diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index b615dac58..a56ebe474 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -1,4 +1,4 @@ -name: Test functions +name: Test Assets permissions: contents: read on: @@ -14,7 +14,7 @@ jobs: steps: - name: Get the current branch name shell: bash - run: echo "::set-output name=branch::${GITHUB_REF#refs/heads/}" + run: echo "branch=${GITHUB_REF#refs/heads/}" >> $GITHUB_OUTPUT id: myref - uses: actions/checkout@v3 @@ -34,14 +34,42 @@ jobs: # This is previous fetch command that stopped working (wile invetsigating added WA bello in run sectiong): git fetch origin ${{ github.base_ref }}:${{ github.base_ref }} # This is old git diff version: git diff ${{ github.base_ref }} --name-only | sed 's,/*[^/]\+/*$,,' | sort | uniq | grep -v '^\.' | sed 's/.*/"&"/' | sed '$!s/.*/&,/' # Based on instructions regarding https://docs.github.com/en/actions/learn-github-actions/contexts#github-context , github.base_ref triggers a workflow run is either pull_request or pull_request_target - run: | - git fetch --no-tags --prune --depth=1 origin ${{ github.base_ref }}:${{ github.base_ref }} - matrix=$(( - echo '{ "package" : [' - git diff ${{ github.base_ref }} --name-only | sed 's,/*[^/]\+/*$,,' | sort | uniq | grep -v '^\.' | sed 's/.*/"&"/' | sed '$!s/.*/&,/' - echo " ]}" - ) | jq 'del(.[][] | select(. == ""))' -c) - echo "::set-output name=matrix::$matrix" + run: | + echo "base ref: ${{ github.base_ref }}" + git fetch origin ${{ github.base_ref }}:refs/remotes/origin/${{ github.base_ref }} + base_ref="origin/${{ github.base_ref }}" + + echo "Diffing against $base_ref:" + changed_files="$(git diff "$base_ref" --name-only || true)" + + # Collect candidate package paths from diff + candidates=$( + printf '%s\n' "$changed_files" \ + | awk -F'/' ' + /^functions\/src\// {print $1"/"$2"/"$3} + /^modules\/src\// {print $1"/"$2"/"$3} + ' \ + | sort -u + ) + + # Keep only those that are actual directories + packages="" + for dir in $candidates; do + if [[ -d "$dir" ]]; then + packages+="$dir"$'\n' + fi + done + + if [[ -z "$packages" ]]; then + matrix_json='{"package":[]}' + else + matrix_json=$(printf '%s\n' "$packages" \ + | grep -v '^$' \ + | jq -R . \ + | jq -s '{package: .}' \ + | jq -c) # <-- compact + fi + echo "matrix=$matrix_json" >> "$GITHUB_OUTPUT" outputs: matrix: ${{ steps.set-matrix.outputs.matrix }} @@ -58,8 +86,10 @@ jobs: echo $matrix echo $matrix | jq . echo $matrix | json2yaml + run_monorepo_tests: needs: build_strategy_matrix + if: needs.build_strategy_matrix.outputs.matrix != '{"package":[]}' runs-on: ubuntu-latest strategy: # matrix: [{"package": some package that changed}, {...}, ...] @@ -67,8 +97,6 @@ jobs: steps: - name: Checkout current repo uses: actions/checkout@v3 - with: - path: functions # Install python 3.10.17 - name: Install python 3.10.17 uses: actions/setup-python@v4 @@ -79,17 +107,17 @@ jobs: id: cache with: path: ~/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('functions/requirements.txt') }} + key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }} restore-keys: | ${{ runner.os }}-pip- - name: Install requirements run: | pip install --upgrade pip - pip install -r functions/requirements.txt - - - name: Run py tests - run: python functions/functions.py run-tests -r functions -s py -fn ${{ matrix.package }} - - - name: Clean + pip install -r requirements.txt + - name: Print workspace structure run: | - rm -rf functions + echo "Current directory: $(pwd)" + echo "Tree structure:" + find . -type f | sort + - name: Run py tests + run: python -m cli.cli run-tests -r ${{ matrix.package }} -s py -fn $(basename "${{ matrix.package }}") diff --git a/.github/workflows/test-all.yaml b/.github/workflows/test-all.yaml index f5f90c171..162804863 100644 --- a/.github/workflows/test-all.yaml +++ b/.github/workflows/test-all.yaml @@ -1,4 +1,4 @@ -name: Test all functions, build marketplace +name: Test all assets, build marketplace permissions: contents: read on: @@ -6,6 +6,7 @@ on: branches: - development - master + workflow_dispatch: {} jobs: build_strategy_matrix: @@ -13,7 +14,7 @@ jobs: steps: - name: Get the current branch name shell: bash - run: echo "::set-output name=branch::${GITHUB_REF#refs/heads/}" + run: echo "branch=${GITHUB_REF#refs/heads/}" >> $GITHUB_OUTPUT id: myref - uses: actions/checkout@v4 @@ -32,13 +33,31 @@ jobs: # 3) Save matrix JSON to output # This is old fetch command it cant work cause base_ref is only avaliable on pull request actions: git fetch origin ${{ github.base_ref }}:${{ github.base_ref }} run: | - git fetch --no-tags --prune --depth=1 origin +refs/heads/${{ steps.myref.outputs.branch }}:refs/remotes/origin/${{ steps.myref.outputs.branch }} - matrix=$(( - echo '{ "package" : [' - git ls-files *[^cli] | grep '/' | sort | uniq| sed 's,/*[^/]\+/*$,,' | sort | uniq | grep -v '^\.' | sed 's/.*/"&"/' | sed '$!s/.*/&,/' - echo " ]}" - ) | jq -c .) - echo "::set-output name=matrix::$matrix" + all_files="$(git ls-files || true)" + # Collect candidate package paths from diff + candidates=$( + printf '%s\n' "$all_files" \ + | awk -F'/' ' + /^functions\/src\// {print $1"/"$2"/"$3} + /^modules\/src\// {print $1"/"$2"/"$3} + ' \ + | sort -u + ) + + # Keep only those that are actual directories + packages="" + for dir in $candidates; do + if [[ -d "$dir" ]]; then + packages+="$dir"$'\n' + fi + done + + if [[ -z "$packages" ]]; then + matrix_json='{"package":[]}' + else + matrix_json=$(printf '%s\n' "$packages" | grep -v '^$' | jq -R . | jq -s '{package: .}' | jq -c) + fi + echo "matrix=$matrix_json" >> "$GITHUB_OUTPUT" outputs: matrix: ${{ steps.set-matrix.outputs.matrix }} @@ -55,6 +74,7 @@ jobs: echo $matrix echo $matrix | jq . echo $matrix | json2yaml + run_monorepo_tests: needs: build_strategy_matrix runs-on: ubuntu-latest @@ -65,8 +85,6 @@ jobs: # Source - name: Checkout current repo uses: actions/checkout@v4 - with: - path: functions # Install python 3.10.17 - name: Install python 3.10.17 uses: actions/setup-python@v4 @@ -77,26 +95,58 @@ jobs: id: cache with: path: ~/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('functions/requirements.txt') }} + key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }} restore-keys: | ${{ runner.os }}-pip- - name: Install requirements run: | pip install --upgrade pip - pip install -r functions/requirements.txt + pip install -r requirements.txt - name: Run py tests - run: python functions/functions.py run-tests -r functions -s py -fn ${{ matrix.package }} + run: python -m cli.cli run-tests -r ${{ matrix.package }} -s py -fn $(basename "${{ matrix.package }}") continue-on-error: true # - name: Run ipynb tests -# run: python functions/functions.py run-tests -r functions -s ipynb - - name: Clean +# run: python functions/cli/cli.py run-tests -r functions -s ipynb + + update_readmes: + needs: build_strategy_matrix + if: github.repository == 'mlrun/functions' || github.repository == 'mlrun/hub-assets' + runs-on: ubuntu-latest + permissions: + contents: write + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Install python 3.10.17 + uses: actions/setup-python@v4 + with: + python-version: 3.10.17 + - name: Install requirements + run: | + pip install --upgrade pip + pip install -r requirements.txt + - name: Regenerate README tables + run: python -m cli.cli update-readme --asset functions --asset modules + - name: Commit & push (if changed) + env: + USERNAME: ${{ secrets.USERNAME }} + USEREMAIL: ${{ secrets.USERMAIL }} run: | - rm -rf functions + if git diff --quiet; then + echo "No README changes." + exit 0 + fi + git config --local user.name $USERNAME + git config --local user.email $USEREMAIL + git add functions/README.md modules/README.md || true + git commit -m "chore(readme): auto-update asset tables [skip ci]" + git push build-marketplace: name: Build marketplace - if: github.repository == 'mlrun/functions' && github.event_name != 'pull_request' + if: (github.repository == 'mlrun/functions' || github.repository == 'mlrun/hub-assets') runs-on: ubuntu-latest needs: run_monorepo_tests continue-on-error: false @@ -104,12 +154,10 @@ jobs: steps: - name: Get the current branch name shell: bash - run: echo "::set-output name=branch::${GITHUB_REF#refs/heads/}" + run: echo "branch=${GITHUB_REF#refs/heads/}" >> $GITHUB_OUTPUT id: branch - name: Checkout current repo uses: actions/checkout@v4 - with: - path: functions - name: Checkout Marketplace uses: actions/checkout@v4 with: @@ -121,7 +169,6 @@ jobs: python-version: 3.10.17 - name: Install requirements run: | - cd functions pip install --upgrade pip pip install -r requirements.txt - name: Build marketplace docs @@ -132,12 +179,19 @@ jobs: pwd git pull origin cd .. - python functions/functions.py build-marketplace -s functions -m marketplace -c $CHANNEL -v -f + python -m cli.cli build-marketplace -s ./functions/src -sn functions -m marketplace -c $CHANNEL -v -f + python -m cli.cli build-marketplace -s ./modules/src -sn modules -m marketplace -c $CHANNEL -v -f + ## Uncomment the following lines if you want to upload the built marketplace as an artifact +# - name: Upload built marketplace as artifact +# uses: actions/upload-artifact@v4 +# with: +# name: marketplace-build +# path: marketplace/** - name: Publish marketplace release env: GITHUB_TOKEN: ${{ secrets.MARKETPLACE_ACCESS_TOKEN_V3 }} - USERNAME: iguazio-cicd - USEREMAIL: iguaziocicd@gmail.com + USERNAME: ${{ secrets.USERNAME }} + USEREMAIL: ${{ secrets.USERMAIL }} REPO_PATH: marketplace BASE_REPO: mlrun BASE_BRANCH: master diff --git a/.gitignore b/.gitignore index f00b38d9e..4637ce406 100644 --- a/.gitignore +++ b/.gitignore @@ -140,3 +140,4 @@ dmypy.json .pyre/ conda-setup-cpu conda-setup-gpu +.DS_Store diff --git a/README.md b/README.md deleted file mode 100644 index 1136c963d..000000000 --- a/README.md +++ /dev/null @@ -1,49 +0,0 @@ -# Functions hub - -This functions hub is intended to be a centralized location for open source contributions of function components. -These are functions expected to be run as independent mlrun pipeline compnents, and as public contributions, -it is expected that contributors follow certain guidelines/protocols (please chip-in). - -## Functions -| function | kind | description | categories | -| --- | --- | --- | --- | -| [aggregate](aggregate/aggregate.ipynb) | job | Rolling aggregation over Metrics and Lables according to specifications | data-prep | -| [bert-embeddings](bert_embeddings/bert_embeddings.ipynb) | nuclio | Get BERT based embeddings for given text | NLP, BERT, embeddings | -| [churn-server](churn_server/churn_server.ipynb) | nuclio | churn classification and predictor | serving, ml | -| [concept-drift](concept_drift/concept_drift.ipynb) | job | Deploy a streaming Concept Drift detector on a labeled stream | ml, serve | -| [concept-drift-streaming](concept_drift_streaming/concept_drift_streaming.ipynb) | nuclio | Deploy a streaming Concept Drift detector on a labeled stream. the nuclio part of the concept_drift function | ml, serve | -| [coxph-test](coxph_test/coxph_test.ipynb) | job | Test cox proportional hazards model | ml, test | -| [coxph-trainer](coxph_trainer/coxph_trainer.ipynb) | job | cox proportional hazards, kaplan meier plots | training, ml | -| [describe](describe/describe.ipynb) | job | describe and visualizes dataset stats | analysis | -| [describe-dask](describe_dask/describe_dask.ipynb) | job | describe and visualizes dataset stats | analysis | -| [describe-spark](describe_spark/describe_spark.ipynb) | job | | | -| [feature-perms](feature_perms/feature_perms.ipynb) | job | estimate feature importances using permutations | analysis | -| [feature-selection](feature_selection/feature_selection.ipynb) | job | Select features through multiple Statistical and Model filters | data-prep, ml | -| [gen-class-data](gen_class_data/gen_class_data.ipynb) | job | Create a binary classification sample dataset and save. | data-prep | -| [github-utils](github_utils/github_utils.ipynb) | job | add comments to github pull request | notifications, utils | -| [load-dataset](load_dataset/load_dataset.ipynb) | job | load a toy dataset from scikit-learn | data-source, ml | -| [model-monitoring-batch](model_monitoring_batch/model_monitoring_batch.ipynb) | job | | | -| [model-monitoring-stream](model_monitoring_stream/model_monitoring_stream.ipynb) | nuclio | | | -| [model-server](model_server/model_server.ipynb) | nuclio | generic sklearn model server | serving, ml | -| [model-server-tester](model_server_tester/model_server_tester.ipynb) | job | test model servers | ml, test | -| [pandas-profiling-report](pandas_profiling_report/pandas_profiling_report.ipynb) | job | Create Pandas Profiling Report from Dataset | analysis | -| [project-runner](project_runner/project_runner.ipynb) | nuclio | Nuclio based - Cron scheduler for running your MLRun projects | utils | -| [rnn-serving](rnn_serving/rnn_serving.ipynb) | serving | deploy an rnn based stock analysis model server. | model-serving | -| [send-email](send_email/send_email.ipynb) | job | Send Email messages through SMTP server | notifications | -| [sentiment-analysis-serving](sentiment_analysis_serving/sentiment_analysis_serving.ipynb) | serving | BERT based sentiment classification model | serving, NLP, BERT, sentiment analysis | -| [sklearn-classifier](sklearn_classifier/sklearn_classifier.ipynb) | job | train any classifier using scikit-learn's API | ml, training | -| [sklearn-classifier-dask](sklearn_classifier_dask/sklearn_classifier_dask.ipynb) | job | train any classifier using scikit-learn's API over Dask | ml, training, dask | -| [slack-notify](slack_notify/slack_notify.ipynb) | job | Send Slack notification | ops | -| [sql-to-file](sql_to_file/sql_to_file.ipynb) | job | SQL To File - Ingest data using SQL query | data-prep | -| [stream-to-parquet](stream_to_parquet/stream_to_parquet.ipynb) | nuclio | Saves a stream to Parquet and can lunch drift detection task on it | ml, serve | -| [test-classifier](test_classifier/test_classifier.ipynb) | job | test a classifier using held-out or new data | ml, test | -| [tf1-serving](tf1_serving/tf1_serving.ipynb) | nuclio | tf1 image classification server | serving, dl | -| [tf2-serving](tf2_serving/tf2_serving.ipynb) | nuclio | tf2 image classification server | serving, dl | -| [tf2-serving-v2](tf2_serving_v2/tf2_serving_v2.ipynb) | serving | tf2 image classification server v2 | serving, dl | -| [v2-model-server](v2_model_server/v2_model_server.ipynb) | serving | generic sklearn model server | serving, ml | -| [v2-model-tester](v2_model_tester/v2_model_tester.ipynb) | job | test v2 model servers | ml, test | -| [virtual-drift](virtual_drift/virtual_drift.ipynb) | job | Compute drift magnitude between Time-Samples T and U | ml, serve, concept-drift | -| [xgb-custom](xgb_custom/xgb_custom.ipynb) | job | simulate data with outliers. | model-testing | -| [xgb-serving](xgb_serving/xgb_serving.ipynb) | nuclio | xgboost test data classification server | model-serving | -| [xgb-test](xgb_test/xgb_test.ipynb) | job | Test one or more classifier models against held-out dataset. | model-test | -| [xgb-trainer](xgb_trainer/xgb_trainer.ipynb) | job | train multiple model types using xgboost. | model-prep | diff --git a/catalog.json b/catalog.json deleted file mode 100644 index 4bcc4022d..000000000 --- a/catalog.json +++ /dev/null @@ -1 +0,0 @@ -{"aggregate": {"description": "Rolling aggregation over Metrics and Lables according to specifications", "categories": ["data-prep"], "kind": "job", "docfile": "aggregate/aggregate.ipynb", "versions": {"latest": "aggregate/function.yaml"}}, "arc-to-parquet": {"description": "retrieve remote archive, open and save as parquet", "categories": ["data-movement", "utils"], "kind": "job", "docfile": "arc_to_parquet/arc_to_parquet.ipynb", "versions": {"latest": "arc_to_parquet/function.yaml"}}, "bert-embeddings": {"description": "Get BERT based embeddings for given text", "categories": ["NLP", "BERT", "embeddings"], "kind": "remote", "docfile": "bert_embeddings/bert_embeddings.ipynb", "versions": {"latest": "bert_embeddings/function.yaml"}}, "churn-server": {"description": "churn classification and predictor", "categories": ["serving", "ml"], "kind": "serving", "docfile": "churn_server/churn_server.ipynb", "versions": {"latest": "churn_server/function.yaml"}}, "concept-drift": {"description": "Deploy a streaming Concept Drift detector on a labeled stream", "categories": ["ml", "serve"], "kind": "job", "docfile": "concept_drift/concept_drift.ipynb", "versions": {"latest": "concept_drift/function.yaml"}}, "concept-drift-streaming": {"description": "Deploy a streaming Concept Drift detector on a labeled stream. the nuclio part of the concept_drift function", "categories": ["ml", "serve"], "kind": "remote", "docfile": "concept_drift_streaming/concept_drift_streaming.ipynb", "versions": {"latest": "concept_drift_streaming/function.yaml"}}, "coxph-test": {"description": "Test cox proportional hazards model", "categories": ["ml", "test"], "kind": "job", "docfile": "coxph_test/coxph_test.ipynb", "versions": {"latest": "coxph_test/function.yaml"}}, "coxph-trainer": {"description": "cox proportional hazards, kaplan meier plots", "categories": ["training", "ml"], "kind": "job", "docfile": "coxph_trainer/coxph_trainer.ipynb", "versions": {"latest": "coxph_trainer/function.yaml"}}, "describe": {"description": "describe and visualizes dataset stats", "categories": ["analysis"], "kind": "job", "docfile": "describe/describe.ipynb", "versions": {"latest": "describe/function.yaml"}}, "describe-dask": {"description": "describe and visualizes dataset stats", "categories": ["analysis"], "kind": "job", "docfile": "describe_dask/describe_dask.ipynb", "versions": {"latest": "describe_dask/function.yaml"}}, "describe-spark": {"description": "", "categories": [], "kind": "job", "docfile": "describe_spark/describe_spark.ipynb", "versions": {"latest": "describe_spark/function.yaml"}}, "feature-perms": {"description": "estimate feature importances using permutations", "categories": ["analysis"], "kind": "job", "docfile": "feature_perms/feature_perms.ipynb", "versions": {"latest": "feature_perms/function.yaml"}}, "feature-selection": {"description": "Select features through multiple Statistical and Model filters", "categories": ["data-prep", "ml"], "kind": "job", "docfile": "feature_selection/feature_selection.ipynb", "versions": {"latest": "feature_selection/function.yaml"}}, "gen-class-data": {"description": "Create a binary classification sample dataset and save.", "categories": ["data-prep"], "kind": "job", "docfile": "gen_class_data/gen_class_data.ipynb", "versions": {"latest": "gen_class_data/function.yaml"}}, "github-utils": {"description": "add comments to github pull request", "categories": ["notifications", "utils"], "kind": "job", "docfile": "github_utils/github_utils.ipynb", "versions": {"latest": "github_utils/function.yaml"}}, "load-dataset": {"description": "load a toy dataset from scikit-learn", "categories": ["data-source", "ml"], "kind": "job", "docfile": "load_dataset/load_dataset.ipynb", "versions": {"latest": "load_dataset/function.yaml"}}, "model-monitoring-batch": {"description": "", "categories": [], "kind": "job", "docfile": "model_monitoring_batch/model_monitoring_batch.ipynb", "versions": {"latest": "model_monitoring_batch/function.yaml"}}, "model-monitoring-stream": {"description": "", "categories": [], "kind": "remote", "docfile": "model_monitoring_stream/model_monitoring_stream.ipynb", "versions": {"latest": "model_monitoring_stream/function.yaml"}}, "model-server": {"description": "generic sklearn model server", "categories": ["serving", "ml"], "kind": "remote", "docfile": "model_server/model_server.ipynb", "versions": {"latest": "model_server/function.yaml"}}, "model-server-tester": {"description": "test model servers", "categories": ["ml", "test"], "kind": "job", "docfile": "model_server_tester/model_server_tester.ipynb", "versions": {"latest": "model_server_tester/function.yaml"}}, "open-archive": {"description": "Open a file/object archive into a target directory", "categories": ["data-movement", "utils"], "kind": "job", "docfile": "open_archive/open_archive.ipynb", "versions": {"latest": "open_archive/function.yaml"}}, "pandas-profiling-report": {"description": "Create Pandas Profiling Report from Dataset", "categories": ["analysis"], "kind": "job", "docfile": "pandas_profiling_report/pandas_profiling_report.ipynb", "versions": {"latest": "pandas_profiling_report/function.yaml"}}, "project-runner": {"description": "Nuclio based - Cron scheduler for running your MLRun projects", "categories": ["utils"], "kind": "remote", "docfile": "project_runner/project_runner.ipynb", "versions": {"latest": "project_runner/function.yaml"}}, "rnn-serving": {"description": "deploy an rnn based stock analysis model server.", "categories": ["model-serving"], "kind": "serving", "docfile": "rnn_serving/rnn_serving.ipynb", "versions": {"latest": "rnn_serving/function.yaml"}}, "send-email": {"description": "Send Email messages through SMTP server", "categories": ["notifications"], "kind": "job", "docfile": "send_email/send_email.ipynb", "versions": {"latest": "send_email/function.yaml"}}, "sentiment-analysis-serving": {"description": "BERT based sentiment classification model", "categories": ["serving", "NLP", "BERT", "sentiment analysis"], "kind": "serving", "docfile": "sentiment_analysis_serving/sentiment_analysis_serving.ipynb", "versions": {"latest": "sentiment_analysis_serving/function.yaml"}}, "sklearn-classifier": {"description": "train any classifier using scikit-learn's API", "categories": ["ml", "training"], "kind": "job", "docfile": "sklearn_classifier/sklearn_classifier.ipynb", "versions": {"latest": "sklearn_classifier/function.yaml"}}, "sklearn-classifier-dask": {"description": "train any classifier using scikit-learn's API over Dask", "categories": ["ml", "training", "dask"], "kind": "job", "docfile": "sklearn_classifier_dask/sklearn_classifier_dask.ipynb", "versions": {"latest": "sklearn_classifier_dask/function.yaml"}}, "slack-notify": {"description": "Send Slack notification", "categories": ["ops"], "kind": "job", "docfile": "slack_notify/slack_notify.ipynb", "versions": {"latest": "slack_notify/function.yaml"}}, "spark-submit": {"description": "", "categories": [], "kind": "job", "docfile": "spark_submit/spark_submit.ipynb", "versions": {"latest": "spark_submit/function.yaml"}}, "sql-to-file": {"description": "SQL To File - Ingest data using SQL query", "categories": ["data-prep"], "kind": "job", "docfile": "sql_to_file/sql_to_file.ipynb", "versions": {"latest": "sql_to_file/function.yaml"}}, "stream-to-parquet": {"description": "Saves a stream to Parquet and can lunch drift detection task on it", "categories": ["ml", "serve"], "kind": "remote", "docfile": "stream_to_parquet/stream_to_parquet.ipynb", "versions": {"latest": "stream_to_parquet/function.yaml"}}, "test-classifier": {"description": "test a classifier using held-out or new data", "categories": ["ml", "test"], "kind": "job", "docfile": "test_classifier/test_classifier.ipynb", "versions": {"latest": "test_classifier/function.yaml"}}, "tf1-serving": {"description": "tf1 image classification server", "categories": ["serving", "dl"], "kind": "remote", "docfile": "tf1_serving/tf1_serving.ipynb", "versions": {"latest": "tf1_serving/function.yaml"}}, "tf2-serving": {"description": "tf2 image classification server", "categories": ["serving", "dl"], "kind": "remote", "docfile": "tf2_serving/tf2_serving.ipynb", "versions": {"latest": "tf2_serving/function.yaml"}}, "tf2-serving-v2": {"description": "tf2 image classification server v2", "categories": ["serving", "dl"], "kind": "serving", "docfile": "tf2_serving_v2/tf2_serving_v2.ipynb", "versions": {"latest": "tf2_serving_v2/function.yaml"}}, "v2-model-server": {"description": "generic sklearn model server", "categories": ["serving", "ml"], "kind": "serving", "docfile": "v2_model_server/v2_model_server.ipynb", "versions": {"latest": "v2_model_server/function.yaml"}}, "v2-model-tester": {"description": "test v2 model servers", "categories": ["ml", "test"], "kind": "job", "docfile": "v2_model_tester/v2_model_tester.ipynb", "versions": {"latest": "v2_model_tester/function.yaml"}}, "virtual-drift": {"description": "Compute drift magnitude between Time-Samples T and U", "categories": ["ml", "serve", "concept-drift"], "kind": "job", "docfile": "virtual_drift/virtual_drift.ipynb", "versions": {"latest": "virtual_drift/function.yaml"}}, "xgb-custom": {"description": "simulate data with outliers.", "categories": ["model-testing"], "kind": "job", "docfile": "xgb_custom/xgb_custom.ipynb", "versions": {"latest": "xgb_custom/function.yaml"}}, "xgb-serving": {"description": "deploy an XGBoost model server.", "categories": ["model-serving"], "kind": "remote", "docfile": "xgb_serving/xgb_serving.ipynb", "versions": {"latest": "xgb_serving/function.yaml"}}, "xgb-test": {"description": "Test one or more classifier models against held-out dataset.", "categories": ["model-test"], "kind": "job", "docfile": "xgb_test/xgb_test.ipynb", "versions": {"latest": "xgb_test/function.yaml"}}, "xgb-trainer": {"description": "train multiple model types using xgboost.", "categories": ["model-prep"], "kind": "job", "docfile": "xgb_trainer/xgb_trainer.ipynb", "versions": {"latest": "xgb_trainer/function.yaml"}}} \ No newline at end of file diff --git a/catalog.yaml b/catalog.yaml deleted file mode 100644 index f603b1b9b..000000000 --- a/catalog.yaml +++ /dev/null @@ -1,190 +0,0 @@ -aggregate: - categories: - - data-prep - description: Rolling aggregation over Metrics and Lables according to specifications - docfile: aggregate/aggregate.ipynb - kind: job - versions: - latest: aggregate/function.yaml -arc-to-parquet: - categories: - - data-movement - - utils - description: retrieve remote archive, open and save as parquet - docfile: arc_to_parquet/arc_to_parquet.ipynb - kind: job - versions: - latest: arc_to_parquet/function.yaml -describe: - categories: - - analysis - description: describe and visualizes dataset stats - docfile: describe/describe.ipynb - kind: job - versions: - latest: describe/function.yaml -describe-dask: - categories: - - analysis - description: describe and visualizes dataset stats - docfile: describe_dask/describe_dask.ipynb - kind: job - versions: - latest: describe_dask/function.yaml -describe-spark: - categories: [] - description: '' - docfile: describe_spark/describe_spark.ipynb - kind: job - versions: - latest: describe_spark/function.yaml -feature-selection: - categories: - - data-prep - - ml - description: Select features through multiple Statistical and Model filters - docfile: feature_selection/feature_selection.ipynb - kind: job - versions: - latest: feature_selection/function.yaml -gen-class-data: - categories: - - data-prep - description: Create a binary classification sample dataset and save. - docfile: gen_class_data/gen_class_data.ipynb - kind: job - versions: - latest: gen_class_data/function.yaml -github-utils: - categories: - - notifications - - utils - description: add comments to github pull request - docfile: github_utils/github_utils.ipynb - kind: job - versions: - latest: github_utils/function.yaml -load-dataset: - categories: - - data-source - - ml - description: load a toy dataset from scikit-learn - docfile: load_dataset/load_dataset.ipynb - kind: job - versions: - latest: load_dataset/function.yaml -model-monitoring-batch: - categories: [] - description: '' - docfile: model_monitoring_batch/model_monitoring_batch.ipynb - kind: job - versions: - latest: model_monitoring_batch/function.yaml -model-server: - categories: - - serving - - ml - description: generic sklearn model server - docfile: model_server/model_server.ipynb - kind: remote - versions: - latest: model_server/function.yaml -model-server-tester: - categories: - - ml - - test - description: test model servers - docfile: model_server_tester/model_server_tester.ipynb - kind: job - versions: - latest: model_server_tester/function.yaml -open-archive: - categories: - - data-movement - - utils - description: Open a file/object archive into a target directory - docfile: open_archive/open_archive.ipynb - kind: job - versions: - latest: open_archive/function.yaml -send-email: - categories: - - notifications - description: Send Email messages through SMTP server - docfile: send_email/send_email.ipynb - kind: job - versions: - latest: send_email/function.yaml -sentiment-analysis-serving: - categories: - - serving - - NLP - - BERT - - sentiment analysis - description: BERT based sentiment classification model - docfile: sentiment_analysis_serving/sentiment_analysis_serving.ipynb - kind: serving - versions: - latest: sentiment_analysis_serving/function.yaml -sklearn-classifier: - categories: - - ml - - training - description: train any classifier using scikit-learn's API - docfile: sklearn_classifier/sklearn_classifier.ipynb - kind: job - versions: - latest: sklearn_classifier/function.yaml -sklearn-classifier-dask: - categories: - - ml - - training - - dask - description: train any classifier using scikit-learn's API over Dask - docfile: sklearn_classifier_dask/sklearn_classifier_dask.ipynb - kind: job - versions: - latest: sklearn_classifier_dask/function.yaml -spark-submit: - categories: [] - description: '' - docfile: spark_submit/spark_submit.ipynb - kind: job - versions: - latest: spark_submit/function.yaml -test-classifier: - categories: - - ml - - test - description: test a classifier using held-out or new data - docfile: test_classifier/test_classifier.ipynb - kind: job - versions: - latest: test_classifier/function.yaml -tf2-serving: - categories: - - serving - - dl - description: tf2 image classification server - docfile: tf2_serving/tf2_serving.ipynb - kind: remote - versions: - latest: tf2_serving/function.yaml -v2-model-server: - categories: - - serving - - ml - description: generic sklearn model server - docfile: v2_model_server/v2_model_server.ipynb - kind: serving - versions: - latest: v2_model_server/function.yaml -v2-model-tester: - categories: - - ml - - test - description: test v2 model servers - docfile: v2_model_tester/v2_model_tester.ipynb - kind: job - versions: - latest: v2_model_tester/function.yaml diff --git a/functions.py b/cli/cli.py similarity index 69% rename from functions.py rename to cli/cli.py index 727eca020..8fee9891a 100644 --- a/functions.py +++ b/cli/cli.py @@ -14,27 +14,26 @@ # import click -from cli.create_legacy_catalog import create_legacy_catalog_cli -from cli.function_to_item import function_to_item_cli -from cli.item_to_function import item_to_function_cli +from cli.functions.function_to_item import function_to_item_cli +from cli.functions.item_to_function import item_to_function_cli from cli.marketplace.build import build_marketplace_cli -from cli.new_item import new_item -from cli.test_suite import test_suite -from cli.item_yaml import update_functions_yaml - +from cli.functions.new_function_item import new_item as new_function_item +from cli.common.test_suite import test_suite +from cli.common.item_yaml import update_functions_yaml +from cli.common.update_readme import update_readme @click.group() def cli(): pass -cli.add_command(new_item) +cli.add_command(new_function_item) cli.add_command(item_to_function_cli, name="item-to-function") cli.add_command(function_to_item_cli, name="function-to-item") cli.add_command(test_suite, name="run-tests") cli.add_command(build_marketplace_cli, name="build-marketplace") -cli.add_command(create_legacy_catalog_cli, name="create-legacy-catalog") cli.add_command(update_functions_yaml, name="update-functions-yaml") +cli.add_command(update_readme, name="update-readme") if __name__ == "__main__": cli() diff --git a/cli/item_yaml.py b/cli/common/item_yaml.py similarity index 96% rename from cli/item_yaml.py rename to cli/common/item_yaml.py index 7483a9912..a14ea48c2 100644 --- a/cli/item_yaml.py +++ b/cli/common/item_yaml.py @@ -13,8 +13,8 @@ # limitations under the License. # import click -from cli.path_iterator import PathIterator -from cli.helpers import is_item_dir +from cli.utils.path_iterator import PathIterator +from cli.utils.helpers import is_item_dir import yaml import datetime diff --git a/cli/test_suite.py b/cli/common/test_suite.py similarity index 98% rename from cli/test_suite.py rename to cli/common/test_suite.py index 93e1428b4..52dc1c5ae 100644 --- a/cli/test_suite.py +++ b/cli/common/test_suite.py @@ -23,14 +23,14 @@ import yaml import re -from cli.helpers import ( +from cli.utils.helpers import ( is_item_dir, install_pipenv, install_python, install_requirements, get_item_yaml_values, ) -from cli.path_iterator import PathIterator +from cli.utils.path_iterator import PathIterator @click.command() @@ -160,7 +160,8 @@ def _run(self, path: Union[str, Path], multiprocess, function_name): print("running tests with {} process".format(process_count)) discovered_functions = self.discover(path) if function_name is not None: - discovered_functions = [fn for fn in discovered_functions if function_name == Path(fn).stem] + click.echo("running test with name {}".format(function_name)) + discovered_functions = [fn for fn in discovered_functions if Path(function_name).stem == Path(fn).stem] for path in discovered_functions: if re.match(".+/test_*", path): discovered_functions.remove(path) @@ -611,7 +612,7 @@ def is_test_valid_by_item(item_posix_path): test_name = data.get("name") if not test_valid: click.echo("==================== Test {} Not valid ====================".format(test_name)) - click.echo("==================== enable tet_valid in item yaml ====================") + click.echo("==================== enable test_valid in item.yaml ====================") return test_valid else: return True diff --git a/cli/common/update_readme.py b/cli/common/update_readme.py new file mode 100644 index 000000000..6bcab8d33 --- /dev/null +++ b/cli/common/update_readme.py @@ -0,0 +1,160 @@ +# Copyright 2025 Iguazio +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import sys +from pathlib import Path +from typing import Iterable, List, Tuple + +import click +import yaml + +MARKER_START = "" +COLUMNS = ("Name", "Description", "Kind", "Categories") + +@click.command("update-readme") +@click.option( + "--asset", + multiple=True, + required=True, + help="Asset types to process (e.g: functions). " + "Pass multiple: --assets functions --assets modules", +) +@click.option("--check", is_flag=True, + help="Do not write; exit non‑zero if README(s) would change.") +def update_readme(asset: Iterable[str], + check: bool) -> None: + """ + Regenerate the README tables for asset types from their item.yaml files. + """ + asset_list = list(asset) + changed_any = False + touched: list[str] = [] + + for t in asset_list: + if check: + # simulate by reading/writing to a temp string, but easiest is: run update and revert if not checking + # Instead: compute would-change by comparing strings without writing: + root = Path(".").resolve() + asset_dir = root / t + readme = asset_dir / "README.md" + rows = _rows_for_asset_type(asset_dir) + table_md = _build_table_md(rows) + old = readme.read_text() if readme.exists() else f"# {t.title()}\n\n" + new = _replace_block(old, table_md) + if new != old: + changed_any = True + touched.append(str(readme)) + else: + if _update_one(t): + changed_any = True + touched.append(str((Path(t) / "README.md").as_posix())) + + if check and changed_any: + click.echo("README tables are out of date for:") + for p in touched: + click.echo(f" - {p}") + sys.exit(1) + + # Normal run prints what it updated (no failure) + if not check: + if changed_any: + click.echo("Updated README(s):") + for p in touched: + click.echo(f" - {p}") + else: + click.echo("No README changes.") + + +def _rows_for_asset_type(asset_dir: Path) -> List[Tuple[str, str, str, str]]: + """Scan /src/*/item.yaml and return table rows.""" + src = asset_dir / "src" + if not src.exists(): + return [] + + rows: List[Tuple[str, str, str, str]] = [] + for item_yaml in sorted(src.glob("*/item.yaml")): + asset_name = item_yaml.parent.name + try: + data = yaml.safe_load(item_yaml.read_text()) or {} + except Exception as e: + raise click.ClickException(f"Failed reading {item_yaml}: {e}") from e + + desc = (data.get("description") or "").strip() + kind = (data.get("spec", {}).get("kind", "")).strip() + cats = data.get("categories") or [] + cats_str = ", ".join(c.strip() for c in cats) if isinstance(cats, list) else str(cats).strip() + # Link the name to its source directory + link = f"[{asset_name}]({(asset_dir / 'src' / asset_name).as_posix()})" + rows.append((link, desc, kind, cats_str)) + + rows.sort(key=lambda r: r[0].lower()) + return rows + + +def _build_table_md(rows: List[Tuple[str, str, str, str]]) -> str: + if not rows: + return "_No items found_" + lines = [ + "| " + " | ".join(COLUMNS) + " |", + "| " + " | ".join("---" for _ in COLUMNS) + " |", + ] + for r in rows: + lines.append("| " + " | ".join((cell or "").replace("\n", " ").strip() for cell in r) + " |") + return "\n".join(lines) + + +def _replace_block(readme_text: str, new_block: str) -> str: + si = readme_text.find(MARKER_START) + ei = readme_text.find(MARKER_END) + if si == -1 or ei == -1 or ei < si: + # Append a section if markers are missing + section = ( + f"\n## Catalog\n\n" + f"{MARKER_START} (do not edit below) -->\n" + f"{new_block}\n" + f"{MARKER_END}\n" + ) + return readme_text.rstrip() + "\n\n" + section + + # Ensure we keep the whole START marker line up to "-->" + start_close = readme_text.find("-->", si) + if start_close == -1: + start_close = si + len(MARKER_START) + readme_text = readme_text[:start_close] + " -->" + readme_text[start_close:] + start_close = readme_text.find("-->", si) + start_close += 3 # include the "-->" + + return readme_text[:start_close] + "\n" + new_block + "\n" + readme_text[ei:] + + +def _update_one(asset_type: str) -> bool: + """Generate/replace the table in /README.md. Return True if changed.""" + root = Path(".").resolve() + asset_dir = root / asset_type + readme = asset_dir / "README.md" + + rows = _rows_for_asset_type(asset_dir) + table_md = _build_table_md(rows) + old = readme.read_text() if readme.exists() else f"# {asset_type.title()}\n\n" + new = _replace_block(old, table_md) + + if new != old: + readme.parent.mkdir(parents=True, exist_ok=True) + readme.write_text(new) + return True + return False + + diff --git a/cli/create_legacy_catalog.py b/cli/create_legacy_catalog.py deleted file mode 100644 index edb2d016b..000000000 --- a/cli/create_legacy_catalog.py +++ /dev/null @@ -1,131 +0,0 @@ -# Copyright 2019 Iguazio -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import json -from pathlib import Path -from typing import Union - -import click as click -import yaml -from mlrun import import_function - -from cli.helpers import PROJECT_ROOT -from cli.path_iterator import PathIterator - - -@click.command() -@click.option( - "-r", "--root-dir", default=PROJECT_ROOT, help="Path to root project directory" -) -def create_legacy_catalog_cli(root_dir: Union[str, Path]): - create_legacy_catalog(root_dir) - -def create_legacy_catalog(root_dir: Union[str, Path]): - root_dir = Path(root_dir) - if not root_dir.is_dir(): - raise RuntimeError("Root directory must be a directory") - - catalog = {} - - file_list = Path(root_dir).glob("**/*.yaml") - - for file in sorted(file_list, key=lambda f: str(f)): - file = file.resolve() - click.echo(f"Now inspecting file: {file}") - - if file.is_file(): - try: - fn = import_function(str(file)) - except Exception as e: - click.echo(f"failed to load func {file}, {e}") - continue - - if not fn.kind or fn.kind in ["", "local", "handler"]: - click.echo(f"illegal function or kind in {file}, kind={fn.kind}") - continue - - if fn.metadata.name in catalog: - entry = catalog[fn.metadata.name] - else: - file_dir = file.parent - notebook_iterator = PathIterator( - root=file_dir, - rule=lambda p: p.name.endswith(".ipynb"), - as_path=True, - ) - notebooks = list(notebook_iterator) - doc_file = file_dir / notebooks[0] - entry = { - "description": fn.spec.description, - "categories": fn.metadata.categories, - "kind": fn.kind, - "docfile": f"{doc_file.parent.name}/{doc_file.name}", - "versions": {}, - } - - entry["versions"][fn.metadata.tag or "latest"] = f"{file.parent.name}/{file.name}" - print(fn.metadata.name, entry) - catalog[fn.metadata.name] = entry - - with open("../catalog.yaml", "w") as fp: - yaml.dump(catalog, fp) - - with open("../catalog.json", "w") as fp: - json.dump(catalog, fp) - - mdheader = """# Functions hub - -This functions hub is intended to be a centralized location for open source contributions of function components. -These are functions expected to be run as independent mlrun pipeline compnents, and as public contributions, -it is expected that contributors follow certain guidelines/protocols (please chip-in). - -## Functions -""" - - with open(root_dir / "README.md", "w") as fp: - fp.write(mdheader) - rows = [] - for k, v in catalog.items(): - kind = v["kind"] - if kind == "remote": - kind = "nuclio" - row = [ - f"[{k}]({v['docfile']})", - kind, - v["description"], - ", ".join(v["categories"] or []), - ] - rows.append(row) - - text = gen_md_table(["function", "kind", "description", "categories"], rows) - fp.write(text) - - -def gen_md_table(header, rows=None): - rows = [] if rows is None else rows - - def gen_list(items=None): - items = [] if items is None else items - out = "|" - for i in items: - out += " {} |".format(i) - return out - - out = gen_list(header) + "\n" + gen_list(len(header) * ["---"]) + "\n" - for r in rows: - out += gen_list(r) + "\n" - return out - -if __name__ == '__main__': - create_legacy_catalog("..") \ No newline at end of file diff --git a/cli/function_to_item.py b/cli/functions/function_to_item.py similarity index 98% rename from cli/function_to_item.py rename to cli/functions/function_to_item.py index 0b4cfc0c0..c3c870d75 100644 --- a/cli/function_to_item.py +++ b/cli/functions/function_to_item.py @@ -19,14 +19,14 @@ import click import yaml -from cli.helpers import is_function_dir -from cli.path_iterator import PathIterator +from cli.utils.helpers import is_function_dir +from cli.utils.path_iterator import PathIterator @click.command() @click.option( "-p", - "-path", + "--path", help="Path to one of: specific function.yaml, directory containing function.yaml or a root directory to search function.yamls in", ) def function_to_item_cli(path: str): diff --git a/cli/item_to_function.py b/cli/functions/item_to_function.py similarity index 98% rename from cli/item_to_function.py rename to cli/functions/item_to_function.py index a1bb4b168..be84c0dce 100644 --- a/cli/item_to_function.py +++ b/cli/functions/item_to_function.py @@ -22,8 +22,8 @@ from mlrun import code_to_function from yaml import full_load -from cli.helpers import is_item_dir -from cli.path_iterator import PathIterator +from cli.utils.helpers import is_item_dir +from cli.utils.path_iterator import PathIterator @click.command() diff --git a/cli/new_item.py b/cli/functions/new_function_item.py similarity index 100% rename from cli/new_item.py rename to cli/functions/new_function_item.py diff --git a/cli/marketplace/build.py b/cli/marketplace/build.py index d6e9cf68b..249bdcc0a 100644 --- a/cli/marketplace/build.py +++ b/cli/marketplace/build.py @@ -26,10 +26,10 @@ from sphinx.cmd.build import main as sphinx_build_cmd from sphinx.ext.apidoc import main as sphinx_apidoc_cmd -from cli.helpers import (PROJECT_ROOT, get_item_yaml_values, - get_mock_requirements, is_item_dir, render_jinja) +from cli.utils.helpers import (PROJECT_ROOT, get_item_yaml_values, + get_mock_requirements, is_item_dir, render_jinja) from cli.marketplace.changelog import ChangeLog -from cli.path_iterator import PathIterator +from cli.utils.path_iterator import PathIterator _verbose = False @@ -38,7 +38,6 @@ ASSETS = { "example": ("example", "src/{}"), "source": ("spec.filename", "src/{}"), - "function": "src/function.yaml", "docs": "static/documentation.html", } @@ -94,7 +93,7 @@ def build_marketplace_cli( def build_marketplace( source_dir: str, marketplace_dir: str, - source_name: Optional[str] = None, + source_name: str, temp_dir: str = "/tmp", channel: str = "development", verbose: bool = False, @@ -104,7 +103,7 @@ def build_marketplace( :param source_dir: Path to the source directory to build the marketplace from :param marketplace_dir: Path to marketplace directory - :param source_name: Name of source, if not provided, name of source directory will be used instead + :param source_name: Name of source (e.g. 'functions', 'modules', etc.) :param temp_dir: Path to intermediate directory, used to build marketplace resources, if not provided '/tmp/' will be used :param channel: The name of the marketplace channel to write to @@ -117,7 +116,7 @@ def build_marketplace( # The root of the temporary project root_base = Path(temp_dir) / uuid.uuid4().hex - temp_root = root_base / "functions" + temp_root = root_base / source_name temp_docs = root_base / "docs" click.echo(f"Temporary working directory: {root_base}") @@ -126,9 +125,9 @@ def build_marketplace( source_dir = Path(source_dir).resolve() # The target directory of the marketplace marketplace_root = Path(marketplace_dir).resolve() - marketplace_dir = marketplace_root / (source_name or source_dir.name) / channel + marketplace_dir = marketplace_root / source_name / channel - # Creating directories temp_root/functions, temp_root/docs and marketplace_root/functions/(development or master): + # Creating directories temp_root/source_name, temp_root/docs and marketplace_root/source_name/(development or master): temp_root.mkdir(parents=True) temp_docs.mkdir(parents=True) marketplace_dir.mkdir(parents=True, exist_ok=True) @@ -156,6 +155,7 @@ def build_marketplace( if _verbose: print_file_tree("Temporary project structure", temp_root) + print_file_tree("Temporary docs structure", temp_docs) render_html_files(temp_docs) @@ -164,6 +164,7 @@ def build_marketplace( update_or_create_items( source_dir, + source_name, marketplace_dir, temp_docs, change_log, @@ -237,12 +238,12 @@ def copy_resources(marketplace_dir, temp_docs): def update_or_create_items( - source_dir, marketplace_dir, temp_docs, change_log, force_update: bool = False + source_dir, source_name, marketplace_dir, temp_docs, change_log, force_update: bool = False ): click.echo("Creating items...") for item_dir in PathIterator(root=source_dir, rule=is_item_dir, as_path=True): update_or_create_item( - item_dir, marketplace_dir, temp_docs, change_log, force_update + item_dir, source_name, marketplace_dir, temp_docs, change_log, force_update ) @@ -255,7 +256,7 @@ def build_catalog_json( with_assets: bool = False, ): """ - Building JSON catalog with all the details of the functions in marketplace + Building JSON catalog with all the details of the assets in marketplace Each function in the catalog is seperated into different versions of the function, and in each version field, there is all the details that concerned the version. @@ -352,6 +353,7 @@ def add_assets(item_yaml: dict): def update_or_create_item( item_dir: Path, + source_name: str, marketplace_dir: Path, temp_docs: Path, change_log: ChangeLog, @@ -360,7 +362,7 @@ def update_or_create_item( # Copy source directories to target directories, if target already has the directory, archive previous version item_yaml = yaml.full_load(open(item_dir / "item.yaml", "r")) source_version = item_yaml["version"] - relative_path = "../../../" + relative_path = "../../" marketplace_item = marketplace_dir / item_dir.stem target_latest = marketplace_item / "latest" @@ -459,19 +461,24 @@ def update_or_create_item( {"source_code": source_code}, ) - with open((item_dir / "function.yaml"), "r") as f: - source_code = f.read() + # render the yaml of the specific asset type if exists (e.g: function.yaml) + asset_name = source_name[:-1] + asset_yaml_path = item_dir / f"{asset_name}.yaml" - render_jinja( - templates / "yaml.html", - latest_static / "function.html", - {"source_code": source_code}, - ) - render_jinja( - templates / "yaml.html", - version_static / "function.html", - {"source_code": source_code}, - ) + if asset_yaml_path.exists(): + with open(asset_yaml_path, "r") as f: + source_code = f.read() + render_jinja( + templates / "python.html", + latest_static / "source.html", + {"source_code": source_code}, + ) + render_jinja( + templates / "python.html", + version_static / "source.html", + {"source_code": source_code}, + ) + ASSETS[asset_name] = f"src/{asset_name}.yaml" pass @@ -593,7 +600,7 @@ def collect_values_from_items( Collecting all tags values from item.yaml files. If the `with_requirements` flag is on than also collecting requirements from ite.yaml and requirements.txt files. - :param source_dir: The source directory that contains all the MLRun functions. + :param source_dir: The source directory that contains the assets (e.g: src/functions). :param tags_set: Set of tags to collect from item.yaml files. :returns: A dictionary contains the tags and requirements. @@ -676,7 +683,7 @@ def build_temp_docs(temp_root, temp_docs, source_dir): from generation. Note: By default this script will not overwrite already created files. - :param temp_root: The project's temporary functions root. + :param temp_root: The project's temporary root. :param temp_docs: The project's temporary docs root. :param source_dir: Path to the source directory to build the marketplace from """ @@ -687,16 +694,5 @@ def build_temp_docs(temp_root, temp_docs, source_dir): sphinx_apidoc_cmd(cmd.split(" ")) - shutil.copytree(source_dir / "cli" / "marketplace" / "_static" / "css", temp_docs / '_static/css') + shutil.copytree(PROJECT_ROOT / "cli" / "marketplace" / "_static" / "css", temp_docs / '_static/css') click.echo("[Sphinx] Done autodoc") - - -if __name__ == "__main__": - # build_marketplace_cli() - build_marketplace( - source_dir="../../../functions", - marketplace_dir="../../../marketplace", - verbose=True, - channel="development", - force_update_items=True, - ) diff --git a/cli/marketplace/conf.template b/cli/marketplace/conf.template index 714a885ce..93c83c9d3 100644 --- a/cli/marketplace/conf.template +++ b/cli/marketplace/conf.template @@ -16,6 +16,7 @@ import sys import os sys.path.insert(0, "{{sphinx_docs_target}}") +sys.path.insert(0, os.path.abspath(os.path.join("{{sphinx_docs_target}}", "../functions"))) # -- Project information ----------------------------------------------------- @@ -73,6 +74,8 @@ source_suffix = { ".md": "myst-nb", } +myst_heading_anchors = 6 + # If you want to mock imports during autodoc autodoc_mock_imports = [{{mock_imports}}] diff --git a/cli/helpers.py b/cli/utils/helpers.py similarity index 99% rename from cli/helpers.py rename to cli/utils/helpers.py index 10b800069..fdeae82c0 100644 --- a/cli/helpers.py +++ b/cli/utils/helpers.py @@ -22,7 +22,7 @@ import yaml from jinja2 import Template -PROJECT_ROOT = Path(__file__).parent.parent.absolute() +PROJECT_ROOT = Path(__file__).parent.parent.parent.absolute() def is_item_dir(path: Path) -> bool: diff --git a/cli/item_template.yaml b/cli/utils/item_template.yaml similarity index 100% rename from cli/item_template.yaml rename to cli/utils/item_template.yaml diff --git a/cli/path_iterator.py b/cli/utils/path_iterator.py similarity index 100% rename from cli/path_iterator.py rename to cli/utils/path_iterator.py diff --git a/describe/describe.ipynb b/describe/describe.ipynb deleted file mode 100644 index a468d1e9c..000000000 --- a/describe/describe.ipynb +++ /dev/null @@ -1,1395 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Describe\n", - "\n", - " This function will analysis the data and outputs the following artifacts per\n", - " column within the data frame (based on data types):\n", - "\n", - " describe csv\n", - " histogram matrix\n", - " violin chart\n", - " correlation-matrix chart\n", - " correlation-matrix csv\n", - " imbalance pie chart\n", - " imbalance-weights-vec csv" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "## analyse\n", - "\n", - "### Docs\n", - "\n", - "#### Parameters:\n", - "* **`context`**: `mlrun.MLClientCtx` - The MLRun function execution context\n", - "* **`name`**: `str` - Key of the dataset to database (\"dataset\" for default).\n", - "* **`table`**: `DataItem = None` - MLRun input pointing to pandas dataframe (csv/parquet file path)\n", - "* **`label_column`**: `str = None` - Ground truth column label\n", - "* **`plots_dest`**: `str = \"plots\"` - Destination folder of summary plots (relative to artifact_path)\n", - "* **`random_state`**: `int = 1` - When the table has more than 500,000 samples, we sample randomly 500,000 samples.\n", - "* **`dask_key`**: `string = datasets` - key of dataframe in dask client \"datasets\" attribute.\n", - "* **`dask_function`**: `str = None` - dask function url (db://..).\n", - "* **`dask_client`**: `str = None` - dask client object." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### DEMO\n", - "#### Set-up" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import mlrun\n", - "import os\n", - "from sklearn.datasets import make_classification" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - }, - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "> 2022-04-26 07:25:24,033 [info] loaded project new-describe-project from MLRun DB\n" - ] - } - ], - "source": [ - "# Set our project's name:\n", - "project_name = \"new-describe-project\"\n", - "\n", - "# Create the project:\n", - "project = mlrun.get_or_create_project(name=project_name, context=\"./\", user_project=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Loading random dataset\n", - "We will use make_classification to generate random dataset" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "n_features=5\n", - "X, y = make_classification(n_samples=100, n_features=n_features, n_classes=3, random_state = 18,\n", - " class_sep=2, n_informative=3)\n", - "df = pd.DataFrame(X, columns=[f\"feature_{i}\" for i in range(n_features)])\n", - "df['label'] = y\n", - "try:\n", - " os.mkdir('artifacts')\n", - "except:\n", - " pass\n", - "df.to_parquet(\"artifacts/random_dataset.parquet\")" - ] - }, - { - "cell_type": "markdown", - "source": [ - "Import the describe MLRun function with analysis handler" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%% md\n" - } - } - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "describe_func = mlrun.import_function(\"hub://describe\")\n", - "describe_func.apply(mlrun.platforms.auto_mount())" - ] - }, - { - "cell_type": "markdown", - "source": [ - "#### Run the function on new data set\n", - "Run describe function\n", - "\n", - "After we run the function you can see the created artifacts by click on the run uid and go -> artifacts" - ], - "metadata": { - "collapsed": false - } - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "> 2022-04-26 07:25:24,124 [info] starting run task-describe uid=4290cd324f784a60b226461f22750fe1 DB=http://mlrun-api:8080\n", - "> 2022-04-26 07:25:30,557 [info] The data set is logged to the project under dataset name\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
projectuiditerstartstatenamelabelsinputsparametersresultsartifacts
new-describe-project-davids0Apr 26 07:25:24completedtask-describe
v3io_user=davids
kind=
owner=davids
host=jupyter-davids-5d6fdc4597-4tpss
table
label_column=label
describe-csv
histograms-matrix
histograms
violin
imbalance
imbalance-weights-vec
correlation-matrix-csv
correlation
dataset
\n", - "
\n", - "
\n", - "
\n", - " Title\n", - " ×\n", - "
\n", - " \n", - "
\n", - "
\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "data": { - "text/html": [ - " > to track results use the .show() or .logs() methods or click here to open in UI" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "> 2022-04-26 07:25:30,768 [info] run executed, status=completed\n" - ] - } - ], - "source": [ - "describe_run = describe_func.run(\n", - " name=\"task-describe\",\n", - " handler='analyze',\n", - " inputs={\"table\": os.path.abspath(\"artifacts/random_dataset.parquet\")},\n", - " params={\"label_column\": \"label\"},\n", - " local=True\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - }, - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "\n", - "\n", - "
\n", - "
\n", - "\n", - "" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "describe_run.artifact('imbalance').show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "describe_run.artifact('scatter-2d').show()" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - } - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Run the function on alredy loaded data set\n", - "\n", - "log new data set to the project\n" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "context = mlrun.get_or_create_ctx(project_name)\n", - "df = pd.read_parquet(os.path.abspath(\"artifacts/random_dataset.parquet\"))\n", - "context.log_dataset(key=\"dataset\", db_key=\"dataset1\", stats=True, df=df)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "> 2022-04-26 07:25:31,096 [info] starting run task-describe uid=0789bdac0aa54605bc4cc298060affa6 DB=http://mlrun-api:8080\n", - "> 2022-04-26 07:25:33,154 [info] The data set is logged to the project under dataset1 name\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
projectuiditerstartstatenamelabelsinputsparametersresultsartifacts
new-describe-project-davids0Apr 26 07:25:31completedtask-describe
v3io_user=davids
kind=
owner=davids
host=jupyter-davids-5d6fdc4597-4tpss
table
name=dataset1
label_column=label
describe-csv
histograms-matrix
histograms
violin
imbalance
imbalance-weights-vec
correlation-matrix-csv
correlation
dataset
\n", - "
\n", - "
\n", - "
\n", - " Title\n", - " ×\n", - "
\n", - " \n", - "
\n", - "
\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "data": { - "text/html": [ - " > to track results use the .show() or .logs() methods or click here to open in UI" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "> 2022-04-26 07:25:33,340 [info] run executed, status=completed\n" - ] - } - ], - "source": [ - "describe_run = describe_func.run(\n", - " name=\"task-describe\",\n", - " handler='analyze',\n", - " inputs={\"table\": os.path.abspath(\"artifacts/random_dataset.parquet\")},\n", - " params={\"name\": \"dataset1\", \"label_column\": \"label\"},\n", - " local=True\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - }, - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "\n", - "\n", - "
\n", - "
\n", - "\n", - "" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "describe_run.artifact('correlation').show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "describe_run.artifact('histograms').show()" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - } - }, - { - "cell_type": "markdown", - "metadata": { - "jupyter": { - "outputs_hidden": false - }, - "pycharm": { - "name": "#%% md\n" - } - }, - "source": [ - "### Run the function with dask ### \n", - "create a dask test cluster (dask function)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - }, - "pycharm": { - "name": "#%%\n", - "is_executing": true - } - }, - "outputs": [], - "source": [ - "dask_cluster = mlrun.new_function('dask_tests', kind='dask', image='mlrun/ml-models')\n", - "dask_cluster.apply(mlrun.mount_v3io())\n", - "dask_cluster.spec.remote = True\n", - "dask_cluster.with_requests(mem='2G')\n", - "dask_cluster_name = dask_cluster.save()" - ] - }, - { - "cell_type": "markdown", - "source": [ - "Run the describe function.\n", - "After we run the function you can see the created artifacts by click on the run uid and go -> artifacts" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%% md\n" - } - } - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - }, - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "> 2022-04-26 07:25:37,924 [info] starting run task-describe uid=9ffc9f61b9c745248a39301e0d9c8a8a DB=http://mlrun-api:8080\n", - "> 2022-04-26 07:25:55,516 [info] to get a dashboard link, use NodePort service_type\n", - "> 2022-04-26 07:25:55,517 [info] trying dask client at: tcp://mlrun-dask-tests-e2bed324-4.default-tenant:8786\n", - "> 2022-04-26 07:25:55,572 [info] using remote dask scheduler (mlrun-dask-tests-e2bed324-4) at: tcp://mlrun-dask-tests-e2bed324-4.default-tenant:8786\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/User/.pythonlibs/jupyter-davids/lib/python3.7/site-packages/distributed/client.py:1131: VersionMismatchWarning:\n", - "\n", - "Mismatched versions found\n", - "\n", - "+---------+--------+-----------+---------+\n", - "| Package | client | scheduler | workers |\n", - "+---------+--------+-----------+---------+\n", - "| blosc | 1.7.0 | 1.10.6 | None |\n", - "| lz4 | 3.1.0 | 3.1.10 | None |\n", - "| msgpack | 1.0.0 | 1.0.3 | None |\n", - "| toolz | 0.11.1 | 0.11.2 | None |\n", - "| tornado | 6.0.4 | 6.1 | None |\n", - "+---------+--------+-----------+---------+\n", - "Notes: \n", - "- msgpack: Variation is ok, as long as everything is above 0.6\n", - "\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "> 2022-04-26 07:26:00,340 [info] The data set is logged to the project under dataset1 name\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
projectuiditerstartstatenamelabelsinputsparametersresultsartifacts
new-describe-project-davids0Apr 26 07:25:38completedtask-describe
v3io_user=davids
kind=
owner=davids
host=jupyter-davids-5d6fdc4597-4tpss
table
name=dataset1
label_column=label
dask_function=db://new-describe-project-davids/dask_tests
describe-csv
histograms-matrix
histograms
violin
imbalance
imbalance-weights-vec
correlation-matrix-csv
correlation
dataset
\n", - "
\n", - "
\n", - "
\n", - " Title\n", - " ×\n", - "
\n", - " \n", - "
\n", - "
\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "data": { - "text/html": [ - " > to track results use the .show() or .logs() methods or click here to open in UI" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "> 2022-04-26 07:26:00,657 [info] run executed, status=completed\n" - ] - } - ], - "source": [ - "describe_run = describe_func.run(\n", - " name=\"task-describe\",\n", - " handler='analyze',\n", - " inputs={\"table\": os.path.abspath(\"artifacts/random_dataset.parquet\")},\n", - " params={\"name\": \"dataset1\", \"label_column\": \"label\", \"dask_function\": dask_cluster_name},\n", - " local=True\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - }, - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "\n", - "\n", - "
\n", - "
\n", - "\n", - "" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "describe_run.artifact('violin').show()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.6" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} \ No newline at end of file diff --git a/describe_dask/function.yaml b/describe_dask/function.yaml deleted file mode 100644 index 451fb639b..000000000 --- a/describe_dask/function.yaml +++ /dev/null @@ -1,61 +0,0 @@ -kind: job -metadata: - name: describe-dask - tag: '' - hash: 1ac500a28694c381499f7cc866921fd32c0ab643 - project: '' - labels: - author: Iguazio - categories: - - data-analysis -spec: - command: '' - args: [] - image: mlrun/ml-models - env: [] - default_handler: summarize - entry_points: - summarize: - name: summarize - doc: 'Summarize a table - - - Connects to dask client through the function context, or through an optional - - user-supplied scheduler.' - parameters: - - name: context - doc: the function context - default: '' - - name: dask_key - type: str - doc: key of dataframe in dask client "datasets" attribute - default: dask_key - - name: dataset - type: DataItem - default: null - - name: label_column - type: str - doc: ground truth column label - default: label - - name: plots_dest - type: str - doc: destination folder of summary plots (relative to artifact_path) - default: plots - - name: dask_function - type: str - doc: dask function url (db://..) - default: null - - name: dask_client - doc: dask client object - default: null - outputs: - - default: '' - lineno: 14 - description: describe and visualizes dataset stats - build: - functionSourceCode: aW1wb3J0IG1scnVuCmltcG9ydCB3YXJuaW5ncwp3YXJuaW5ncy5zaW1wbGVmaWx0ZXIoYWN0aW9uPSJpZ25vcmUiLCBjYXRlZ29yeT1GdXR1cmVXYXJuaW5nKQppbXBvcnQgcGFuZGFzIGFzIHBkCmltcG9ydCBtYXRwbG90bGliLnB5cGxvdCBhcyBwbHQKaW1wb3J0IHNlYWJvcm4gYXMgc25zCmZyb20gbWxydW4uYXJ0aWZhY3RzIGltcG9ydCBQbG90QXJ0aWZhY3QsIFRhYmxlQXJ0aWZhY3QKZnJvbSBtbHJ1bi5tbHV0aWxzLnBsb3RzIGltcG9ydCBnY2ZfY2xlYXIKaW1wb3J0IG51bXB5IGFzIG5wCgoKcGQuc2V0X29wdGlvbigiZGlzcGxheS5mbG9hdF9mb3JtYXQiLCBsYW1iZGEgeDogIiUuMmYiICUgeCkKCmRlZiBzdW1tYXJpemUoCiAgICBjb250ZXh0LAogICAgZGFza19rZXk6IHN0ciA9ICJkYXNrX2tleSIsCiAgICBkYXRhc2V0OiBtbHJ1bi5EYXRhSXRlbSA9IE5vbmUsCiAgICBsYWJlbF9jb2x1bW46IHN0ciA9ICJsYWJlbCIsCiAgICBwbG90c19kZXN0OiBzdHIgPSAicGxvdHMiLAogICAgZGFza19mdW5jdGlvbjogc3RyID0gTm9uZSwKICAgIGRhc2tfY2xpZW50PU5vbmUsCikgLT4gTm9uZToKICAgICIiIlN1bW1hcml6ZSBhIHRhYmxlCiAgICAKICAgIENvbm5lY3RzIHRvIGRhc2sgY2xpZW50IHRocm91Z2ggdGhlIGZ1bmN0aW9uIGNvbnRleHQsIG9yIHRocm91Z2ggYW4gb3B0aW9uYWwKICAgIHVzZXItc3VwcGxpZWQgc2NoZWR1bGVyLgoKICAgIDpwYXJhbSBjb250ZXh0OiAgICAgICAgIHRoZSBmdW5jdGlvbiBjb250ZXh0CiAgICA6cGFyYW0gZGFza19rZXk6ICAgICAgICBrZXkgb2YgZGF0YWZyYW1lIGluIGRhc2sgY2xpZW50ICJkYXRhc2V0cyIgYXR0cmlidXRlCiAgICA6cGFyYW0gbGFiZWxfY29sdW1uOiAgICBncm91bmQgdHJ1dGggY29sdW1uIGxhYmVsCiAgICA6cGFyYW0gcGxvdHNfZGVzdDogICAgICBkZXN0aW5hdGlvbiBmb2xkZXIgb2Ygc3VtbWFyeSBwbG90cyAocmVsYXRpdmUgdG8gYXJ0aWZhY3RfcGF0aCkKICAgIDpwYXJhbSBkYXNrX2Z1bmN0aW9uOiAgIGRhc2sgZnVuY3Rpb24gdXJsIChkYjovLy4uKQogICAgOnBhcmFtIGRhc2tfY2xpZW50OiAgICAgZGFzayBjbGllbnQgb2JqZWN0CiAgICAiIiIKICAgIGlmIGRhc2tfZnVuY3Rpb246CiAgICAgICAgY2xpZW50ID0gbWxydW4uaW1wb3J0X2Z1bmN0aW9uKGRhc2tfZnVuY3Rpb24pLmNsaWVudAogICAgZWxpZiBkYXNrX2NsaWVudDoKICAgICAgICBjbGllbnQgPSBkYXNrX2NsaWVudAogICAgZWxzZToKICAgICAgICByYWlzZSBWYWx1ZUVycm9yKCdkYXNrIGNsaWVudCB3YXMgbm90IHByb3ZpZGVkJykKICAgICAgICAKICAgIGlmIGRhc2tfa2V5IGluIGNsaWVudC5kYXRhc2V0czoKICAgICAgICB0YWJsZSA9IGNsaWVudC5nZXRfZGF0YXNldChkYXNrX2tleSkKICAgIGVsaWYgZGF0YXNldDoKICAgICAgICAjdGFibGUgPSBkYXRhc2V0LmFzX2RmKGRmX21vZHVsZT1kZCkKICAgICAgICB0YWJsZSA9IGRhdGFzZXQuYXNfZGYoKQogICAgZWxzZToKICAgICAgICBjb250ZXh0LmxvZ2dlci5pbmZvKGYib25seSB0aGVzZSBkYXRhc2V0cyBhcmUgYXZhaWxhYmxlIHtjbGllbnQuZGF0YXNldHN9IGluIGNsaWVudCB7Y2xpZW50fSIpCiAgICAgICAgcmFpc2UgRXhjZXB0aW9uKCJkYXRhc2V0IG5vdCBmb3VuZCBvbiBkYXNrIGNsdXN0ZXIiKQogICAgZGYgPSB0YWJsZQogICAgaGVhZGVyID0gZGYuY29sdW1ucy52YWx1ZXMKICAgIGV4dHJhX2RhdGEgPSB7fQoKICAgIHRyeToKICAgICAgICBnY2ZfY2xlYXIocGx0KQogICAgICAgIHNuc3BsdCA9IHNucy5wYWlycGxvdChkZiwgaHVlPWxhYmVsX2NvbHVtbikgICMgLCBkaWFnX2t3cz17ImJ3IjogMS41fSkKICAgICAgICBleHRyYV9kYXRhWyJoaXN0b2dyYW1zIl0gPSBjb250ZXh0LmxvZ19hcnRpZmFjdCgKICAgICAgICAgICAgUGxvdEFydGlmYWN0KCJoaXN0b2dyYW1zIiwgYm9keT1wbHQuZ2NmKCkpLAogICAgICAgICAgICBsb2NhbF9wYXRoPWYie3Bsb3RzX2Rlc3R9L2hpc3QuaHRtbCIsCiAgICAgICAgICAgIGRiX2tleT1GYWxzZSwKICAgICAgICApCiAgICBleGNlcHQgRXhjZXB0aW9uIGFzIGU6CiAgICAgICAgY29udGV4dC5sb2dnZXIuZXJyb3IoZiJGYWlsZWQgdG8gY3JlYXRlIHBhaXJwbG90IGhpc3RvZ3JhbXMgZHVlIHRvOiB7ZX0iKQoKICAgIHRyeToKICAgICAgICBnY2ZfY2xlYXIocGx0KQogICAgICAgIHBsb3RfY29scyA9IDMKICAgICAgICBwbG90X3Jvd3MgPSBpbnQoKGxlbihoZWFkZXIpIC0gMSkgLyBwbG90X2NvbHMpICsgMQogICAgICAgIGZpZywgYXggPSBwbHQuc3VicGxvdHMocGxvdF9yb3dzLCBwbG90X2NvbHMsIGZpZ3NpemU9KDE1LCA0KSkKICAgICAgICBmaWcudGlnaHRfbGF5b3V0KHBhZD0yLjApCiAgICAgICAgZm9yIGkgaW4gcmFuZ2UocGxvdF9yb3dzICogcGxvdF9jb2xzKToKICAgICAgICAgICAgaWYgaSA8IGxlbihoZWFkZXIpOgogICAgICAgICAgICAgICAgc25zLnZpb2xpbnBsb3QoCiAgICAgICAgICAgICAgICAgICAgeD1kZltoZWFkZXJbaV1dLAogICAgICAgICAgICAgICAgICAgIGF4PWF4W2ludChpIC8gcGxvdF9jb2xzKV1baSAlIHBsb3RfY29sc10sCiAgICAgICAgICAgICAgICAgICAgb3JpZW50PSJoIiwKICAgICAgICAgICAgICAgICAgICB3aWR0aD0wLjcsCiAgICAgICAgICAgICAgICAgICAgaW5uZXI9InF1YXJ0aWxlIiwKICAgICAgICAgICAgICAgICkKICAgICAgICAgICAgZWxzZToKICAgICAgICAgICAgICAgIGZpZy5kZWxheGVzKGF4W2ludChpIC8gcGxvdF9jb2xzKV1baSAlIHBsb3RfY29sc10pCiAgICAgICAgICAgIGkgKz0gMQogICAgICAgIGV4dHJhX2RhdGFbInZpb2xpbiJdID0gY29udGV4dC5sb2dfYXJ0aWZhY3QoCiAgICAgICAgICAgIFBsb3RBcnRpZmFjdCgidmlvbGluIiwgYm9keT1wbHQuZ2NmKCksIHRpdGxlPSJWaW9saW4gUGxvdCIpLAogICAgICAgICAgICBsb2NhbF9wYXRoPWYie3Bsb3RzX2Rlc3R9L3Zpb2xpbi5odG1sIiwKICAgICAgICAgICAgZGJfa2V5PUZhbHNlLAogICAgICAgICkKICAgIGV4Y2VwdCBFeGNlcHRpb24gYXMgZToKICAgICAgICBjb250ZXh0LmxvZ2dlci53YXJuKGYiRmFpbGVkIHRvIGNyZWF0ZSB2aW9saW4gZGlzdHJpYnV0aW9uIHBsb3RzIGR1ZSB0bzoge2V9IikKCiAgICBpZiBsYWJlbF9jb2x1bW46CiAgICAgICAgbGFiZWxzID0gZGYucG9wKGxhYmVsX2NvbHVtbikKICAgICAgICBpbWJ0YWJsZSA9IGxhYmVscy52YWx1ZV9jb3VudHMobm9ybWFsaXplPVRydWUpLnNvcnRfaW5kZXgoKQogICAgICAgIHRyeToKICAgICAgICAgICAgZ2NmX2NsZWFyKHBsdCkKICAgICAgICAgICAgYmFsYW5jZWJhciA9IGltYnRhYmxlLnBsb3Qoa2luZD0iYmFyIiwgdGl0bGU9ImNsYXNzIGltYmFsYW5jZSAtIGxhYmVscyIpCiAgICAgICAgICAgIGJhbGFuY2ViYXIuc2V0X3hsYWJlbCgiY2xhc3MiKQogICAgICAgICAgICBiYWxhbmNlYmFyLnNldF95bGFiZWwoInByb3BvcnRpb24gb2YgdG90YWwiKQogICAgICAgICAgICBleHRyYV9kYXRhWyJpbWJhbGFuY2UiXSA9IGNvbnRleHQubG9nX2FydGlmYWN0KAogICAgICAgICAgICAgICAgUGxvdEFydGlmYWN0KCJpbWJhbGFuY2UiLCBib2R5PXBsdC5nY2YoKSksCiAgICAgICAgICAgICAgICBsb2NhbF9wYXRoPWYie3Bsb3RzX2Rlc3R9L2ltYmFsYW5jZS5odG1sIiwKICAgICAgICAgICAgKQogICAgICAgIGV4Y2VwdCBFeGNlcHRpb24gYXMgZToKICAgICAgICAgICAgY29udGV4dC5sb2dnZXIud2FybihmIkZhaWxlZCB0byBjcmVhdGUgY2xhc3MgaW1iYWxhbmNlIHBsb3QgZHVlIHRvOiB7ZX0iKQogICAgICAgIGNvbnRleHQubG9nX2FydGlmYWN0KAogICAgICAgICAgICBUYWJsZUFydGlmYWN0KAogICAgICAgICAgICAgICAgImltYmFsYW5jZS13ZWlnaHRzLXZlYyIsIGRmPXBkLkRhdGFGcmFtZSh7IndlaWdodHMiOiBpbWJ0YWJsZX0pCiAgICAgICAgICAgICksCiAgICAgICAgICAgIGxvY2FsX3BhdGg9ZiJ7cGxvdHNfZGVzdH0vaW1iYWxhbmNlLXdlaWdodHMtdmVjLmNzdiIsCiAgICAgICAgICAgIGRiX2tleT1GYWxzZSwKICAgICAgICApCgogICAgdGJsY29yciA9IGRmLmNvcnIoKQogICAgbWFzayA9IG5wLnplcm9zX2xpa2UodGJsY29yciwgZHR5cGU9bnAuYm9vbCkKICAgIG1hc2tbbnAudHJpdV9pbmRpY2VzX2Zyb20obWFzayldID0gVHJ1ZQoKICAgIGRmY29yciA9IHBkLkRhdGFGcmFtZShkYXRhPXRibGNvcnIsIGNvbHVtbnM9aGVhZGVyLCBpbmRleD1oZWFkZXIpCiAgICBkZmNvcnIgPSBkZmNvcnJbbnAuYXJhbmdlKGRmY29yci5zaGFwZVswXSlbOiwgTm9uZV0gPiBucC5hcmFuZ2UoZGZjb3JyLnNoYXBlWzFdKV0KICAgIGNvbnRleHQubG9nX2FydGlmYWN0KAogICAgICAgIFRhYmxlQXJ0aWZhY3QoImNvcnJlbGF0aW9uLW1hdHJpeCIsIGRmPXRibGNvcnIsIHZpc2libGU9VHJ1ZSksCiAgICAgICAgbG9jYWxfcGF0aD1mIntwbG90c19kZXN0fS9jb3JyZWxhdGlvbi1tYXRyaXguY3N2IiwKICAgICAgICBkYl9rZXk9RmFsc2UsCiAgICApCgogICAgdHJ5OgogICAgICAgIGdjZl9jbGVhcihwbHQpCiAgICAgICAgYXggPSBwbHQuYXhlcygpCiAgICAgICAgc25zLmhlYXRtYXAodGJsY29yciwgYXg9YXgsIG1hc2s9bWFzaywgYW5ub3Q9RmFsc2UsIGNtYXA9cGx0LmNtLlJlZHMpCiAgICAgICAgYXguc2V0X3RpdGxlKCJmZWF0dXJlcyBjb3JyZWxhdGlvbiIpCiAgICAgICAgZXh0cmFfZGF0YVsiY29ycmVsYXRpb24iXSA9IGNvbnRleHQubG9nX2FydGlmYWN0KAogICAgICAgICAgICBQbG90QXJ0aWZhY3QoImNvcnJlbGF0aW9uIiwgYm9keT1wbHQuZ2NmKCksIHRpdGxlPSJDb3JyZWxhdGlvbiBNYXRyaXgiKSwKICAgICAgICAgICAgbG9jYWxfcGF0aD1mIntwbG90c19kZXN0fS9jb3JyLmh0bWwiLAogICAgICAgICAgICBkYl9rZXk9RmFsc2UsCiAgICAgICAgKQogICAgZXhjZXB0IEV4Y2VwdGlvbiBhcyBlOgogICAgICAgIGNvbnRleHQubG9nZ2VyLndhcm4oZiJGYWlsZWQgdG8gY3JlYXRlIGZlYXR1cmVzIGNvcnJlbGF0aW9uIHBsb3QgZHVlIHRvOiB7ZX0iKQoKICAgIGdjZl9jbGVhcihwbHQpCg== - commands: [] - code_origin: https://github.com/daniels290813/functions.git#55a79c32be5d233cc11efcf40cd3edbe309bfdef:/home/kali/functions/describe_dask/describe_dask.py - affinity: null -verbose: false diff --git a/CONTRIBUTING.md b/functions/CONTRIBUTING.md similarity index 100% rename from CONTRIBUTING.md rename to functions/CONTRIBUTING.md diff --git a/functions/README.md b/functions/README.md new file mode 100644 index 000000000..08b1c7ad9 --- /dev/null +++ b/functions/README.md @@ -0,0 +1,48 @@ +# Functions hub + +This functions hub is intended to be a centralized location for open source contributions of function components. +These are functions expected to be run as independent mlrun pipeline compnents, and as public contributions, +it is expected that contributors follow certain guidelines/protocols (please chip-in). + +## Catalog + + +| Name | Description | Kind | Categories | +| --- | --- | --- | --- | +| [aggregate](/home/runner/work/functions/functions/functions/src/aggregate) | Rolling aggregation over Metrics and Lables according to specifications | job | data-preparation | +| [arc_to_parquet](/home/runner/work/functions/functions/functions/src/arc_to_parquet) | retrieve remote archive, open and save as parquet | job | utils | +| [auto_trainer](/home/runner/work/functions/functions/functions/src/auto_trainer) | Automatic train, evaluate and predict functions for the ML frameworks - Scikit-Learn, XGBoost and LightGBM. | job | machine-learning, model-training | +| [azureml_serving](/home/runner/work/functions/functions/functions/src/azureml_serving) | AzureML serving function | serving | machine-learning, model-serving | +| [azureml_utils](/home/runner/work/functions/functions/functions/src/azureml_utils) | Azure AutoML integration in MLRun, including utils functions for training models on Azure AutoML platfrom. | job | model-serving, utils | +| [batch_inference](/home/runner/work/functions/functions/functions/src/batch_inference) | Batch inference (also knows as prediction) for the common ML frameworks (SciKit-Learn, XGBoost and LightGBM) while performing data drift analysis. | job | model-serving | +| [batch_inference_v2](/home/runner/work/functions/functions/functions/src/batch_inference_v2) | Batch inference (also knows as prediction) for the common ML frameworks (SciKit-Learn, XGBoost and LightGBM) while performing data drift analysis. | job | model-serving | +| [describe](/home/runner/work/functions/functions/functions/src/describe) | describe and visualizes dataset stats | job | data-analysis | +| [describe_dask](/home/runner/work/functions/functions/functions/src/describe_dask) | describe and visualizes dataset stats | job | data-analysis | +| [describe_spark](/home/runner/work/functions/functions/functions/src/describe_spark) | | job | data-analysis | +| [feature_selection](/home/runner/work/functions/functions/functions/src/feature_selection) | Select features through multiple Statistical and Model filters | job | data-preparation, machine-learning | +| [gen_class_data](/home/runner/work/functions/functions/functions/src/gen_class_data) | Create a binary classification sample dataset and save. | job | data-generation | +| [github_utils](/home/runner/work/functions/functions/functions/src/github_utils) | add comments to github pull request | job | utils | +| [hugging_face_serving](/home/runner/work/functions/functions/functions/src/hugging_face_serving) | Generic Hugging Face model server. | serving | genai, model-serving | +| [load_dataset](/home/runner/work/functions/functions/functions/src/load_dataset) | load a toy dataset from scikit-learn | job | data-preparation | +| [mlflow_utils](/home/runner/work/functions/functions/functions/src/mlflow_utils) | Mlflow model server, and additional utils. | serving | model-serving, utils | +| [model_server](/home/runner/work/functions/functions/functions/src/model_server) | generic sklearn model server | nuclio:serving | model-serving, machine-learning | +| [model_server_tester](/home/runner/work/functions/functions/functions/src/model_server_tester) | test model servers | job | monitoring, model-serving | +| [noise_reduction](/home/runner/work/functions/functions/functions/src/noise_reduction) | Reduce noise from audio files | job | data-preparation, audio | +| [onnx_utils](/home/runner/work/functions/functions/functions/src/onnx_utils) | ONNX intigration in MLRun, some utils functions for the ONNX framework, optimizing and converting models from different framework to ONNX using MLRun. | job | utils, deep-learning | +| [open_archive](/home/runner/work/functions/functions/functions/src/open_archive) | Open a file/object archive into a target directory | job | utils | +| [pii_recognizer](/home/runner/work/functions/functions/functions/src/pii_recognizer) | This function is used to recognize PII in a directory of text files | job | data-preparation, NLP | +| [pyannote_audio](/home/runner/work/functions/functions/functions/src/pyannote_audio) | pyannote's speech diarization of audio files | job | deep-learning, audio | +| [question_answering](/home/runner/work/functions/functions/functions/src/question_answering) | GenAI approach of question answering on a given data | job | genai | +| [send_email](/home/runner/work/functions/functions/functions/src/send_email) | Send Email messages through SMTP server | job | utils | +| [silero_vad](/home/runner/work/functions/functions/functions/src/silero_vad) | Silero VAD (Voice Activity Detection) functions. | job | deep-learning, audio | +| [sklearn_classifier](/home/runner/work/functions/functions/functions/src/sklearn_classifier) | train any classifier using scikit-learn's API | job | machine-learning, model-training | +| [sklearn_classifier_dask](/home/runner/work/functions/functions/functions/src/sklearn_classifier_dask) | train any classifier using scikit-learn's API over Dask | job | machine-learning, model-training | +| [structured_data_generator](/home/runner/work/functions/functions/functions/src/structured_data_generator) | GenAI approach of generating structured data according to a given schema | job | data-generation, genai | +| [test_classifier](/home/runner/work/functions/functions/functions/src/test_classifier) | test a classifier using held-out or new data | job | machine-learning, model-testing | +| [text_to_audio_generator](/home/runner/work/functions/functions/functions/src/text_to_audio_generator) | Generate audio file from text using different speakers | job | data-generation, audio | +| [tf2_serving](/home/runner/work/functions/functions/functions/src/tf2_serving) | tf2 image classification server | nuclio:serving | model-serving, machine-learning | +| [transcribe](/home/runner/work/functions/functions/functions/src/transcribe) | Transcribe audio files into text files | job | audio, genai | +| [translate](/home/runner/work/functions/functions/functions/src/translate) | Translate text files from one language to another | job | genai, NLP | +| [v2_model_server](/home/runner/work/functions/functions/functions/src/v2_model_server) | generic sklearn model server | serving | model-serving, machine-learning | +| [v2_model_tester](/home/runner/work/functions/functions/functions/src/v2_model_tester) | test v2 model servers | job | model-testing, machine-learning | + diff --git a/aggregate/README.md b/functions/src/aggregate/README.md similarity index 100% rename from aggregate/README.md rename to functions/src/aggregate/README.md diff --git a/aggregate/aggregate.ipynb b/functions/src/aggregate/aggregate.ipynb similarity index 96% rename from aggregate/aggregate.ipynb rename to functions/src/aggregate/aggregate.ipynb index 5923bb20c..d71ba8128 100644 --- a/aggregate/aggregate.ipynb +++ b/functions/src/aggregate/aggregate.ipynb @@ -20,7 +20,7 @@ "source": [ "### **Steps**\n", "\n", - "1. [Data exploration](#Data-exploration)\n", + "1. [Data exploration](#data-exploration)\n", "2. [Importing the function](#Importing-the-function)\n", "3. [Running the function locally](#Running-the-function-locally)\n", "4. [Running the function remotely](#Running-the-function-remotely)" @@ -29,9 +29,7 @@ { "cell_type": "markdown", "metadata": {}, - "source": [ - "### **Data exploration**" - ] + "source": "### **Data exploration** {#data-exploration}" }, { "cell_type": "markdown", @@ -52,9 +50,12 @@ }, { "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], + "metadata": { + "ExecuteTime": { + "end_time": "2025-08-11T12:24:41.598043Z", + "start_time": "2025-08-11T12:24:41.596166Z" + } + }, "source": [ "# upload environment variables from env file if exists\n", "import os,mlrun\n", @@ -64,36 +65,76 @@ " \n", "if os.path.exists(path):\n", " env_dict = mlrun.set_env_from_file(path, return_dict=True)\n" - ] + ], + "outputs": [], + "execution_count": 16 }, { "cell_type": "code", - "execution_count": 2, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-08-11T12:24:41.629080Z", + "start_time": "2025-08-11T12:24:41.624195Z" + } + }, + "source": [ + "# create the new project\n", + "project_name = 'aggregate-example'\n", + "\n", + "# Initialize the MLRun project object\n", + "project = mlrun.get_or_create_project(project_name, context=\"./\", user_project=True)" + ], "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "> 2022-11-30 13:51:49,512 [info] loaded project aggregate-example from MLRun DB\n" + "> 2025-08-11 15:24:41,624 [warning] Could not detect path to API server, not connected to API server!\n", + "> 2025-08-11 15:24:41,625 [warning] MLRUN_DBPATH is misconfigured. Set this environment variable to the URL of the API server in order to connect\n", + "> 2025-08-11 15:24:41,625 [info] Loading project from path: {\"path\":\"./\",\"project_name\":\"aggregate-example\",\"user_project\":true}\n", + "> 2025-08-11 15:24:41,627 [warning] Could not detect path to API server, not connected to API server!\n", + "> 2025-08-11 15:24:41,627 [warning] MLRUN_DBPATH is misconfigured. Set this environment variable to the URL of the API server in order to connect\n", + "> 2025-08-11 15:24:41,628 [info] Project loaded successfully: {\"path\":\"./\",\"project_name\":\"aggregate-example-daniel-perez\",\"stored_in_db\":true}\n" ] } ], - "source": [ - "# create the new project\n", - "project_name = 'aggregate-example'\n", - "\n", - "# Initialize the MLRun project object\n", - "project = mlrun.get_or_create_project(project_name, context=\"./\", user_project=True)" - ] + "execution_count": 17 }, { "cell_type": "code", - "execution_count": 3, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-08-11T12:24:43.245969Z", + "start_time": "2025-08-11T12:24:41.647984Z" + } + }, + "source": [ + "import pandas as pd\n", + "\n", + "data_path = 'https://s3.wasabisys.com/iguazio/data/function-marketplace-data/aggregate/train_room_occupancy.csv'\n", + "df = pd.read_csv(data_path).set_index('date',drop=False)\n", + "df.head()" + ], "outputs": [ { "data": { + "text/plain": [ + " date Temperature Humidity Light \\\n", + "date \n", + "2015-02-04 17:51:00 2015-02-04 17:51:00 23.18 27.2720 426.0 \n", + "2015-02-04 17:51:59 2015-02-04 17:51:59 23.15 27.2675 429.5 \n", + "2015-02-04 17:53:00 2015-02-04 17:53:00 23.15 27.2450 426.0 \n", + "2015-02-04 17:54:00 2015-02-04 17:54:00 23.15 27.2000 426.0 \n", + "2015-02-04 17:55:00 2015-02-04 17:55:00 23.10 27.2000 426.0 \n", + "\n", + " CO2 HumidityRatio Occupancy \n", + "date \n", + "2015-02-04 17:51:00 721.25 0.004793 1 \n", + "2015-02-04 17:51:59 714.00 0.004783 1 \n", + "2015-02-04 17:53:00 713.50 0.004779 1 \n", + "2015-02-04 17:54:00 708.25 0.004772 1 \n", + "2015-02-04 17:55:00 704.50 0.004757 1 " + ], "text/html": [ "
\n", "