diff --git a/CHANGELOG.md b/CHANGELOG.md index 3e1098dc..18eac7a6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## [Unreleased] +### Added + +- Support demos, which are an end-to-end demonstrations of the usage of the Stackable Data Platform ([#66](https://github.com/stackabletech/stackablectl/pull/66)) + ## [0.3.0] - 2022-08-09 ### Added diff --git a/Cargo.lock b/Cargo.lock index 66c61809..aef63675 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1352,7 +1352,7 @@ dependencies = [ [[package]] name = "stackablectl" -version = "0.2.0" +version = "0.3.0" dependencies = [ "cached", "clap", diff --git a/demos/demos-v1.yaml b/demos/demos-v1.yaml new file mode 100644 index 00000000..091fcc6b --- /dev/null +++ b/demos/demos-v1.yaml @@ -0,0 +1,16 @@ +--- +demos: + trino-taxi-data: + description: Demo loading 2.5 years of New York taxi data into S3 bucket, creating a Trino table and a Superset dashboard + documentation: https://docs.stackable.tech/stackablectl/stable/demos/trino-taxi-data.html + stackableStack: trino-superset-s3 + labels: + - trino + - superset + - minio + - s3 + - ny-taxi-data + manifests: + - plainYaml: https://raw.githubusercontent.com/stackabletech/stackablectl/main/demos/trino-taxi-data/load-test-data.yaml + - plainYaml: https://raw.githubusercontent.com/stackabletech/stackablectl/main/demos/trino-taxi-data/create-table-in-trino.yaml + - plainYaml: https://raw.githubusercontent.com/stackabletech/stackablectl/main/demos/trino-taxi-data/setup-superset.yaml diff --git a/demos/trino-taxi-data/create-table-in-trino.yaml b/demos/trino-taxi-data/create-table-in-trino.yaml new file mode 100644 index 00000000..3f01261f --- /dev/null +++ b/demos/trino-taxi-data/create-table-in-trino.yaml @@ -0,0 +1,105 @@ +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: create-ny-taxi-data-table-in-trino +spec: + template: + spec: + containers: + - name: create-ny-taxi-data-table-in-trino + image: python:3.10-slim + command: ["bash", "-c", "pip install trino==0.314.0 && python /tmp/script/script.py"] + volumeMounts: + - name: script + mountPath: /tmp/script + restartPolicy: OnFailure + volumes: + - name: script + configMap: + name: create-ny-taxi-data-table-in-trino-script + restartPolicy: Never + backoffLimit: 50 # It can take some time until Trino is ready +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: create-ny-taxi-data-table-in-trino-script +data: + script.py: | + import sys + import trino + + if not sys.warnoptions: + import warnings + warnings.simplefilter("ignore") + + def get_connection(): + connection = trino.dbapi.connect( + host="trino-coordinator", + port=8443, + user="demo", + http_scheme='https', + auth=trino.auth.BasicAuthentication("demo", "demo"), + ) + connection._http_session.verify = False + return connection + + def run_query(connection, query): + print(f"[DEBUG] Executing query {query}") + cursor = connection.cursor() + cursor.execute(query) + return cursor.fetchall() + + connection = get_connection() + + assert run_query(connection, "CREATE SCHEMA IF NOT EXISTS hive.demo WITH (location = 's3a://demo/')")[0][0] is True + assert run_query(connection, """ + CREATE TABLE IF NOT EXISTS hive.demo.ny_taxi_data_raw ( + VendorID BIGINT, + tpep_pickup_datetime TIMESTAMP, + tpep_dropoff_datetime TIMESTAMP, + passenger_count DOUBLE, + trip_distance DOUBLE, + payment_type BIGINT, + Fare_amount DOUBLE, + Tip_amount DOUBLE, + Total_amount DOUBLE + ) WITH ( + external_location = 's3a://demo/ny-taxi-data/raw/', + format = 'parquet' + ) + """)[0][0] is True + + loaded_rows = run_query(connection, "SELECT COUNT(*) FROM hive.demo.ny_taxi_data_raw")[0][0] + print(f"Loaded {loaded_rows} rows") + assert loaded_rows > 0 + + print("Analyzing table ny_taxi_data_raw") + analyze_rows = run_query(connection, """ANALYZE hive.demo.ny_taxi_data_raw""")[0][0] + assert analyze_rows == loaded_rows + stats = run_query(connection, """show stats for hive.demo.ny_taxi_data_raw""") + print("Produced the following stats:") + print(*stats, sep="\n") + + assert run_query(connection, """ + create or replace view hive.demo.ny_taxi_data as + select + vendorid, + tpep_pickup_datetime, + tpep_dropoff_datetime, + date_diff('minute', tpep_pickup_datetime, tpep_dropoff_datetime) as duration_min, + passenger_count, + trip_distance, + case payment_type when 1 then 'Credit card' when 2 then 'Cash' when 3 then 'No charge' when 4 then 'Dispute' when 6 then 'Voided trino' else 'Unknown' end as payment_type, + fare_amount, + tip_amount, + total_amount + from hive.demo.ny_taxi_data_raw + where tpep_pickup_datetime >= from_iso8601_timestamp('2019-12-01T00:00:00') + and tpep_pickup_datetime <= from_iso8601_timestamp('2022-05-31T00:00:00') + """)[0][0] is True + + rows_in_view = run_query(connection, "SELECT COUNT(*) FROM hive.demo.ny_taxi_data")[0][0] + print(f"{rows_in_view} rows in view") + assert rows_in_view > 0 diff --git a/demos/trino-taxi-data/load-test-data.yaml b/demos/trino-taxi-data/load-test-data.yaml new file mode 100644 index 00000000..566b8929 --- /dev/null +++ b/demos/trino-taxi-data/load-test-data.yaml @@ -0,0 +1,13 @@ +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: load-ny-taxi-data +spec: + template: + spec: + containers: + - name: load-ny-taxi-data + image: "bitnami/minio:2022-debian-10" + command: ["bash", "-c", "cd /tmp && for month in 2020-01 2020-02 2020-03 2020-04 2020-05 2020-06 2020-07 2020-08 2020-09 2020-10 2020-11 2020-12 2021-01 2021-02 2021-03 2021-04 2021-05 2021-06 2021-07 2021-08 2021-09 2021-10 2021-11 2021-12 2022-01 2022-02 2022-03 2022-04; do curl -O https://repo.stackable.tech/repository/misc/ny-taxi-data/yellow_tripdata_$month.parquet && mc --insecure alias set minio http://minio-trino:9000/ demo demodemo && mc cp yellow_tripdata_$month.parquet minio/demo/ny-taxi-data/raw/; done"] + restartPolicy: OnFailure diff --git a/demos/trino-taxi-data/setup-superset.yaml b/demos/trino-taxi-data/setup-superset.yaml new file mode 100644 index 00000000..d7819268 --- /dev/null +++ b/demos/trino-taxi-data/setup-superset.yaml @@ -0,0 +1,85 @@ +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: setup-superset +spec: + template: + spec: + containers: + - name: setup-superset + image: python:3.10-slim + command: ["bash", "-c", " apt update && apt install -y curl && curl -o superset-assets.zip https://raw.githubusercontent.com/stackabletech/stackablectl/main/demos/trino-taxi-data/superset-assets.zip && pip install requests==2.22.0 && python /tmp/script/script.py"] + volumeMounts: + - name: script + mountPath: /tmp/script + restartPolicy: OnFailure + volumes: + - name: script + configMap: + name: setup-superset-script + restartPolicy: Never + backoffLimit: 50 # It can take some time until Superset is ready +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: setup-superset-script +data: + script.py: | + import logging + import requests + + base_url = "http://superset-external:8088" + # base_url = "http://172.18.0.4:31024" + username = "admin" + password = "admin" + + logging.basicConfig(level=logging.INFO) + logging.info("Starting setup of Superset") + + logging.info("Getting access token from /api/v1/security/login") + session = requests.session() + access_token = session.post(f"{base_url}/api/v1/security/login", json={"username": username, "password": password, "provider": "db", "refresh": True}).json()['access_token'] + # print(f"access_token: {access_token}") + + logging.info("Getting csrf token from /api/v1/security/csrf_token") + csrf_token = session.get(f"{base_url}/api/v1/security/csrf_token", headers={"Authorization": f"Bearer {access_token}"}).json()["result"] + # print(f"csrf_token: {csrf_token}") + + headers = { + "accept": "application/json", + "Authorization": f"Bearer {access_token}", + "X-CSRFToken": csrf_token, + } + + # To retrieve all of the assets (datasources, datasets, charts and dashboards) run the following commands + # logging.info("Exporting all assets") + # result = session.get(f"{base_url}/api/v1/assets/export", headers=headers) + # assert result.status_code == 200 + # with open("superset-assets.zip", "wb") as f: + # f.write(result.content) + + + ######################### + # IMPORTANT + ######################### + # The exported zip file had to be modified, otherwise we get: + # + # {"errors": [{"message": "Error importing assets", "error_type": "GENERIC_COMMAND_ERROR", "level": "warning", "extra": {"databases/Trino.yaml": {"extra": {"disable_data_preview": ["Unknown field."]}}, "issue_codes": [{"code": 1010, "message": "Issue 1010 - Superset encountered an error while running a command."}]}}]} + # + # The file databases/Trino.yaml was modified and the attribute "extra.disable_data_preview" was removed + ######################### + logging.info("Importing all assets") + files = { + "bundle": ("superset-assets.zip", open("superset-assets.zip", "rb")), + } + data = { + "passwords": '{"databases/Trino.yaml": "demo"}' + } + result = session.post(f"{base_url}/api/v1/assets/import", headers=headers, files=files, data=data) + print(result) + print(result.text) + assert result.status_code == 200 + + logging.info("Finished setup of Superset") diff --git a/demos/trino-taxi-data/superset-assets.zip b/demos/trino-taxi-data/superset-assets.zip new file mode 100644 index 00000000..0fdb31e5 Binary files /dev/null and b/demos/trino-taxi-data/superset-assets.zip differ diff --git a/docs/modules/ROOT/images/demo-trino-taxi-data/minio_1.png b/docs/modules/ROOT/images/demo-trino-taxi-data/minio_1.png new file mode 100644 index 00000000..83251ab3 Binary files /dev/null and b/docs/modules/ROOT/images/demo-trino-taxi-data/minio_1.png differ diff --git a/docs/modules/ROOT/images/demo-trino-taxi-data/minio_2.png b/docs/modules/ROOT/images/demo-trino-taxi-data/minio_2.png new file mode 100644 index 00000000..32febce0 Binary files /dev/null and b/docs/modules/ROOT/images/demo-trino-taxi-data/minio_2.png differ diff --git a/docs/modules/ROOT/images/demo-trino-taxi-data/minio_3.png b/docs/modules/ROOT/images/demo-trino-taxi-data/minio_3.png new file mode 100644 index 00000000..3d03588a Binary files /dev/null and b/docs/modules/ROOT/images/demo-trino-taxi-data/minio_3.png differ diff --git a/docs/modules/ROOT/images/demo-trino-taxi-data/overview.png b/docs/modules/ROOT/images/demo-trino-taxi-data/overview.png new file mode 100644 index 00000000..c6fbcb3c Binary files /dev/null and b/docs/modules/ROOT/images/demo-trino-taxi-data/overview.png differ diff --git a/docs/modules/ROOT/images/demo-trino-taxi-data/superset_1.png b/docs/modules/ROOT/images/demo-trino-taxi-data/superset_1.png new file mode 100644 index 00000000..cc6f80a8 Binary files /dev/null and b/docs/modules/ROOT/images/demo-trino-taxi-data/superset_1.png differ diff --git a/docs/modules/ROOT/images/demo-trino-taxi-data/superset_2.png b/docs/modules/ROOT/images/demo-trino-taxi-data/superset_2.png new file mode 100644 index 00000000..3e1641b6 Binary files /dev/null and b/docs/modules/ROOT/images/demo-trino-taxi-data/superset_2.png differ diff --git a/docs/modules/ROOT/images/demo-trino-taxi-data/superset_3.png b/docs/modules/ROOT/images/demo-trino-taxi-data/superset_3.png new file mode 100644 index 00000000..b30275e3 Binary files /dev/null and b/docs/modules/ROOT/images/demo-trino-taxi-data/superset_3.png differ diff --git a/docs/modules/ROOT/images/demo-trino-taxi-data/superset_4.png b/docs/modules/ROOT/images/demo-trino-taxi-data/superset_4.png new file mode 100644 index 00000000..8e9b495d Binary files /dev/null and b/docs/modules/ROOT/images/demo-trino-taxi-data/superset_4.png differ diff --git a/docs/modules/ROOT/images/demo-trino-taxi-data/superset_5.png b/docs/modules/ROOT/images/demo-trino-taxi-data/superset_5.png new file mode 100644 index 00000000..7ffeedd6 Binary files /dev/null and b/docs/modules/ROOT/images/demo-trino-taxi-data/superset_5.png differ diff --git a/docs/modules/ROOT/images/demo-trino-taxi-data/superset_6.png b/docs/modules/ROOT/images/demo-trino-taxi-data/superset_6.png new file mode 100644 index 00000000..14f0eca5 Binary files /dev/null and b/docs/modules/ROOT/images/demo-trino-taxi-data/superset_6.png differ diff --git a/docs/modules/ROOT/images/demo-trino-taxi-data/superset_7.png b/docs/modules/ROOT/images/demo-trino-taxi-data/superset_7.png new file mode 100644 index 00000000..7163b3c5 Binary files /dev/null and b/docs/modules/ROOT/images/demo-trino-taxi-data/superset_7.png differ diff --git a/docs/modules/ROOT/images/demo-trino-taxi-data/trino_1.png b/docs/modules/ROOT/images/demo-trino-taxi-data/trino_1.png new file mode 100644 index 00000000..0564258d Binary files /dev/null and b/docs/modules/ROOT/images/demo-trino-taxi-data/trino_1.png differ diff --git a/docs/modules/ROOT/images/demo-trino-taxi-data/trino_2.png b/docs/modules/ROOT/images/demo-trino-taxi-data/trino_2.png new file mode 100644 index 00000000..75a10bf0 Binary files /dev/null and b/docs/modules/ROOT/images/demo-trino-taxi-data/trino_2.png differ diff --git a/docs/modules/ROOT/nav.adoc b/docs/modules/ROOT/nav.adoc index 9a6f25b8..e4e4ff44 100644 --- a/docs/modules/ROOT/nav.adoc +++ b/docs/modules/ROOT/nav.adoc @@ -6,5 +6,7 @@ ** xref:commands/release.adoc[] ** xref:commands/services.adoc[] ** xref:commands/stack.adoc[] +* Demos +** xref:demos/trino-taxi-data.adoc[] * xref:customization.adoc[] * xref:troubleshooting.adoc[] diff --git a/docs/modules/ROOT/pages/commands/demo.adoc b/docs/modules/ROOT/pages/commands/demo.adoc index f8e1def5..e1d5bd61 100644 --- a/docs/modules/ROOT/pages/commands/demo.adoc +++ b/docs/modules/ROOT/pages/commands/demo.adoc @@ -1,3 +1,130 @@ = Demo -Not implemented yet +A demo is an end-to-end demonstration of the usage of the Stackable data platform. +It is tied to a specific stack of the Stackable data platform, which will provide the required products for the demo. + +== Browse available demos +To list the available demos, run the following command: + +[source,console] +---- +$ stackablectl demo list +DEMO STACKABLE STACK DESCRIPTION +trino-taxi-data trino-superset-s3 Demo loading 2.5 years of New York taxi data into S3 bucket, creating a Trino table and a Superset dashboard +---- + +Detailed information of a demo can be queried with the `describe` command: + +[source,console] +---- +$ stackablectl demo describe trino-taxi-data +Demo: trino-taxi-data +Description: Demo loading 2.5 years of New York taxi data into S3 bucket, creating a Trino table and a Superset dashboard +Documentation: https://docs.stackable.tech/stackablectl/stable/demos/trino-taxi-data.html +Stackable stack: trino-superset-s3 +Labels: trino, superset, minio, s3, ny-taxi-data +---- + +Future versions of `stackablectl` will also allow to search for demos based on the labels. + +== Install demo +=== Using existing Kubernetes cluster +If you want to access a Kubernetes cluster, make sure your https://kubernetes.io/docs/tasks/tools/#kubectl[`kubectl`] Kubernetes client is configured to interact with the Kubernetes cluster. +After that run the following command + +[source,console] +---- +$ stackablectl demo install trino-taxi-data +[INFO ] Installing demo trino-taxi-data +[INFO ] Installing stack trino-superset-s3 +[INFO ] Installing release 22.06 +[INFO ] Installing airflow operator in version 0.4.0 +[INFO ] Installing commons operator in version 0.2.0 +[INFO ] Installing druid operator in version 0.6.0 +[INFO ] Installing hbase operator in version 0.3.0 +[INFO ] Installing hdfs operator in version 0.4.0 +[INFO ] Installing hive operator in version 0.6.0 +[INFO ] Installing kafka operator in version 0.6.0 +[INFO ] Installing nifi operator in version 0.6.0 +[INFO ] Installing opa operator in version 0.9.0 +[INFO ] Installing secret operator in version 0.5.0 +[INFO ] Installing spark-k8s operator in version 0.3.0 +[INFO ] Installing superset operator in version 0.5.0 +[INFO ] Installing trino operator in version 0.4.0 +[INFO ] Installing zookeeper operator in version 0.10.0 +[INFO ] Installing components of stack trino-superset-s3 +[INFO ] Installed stack trino-superset-s3 +[INFO ] Installing components of demo trino-taxi-data +[INFO ] Installed demo trino-taxi-data. Use "stackablectl services list" to list the installed services +---- + +=== Using local kind cluster +If you don't have a Kubernetes cluster available, `stackablectl` can spin up a https://kind.sigs.k8s.io/[kind] Kubernetes cluster for you. +Make sure you have `kind` installed and run the following command: + +[source,console] +---- +$ stackablectl demo install trino-taxi-data --kind-cluster +[INFO ] Creating kind cluster stackable-data-platform +Creating cluster "stackable-data-platform" ... + ✓ Ensuring node image (kindest/node:v1.21.1) đŸ–ŧ + ✓ Preparing nodes đŸ“Ļ đŸ“Ļ đŸ“Ļ đŸ“Ļ + ✓ Writing configuration 📜 + ✓ Starting control-plane đŸ•šī¸ + ✓ Installing CNI 🔌 + ✓ Installing StorageClass 💾 + ✓ Joining worker nodes 🚜 +Set kubectl context to "kind-stackable-data-platform" +You can now use your cluster with: + +kubectl cluster-info --context kind-stackable-data-platform + +Have a nice day! 👋 +[INFO ] Installing demo trino-taxi-data +[INFO ] Installing stack trino-superset-s3 +[INFO ] Installing release 22.06 +[INFO ] Installing airflow operator in version 0.4.0 +[INFO ] Installing commons operator in version 0.2.0 +[INFO ] Installing druid operator in version 0.6.0 +[INFO ] Installing hbase operator in version 0.3.0 +[INFO ] Installing hdfs operator in version 0.4.0 +[INFO ] Installing hive operator in version 0.6.0 +[INFO ] Installing kafka operator in version 0.6.0 +[INFO ] Installing nifi operator in version 0.6.0 +[INFO ] Installing opa operator in version 0.9.0 +[INFO ] Installing secret operator in version 0.5.0 +[INFO ] Installing spark-k8s operator in version 0.3.0 +[INFO ] Installing superset operator in version 0.5.0 +[INFO ] Installing trino operator in version 0.4.0 +[INFO ] Installing zookeeper operator in version 0.10.0 +[INFO ] Installing components of stack trino-superset-s3 +[INFO ] Installed stack trino-superset-s3 +[INFO ] Installing components of demo trino-taxi-data +[INFO ] Installed demo trino-taxi-data. Use "stackablectl services list" to list the installed services +---- + +=== List deployed services +After installing your demo you can use the xref:commands/services.adoc[] command to list the installed services as follows + +[source,console] +---- +$ stackablectl services list --all-namespaces + PRODUCT NAME NAMESPACE ENDPOINTS EXTRA INFOS + + hive hive default hive 172.18.0.4:32658 + metrics 172.18.0.4:30745 + + opa opa default http http://172.18.0.2:31324 + + superset superset default external-superset http://172.18.0.2:32716 Admin user: admin, password: admin + + trino trino default coordinator-http http://172.18.0.5:32128 + coordinator-metrics 172.18.0.5:31199 + coordinator-https https://172.18.0.5:32721 + + minio minio-trino default http http://172.18.0.4:31026 Third party service + console-http http://172.18.0.4:30354 Admin user: root, password: rootroot +---- + +== Uninstall stack +Currently there is no support for uninstalling a demo again. diff --git a/docs/modules/ROOT/pages/commands/services.adoc b/docs/modules/ROOT/pages/commands/services.adoc index 16b060c8..32a2d951 100644 --- a/docs/modules/ROOT/pages/commands/services.adoc +++ b/docs/modules/ROOT/pages/commands/services.adoc @@ -1,4 +1,5 @@ = Services + In this context a (Stackable) service is a running instance of a data product. This is different from the meaning of a Kubernetes service which is an abstract way to expose an application running on a set of pods as a network service. == List running services @@ -9,15 +10,22 @@ An example invocation looks as follows: [source,console] ---- -$ stackablectl services list -PRODUCT NAME NAMESPACE ENDPOINTS EXTRA INFOS -airflow airflow default webserver-airflow: http://172.18.0.5:32290 Admin user: airflow, password: airflow -druid druid default router-http: http://172.18.0.2:30245 - coordinator-http: http://172.18.0.4:30506 -superset superset default external-superset: http://172.18.0.2:31891 Admin user: admin, password: admin -zookeeper druid-zookeeper default zk: 172.18.0.5:30890 -minio minio-druid default http: http://172.18.0.4:32173 Third party service - console-http: http://172.18.0.4:30982 Admin user: root, password: rootroot +$ stackablectl services list --all-namespaces + PRODUCT NAME NAMESPACE ENDPOINTS EXTRA INFOS + + hive hive default hive 172.18.0.4:32658 + metrics 172.18.0.4:30745 + + opa opa default http http://172.18.0.2:31324 + + superset superset default external-superset http://172.18.0.2:32716 Admin user: admin, password: admin + + trino trino default coordinator-http http://172.18.0.5:32128 + coordinator-metrics 172.18.0.5:31199 + coordinator-https https://172.18.0.5:32721 + + minio minio-trino default http http://172.18.0.4:31026 Third party service + console-http http://172.18.0.4:30354 Admin user: root, password: rootroot ---- You can also @@ -31,14 +39,20 @@ To achieve this you can use the following command: [source,console] ---- $ stackablectl services list --all-namespaces --redact-credentials --show-versions -PRODUCT NAME NAMESPACE ENDPOINTS EXTRA INFOS -airflow airflow default webserver-airflow: http://172.18.0.5:32290 Admin user: airflow, password: - version 2.2.5-python39-stackable0.3.0 -druid druid default router-http: http://172.18.0.2:30245 version 0.23.0-stackable0.1.0 - coordinator-http: http://172.18.0.4:30506 -superset superset default external-superset: http://172.18.0.2:31891 Admin user: admin, password: - version 1.5.1-stackable0.2.0 -zookeeper druid-zookeeper default zk: 172.18.0.5:30890 version 3.8.0-stackable0.7.1 -minio minio-druid default http: http://172.18.0.4:32173 Third party service - console-http: http://172.18.0.4:30982 Admin user: root, password: + PRODUCT NAME NAMESPACE ENDPOINTS EXTRA INFOS + + hive hive default hive 172.18.0.5:30298 version 2.3.9-stackable0.4.0 + metrics 172.18.0.5:31633 + + opa opa default http http://172.18.0.5:30316 version 0.41.0-stackable0.1.0 + + superset superset default external-superset http://172.18.0.4:32295 Admin user: admin, password: + version 1.5.1-stackable0.2.0 + + trino trino default coordinator-http http://172.18.0.3:30167 version 387-stackable0.1.0 + coordinator-metrics 172.18.0.3:31818 + coordinator-https https://172.18.0.3:30141 + + minio minio-trino default http http://172.18.0.3:31062 Third party service + console-http http://172.18.0.3:30503 Admin user: root, password: ---- diff --git a/docs/modules/ROOT/pages/commands/stack.adoc b/docs/modules/ROOT/pages/commands/stack.adoc index b2bffe6c..35a8e1e1 100644 --- a/docs/modules/ROOT/pages/commands/stack.adoc +++ b/docs/modules/ROOT/pages/commands/stack.adoc @@ -1,6 +1,7 @@ = Stack + A stack is a collection of ready-to-use Stackable data products as well as required third-party services like Postgresql or MinIO. -It is tied to a specific release of the Stackable Data Platform, which will provide the required operators for the Stack. +It is tied to a specific release of the Stackable data platform, which will provide the required operators for the stack. == Browse available stacks To list the available stacks, run the following command: @@ -24,15 +25,17 @@ Stackable release: 22.06 Labels: druid, superset, minio, s3 ---- -Future version of `stackablectl` will allow to search for stacks based on the labels. +Future versions of `stackablectl` will also allow to search for stacks based on the labels. == Install stack +=== Using existing Kubernetes cluster If you want to access a Kubernetes cluster, make sure your https://kubernetes.io/docs/tasks/tools/#kubectl[`kubectl`] Kubernetes client is configured to interact with the Kubernetes cluster. After that run the following command [source,console] ---- $ stackablectl stack install druid-superset-s3 +[INFO ] Installing stack druid-superset-s3 [INFO ] Installing release 22.06 [INFO ] Installing airflow operator in version 0.4.0 [INFO ] Installing commons operator in version 0.2.0 @@ -52,6 +55,7 @@ $ stackablectl stack install druid-superset-s3 [INFO ] Installed stack druid-superset-s3 ---- +=== Using local kind cluster If you don't have a Kubernetes cluster available, `stackablectl` can spin up a https://kind.sigs.k8s.io/[kind] Kubernetes cluster for you. Make sure you have `kind` installed and run the following command: @@ -73,6 +77,7 @@ You can now use your cluster with: kubectl cluster-info --context kind-stackable-data-platform Have a nice day! 👋 +[INFO ] Installing stack druid-superset-s3 [INFO ] Installing release 22.06 [INFO ] Installing airflow operator in version 0.4.0 [INFO ] Installing commons operator in version 0.2.0 @@ -92,20 +97,28 @@ Have a nice day! 👋 [INFO ] Installed stack druid-superset-s3 ---- -After installing the stack, we can access the running services using the xref:commands/services.adoc[] command: +=== List deployed services +After installing your stack you can use the xref:commands/services.adoc[] command to list the installed services as follows [source,console] ---- -$ stackablectl services list -PRODUCT NAME NAMESPACE ENDPOINTS EXTRA INFOS -druid druid default router-http: http://172.18.0.2:30245 - coordinator-http: http://172.18.0.4:30506 -superset superset default external-superset: http://172.18.0.2:31891 Admin user: admin, password: admin -zookeeper druid-zookeeper default zk: 172.18.0.5:30890 -minio minio-druid default http: http://172.18.0.4:32173 Third party service - console-http: http://172.18.0.4:30982 Admin user: root, password: rootroot +$ stackablectl services list --all-namespaces + PRODUCT NAME NAMESPACE ENDPOINTS EXTRA INFOS + + hive hive default hive 172.18.0.4:32658 + metrics 172.18.0.4:30745 + + opa opa default http http://172.18.0.2:31324 + + superset superset default external-superset http://172.18.0.2:32716 Admin user: admin, password: admin + + trino trino default coordinator-http http://172.18.0.5:32128 + coordinator-metrics 172.18.0.5:31199 + coordinator-https https://172.18.0.5:32721 + + minio minio-trino default http http://172.18.0.4:31026 Third party service + console-http http://172.18.0.4:30354 Admin user: root, password: rootroot ---- == Uninstall stack Currently there is no support for uninstalling a stack again. -Maybe a solution would be to uninstall the components of the stack but leave the release running. diff --git a/docs/modules/ROOT/pages/customization.adoc b/docs/modules/ROOT/pages/customization.adoc index f58e2d99..e8c33a43 100644 --- a/docs/modules/ROOT/pages/customization.adoc +++ b/docs/modules/ROOT/pages/customization.adoc @@ -1,7 +1,8 @@ = Customization + If you're working for a large company, chances are that there are multiple teams using the Stackable Data Platform. A single team can also operate multiple Stackable Data Platforms. -`stackablectl` is build in a way customers or even single developers can define their own release, stack and even demo! +`stackablectl` is build in a way customers or developers can define their own releases, stacks and demos! This way it is possible to cover the following use-cases. Any additional demos/stacks/releases you specify, will be added to the already existing ones provided by Stackable. diff --git a/docs/modules/ROOT/pages/demos/trino-taxi-data.adoc b/docs/modules/ROOT/pages/demos/trino-taxi-data.adoc new file mode 100644 index 00000000..54b51b5b --- /dev/null +++ b/docs/modules/ROOT/pages/demos/trino-taxi-data.adoc @@ -0,0 +1,311 @@ += trino-taxi-data + +This demo will + +* Install the required Stackable operators +* Spin up the follow data products +** Superset: A modern data exploration and visualization platform. This demo uses it to execute SQL queries and build dashboards +** Trino: A fast distributed SQL query engine for big data analytics that helps you explore your data universe. This demo uses it to enable SQL access to the data +** MinIO: A S3 compatible object store. This demo uses it as persistent storage to store all the data used +** Hive metastore: A service that stores metadata related to Apache Hive and other services. This demo uses it as metadata storage for Trino +* Load testdata into S3. It contains 2.5 years of New York City taxi trips +* Make data accessible via SQL in Trino +* Create Superset dashboards for visualization + +You can see the deployed products as well as their relationship in the following diagram: + +image::demo-trino-taxi-data/overview.png[] + +== Inspect data in S3 +The S3 provided by MinIO is used as a persistent storage to store all the data used +You can have a look at the testdata within the MinIO Webinterface by opening the given `minio` endpoint `console-http` from the `stackablectl services list` command. +You have to use the endpoint from your command output, in this case it is http://172.18.0.3:30503. +Open it with your favorite browser. + +image::demo-trino-taxi-data/minio_1.png[] + +Log in with the credentials username `root`, password `rootroot`. + +image::demo-trino-taxi-data/minio_2.png[] + +Click on the blue button `Browse` on the bucket `demo` and open the folders `ny-taxi-data` -> `raw`. + +image::demo-trino-taxi-data/minio_3.png[] + +As you can see the demo uploaded 1GB of parquet files, one file per month. +The data contain taxi rides in New York City. +You can see the file size (and therefore the number of rides) decrease drastically because of the Covid-19 pandemic starting from `2020-03`. +https://parquet.apache.org/[Parquet] is an open source, column-oriented data file format designed for efficient data storage and retrieval. + +== Use Trino webinterface +Trino offers SQL access to the data within S3. +Open the `trino` endpoint `coordinator-https` in your browser (`https://172.18.0.3:30141` in this case). +If you get a warning regarding the self signed certificate (e.g. `Warning: Potential Security Risk Ahead`) you have to tell your browser to trust the website and continue. + +image::demo-trino-taxi-data/trino_1.png[] + +Log in with the credentials username `admin`, password `admin`. + +image::demo-trino-taxi-data/trino_2.png[] + +When you start executing SQL queries you will see the queries getting processed here. + +== Use Superset webinterface +Superset gives the ability to execute SQL queries and build dashboards. +Open the `superset` endpoint `external-superset` in your browser (`http://172.18.0.4:32295` in this case). + +image::demo-trino-taxi-data/superset_1.png[] + +Log in with the credentials username `admin`, password `admin`. + +image::demo-trino-taxi-data/superset_2.png[] + +=== Inspect dashboard +On the top click on the tab `Dashboards`. + +image::demo-trino-taxi-data/superset_3.png[] + +Click on the dashboard called `Taxi data`. +It might take some time until the dashboards renders all the included charts. + +image::demo-trino-taxi-data/superset_4.png[] + +You can clearly see the impact of Covid-19 on the taxi business. + +=== Execute arbitrary SQL statements +Within Superset you can not only create dashboards but also run arbitrary SQL statements. +On the top click on the tab `SQL Lab` -> `SQL Editor`. + +image::demo-trino-taxi-data/superset_5.png[] + +On the left select the database `Trino`, the schema `demo` and set `See table schema` to `ny_taxi_data`. + +image::demo-trino-taxi-data/superset_6.png[] + +On the right textbox enter the desired SQL statement. +If you do not want to make on up you can use the following: + +[source,sql] +---- +select + format_datetime(tpep_pickup_datetime, 'YYYY/MM') as month, + count(*) as trips, + sum(total_amount) as sales, + avg(date_diff('minute', tpep_pickup_datetime, tpep_dropoff_datetime)) as avg_duration_min +from ny_taxi_data +group by 1 +order by 1 +---- + +image::demo-trino-taxi-data/superset_7.png[] + +== Summary +The demo loaded 2.5 years of taxi trip data from New York City with 68 million records and a total size of 1GB in parquet files. +The data was put into the S3 storage. +Trino enables you to query the data via SQL. +Superset was used as a web-based frontend to execute SQL statements and build dashboards. + +== Where to go from here +There are multiple paths to go from here. +The following sections can give you some ideas on what to explore next. +You can find the description of the taxi data https://www1.nyc.gov/assets/tlc/downloads/pdf/data_dictionary_trip_records_yellow.pdf[on the New York City website]. + +=== Execute arbitrary SQL statements +Within Superset you can execute arbitrary SQL statements to explore the taxi data. +Can you answer the following questions by executing SQL statements? +The https://trino.io/docs/current/language.html[Trino documentation on their SQL language] might help you. + +How many taxi trips there where in the year 2021? + +.See the answer +[%collapsible] +==== + +[source,sql] +---- +select + count(*) as trips +from ny_taxi_data +where year(tpep_pickup_datetime) = 2021 +---- + +returns 30.903.982 trips. +==== + +What was the maximum amount of passengers? + +.See the answer +[%collapsible] +==== + +[source,sql] +---- +select + max(passenger_count) as max_passenger_count +from ny_taxi_data; +---- + +returns 112 passengers. + +Well that's weird. +Lets examine the passengers distribution. + +[source,sql] +---- +select + passenger_count, + count(*) as frequency +from ny_taxi_data +group by 1 +order by 1 desc +limit 100 +---- + +returns + +[source] +---- + passenger_count | frequency +-----------------+----------- + 112.0 | 1 + 96.0 | 1 + 9.0 | 98 + 8.0 | 156 + 7.0 | 229 + 6.0 | 1089568 + 5.0 | 1715439 + 4.0 | 1052834 + 3.0 | 2504112 + 2.0 | 9575299 + 1.0 | 48133494 + 0.0 | 1454268 + NULL | 2698591 +---- + +We can see that one trip had 112 and another one 96 passengers. +All the other trips start with a more "realistic" number of 9 passengers. + +As a bonus question: What *exactly* did the large passenger do? + +[source,sql] +---- +select * +from ny_taxi_data +where passenger_count > 50 +---- + +returns + +[source] +---- + vendorid | tpep_pickup_datetime | tpep_dropoff_datetime | duration_min | passenger_count | trip_distance | payment_type | fare_amount | tip_amount | total_amount +----------+-------------------------+-------------------------+--------------+-----------------+---------------+--------------+-------------+------------+-------------- + 2 | 2021-08-01 19:47:43.000 | 2021-08-01 19:57:54.000 | 10 | 112.0 | 1.8 | Credit card | 9.0 | 2.46 | 14.76 + 2 | 2021-08-03 11:51:58.000 | 2021-08-03 12:09:29.000 | 17 | 96.0 | 1.56 | Credit card | 11.5 | 2.22 | 17.02 +---- +Pretty cheap for that amount of persons! +This probably is an invalid record. +==== + +What was the highest tip (measured in percentage of the original fee) ever given? + +.See the answer +[%collapsible] +==== + +[source,sql] +---- +select + total_amount as fee, + tip_amount as tip, + tip_amount / total_amount * 100 as tip_percentage +from ny_taxi_data +where total_amount > 0 +order by 3 desc +limit 5 +---- + +returns + +[source] +---- + fee | tip | tip_percentage +------+------+-------------------- + 4.2 | 10.0 | 238.0952380952381 + 18.2 | 25.0 | 137.36263736263737 + 8.24 | 9.24 | 112.13592233009709 + 0.66 | 0.66 | 100.0 + 0.01 | 0.01 | 100.0 +---- +==== + +=== Create additional dashboards +You also have the possibility to create additional Charts and bundle them together in a Dashboard. +Have a look at https://superset.apache.org/docs/creating-charts-dashboards/creating-your-first-dashboard#creating-charts-in-explore-view[the Superset documentation] on how to do that. + +=== Load additional data +You can use the MinIO Webinterface to upload any data. +As an alternative you can use the S3 API with an S3 client e.g. https://s3tools.org/s3cmd[s3cmd]. +It is recommended to put the data into a folder (prefix) in the `demo` bucket. + +Have a look at the defined tables inside the `hive`.`demo` schema on how to inform Trino about the newly available data. + +.Table definitions +[%collapsible] +==== + +[source,sql] +---- +show create table hive.demo.ny_taxi_data_raw +---- + +produces something like + +[source,sql] +---- +CREATE TABLE IF NOT EXISTS hive.demo.ny_taxi_data_raw ( + VendorID BIGINT, + tpep_pickup_datetime TIMESTAMP, + tpep_dropoff_datetime TIMESTAMP, + passenger_count DOUBLE, + trip_distance DOUBLE, + payment_type BIGINT, + Fare_amount DOUBLE, + Tip_amount DOUBLE, + Total_amount DOUBLE +) WITH ( + external_location = 's3a://demo/ny-taxi-data/raw/', + format = 'parquet' +) +---- + +If you want to transform or filter your data in any way before using it e.g. in Superset you can create a view as follows: +[source,sql] +---- +show create view hive.demo.ny_taxi_data +---- + +produces something like + +[source,sql] +---- +create or replace view hive.demo.ny_taxi_data as +select + vendorid, + tpep_pickup_datetime, + tpep_dropoff_datetime, + date_diff('minute', tpep_pickup_datetime, tpep_dropoff_datetime) as duration_min, + passenger_count, + trip_distance, + case payment_type when 1 then 'Credit card' when 2 then 'Cash' when 3 then 'No charge' when 4 then 'Dispute' when 6 then 'Voided trino' else 'Unknown' end as payment_type, + fare_amount, + tip_amount, + total_amount +from hive.demo.ny_taxi_data_raw +where tpep_pickup_datetime >= from_iso8601_timestamp('2019-12-01T00:00:00') +and tpep_pickup_datetime <= from_iso8601_timestamp('2022-05-31T00:00:00') +---- +==== + +=== Connect to Trino via CLI, Python or DBeaver +If you prefer running your SQL statements via command-line, a Python script or a graphical Database manager like DBeaver please have a look at the https://trino.io/docs/current/client.html[the Trino documentation] on how to do that. diff --git a/docs/modules/ROOT/pages/index.adoc b/docs/modules/ROOT/pages/index.adoc index 21fc4aa4..a179fcf8 100644 --- a/docs/modules/ROOT/pages/index.adoc +++ b/docs/modules/ROOT/pages/index.adoc @@ -22,7 +22,7 @@ image::layers.png[Layers of the deployed services] == Operators This layer consists of Stackable operators managing the individual data products. -They can either be installed one by one with the command `stackablectl operator` or from a release with `stackablectl release` which is preferred. +They can either be installed one by one with the command `stackablectl operator` or from a release with `stackablectl release` which is recommended. A release is a well-playing bundle of operators that get released approximately every 2 months. == Stacks @@ -33,7 +33,7 @@ A stack needs a release (of Stackable operators) to run on. To achieve this a stacks has a dependency on a release which gets automatically installed when a stack is installed. == Demos -A demo is an end-to-end demonstration of the usage of the Stackable Data Platform. +A demo is an end-to-end demonstration of the usage of the Stackable data platform. It contains . Installing a Stackable release diff --git a/docs/modules/ROOT/pages/installation.adoc b/docs/modules/ROOT/pages/installation.adoc index 785ddbb4..2267b58f 100644 --- a/docs/modules/ROOT/pages/installation.adoc +++ b/docs/modules/ROOT/pages/installation.adoc @@ -8,11 +8,12 @@ If the binary does not work for you, you can always <<_build_stackablectl_from_s === Linux -Download the `stackablectl-x86_64-unknown-linux-gnu` binary file from the link:https://github.com/stackabletech/stackablectl/releases/latest[latest release], then rename the file to `stackabelctl`: +Download the `stackablectl-x86_64-unknown-linux-gnu` binary file from the link:https://github.com/stackabletech/stackablectl/releases/latest[latest release], then rename the file to `stackabelctl`. +You can also use the following command: [source,console] ---- -$ mv stackablectl-x86_64-unknown-linux-gnu stackablectl +$ curl -L -o stackablectl https://github.com/stackabletech/stackablectl/releases/latest/download/stackablectl-x86_64-unknown-linux-gnu ---- and mark it as executable: diff --git a/docs/modules/ROOT/pages/quickstart.adoc b/docs/modules/ROOT/pages/quickstart.adoc index 5f2b3e16..7f5e3a15 100644 --- a/docs/modules/ROOT/pages/quickstart.adoc +++ b/docs/modules/ROOT/pages/quickstart.adoc @@ -1,5 +1,144 @@ = Quickstart -This pages wait's until the xref:commands/demo.adoc[] is ready. +== Goal +In this Quickstart guide you will install a xref:commands/demo.adoc[], which is an end-to-end demonstration of the usage of the Stackable data platform. -When the demo command is available we will browse the demos and install a demo together. +== Install stackablectl +Please follow the xref:installation.adoc[Installation documentation] to install `stackablectl`. + +== Browse available demos +Stackable provides a set of ready-to-use demos. +As of writing (2022/09/10) only a single demo is available, but further demos will be added in the future. +They will automatically show up, as `stackablectl` fetches the available list of demos via the Internet. +To list the available demos run the following command: + +[source,console] +---- +$ stackablectl demos list +DEMO STACKABLE STACK DESCRIPTION +trino-taxi-data trino-superset-s3 Demo loading 2.5 years of New York taxi data into S3 bucket, creating a Trino table and a Superset dashboard +---- + +[NOTE] +==== +When you are on a Windows system you have to replace the `stackablectl` command with `stackablectl.exe`, e.g. `stackablectl.exe demos list`. +This applies to all commands below. +==== + +For this guide we will use the xref:demos/trino-taxi-data.adoc[] demo. +The installation of other available demos should work the same way. +You simply need to use the name of the chosen demo instead of `trino-taxi-data` in the following commands. + +== Install demo +The installation depends on wether you already have an Kubernetes available to run the Stackable data platform on. + +=== Using existing Kubernetes cluster +If you want to access a Kubernetes cluster, make sure your https://kubernetes.io/docs/tasks/tools/#kubectl[`kubectl`] Kubernetes client is configured to interact with the Kubernetes cluster. +After that run the following command. + +[source,console] +---- +$ stackablectl demo install trino-taxi-data +[INFO ] Installing demo trino-taxi-data +[INFO ] Installing stack trino-superset-s3 +[INFO ] Installing release 22.06 +[INFO ] Installing airflow operator in version 0.4.0 +[INFO ] Installing commons operator in version 0.2.0 +[INFO ] Installing druid operator in version 0.6.0 +[INFO ] Installing hbase operator in version 0.3.0 +[INFO ] Installing hdfs operator in version 0.4.0 +[INFO ] Installing hive operator in version 0.6.0 +[INFO ] Installing kafka operator in version 0.6.0 +[INFO ] Installing nifi operator in version 0.6.0 +[INFO ] Installing opa operator in version 0.9.0 +[INFO ] Installing secret operator in version 0.5.0 +[INFO ] Installing spark-k8s operator in version 0.3.0 +[INFO ] Installing superset operator in version 0.5.0 +[INFO ] Installing trino operator in version 0.4.0 +[INFO ] Installing zookeeper operator in version 0.10.0 +[INFO ] Installing components of stack trino-superset-s3 +[INFO ] Installed stack trino-superset-s3 +[INFO ] Installing components of demo trino-taxi-data +[INFO ] Installed demo trino-taxi-data. Use "stackablectl services list" to list the installed services +---- + +=== Using local kind cluster +If you don't have a Kubernetes cluster available, `stackablectl` can spin up a https://kind.sigs.k8s.io/[kind] Kubernetes cluster for you. +Make sure you have `kind` installed and run the following command: + +[source,console] +---- +$ stackablectl demo install trino-taxi-data --kind-cluster +[INFO ] Creating kind cluster stackable-data-platform +Creating cluster "stackable-data-platform" ... + ✓ Ensuring node image (kindest/node:v1.21.1) đŸ–ŧ + ✓ Preparing nodes đŸ“Ļ đŸ“Ļ đŸ“Ļ đŸ“Ļ + ✓ Writing configuration 📜 + ✓ Starting control-plane đŸ•šī¸ + ✓ Installing CNI 🔌 + ✓ Installing StorageClass 💾 + ✓ Joining worker nodes 🚜 +Set kubectl context to "kind-stackable-data-platform" +You can now use your cluster with: + +kubectl cluster-info --context kind-stackable-data-platform + +Have a nice day! 👋 +[INFO ] Installing demo trino-taxi-data +[INFO ] Installing stack trino-superset-s3 +[INFO ] Installing release 22.06 +[INFO ] Installing airflow operator in version 0.4.0 +[INFO ] Installing commons operator in version 0.2.0 +[INFO ] Installing druid operator in version 0.6.0 +[INFO ] Installing hbase operator in version 0.3.0 +[INFO ] Installing hdfs operator in version 0.4.0 +[INFO ] Installing hive operator in version 0.6.0 +[INFO ] Installing kafka operator in version 0.6.0 +[INFO ] Installing nifi operator in version 0.6.0 +[INFO ] Installing opa operator in version 0.9.0 +[INFO ] Installing secret operator in version 0.5.0 +[INFO ] Installing spark-k8s operator in version 0.3.0 +[INFO ] Installing superset operator in version 0.5.0 +[INFO ] Installing trino operator in version 0.4.0 +[INFO ] Installing zookeeper operator in version 0.10.0 +[INFO ] Installing components of stack trino-superset-s3 +[INFO ] Installed stack trino-superset-s3 +[INFO ] Installing components of demo trino-taxi-data +[INFO ] Installed demo trino-taxi-data. Use "stackablectl services list" to list the installed services +---- + +== List deploy Stackable services +The `stackablectl demo install` command installed all the needed bits and pieces to make the demo work. +Depending on your Internet connectivity it can take quite some time to download all the needed Software (in form of docker images). +So it might be the case that you have installed the data products but they aren't ready yet. + +To list the installed installed Stackable service run the following command: + +[source,console] +---- +stackablectl services list --all-namespaces + PRODUCT NAME NAMESPACE ENDPOINTS EXTRA INFOS + + hive hive default hive 172.18.0.5:30298 + metrics 172.18.0.5:31633 + + opa opa default http http://172.18.0.5:30316 + + superset superset default external-superset http://172.18.0.4:32295 Admin user: admin, password: admin + + trino trino default coordinator-http http://172.18.0.3:30167 + coordinator-metrics 172.18.0.3:31818 + coordinator-https https://172.18.0.3:30141 + + minio minio-trino default http http://172.18.0.3:31062 Third party service + console-http http://172.18.0.3:30503 Admin user: root, password: rootroot +---- +[NOTE] +==== +When a product doesn't have an endpoint yet the product has not started yet. +Starting all of the products might take an considerable amount of time. +In case the product is not ready yet a warning might be shown. +==== + +== Proceed with the demo +Please read the documentation on the demo xref:demos/trino-taxi-data.adoc[] on how to proceed with the demo diff --git a/src/arguments.rs b/src/arguments.rs index 3eb59dc9..37ad5993 100644 --- a/src/arguments.rs +++ b/src/arguments.rs @@ -1,6 +1,6 @@ use crate::{ - operator::CliCommandOperator, release::CliCommandRelease, services::CliCommandServices, - stack::CliCommandStack, + demo::CliCommandDemo, operator::CliCommandOperator, release::CliCommandRelease, + services::CliCommandServices, stack::CliCommandStack, }; use clap::{ArgEnum, Command, Parser, ValueHint}; use clap_complete::{generate, Generator, Shell}; @@ -71,6 +71,15 @@ pub struct CliArgs { /// Can be specified multiple times. #[clap(long, multiple_occurrences(true), value_hint = ValueHint::FilePath)] pub additional_stacks_file: Vec, + + /// Adds a YAML file containing custom demos + /// + /// If you do not have access to the Stackable repositories on GitHub or if you want to maintain your own demos, you can specify additional YAML files containing demo information. + /// Have a look at for the structure. + /// Can either be a URL or a path to a file, e.g. `https://my.server/my-demos.yaml`, '/etc/my-demos.yaml' or `C:\Users\Bob\my-demos.yaml`. + /// Can be specified multiple times. + #[clap(long, multiple_occurrences(true), value_hint = ValueHint::FilePath)] + pub additional_demos_file: Vec, } #[derive(Parser)] @@ -83,7 +92,7 @@ pub enum CliCommand { #[clap(subcommand, alias("r"), alias("re"))] Release(CliCommandRelease), - /// This subcommand interacts with stacks which are ready-to-use combinations of products. + /// This subcommand interacts with stacks, which are ready-to-use combinations of products. #[clap(subcommand, alias("s"), alias("st"))] Stack(CliCommandStack), @@ -91,6 +100,10 @@ pub enum CliCommand { #[clap(subcommand, alias("svc"))] Services(CliCommandServices), + /// This command interacts with demos, which are end-to-end demonstrations of the usage of the Stackable data platform. + #[clap(subcommand, alias("d"), alias("de"))] + Demo(CliCommandDemo), + /// Output shell completion code for the specified shell. Completion(CliCommandCompletion), } diff --git a/src/demo.rs b/src/demo.rs new file mode 100644 index 00000000..bbdf2755 --- /dev/null +++ b/src/demo.rs @@ -0,0 +1,206 @@ +use crate::{ + arguments::OutputType, + helpers, kind, + stack::{self, StackManifest}, + CliArgs, +}; +use cached::proc_macro::cached; +use clap::{Parser, ValueHint}; +use indexmap::IndexMap; +use lazy_static::lazy_static; +use log::{info, warn}; +use serde::{Deserialize, Serialize}; +use std::{error::Error, ops::Deref, sync::Mutex}; + +lazy_static! { + pub static ref DEMO_FILES: Mutex> = Mutex::new(vec![ + "https://raw.githubusercontent.com/stackabletech/stackablectl/main/demos/demos-v1.yaml" + .to_string(), + ]); +} + +#[derive(Parser)] +pub enum CliCommandDemo { + /// List all the available demos + #[clap(alias("ls"))] + List { + #[clap(short, long, arg_enum, default_value = "text")] + output: OutputType, + }, + /// Show details of a specific demo + #[clap(alias("desc"))] + Describe { + /// Name of the demo to describe + #[clap(required = true, value_hint = ValueHint::Other)] + demo: String, + + #[clap(short, long, arg_enum, default_value = "text")] + output: OutputType, + }, + /// Install a specific demo + #[clap(alias("in"))] + Install { + /// Name of the demo to install + #[clap(required = true, value_hint = ValueHint::Other)] + demo: String, + + /// If specified, a local Kubernetes cluster consisting of 4 nodes (1 for control-plane and 3 workers) for testing purposes will be created. + /// Kind is a tool to spin up a local Kubernetes cluster running on Docker on your machine. + /// You need to have `docker` and `kind` installed. + /// Have a look at our documentation on how to install `kind` at + #[clap(short, long)] + kind_cluster: bool, + + /// Name of the kind cluster created if `--kind-cluster` is specified + #[clap( + long, + default_value = "stackable-data-platform", + requires = "kind-cluster", + value_hint = ValueHint::Other, + )] + kind_cluster_name: String, + }, +} + +impl CliCommandDemo { + pub async fn handle(&self) -> Result<(), Box> { + match self { + CliCommandDemo::List { output } => list_demos(output).await?, + CliCommandDemo::Describe { demo, output } => describe_demo(demo, output).await?, + CliCommandDemo::Install { + demo, + kind_cluster, + kind_cluster_name, + } => { + kind::handle_cli_arguments(*kind_cluster, kind_cluster_name)?; + install_demo(demo).await?; + } + } + Ok(()) + } +} + +pub fn handle_common_cli_args(args: &CliArgs) { + let mut demo_files = DEMO_FILES.lock().unwrap(); + demo_files.extend_from_slice(&args.additional_demos_file); +} + +#[derive(Clone, Debug, Deserialize, Serialize)] +#[serde(rename_all = "camelCase")] +struct Demos { + demos: IndexMap, +} + +#[derive(Clone, Debug, Deserialize, Serialize)] +#[serde(rename_all = "camelCase")] +struct Demo { + description: String, + documentation: Option, + stackable_stack: String, + labels: Vec, + manifests: Vec, +} + +async fn list_demos(output_type: &OutputType) -> Result<(), Box> { + let output = get_demos().await; + match output_type { + OutputType::Text => { + println!("DEMO STACKABLE STACK DESCRIPTION"); + for (demo_name, demo) in output.demos.iter() { + println!( + "{:35} {:25} {}", + demo_name, demo.stackable_stack, demo.description, + ); + } + } + OutputType::Json => { + println!("{}", serde_json::to_string_pretty(&output)?); + } + OutputType::Yaml => { + println!("{}", serde_yaml::to_string(&output)?); + } + } + + Ok(()) +} + +async fn describe_demo(demo_name: &str, output_type: &OutputType) -> Result<(), Box> { + #[derive(Serialize)] + #[serde(rename_all = "camelCase")] + struct Output { + demo: String, + description: String, + documentation: Option, + stackable_stack: String, + labels: Vec, + } + + let demo = get_demo(demo_name).await?; + let output = Output { + demo: demo_name.to_string(), + description: demo.description, + documentation: demo.documentation, + stackable_stack: demo.stackable_stack, + labels: demo.labels, + }; + + match output_type { + OutputType::Text => { + println!("Demo: {}", output.demo); + println!("Description: {}", output.description); + if let Some(documentation) = output.documentation { + println!("Documentation: {}", documentation); + } + println!("Stackable stack: {}", output.stackable_stack); + println!("Labels: {}", output.labels.join(", ")); + } + OutputType::Json => { + println!("{}", serde_json::to_string_pretty(&output).unwrap()); + } + OutputType::Yaml => { + println!("{}", serde_yaml::to_string(&output).unwrap()); + } + } + + Ok(()) +} + +async fn install_demo(demo_name: &str) -> Result<(), Box> { + info!("Installing demo {demo_name}"); + let demo = get_demo(demo_name).await?; + stack::install_stack(&demo.stackable_stack).await?; + info!("Installing components of demo {demo_name}"); + stack::install_manifests(&demo.manifests).await?; + + info!("Installed demo {demo_name}. Use \"stackablectl services list\" to list the installed services"); + Ok(()) +} + +/// Cached because of potential slow network calls +#[cached] +async fn get_demos() -> Demos { + let mut all_demos = IndexMap::new(); + let demo_files = DEMO_FILES.lock().unwrap().deref().clone(); + for demo_file in demo_files { + let yaml = helpers::read_from_url_or_file(&demo_file).await; + match yaml { + Ok(yaml) => match serde_yaml::from_str::(&yaml) { + Ok(demos) => all_demos.extend(demos.demos), + Err(err) => warn!("Failed to parse demo list from {demo_file}: {err}"), + }, + Err(err) => { + warn!("Could not read from demo file \"{demo_file}\": {err}"); + } + } + } + + Demos { demos: all_demos } +} + +async fn get_demo(demo_name: &str) -> Result> { + get_demos() + .await + .demos + .remove(demo_name) // We need to remove to take ownership + .ok_or_else(|| format!("Demo {demo_name} not found. Use `stackablectl demo list` to list the available demos.").into()) +} diff --git a/src/main.rs b/src/main.rs index 0e4bb281..8bd413cd 100644 --- a/src/main.rs +++ b/src/main.rs @@ -6,6 +6,7 @@ use log::error; use std::{error::Error, process::exit, sync::Mutex}; mod arguments; +mod demo; mod helm; mod helpers; mod kind; @@ -52,11 +53,13 @@ async fn main() -> Result<(), Box> { helm::handle_common_cli_args(&args); release::handle_common_cli_args(&args); stack::handle_common_cli_args(&args); + demo::handle_common_cli_args(&args); let result = match &args.cmd { CliCommand::Operator(command) => command.handle().await, CliCommand::Release(command) => command.handle().await, CliCommand::Stack(command) => command.handle().await, + CliCommand::Demo(command) => command.handle().await, CliCommand::Services(command) => command.handle().await, CliCommand::Completion(command) => { let mut cmd = CliArgs::command(); diff --git a/src/release.rs b/src/release.rs index dbd94e02..890878a3 100644 --- a/src/release.rs +++ b/src/release.rs @@ -228,7 +228,7 @@ async fn uninstall_release(release_name: &str) { /// Cached because of potential slow network calls #[cached] async fn get_releases() -> Releases { - let mut all_releases: IndexMap = IndexMap::new(); + let mut all_releases = IndexMap::new(); let release_files = RELEASE_FILES.lock().unwrap().deref().clone(); for release_file in release_files { let yaml = helpers::read_from_url_or_file(&release_file).await; diff --git a/src/stack.rs b/src/stack.rs index b4a8670f..2e9ec089 100644 --- a/src/stack.rs +++ b/src/stack.rs @@ -16,7 +16,7 @@ lazy_static! { #[derive(Parser)] pub enum CliCommandStack { - /// List all the available stack + /// List all the available stacks #[clap(alias("ls"))] List { #[clap(short, long, arg_enum, default_value = "text")] @@ -97,7 +97,7 @@ struct Stack { #[derive(Clone, Debug, Deserialize, Serialize)] #[serde(rename_all = "camelCase")] -enum StackManifest { +pub enum StackManifest { #[serde(rename_all = "camelCase")] HelmChart { release_name: String, @@ -111,7 +111,7 @@ enum StackManifest { #[derive(Clone, Debug, Deserialize, Serialize)] #[serde(rename_all = "camelCase")] -struct HelmChartRepo { +pub struct HelmChartRepo { name: String, url: String, } @@ -175,14 +175,21 @@ async fn describe_stack(stack_name: &str, output_type: &OutputType) -> Result<() Ok(()) } -async fn install_stack(stack_name: &str) -> Result<(), Box> { +pub async fn install_stack(stack_name: &str) -> Result<(), Box> { info!("Installing stack {stack_name}"); let stack = get_stack(stack_name).await?; release::install_release(&stack.stackable_release, &[], &[]).await?; info!("Installing components of stack {stack_name}"); - for manifest in stack.manifests { + install_manifests(&stack.manifests).await?; + + info!("Installed stack {stack_name}"); + Ok(()) +} + +pub async fn install_manifests(manifests: &[StackManifest]) -> Result<(), Box> { + for manifest in manifests { match manifest { StackManifest::HelmChart { release_name, @@ -192,21 +199,23 @@ async fn install_stack(stack_name: &str) -> Result<(), Box> { options, } => { debug!("Installing helm chart {name} as {release_name}"); - HELM_REPOS.lock()?.insert(repo.name.clone(), repo.url); + HELM_REPOS + .lock()? + .insert(repo.name.clone(), repo.url.clone()); let values_yaml = serde_yaml::to_string(&options)?; helm::install_helm_release_from_repo( - &release_name, - &release_name, + release_name, + release_name, &repo.name, - &name, - Some(&version), + name, + Some(version), Some(&values_yaml), )? } StackManifest::PlainYaml(yaml_url_or_file) => { debug!("Installing yaml manifest from {yaml_url_or_file}"); - let manifests = helpers::read_from_url_or_file(&yaml_url_or_file) + let manifests = helpers::read_from_url_or_file(yaml_url_or_file) .await .map_err(|err| { format!( @@ -218,14 +227,13 @@ async fn install_stack(stack_name: &str) -> Result<(), Box> { } } - info!("Installed stack {stack_name}"); Ok(()) } /// Cached because of potential slow network calls #[cached] async fn get_stacks() -> Stacks { - let mut all_stacks: IndexMap = IndexMap::new(); + let mut all_stacks = IndexMap::new(); let stack_files = STACK_FILES.lock().unwrap().deref().clone(); for stack_file in stack_files { let yaml = helpers::read_from_url_or_file(&stack_file).await;