From 74780c5b1e8fa0f4599ff89afefd61424366c456 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Thu, 4 Apr 2024 11:27:32 +0900 Subject: [PATCH 1/3] Add an environment variable for testing remote pure Python library --- python/pyspark/testing/connectutils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyspark/testing/connectutils.py b/python/pyspark/testing/connectutils.py index bfe54b33f569b..da6b861e925a5 100644 --- a/python/pyspark/testing/connectutils.py +++ b/python/pyspark/testing/connectutils.py @@ -174,7 +174,7 @@ def conf(cls): @classmethod def master(cls): - return "local[4]" + return os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[4]") @classmethod def setUpClass(cls): From 8e68e6ab6f657dae70e8e443e73b8f655c31bffb Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Thu, 4 Apr 2024 12:13:50 +0900 Subject: [PATCH 2/3] test --- .github/workflows/build_main.yml | 2 +- .github/workflows/build_python_connect.yml | 101 +++++++++++++++++++++ 2 files changed, 102 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/build_python_connect.yml diff --git a/.github/workflows/build_main.yml b/.github/workflows/build_main.yml index 9ef52f326375b..f235d19580dc1 100644 --- a/.github/workflows/build_main.yml +++ b/.github/workflows/build_main.yml @@ -22,7 +22,7 @@ name: "Build" on: push: branches: - - '**' + - 'nonexistent' jobs: call-build-and-test: diff --git a/.github/workflows/build_python_connect.yml b/.github/workflows/build_python_connect.yml new file mode 100644 index 0000000000000..17b3e1987aa54 --- /dev/null +++ b/.github/workflows/build_python_connect.yml @@ -0,0 +1,101 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +name: Build / Spark Connect Python-only (master, Python 3.11) + +on: + push: + branches: + - '**' + +jobs: + # Build: build Spark and run the tests for specified modules using maven + build: + name: "Build modules: pyspark-connect" + runs-on: ubuntu-latest + timeout-minutes: 300 + if: github.repository == 'apache/spark' + steps: + - name: Checkout Spark repository + uses: actions/checkout@v4 + - name: Cache Scala, SBT and Maven + uses: actions/cache@v4 + with: + path: | + build/apache-maven-* + build/scala-* + build/*.jar + ~/.sbt + key: build-spark-connect-python-only-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }} + restore-keys: | + build-spark-connect-python-only- + - name: Cache Maven local repository + uses: actions/cache@v4 + with: + path: ~/.m2/repository + key: m2-build-spark-connect-python-only-${{ hashFiles('**/pom.xml') }} + restore-keys: | + m2-build-spark-connect-python-only- + - name: Install Java 17 + uses: actions/setup-java@v4 + with: + distribution: zulu + java-version: 17 + - name: Install Python 3.11 + uses: actions/setup-python@v5 + with: + python-version: '3.11' + architecture: x64 + - name: Build Spark + run: | + export MAVEN_OPTS="-Xss64m -Xmx4g -Xms4g -XX:ReservedCodeCacheSize=128m -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN" + export MAVEN_CLI_OPTS="--no-transfer-progress" + export JAVA_VERSION=17 + ./build/mvn $MAVEN_CLI_OPTS -Phive -DskipTests package + - name: Build Spark Connect pure Python package (pyspark-connect) + run: | + cd python + python packaging/connect/setup.py sdist + cd dist + pip install pyspark-connect-4.0.0.dev0.tar.gz + - name: Run tests + env: + SPARK_CONNECT_TESTING_REMOTE: sc://localhost + run: | + # Start a Spark Connect server + ./sbin/start-connect-server.sh --jars `ls connector/connect/server/target/**/spark-connect*SNAPSHOT.jar` + # Remove packages, Py4J and PySpark zipped library to make sure. + ./build/mvn clean + rm python/lib + ./python/run-tests --python-executables=python3 --modules pyspark-connect + - name: Clean up local Maven repository + run: | + rm -rf ~/.m2/repository/org/apache/spark + - name: Upload test results to report + if: always() + uses: actions/upload-artifact@v4 + with: + name: test-results-spark-connect-python-only + path: "**/target/test-reports/*.xml" + - name: Upload unit tests log files + if: failure() + uses: actions/upload-artifact@v4 + with: + name: unit-tests-log-spark-connect-python-only + path: "**/target/unit-tests.log" From f0621b3a70e48ac64579f6c511238125adcef83b Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Thu, 4 Apr 2024 12:14:37 +0900 Subject: [PATCH 3/3] Revert "test" This reverts commit 8e68e6ab6f657dae70e8e443e73b8f655c31bffb. --- .github/workflows/build_main.yml | 2 +- .github/workflows/build_python_connect.yml | 101 --------------------- 2 files changed, 1 insertion(+), 102 deletions(-) delete mode 100644 .github/workflows/build_python_connect.yml diff --git a/.github/workflows/build_main.yml b/.github/workflows/build_main.yml index f235d19580dc1..9ef52f326375b 100644 --- a/.github/workflows/build_main.yml +++ b/.github/workflows/build_main.yml @@ -22,7 +22,7 @@ name: "Build" on: push: branches: - - 'nonexistent' + - '**' jobs: call-build-and-test: diff --git a/.github/workflows/build_python_connect.yml b/.github/workflows/build_python_connect.yml deleted file mode 100644 index 17b3e1987aa54..0000000000000 --- a/.github/workflows/build_python_connect.yml +++ /dev/null @@ -1,101 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -name: Build / Spark Connect Python-only (master, Python 3.11) - -on: - push: - branches: - - '**' - -jobs: - # Build: build Spark and run the tests for specified modules using maven - build: - name: "Build modules: pyspark-connect" - runs-on: ubuntu-latest - timeout-minutes: 300 - if: github.repository == 'apache/spark' - steps: - - name: Checkout Spark repository - uses: actions/checkout@v4 - - name: Cache Scala, SBT and Maven - uses: actions/cache@v4 - with: - path: | - build/apache-maven-* - build/scala-* - build/*.jar - ~/.sbt - key: build-spark-connect-python-only-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }} - restore-keys: | - build-spark-connect-python-only- - - name: Cache Maven local repository - uses: actions/cache@v4 - with: - path: ~/.m2/repository - key: m2-build-spark-connect-python-only-${{ hashFiles('**/pom.xml') }} - restore-keys: | - m2-build-spark-connect-python-only- - - name: Install Java 17 - uses: actions/setup-java@v4 - with: - distribution: zulu - java-version: 17 - - name: Install Python 3.11 - uses: actions/setup-python@v5 - with: - python-version: '3.11' - architecture: x64 - - name: Build Spark - run: | - export MAVEN_OPTS="-Xss64m -Xmx4g -Xms4g -XX:ReservedCodeCacheSize=128m -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN" - export MAVEN_CLI_OPTS="--no-transfer-progress" - export JAVA_VERSION=17 - ./build/mvn $MAVEN_CLI_OPTS -Phive -DskipTests package - - name: Build Spark Connect pure Python package (pyspark-connect) - run: | - cd python - python packaging/connect/setup.py sdist - cd dist - pip install pyspark-connect-4.0.0.dev0.tar.gz - - name: Run tests - env: - SPARK_CONNECT_TESTING_REMOTE: sc://localhost - run: | - # Start a Spark Connect server - ./sbin/start-connect-server.sh --jars `ls connector/connect/server/target/**/spark-connect*SNAPSHOT.jar` - # Remove packages, Py4J and PySpark zipped library to make sure. - ./build/mvn clean - rm python/lib - ./python/run-tests --python-executables=python3 --modules pyspark-connect - - name: Clean up local Maven repository - run: | - rm -rf ~/.m2/repository/org/apache/spark - - name: Upload test results to report - if: always() - uses: actions/upload-artifact@v4 - with: - name: test-results-spark-connect-python-only - path: "**/target/test-reports/*.xml" - - name: Upload unit tests log files - if: failure() - uses: actions/upload-artifact@v4 - with: - name: unit-tests-log-spark-connect-python-only - path: "**/target/unit-tests.log"