From d503128be104350296d2a370b62d0b533915cff7 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Thu, 22 Nov 2018 16:08:20 +0800 Subject: [PATCH 01/34] Run and generate test coverage report from Python via Jenkins --- README.md | 2 ++ dev/run-tests.py | 67 +++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 68 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index f3b90ceb923ec..77095435722c3 100644 --- a/README.md +++ b/README.md @@ -86,6 +86,8 @@ Please see the guidance on how to There is also a Kubernetes integration test, see resource-managers/kubernetes/integration-tests/README.md +For PySpark test coverage, see [this coverage report for the latest commit](https://spark-test.github.io/pyspark-coverage-site/). + ## A Note About Hadoop Versions Spark uses the Hadoop core library to talk to HDFS and other Hadoop-supported diff --git a/dev/run-tests.py b/dev/run-tests.py index e1ed2744d78b3..573ef36e8e79c 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -410,6 +410,60 @@ def run_python_tests(test_modules, parallelism): run_cmd(command) +def run_python_tests_with_coverage(test_modules, parallelism): + set_title_and_block("Running PySpark tests with coverage report", "BLOCK_PYSPARK_UNIT_TESTS") + + command = [os.path.join(SPARK_HOME, "python", "run-tests-with-coverage")] + if test_modules != [modules.root]: + command.append("--modules=%s" % ','.join(m.name for m in test_modules)) + command.append("--parallelism=%i" % parallelism) + run_cmd(command) + post_python_tests_results() + + +def post_python_tests_results(): + if "SPARK_TEST_KEY" not in os.environ: + print("[error] 'SPARK_TEST_KEY' environment variable was not set. Unable to post" + "PySpark coverage results.") + sys.exit(1) + spark_test_key = os.environ.get("SPARK_TEST_KEY") + with_pyspark_coverage_site = ["cd", "pyspark-coverage-site", "&&"] + commands = [[ + # Clone PySpark coverage site. + "git", + "clone", + "https://spark-test:%s@github.com/spark-test/pyspark-coverage-site.git" % spark_test_key], + + # Copy generated coverage HTML. + ["cp", "-r", "%s/python/test_coverage/htmlcov/*" % SPARK_HOME, "pyspark-coverage-site/"], + + # Check out to a temporary branch. + with_pyspark_coverage_site + ["git", "checkout", "--orphan", "latest_branch"], + + # Add all the files. + with_pyspark_coverage_site + ["git", "add", "-A"], + + # Commit current test coverage results. + with_pyspark_coverage_site + [ + "git", + "commit", + "-am", + '"Coverage report at latest commit in Apache Spark"', + '--author="Apache Spark Test Account "'], + + # Delete the old branch. + with_pyspark_coverage_site + ["git", "branch", "-D", "gh-pages"], + + # Rename the temporary branch to master. + with_pyspark_coverage_site + ["git", "branch", "-m", "gh-pages"], + + # Finally, force update to our repository. + with_pyspark_coverage_site + ["git", "push", "-f", "origin", "gh-pages"]] + + for command in commands: + run_cmd(command) + + def run_python_packaging_tests(): set_title_and_block("Running PySpark packaging tests", "BLOCK_PYSPARK_PIP_TESTS") command = [os.path.join(SPARK_HOME, "dev", "run-pip-tests")] @@ -567,7 +621,18 @@ def main(): modules_with_python_tests = [m for m in test_modules if m.python_test_goals] if modules_with_python_tests: - run_python_tests(modules_with_python_tests, opts.parallelism) + # We only run PySpark tests with coverage report in one specific job with + # Spark master with SBT in Jenkins. + is_sbt_master_job = ( + os.environ.get("AMPLAB_JENKINS_BUILD_PROFILE", "") == "hadoop2.7" + and os.environ.get("SPARK_BRANCH", "") == "master" + and os.environ.get("AMPLAB_JENKINS", "") == "true" + and os.environ.get("AMPLAB_JENKINS_BUILD_TOOL", "") == "sbt") + is_sbt_master_job = True # Will remove this right before getting merged. + if is_sbt_master_job: + run_python_tests_with_coverage(modules_with_python_tests, opts.parallelism) + else: + run_python_tests(modules_with_python_tests, opts.parallelism) run_python_packaging_tests() if any(m.should_run_r_tests for m in test_modules): run_sparkr_tests() From 135e7aec2ff45d05a2f9a949eefa454f3b0234c2 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Thu, 22 Nov 2018 16:38:50 +0800 Subject: [PATCH 02/34] Add a logic to remove existing reports --- dev/run-tests.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dev/run-tests.py b/dev/run-tests.py index 573ef36e8e79c..96c3d501ad983 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -434,6 +434,9 @@ def post_python_tests_results(): "clone", "https://spark-test:%s@github.com/spark-test/pyspark-coverage-site.git" % spark_test_key], + # Remove existing reports + ["rm", "-fr", "pyspark-coverage-site/*"], + # Copy generated coverage HTML. ["cp", "-r", "%s/python/test_coverage/htmlcov/*" % SPARK_HOME, "pyspark-coverage-site/"], From 08ab7421d53fea1f037f92504888cd32b9c5036e Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Tue, 27 Nov 2018 23:57:00 +0900 Subject: [PATCH 03/34] Fold the functions --- dev/run-tests.py | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/dev/run-tests.py b/dev/run-tests.py index 96c3d501ad983..31e48a013d4e3 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -400,25 +400,21 @@ def run_scala_tests(build_tool, hadoop_version, test_modules, excluded_tags): run_scala_tests_sbt(test_modules, test_profiles) -def run_python_tests(test_modules, parallelism): +def run_python_tests(test_modules, parallelism, with_coverage=False): set_title_and_block("Running PySpark tests", "BLOCK_PYSPARK_UNIT_TESTS") - command = [os.path.join(SPARK_HOME, "python", "run-tests")] + if with_coverage: + script = "run-tests-with-coverage" + else: + script = "run-tests" + command = [os.path.join(SPARK_HOME, "python", script)] if test_modules != [modules.root]: command.append("--modules=%s" % ','.join(m.name for m in test_modules)) command.append("--parallelism=%i" % parallelism) run_cmd(command) - -def run_python_tests_with_coverage(test_modules, parallelism): - set_title_and_block("Running PySpark tests with coverage report", "BLOCK_PYSPARK_UNIT_TESTS") - - command = [os.path.join(SPARK_HOME, "python", "run-tests-with-coverage")] - if test_modules != [modules.root]: - command.append("--modules=%s" % ','.join(m.name for m in test_modules)) - command.append("--parallelism=%i" % parallelism) - run_cmd(command) - post_python_tests_results() + if with_coverage: + post_python_tests_results() def post_python_tests_results(): @@ -632,10 +628,8 @@ def main(): and os.environ.get("AMPLAB_JENKINS", "") == "true" and os.environ.get("AMPLAB_JENKINS_BUILD_TOOL", "") == "sbt") is_sbt_master_job = True # Will remove this right before getting merged. - if is_sbt_master_job: - run_python_tests_with_coverage(modules_with_python_tests, opts.parallelism) - else: - run_python_tests(modules_with_python_tests, opts.parallelism) + run_python_tests( + modules_with_python_tests, opts.parallelism, with_coverage=is_sbt_master_job) run_python_packaging_tests() if any(m.should_run_r_tests for m in test_modules): run_sparkr_tests() From 125019d16a6e64537e493c62a8eb62c5c726b1fc Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Sun, 20 Jan 2019 20:08:15 +0800 Subject: [PATCH 04/34] Debug #1 --- dev/run-tests.py | 4 +++- python/run-tests-with-coverage | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/dev/run-tests.py b/dev/run-tests.py index 31e48a013d4e3..8e61bf0538685 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -411,7 +411,9 @@ def run_python_tests(test_modules, parallelism, with_coverage=False): if test_modules != [modules.root]: command.append("--modules=%s" % ','.join(m.name for m in test_modules)) command.append("--parallelism=%i" % parallelism) + print("Running %s" % command) run_cmd(command) + print("Finished %s" % command) if with_coverage: post_python_tests_results() @@ -616,7 +618,7 @@ def main(): build_spark_assembly_sbt(hadoop_version, should_run_java_style_checks) # run the test suites - run_scala_tests(build_tool, hadoop_version, test_modules, excluded_tags) + # run_scala_tests(build_tool, hadoop_version, test_modules, excluded_tags) modules_with_python_tests = [m for m in test_modules if m.python_test_goals] if modules_with_python_tests: diff --git a/python/run-tests-with-coverage b/python/run-tests-with-coverage index 457821037d43c..572e5acb9b327 100755 --- a/python/run-tests-with-coverage +++ b/python/run-tests-with-coverage @@ -52,6 +52,8 @@ export COVERAGE_PROCESS_START="$FWDIR/.coveragerc" ./run-tests "$@" +echo "+++++++++++++++ Tests finished" + # Don't run coverage for the coverage command itself unset COVERAGE_PROCESS_START From e1a3f9db781e997732a463e6d52e82848fc2228d Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Mon, 21 Jan 2019 11:49:44 +0800 Subject: [PATCH 05/34] Debug 2 --- python/run-tests-with-coverage | 2 +- python/run-tests.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/python/run-tests-with-coverage b/python/run-tests-with-coverage index 572e5acb9b327..5aebd8d7f0a78 100755 --- a/python/run-tests-with-coverage +++ b/python/run-tests-with-coverage @@ -50,8 +50,8 @@ export SPARK_CONF_DIR="$COVERAGE_DIR/conf" # This environment variable enables the coverage. export COVERAGE_PROCESS_START="$FWDIR/.coveragerc" +echo "+++++++++++++++ Tests started" ./run-tests "$@" - echo "+++++++++++++++ Tests finished" # Don't run coverage for the coverage command itself diff --git a/python/run-tests.py b/python/run-tests.py index 7456170ba2d56..c14afd28a833f 100755 --- a/python/run-tests.py +++ b/python/run-tests.py @@ -302,7 +302,9 @@ def process_queue(task_queue): worker.daemon = True worker.start() try: + print_red("Join start") task_queue.join() + print_red("Join finished") except (KeyboardInterrupt, SystemExit): print_red("Exiting due to interrupt") sys.exit(-1) From 3eb761155a68be963d24fba4e03f3c019628addf Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Mon, 21 Jan 2019 11:53:50 +0800 Subject: [PATCH 06/34] Debug 3 --- python/run-tests.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/run-tests.py b/python/run-tests.py index c14afd28a833f..16c64579bca25 100755 --- a/python/run-tests.py +++ b/python/run-tests.py @@ -142,7 +142,8 @@ def run_individual_python_test(target_dir, test_name, pyspark_python): key = (pyspark_python, test_name) SKIPPED_TESTS[key] = skipped_tests per_test_output.close() - except: + except Exception as e: + LOGGER.info("Exception thrown %s" % e) import traceback print_red("\nGot an exception while trying to store " "skipped test output:\n%s" % traceback.format_exc()) From 88954cd217875db00f3445b9f019583f43dac4d2 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Mon, 21 Jan 2019 14:55:58 +0800 Subject: [PATCH 07/34] Revert "Debug #1" This reverts commit 7188b0580889cc2dd8d9cdc07ed41f2d2eba26fa. --- dev/run-tests.py | 4 +--- python/run-tests-with-coverage | 2 -- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/dev/run-tests.py b/dev/run-tests.py index 8e61bf0538685..31e48a013d4e3 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -411,9 +411,7 @@ def run_python_tests(test_modules, parallelism, with_coverage=False): if test_modules != [modules.root]: command.append("--modules=%s" % ','.join(m.name for m in test_modules)) command.append("--parallelism=%i" % parallelism) - print("Running %s" % command) run_cmd(command) - print("Finished %s" % command) if with_coverage: post_python_tests_results() @@ -618,7 +616,7 @@ def main(): build_spark_assembly_sbt(hadoop_version, should_run_java_style_checks) # run the test suites - # run_scala_tests(build_tool, hadoop_version, test_modules, excluded_tags) + run_scala_tests(build_tool, hadoop_version, test_modules, excluded_tags) modules_with_python_tests = [m for m in test_modules if m.python_test_goals] if modules_with_python_tests: diff --git a/python/run-tests-with-coverage b/python/run-tests-with-coverage index 5aebd8d7f0a78..457821037d43c 100755 --- a/python/run-tests-with-coverage +++ b/python/run-tests-with-coverage @@ -50,9 +50,7 @@ export SPARK_CONF_DIR="$COVERAGE_DIR/conf" # This environment variable enables the coverage. export COVERAGE_PROCESS_START="$FWDIR/.coveragerc" -echo "+++++++++++++++ Tests started" ./run-tests "$@" -echo "+++++++++++++++ Tests finished" # Don't run coverage for the coverage command itself unset COVERAGE_PROCESS_START From 37fcd3c4de66f5304c41de2aac9207e740800a18 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Mon, 21 Jan 2019 14:56:33 +0800 Subject: [PATCH 08/34] Revert "Debug 2" This reverts commit 07b96fa768f8f5e04b632e94913e34afe0f178f4. --- python/run-tests.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/run-tests.py b/python/run-tests.py index 16c64579bca25..254369e050e5a 100755 --- a/python/run-tests.py +++ b/python/run-tests.py @@ -303,9 +303,7 @@ def process_queue(task_queue): worker.daemon = True worker.start() try: - print_red("Join start") task_queue.join() - print_red("Join finished") except (KeyboardInterrupt, SystemExit): print_red("Exiting due to interrupt") sys.exit(-1) From dafe7de318f85368aaef657f1107e44cf30c3250 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Mon, 21 Jan 2019 14:56:39 +0800 Subject: [PATCH 09/34] Revert "Debug 3" This reverts commit 0dcc98ad26d775400603d7a9169ad9be39f4f0b0. --- python/run-tests.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/run-tests.py b/python/run-tests.py index 254369e050e5a..7456170ba2d56 100755 --- a/python/run-tests.py +++ b/python/run-tests.py @@ -142,8 +142,7 @@ def run_individual_python_test(target_dir, test_name, pyspark_python): key = (pyspark_python, test_name) SKIPPED_TESTS[key] = skipped_tests per_test_output.close() - except Exception as e: - LOGGER.info("Exception thrown %s" % e) + except: import traceback print_red("\nGot an exception while trying to store " "skipped test output:\n%s" % traceback.format_exc()) From 643246bf7b1ada9b6a5d361ff02e78ea299797cf Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Mon, 21 Jan 2019 14:56:45 +0800 Subject: [PATCH 10/34] Disable DStream tests when PyPy is used with coverage --- python/pyspark/streaming/tests/test_dstream.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/python/pyspark/streaming/tests/test_dstream.py b/python/pyspark/streaming/tests/test_dstream.py index d14e346b7a688..61a8161604909 100644 --- a/python/pyspark/streaming/tests/test_dstream.py +++ b/python/pyspark/streaming/tests/test_dstream.py @@ -22,12 +22,16 @@ import unittest from functools import reduce from itertools import chain +import platform from pyspark import SparkConf, SparkContext, RDD from pyspark.streaming import StreamingContext from pyspark.testing.streamingutils import PySparkStreamingTestCase +@unittest.skipIf( + "pypy" in platform.python_implementation().lower() and "COVERAGE_PROCESS_START" in os.environ, + "PyPy implementation causes to hang DStream tests forever when Coverage report is used.") class BasicOperationTests(PySparkStreamingTestCase): def test_map(self): @@ -389,6 +393,9 @@ def failed_func(i): self.fail("a failed func should throw an error") +@unittest.skipIf( + "pypy" in platform.python_implementation().lower() and "COVERAGE_PROCESS_START" in os.environ, + "PyPy implementation causes to hang DStream tests forever when Coverage report is used.") class WindowFunctionTests(PySparkStreamingTestCase): timeout = 15 @@ -466,6 +473,9 @@ def func(dstream): self._test_func(input, func, expected) +@unittest.skipIf( + "pypy" in platform.python_implementation().lower() and "COVERAGE_PROCESS_START" in os.environ, + "PyPy implementation causes to hang DStream tests forever when Coverage report is used.") class CheckpointTests(unittest.TestCase): setupCalled = False From 8133a08ab1650b5a5dffc549cefdc73e16566ee9 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Mon, 21 Jan 2019 21:53:26 +0800 Subject: [PATCH 11/34] Update run-tests.py --- dev/run-tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/run-tests.py b/dev/run-tests.py index 31e48a013d4e3..f095c7281d004 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -419,7 +419,7 @@ def run_python_tests(test_modules, parallelism, with_coverage=False): def post_python_tests_results(): if "SPARK_TEST_KEY" not in os.environ: - print("[error] 'SPARK_TEST_KEY' environment variable was not set. Unable to post" + print("[error] 'SPARK_TEST_KEY' environment variable was not set. Unable to post " "PySpark coverage results.") sys.exit(1) spark_test_key = os.environ.get("SPARK_TEST_KEY") From 6d46f8c1344e46f13ceb963a4685b50d538f3ed6 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Tue, 29 Jan 2019 10:15:01 +0800 Subject: [PATCH 12/34] Make the tests less flaky --- dev/run-tests.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dev/run-tests.py b/dev/run-tests.py index f095c7281d004..c71b14c6e6430 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -404,6 +404,10 @@ def run_python_tests(test_modules, parallelism, with_coverage=False): set_title_and_block("Running PySpark tests", "BLOCK_PYSPARK_UNIT_TESTS") if with_coverage: + # Coverage makes the PySpark tests flaky due to heavy parallelism. + # When we run PySpark tests with coverage, it uses 4 for now as + # workaround. + parallelism = 4 script = "run-tests-with-coverage" else: script = "run-tests" From ad006e7ca1d0c408d89700c7b60f945ae512f863 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Tue, 29 Jan 2019 10:16:13 +0800 Subject: [PATCH 13/34] Skip scala tests for now (debug) --- dev/run-tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/run-tests.py b/dev/run-tests.py index c71b14c6e6430..1b8b8146c4fd8 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -620,7 +620,7 @@ def main(): build_spark_assembly_sbt(hadoop_version, should_run_java_style_checks) # run the test suites - run_scala_tests(build_tool, hadoop_version, test_modules, excluded_tags) + # run_scala_tests(build_tool, hadoop_version, test_modules, excluded_tags) modules_with_python_tests = [m for m in test_modules if m.python_test_goals] if modules_with_python_tests: From 92d74f0a3a4bd6344615b17b21a576c8e1b82226 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Tue, 29 Jan 2019 10:18:41 +0800 Subject: [PATCH 14/34] Use SPARK_MATER_SBT_HADOOP_2_7 --- dev/run-tests.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/dev/run-tests.py b/dev/run-tests.py index 1b8b8146c4fd8..c162bae208054 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -626,11 +626,7 @@ def main(): if modules_with_python_tests: # We only run PySpark tests with coverage report in one specific job with # Spark master with SBT in Jenkins. - is_sbt_master_job = ( - os.environ.get("AMPLAB_JENKINS_BUILD_PROFILE", "") == "hadoop2.7" - and os.environ.get("SPARK_BRANCH", "") == "master" - and os.environ.get("AMPLAB_JENKINS", "") == "true" - and os.environ.get("AMPLAB_JENKINS_BUILD_TOOL", "") == "sbt") + is_sbt_master_job = "SPARK_MATER_SBT_HADOOP_2_7" in os.environ is_sbt_master_job = True # Will remove this right before getting merged. run_python_tests( modules_with_python_tests, opts.parallelism, with_coverage=is_sbt_master_job) From ac6efefa8d72548fd17536e1e9494abff928c7ab Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Tue, 29 Jan 2019 11:59:18 +0800 Subject: [PATCH 15/34] debug 2 --- dev/run-tests.py | 1 + 1 file changed, 1 insertion(+) diff --git a/dev/run-tests.py b/dev/run-tests.py index c162bae208054..42472fe495cb8 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -436,6 +436,7 @@ def post_python_tests_results(): # Remove existing reports ["rm", "-fr", "pyspark-coverage-site/*"], + ["ls", "-al", "%s/python/test_coverage/htmlcov/*" % SPARK_HOME], # Copy generated coverage HTML. ["cp", "-r", "%s/python/test_coverage/htmlcov/*" % SPARK_HOME, "pyspark-coverage-site/"], From f34fd8dcb2b1fa40b73fa21ea4355638449ab72c Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Tue, 29 Jan 2019 13:06:09 +0800 Subject: [PATCH 16/34] debug 3 --- dev/run-tests.py | 3 ++- python/run-tests-with-coverage | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/dev/run-tests.py b/dev/run-tests.py index 42472fe495cb8..c3b74cf867205 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -429,6 +429,8 @@ def post_python_tests_results(): spark_test_key = os.environ.get("SPARK_TEST_KEY") with_pyspark_coverage_site = ["cd", "pyspark-coverage-site", "&&"] commands = [[ + ["ls", "-al", "%s/python/test_coverage/htmlcov/*" % SPARK_HOME], + # Clone PySpark coverage site. "git", "clone", @@ -436,7 +438,6 @@ def post_python_tests_results(): # Remove existing reports ["rm", "-fr", "pyspark-coverage-site/*"], - ["ls", "-al", "%s/python/test_coverage/htmlcov/*" % SPARK_HOME], # Copy generated coverage HTML. ["cp", "-r", "%s/python/test_coverage/htmlcov/*" % SPARK_HOME, "pyspark-coverage-site/"], diff --git a/python/run-tests-with-coverage b/python/run-tests-with-coverage index 457821037d43c..553e29d5b8a32 100755 --- a/python/run-tests-with-coverage +++ b/python/run-tests-with-coverage @@ -64,4 +64,6 @@ $COV_EXEC report --include "pyspark/*" echo "Generating HTML files for PySpark coverage under $COVERAGE_DIR/htmlcov" $COV_EXEC html --ignore-errors --include "pyspark/*" --directory "$COVERAGE_DIR/htmlcov" +ls -al "$COVERAGE_DIR/htmlcov" + popd From 9ea948d743275f1462e7f46ac2ed2690f99d17b6 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Tue, 29 Jan 2019 14:25:23 +0800 Subject: [PATCH 17/34] debug 4 --- dev/run-tests.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dev/run-tests.py b/dev/run-tests.py index c3b74cf867205..66ff9a9f90a3a 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -428,13 +428,13 @@ def post_python_tests_results(): sys.exit(1) spark_test_key = os.environ.get("SPARK_TEST_KEY") with_pyspark_coverage_site = ["cd", "pyspark-coverage-site", "&&"] - commands = [[ + commands = [ ["ls", "-al", "%s/python/test_coverage/htmlcov/*" % SPARK_HOME], # Clone PySpark coverage site. - "git", - "clone", - "https://spark-test:%s@github.com/spark-test/pyspark-coverage-site.git" % spark_test_key], + ["git", "clone", + "https://spark-test:%s@github.com/spark-test/pyspark-coverage-site.git" + % spark_test_key], # Remove existing reports ["rm", "-fr", "pyspark-coverage-site/*"], From 8afab85259b4c238f02c64bdc9a0c3c2d4ac3627 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Tue, 29 Jan 2019 15:25:37 +0800 Subject: [PATCH 18/34] Revert "debug 4" This reverts commit e113216c25e20dfe67b86238cff8e87ff846a8c0. --- dev/run-tests.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dev/run-tests.py b/dev/run-tests.py index 66ff9a9f90a3a..c3b74cf867205 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -428,13 +428,13 @@ def post_python_tests_results(): sys.exit(1) spark_test_key = os.environ.get("SPARK_TEST_KEY") with_pyspark_coverage_site = ["cd", "pyspark-coverage-site", "&&"] - commands = [ + commands = [[ ["ls", "-al", "%s/python/test_coverage/htmlcov/*" % SPARK_HOME], # Clone PySpark coverage site. - ["git", "clone", - "https://spark-test:%s@github.com/spark-test/pyspark-coverage-site.git" - % spark_test_key], + "git", + "clone", + "https://spark-test:%s@github.com/spark-test/pyspark-coverage-site.git" % spark_test_key], # Remove existing reports ["rm", "-fr", "pyspark-coverage-site/*"], From 9b4a5b89ec8e1a4183560711454232c2a0389142 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Tue, 29 Jan 2019 15:25:47 +0800 Subject: [PATCH 19/34] Revert "debug 3" This reverts commit cf623716b71fd179fd8390dc49780e63c28de6e6. --- dev/run-tests.py | 3 +-- python/run-tests-with-coverage | 2 -- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/dev/run-tests.py b/dev/run-tests.py index c3b74cf867205..42472fe495cb8 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -429,8 +429,6 @@ def post_python_tests_results(): spark_test_key = os.environ.get("SPARK_TEST_KEY") with_pyspark_coverage_site = ["cd", "pyspark-coverage-site", "&&"] commands = [[ - ["ls", "-al", "%s/python/test_coverage/htmlcov/*" % SPARK_HOME], - # Clone PySpark coverage site. "git", "clone", @@ -438,6 +436,7 @@ def post_python_tests_results(): # Remove existing reports ["rm", "-fr", "pyspark-coverage-site/*"], + ["ls", "-al", "%s/python/test_coverage/htmlcov/*" % SPARK_HOME], # Copy generated coverage HTML. ["cp", "-r", "%s/python/test_coverage/htmlcov/*" % SPARK_HOME, "pyspark-coverage-site/"], diff --git a/python/run-tests-with-coverage b/python/run-tests-with-coverage index 553e29d5b8a32..457821037d43c 100755 --- a/python/run-tests-with-coverage +++ b/python/run-tests-with-coverage @@ -64,6 +64,4 @@ $COV_EXEC report --include "pyspark/*" echo "Generating HTML files for PySpark coverage under $COVERAGE_DIR/htmlcov" $COV_EXEC html --ignore-errors --include "pyspark/*" --directory "$COVERAGE_DIR/htmlcov" -ls -al "$COVERAGE_DIR/htmlcov" - popd From 23398d34f9572fb451c5fd941e803d2cb19c7886 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Tue, 29 Jan 2019 15:25:55 +0800 Subject: [PATCH 20/34] Revert "debug 2" This reverts commit 4f3939ec4d9ab125db5eacd5378a6292a51986a8. --- dev/run-tests.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dev/run-tests.py b/dev/run-tests.py index 42472fe495cb8..c162bae208054 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -436,7 +436,6 @@ def post_python_tests_results(): # Remove existing reports ["rm", "-fr", "pyspark-coverage-site/*"], - ["ls", "-al", "%s/python/test_coverage/htmlcov/*" % SPARK_HOME], # Copy generated coverage HTML. ["cp", "-r", "%s/python/test_coverage/htmlcov/*" % SPARK_HOME, "pyspark-coverage-site/"], From f3c7b71a5d92c6bd459e4b8187b6f1deb9e25c03 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Tue, 29 Jan 2019 16:06:14 +0800 Subject: [PATCH 21/34] Avoid shell interpretation --- dev/run-tests.py | 52 ++++++++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/dev/run-tests.py b/dev/run-tests.py index c162bae208054..383642c69239f 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -25,6 +25,8 @@ import re import sys import subprocess +import glob +import shutil from collections import namedtuple from sparktestsupport import SPARK_HOME, USER_HOME, ERROR_CODES @@ -428,43 +430,41 @@ def post_python_tests_results(): sys.exit(1) spark_test_key = os.environ.get("SPARK_TEST_KEY") with_pyspark_coverage_site = ["cd", "pyspark-coverage-site", "&&"] - commands = [[ + run_cmd([ # Clone PySpark coverage site. "git", "clone", - "https://spark-test:%s@github.com/spark-test/pyspark-coverage-site.git" % spark_test_key], + "https://spark-test:%s@github.com/spark-test/pyspark-coverage-site.git" % spark_test_key]) - # Remove existing reports - ["rm", "-fr", "pyspark-coverage-site/*"], + # Remove existing reports + run_cmd(["rm", "-fr"] + glob.glob("pyspark-coverage-site/*")) - # Copy generated coverage HTML. - ["cp", "-r", "%s/python/test_coverage/htmlcov/*" % SPARK_HOME, "pyspark-coverage-site/"], + # Copy generated coverage HTML. + for f in glob.glob("%s/python/test_coverage/htmlcov/*" % SPARK_HOME): + shutil.copy(f, "pyspark-coverage-site/") - # Check out to a temporary branch. - with_pyspark_coverage_site + ["git", "checkout", "--orphan", "latest_branch"], + # Check out to a temporary branch. + run_cmd(with_pyspark_coverage_site + ["git", "checkout", "--orphan", "latest_branch"]) - # Add all the files. - with_pyspark_coverage_site + ["git", "add", "-A"], + # Add all the files. + run_cmd(with_pyspark_coverage_site + ["git", "add", "-A"]) - # Commit current test coverage results. - with_pyspark_coverage_site + [ - "git", - "commit", - "-am", - '"Coverage report at latest commit in Apache Spark"', - '--author="Apache Spark Test Account "'], - - # Delete the old branch. - with_pyspark_coverage_site + ["git", "branch", "-D", "gh-pages"], + # Commit current test coverage results. + run_cmd(with_pyspark_coverage_site + [ + "git", + "commit", + "-am", + '"Coverage report at latest commit in Apache Spark"', + '--author="Apache Spark Test Account "']) - # Rename the temporary branch to master. - with_pyspark_coverage_site + ["git", "branch", "-m", "gh-pages"], + # Delete the old branch. + run_cmd(with_pyspark_coverage_site + ["git", "branch", "-D", "gh-pages"]) - # Finally, force update to our repository. - with_pyspark_coverage_site + ["git", "push", "-f", "origin", "gh-pages"]] + # Rename the temporary branch to master. + run_cmd(with_pyspark_coverage_site + ["git", "branch", "-m", "gh-pages"]) - for command in commands: - run_cmd(command) + # Finally, force update to our repository. + run_cmd(with_pyspark_coverage_site + ["git", "push", "-f", "origin", "gh-pages"]) def run_python_packaging_tests(): From a88c16faa6aa1574d1654e643f40b8cc57692c15 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Tue, 29 Jan 2019 16:08:52 +0800 Subject: [PATCH 22/34] newlines pretty --- dev/run-tests.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/dev/run-tests.py b/dev/run-tests.py index 383642c69239f..d6084d28d0b54 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -435,20 +435,15 @@ def post_python_tests_results(): "git", "clone", "https://spark-test:%s@github.com/spark-test/pyspark-coverage-site.git" % spark_test_key]) - # Remove existing reports run_cmd(["rm", "-fr"] + glob.glob("pyspark-coverage-site/*")) - # Copy generated coverage HTML. for f in glob.glob("%s/python/test_coverage/htmlcov/*" % SPARK_HOME): shutil.copy(f, "pyspark-coverage-site/") - # Check out to a temporary branch. run_cmd(with_pyspark_coverage_site + ["git", "checkout", "--orphan", "latest_branch"]) - # Add all the files. run_cmd(with_pyspark_coverage_site + ["git", "add", "-A"]) - # Commit current test coverage results. run_cmd(with_pyspark_coverage_site + [ "git", @@ -456,13 +451,10 @@ def post_python_tests_results(): "-am", '"Coverage report at latest commit in Apache Spark"', '--author="Apache Spark Test Account "']) - # Delete the old branch. run_cmd(with_pyspark_coverage_site + ["git", "branch", "-D", "gh-pages"]) - # Rename the temporary branch to master. run_cmd(with_pyspark_coverage_site + ["git", "branch", "-m", "gh-pages"]) - # Finally, force update to our repository. run_cmd(with_pyspark_coverage_site + ["git", "push", "-f", "origin", "gh-pages"]) From b7d3cef99107ba6aa087a3a84455f88e29d43291 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Tue, 29 Jan 2019 16:10:36 +0800 Subject: [PATCH 23/34] Pretty comment --- dev/run-tests.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/dev/run-tests.py b/dev/run-tests.py index d6084d28d0b54..47562d4639ce8 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -430,32 +430,34 @@ def post_python_tests_results(): sys.exit(1) spark_test_key = os.environ.get("SPARK_TEST_KEY") with_pyspark_coverage_site = ["cd", "pyspark-coverage-site", "&&"] + + # The steps below uploads HTMLs to 'github.com/spark-test/pyspark-coverage-site'. + # 1. Clone PySpark coverage site. run_cmd([ - # Clone PySpark coverage site. "git", "clone", "https://spark-test:%s@github.com/spark-test/pyspark-coverage-site.git" % spark_test_key]) - # Remove existing reports + # 2. Remove existing reports run_cmd(["rm", "-fr"] + glob.glob("pyspark-coverage-site/*")) - # Copy generated coverage HTML. + # 3. Copy generated coverage HTML. for f in glob.glob("%s/python/test_coverage/htmlcov/*" % SPARK_HOME): shutil.copy(f, "pyspark-coverage-site/") - # Check out to a temporary branch. + # 4. Check out to a temporary branch. run_cmd(with_pyspark_coverage_site + ["git", "checkout", "--orphan", "latest_branch"]) - # Add all the files. + # 5. Add all the files. run_cmd(with_pyspark_coverage_site + ["git", "add", "-A"]) - # Commit current test coverage results. + # 6. Commit current test coverage results. run_cmd(with_pyspark_coverage_site + [ "git", "commit", "-am", '"Coverage report at latest commit in Apache Spark"', '--author="Apache Spark Test Account "']) - # Delete the old branch. + # 7. Delete the old branch. run_cmd(with_pyspark_coverage_site + ["git", "branch", "-D", "gh-pages"]) - # Rename the temporary branch to master. + # 8. Rename the temporary branch to master. run_cmd(with_pyspark_coverage_site + ["git", "branch", "-m", "gh-pages"]) - # Finally, force update to our repository. + # 9. Finally, force update to our repository. run_cmd(with_pyspark_coverage_site + ["git", "push", "-f", "origin", "gh-pages"]) From c09ddb8f1bcbd144d814c42d01755cd8be0eae13 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Tue, 29 Jan 2019 17:46:02 +0800 Subject: [PATCH 24/34] Avoid shell interpreting --- dev/run-tests.py | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/dev/run-tests.py b/dev/run-tests.py index 47562d4639ce8..66e2f0a6c72f7 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -443,22 +443,26 @@ def post_python_tests_results(): for f in glob.glob("%s/python/test_coverage/htmlcov/*" % SPARK_HOME): shutil.copy(f, "pyspark-coverage-site/") # 4. Check out to a temporary branch. - run_cmd(with_pyspark_coverage_site + ["git", "checkout", "--orphan", "latest_branch"]) - # 5. Add all the files. - run_cmd(with_pyspark_coverage_site + ["git", "add", "-A"]) - # 6. Commit current test coverage results. - run_cmd(with_pyspark_coverage_site + [ - "git", - "commit", - "-am", - '"Coverage report at latest commit in Apache Spark"', - '--author="Apache Spark Test Account "']) - # 7. Delete the old branch. - run_cmd(with_pyspark_coverage_site + ["git", "branch", "-D", "gh-pages"]) - # 8. Rename the temporary branch to master. - run_cmd(with_pyspark_coverage_site + ["git", "branch", "-m", "gh-pages"]) - # 9. Finally, force update to our repository. - run_cmd(with_pyspark_coverage_site + ["git", "push", "-f", "origin", "gh-pages"]) + os.chdir("pyspark-coverage-site") + try: + run_cmd(with_pyspark_coverage_site + ["git", "checkout", "--orphan", "latest_branch"]) + # 5. Add all the files. + run_cmd(with_pyspark_coverage_site + ["git", "add", "-A"]) + # 6. Commit current test coverage results. + run_cmd(with_pyspark_coverage_site + [ + "git", + "commit", + "-am", + '"Coverage report at latest commit in Apache Spark"', + '--author="Apache Spark Test Account "']) + # 7. Delete the old branch. + run_cmd(with_pyspark_coverage_site + ["git", "branch", "-D", "gh-pages"]) + # 8. Rename the temporary branch to master. + run_cmd(with_pyspark_coverage_site + ["git", "branch", "-m", "gh-pages"]) + # 9. Finally, force update to our repository. + run_cmd(with_pyspark_coverage_site + ["git", "push", "-f", "origin", "gh-pages"]) + finally: + os.chdir("..") def run_python_packaging_tests(): From 0a66669e6d263258bb58fdeafdbb4716ed23c4ff Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Tue, 29 Jan 2019 20:26:26 +0800 Subject: [PATCH 25/34] D'oh! --- dev/run-tests.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/dev/run-tests.py b/dev/run-tests.py index 66e2f0a6c72f7..6e4ed0fad1fab 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -429,8 +429,6 @@ def post_python_tests_results(): "PySpark coverage results.") sys.exit(1) spark_test_key = os.environ.get("SPARK_TEST_KEY") - with_pyspark_coverage_site = ["cd", "pyspark-coverage-site", "&&"] - # The steps below uploads HTMLs to 'github.com/spark-test/pyspark-coverage-site'. # 1. Clone PySpark coverage site. run_cmd([ @@ -445,22 +443,22 @@ def post_python_tests_results(): # 4. Check out to a temporary branch. os.chdir("pyspark-coverage-site") try: - run_cmd(with_pyspark_coverage_site + ["git", "checkout", "--orphan", "latest_branch"]) + run_cmd(["git", "checkout", "--orphan", "latest_branch"]) # 5. Add all the files. - run_cmd(with_pyspark_coverage_site + ["git", "add", "-A"]) + run_cmd(["git", "add", "-A"]) # 6. Commit current test coverage results. - run_cmd(with_pyspark_coverage_site + [ + run_cmd([ "git", "commit", "-am", '"Coverage report at latest commit in Apache Spark"', '--author="Apache Spark Test Account "']) # 7. Delete the old branch. - run_cmd(with_pyspark_coverage_site + ["git", "branch", "-D", "gh-pages"]) + run_cmd(["git", "branch", "-D", "gh-pages"]) # 8. Rename the temporary branch to master. - run_cmd(with_pyspark_coverage_site + ["git", "branch", "-m", "gh-pages"]) + run_cmd(["git", "branch", "-m", "gh-pages"]) # 9. Finally, force update to our repository. - run_cmd(with_pyspark_coverage_site + ["git", "push", "-f", "origin", "gh-pages"]) + run_cmd(["git", "push", "-f", "origin", "gh-pages"]) finally: os.chdir("..") From c2412f6af2add2716923d610461637b415fa69f6 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Tue, 29 Jan 2019 22:34:31 +0800 Subject: [PATCH 26/34] Work around by `symbolic-ref` --- dev/run-tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/run-tests.py b/dev/run-tests.py index 6e4ed0fad1fab..e34c307f289c1 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -443,7 +443,7 @@ def post_python_tests_results(): # 4. Check out to a temporary branch. os.chdir("pyspark-coverage-site") try: - run_cmd(["git", "checkout", "--orphan", "latest_branch"]) + run_cmd(["git", "symbolic-ref", "HEAD", "refs/heads/latest_branch"]) # 5. Add all the files. run_cmd(["git", "add", "-A"]) # 6. Commit current test coverage results. From c660dcf5a7c5779882d82b08189acf4ccc74b03b Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Tue, 29 Jan 2019 22:35:56 +0800 Subject: [PATCH 27/34] Fix some comments accordingly --- dev/run-tests.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dev/run-tests.py b/dev/run-tests.py index e34c307f289c1..fb421ea069cc8 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -435,18 +435,18 @@ def post_python_tests_results(): "git", "clone", "https://spark-test:%s@github.com/spark-test/pyspark-coverage-site.git" % spark_test_key]) - # 2. Remove existing reports + # 2. Remove existing HTMLs. run_cmd(["rm", "-fr"] + glob.glob("pyspark-coverage-site/*")) - # 3. Copy generated coverage HTML. + # 3. Copy generated coverage HTMLs. for f in glob.glob("%s/python/test_coverage/htmlcov/*" % SPARK_HOME): shutil.copy(f, "pyspark-coverage-site/") - # 4. Check out to a temporary branch. os.chdir("pyspark-coverage-site") try: + # 4. Check out to a temporary branch. run_cmd(["git", "symbolic-ref", "HEAD", "refs/heads/latest_branch"]) # 5. Add all the files. run_cmd(["git", "add", "-A"]) - # 6. Commit current test coverage results. + # 6. Commit current HTMLs. run_cmd([ "git", "commit", From 5132334586da1af7858bbe39278fc899084fc2e2 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Wed, 30 Jan 2019 10:16:07 +0800 Subject: [PATCH 28/34] Fix comments and save Py4J access --- dev/run-tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/run-tests.py b/dev/run-tests.py index fb421ea069cc8..39f2616cbf6e5 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -451,7 +451,7 @@ def post_python_tests_results(): "git", "commit", "-am", - '"Coverage report at latest commit in Apache Spark"', + "Coverage report at latest commit in Apache Spark", '--author="Apache Spark Test Account "']) # 7. Delete the old branch. run_cmd(["git", "branch", "-D", "gh-pages"]) From 0a1216a3734e41ac113652fd9c13f00afc6d5087 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Wed, 30 Jan 2019 12:57:55 +0800 Subject: [PATCH 29/34] Remove workarounds to speed up tests --- dev/run-tests.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dev/run-tests.py b/dev/run-tests.py index 39f2616cbf6e5..6e095d14cd88a 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -616,14 +616,13 @@ def main(): build_spark_assembly_sbt(hadoop_version, should_run_java_style_checks) # run the test suites - # run_scala_tests(build_tool, hadoop_version, test_modules, excluded_tags) + run_scala_tests(build_tool, hadoop_version, test_modules, excluded_tags) modules_with_python_tests = [m for m in test_modules if m.python_test_goals] if modules_with_python_tests: # We only run PySpark tests with coverage report in one specific job with # Spark master with SBT in Jenkins. is_sbt_master_job = "SPARK_MATER_SBT_HADOOP_2_7" in os.environ - is_sbt_master_job = True # Will remove this right before getting merged. run_python_tests( modules_with_python_tests, opts.parallelism, with_coverage=is_sbt_master_job) run_python_packaging_tests() From efb0299c13599bd5cc5c4b3fd61fa5dce3a6d567 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Wed, 30 Jan 2019 12:59:35 +0800 Subject: [PATCH 30/34] typo --- dev/run-tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/run-tests.py b/dev/run-tests.py index 6e095d14cd88a..80f0d7cd921f4 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -429,7 +429,7 @@ def post_python_tests_results(): "PySpark coverage results.") sys.exit(1) spark_test_key = os.environ.get("SPARK_TEST_KEY") - # The steps below uploads HTMLs to 'github.com/spark-test/pyspark-coverage-site'. + # The steps below upload HTMLs to 'github.com/spark-test/pyspark-coverage-site'. # 1. Clone PySpark coverage site. run_cmd([ "git", From d4e30f4694f546d714b1ff3d818f709023d24d4f Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Wed, 30 Jan 2019 13:57:10 +0800 Subject: [PATCH 31/34] Add a badge for PySpark coverage --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 77095435722c3..97e580ee9a89e 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,7 @@ [![Jenkins Build](https://amplab.cs.berkeley.edu/jenkins/job/spark-master-test-sbt-hadoop-2.7/badge/icon)](https://amplab.cs.berkeley.edu/jenkins/job/spark-master-test-sbt-hadoop-2.7) [![AppVeyor Build](https://img.shields.io/appveyor/ci/ApacheSoftwareFoundation/spark/master.svg?style=plastic&logo=appveyor)](https://ci.appveyor.com/project/ApacheSoftwareFoundation/spark) +[![PySpark Coverage](https://img.shields.io/badge/dynamic/xml.svg?label=pyspark%20coverage&url=https%3A%2F%2Fspark-test.github.io%2Fpyspark-coverage-site&query=%2Fhtml%2Fbody%2Fdiv%5B1%5D%2Fdiv%2Fh1%2Fspan&colorB=green&style=plastic)](https://spark-test.github.io/pyspark-coverage-site) Spark is a fast and general cluster computing system for Big Data. It provides high-level APIs in Scala, Java, Python, and R, and an optimized engine that @@ -86,8 +87,6 @@ Please see the guidance on how to There is also a Kubernetes integration test, see resource-managers/kubernetes/integration-tests/README.md -For PySpark test coverage, see [this coverage report for the latest commit](https://spark-test.github.io/pyspark-coverage-site/). - ## A Note About Hadoop Versions Spark uses the Hadoop core library to talk to HDFS and other Hadoop-supported From f5384109ae65650439daf324024a98ff26ac962d Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Wed, 30 Jan 2019 14:01:07 +0800 Subject: [PATCH 32/34] Match it --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 97e580ee9a89e..271f2f5f5b1c3 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![Jenkins Build](https://amplab.cs.berkeley.edu/jenkins/job/spark-master-test-sbt-hadoop-2.7/badge/icon)](https://amplab.cs.berkeley.edu/jenkins/job/spark-master-test-sbt-hadoop-2.7) [![AppVeyor Build](https://img.shields.io/appveyor/ci/ApacheSoftwareFoundation/spark/master.svg?style=plastic&logo=appveyor)](https://ci.appveyor.com/project/ApacheSoftwareFoundation/spark) -[![PySpark Coverage](https://img.shields.io/badge/dynamic/xml.svg?label=pyspark%20coverage&url=https%3A%2F%2Fspark-test.github.io%2Fpyspark-coverage-site&query=%2Fhtml%2Fbody%2Fdiv%5B1%5D%2Fdiv%2Fh1%2Fspan&colorB=green&style=plastic)](https://spark-test.github.io/pyspark-coverage-site) +[![PySpark Coverage](https://img.shields.io/badge/dynamic/xml.svg?label=pyspark%20coverage&url=https%3A%2F%2Fspark-test.github.io%2Fpyspark-coverage-site&query=%2Fhtml%2Fbody%2Fdiv%5B1%5D%2Fdiv%2Fh1%2Fspan&colorB=brightgreen&style=plastic)](https://spark-test.github.io/pyspark-coverage-site) Spark is a fast and general cluster computing system for Big Data. It provides high-level APIs in Scala, Java, Python, and R, and an optimized engine that From a1c0601ab3bf4db019ded2b40cd30a1d3b697e17 Mon Sep 17 00:00:00 2001 From: shane knapp Date: Tue, 20 Nov 2018 12:38:40 -0800 Subject: [PATCH 33/34] Adding Shanke as co-author Authored-by: shane knapp Signed-off-by: shane knapp From 426ef1182dffb3bd01561af7d714d8ce3bb6ddde Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Thu, 31 Jan 2019 10:14:10 +0800 Subject: [PATCH 34/34] Fix a typo --- dev/run-tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/run-tests.py b/dev/run-tests.py index 80f0d7cd921f4..edd89c9f08903 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -622,7 +622,7 @@ def main(): if modules_with_python_tests: # We only run PySpark tests with coverage report in one specific job with # Spark master with SBT in Jenkins. - is_sbt_master_job = "SPARK_MATER_SBT_HADOOP_2_7" in os.environ + is_sbt_master_job = "SPARK_MASTER_SBT_HADOOP_2_7" in os.environ run_python_tests( modules_with_python_tests, opts.parallelism, with_coverage=is_sbt_master_job) run_python_packaging_tests()