From 45bba4cb7cbbc9dc86e0d6e45c985c31d002a8e1 Mon Sep 17 00:00:00 2001 From: beliefer Date: Wed, 8 Apr 2020 23:01:51 +0800 Subject: [PATCH 1/3] Optimize code --- .../resources/sql-tests/results/limit.sql.out | 16 ++++---- .../sql-tests/results/show-tables.sql.out | 12 ------ .../sql-tests/results/show-views.sql.out | 24 ------------ .../apache/spark/sql/SQLQueryTestSuite.scala | 37 +++++++++++++++---- 4 files changed, 38 insertions(+), 51 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/results/limit.sql.out b/sql/core/src/test/resources/sql-tests/results/limit.sql.out index 281326e22a97a..1e35f8b66ea32 100644 --- a/sql/core/src/test/resources/sql-tests/results/limit.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/limit.sql.out @@ -7,8 +7,8 @@ SELECT * FROM testdata LIMIT 2 -- !query schema struct -- !query output -1 1 -2 2 +51 51 +52 52 -- !query @@ -34,9 +34,9 @@ SELECT * FROM testdata LIMIT 2 + 1 -- !query schema struct -- !query output -1 1 -2 2 -3 3 +51 51 +52 52 +53 53 -- !query @@ -44,7 +44,7 @@ SELECT * FROM testdata LIMIT CAST(1 AS int) -- !query schema struct -- !query output -1 1 +51 51 -- !query @@ -70,7 +70,7 @@ SELECT * FROM testdata LIMIT CAST(1 AS INT) -- !query schema struct -- !query output -1 1 +51 51 -- !query @@ -88,7 +88,7 @@ SELECT * FROM testdata LIMIT key > 3 struct<> -- !query output org.apache.spark.sql.AnalysisException -The limit expression must evaluate to a constant value, but got (testdata.`key` > 3); +The limit expression must evaluate to a constant value, but got (spark_catalog.default.testdata.`key` > 3); -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out b/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out index dd2e7b168698e..a95b02c7f7743 100644 --- a/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out @@ -63,15 +63,9 @@ SHOW TABLES -- !query schema struct -- !query output -aggtest -arraydata -mapdata -onek show_t1 show_t2 show_t3 -tenk1 -testdata -- !query @@ -79,15 +73,9 @@ SHOW TABLES IN showdb -- !query schema struct -- !query output -aggtest -arraydata -mapdata -onek show_t1 show_t2 show_t3 -tenk1 -testdata -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/show-views.sql.out b/sql/core/src/test/resources/sql-tests/results/show-views.sql.out index ee8a7474e6d5d..d88790d8b5ec8 100644 --- a/sql/core/src/test/resources/sql-tests/results/show-views.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/show-views.sql.out @@ -63,12 +63,6 @@ SHOW VIEWS -- !query schema struct -- !query output -aggtest -arraydata -mapdata -onek -tenk1 -testdata view_1 view_2 view_4 @@ -79,12 +73,6 @@ SHOW VIEWS FROM showdb -- !query schema struct -- !query output -aggtest -arraydata -mapdata -onek -tenk1 -testdata view_1 view_2 view_4 @@ -95,12 +83,6 @@ SHOW VIEWS IN showdb -- !query schema struct -- !query output -aggtest -arraydata -mapdata -onek -tenk1 -testdata view_1 view_2 view_4 @@ -111,12 +93,6 @@ SHOW VIEWS IN global_temp -- !query schema struct -- !query output -aggtest -arraydata -mapdata -onek -tenk1 -testdata view_3 view_4 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index cb11f9bb9fe96..a83743c4657f8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -363,7 +363,6 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession { // Create a local SparkSession to have stronger isolation between different test cases. // This does not isolate catalog changes. val localSparkSession = spark.newSession() - loadTestData(localSparkSession) testCase match { case udfTestCase: UDFTest => @@ -575,11 +574,16 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession { private def loadTestData(session: SparkSession): Unit = { import session.implicits._ - (1 to 100).map(i => (i, i.toString)).toDF("key", "value").createOrReplaceTempView("testdata") + (1 to 100).map(i => (i, i.toString)).toDF("key", "value") + .write + .format("parquet") + .saveAsTable("testdata") ((Seq(1, 2, 3), Seq(Seq(1, 2, 3))) :: (Seq(2, 3, 4), Seq(Seq(2, 3, 4))) :: Nil) .toDF("arraycol", "nestedarraycol") - .createOrReplaceTempView("arraydata") + .write + .format("parquet") + .saveAsTable("arraydata") (Tuple1(Map(1 -> "a1", 2 -> "b1", 3 -> "c1", 4 -> "d1", 5 -> "e1")) :: Tuple1(Map(1 -> "a2", 2 -> "b2", 3 -> "c2", 4 -> "d2")) :: @@ -587,7 +591,9 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession { Tuple1(Map(1 -> "a4", 2 -> "b4")) :: Tuple1(Map(1 -> "a5")) :: Nil) .toDF("mapcol") - .createOrReplaceTempView("mapdata") + .write + .format("parquet") + .saveAsTable("mapdata") session .read @@ -595,7 +601,9 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession { .options(Map("delimiter" -> "\t", "header" -> "false")) .schema("a int, b float") .load(testFile("test-data/postgresql/agg.data")) - .createOrReplaceTempView("aggtest") + .write + .format("parquet") + .saveAsTable("aggtest") session .read @@ -621,7 +629,9 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession { |string4 string """.stripMargin) .load(testFile("test-data/postgresql/onek.data")) - .createOrReplaceTempView("onek") + .write + .format("parquet") + .saveAsTable("onek") session .read @@ -647,7 +657,18 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession { |string4 string """.stripMargin) .load(testFile("test-data/postgresql/tenk.data")) - .createOrReplaceTempView("tenk1") + .write + .format("parquet") + .saveAsTable("tenk1") + } + + private def unloadTestData(session: SparkSession): Unit = { + session.sql("DROP TABLE IF EXISTS testdata") + session.sql("DROP TABLE IF EXISTS arraydata") + session.sql("DROP TABLE IF EXISTS mapdata") + session.sql("DROP TABLE IF EXISTS aggtest") + session.sql("DROP TABLE IF EXISTS onek") + session.sql("DROP TABLE IF EXISTS tenk1") } private val originalTimeZone = TimeZone.getDefault @@ -655,6 +676,7 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession { override def beforeAll(): Unit = { super.beforeAll() + loadTestData(spark) // Timezone is fixed to America/Los_Angeles for those timezone sensitive tests (timestamp_*) TimeZone.setDefault(TimeZone.getTimeZone("America/Los_Angeles")) // Add Locale setting @@ -668,6 +690,7 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession { try { TimeZone.setDefault(originalTimeZone) Locale.setDefault(originalLocale) + unloadTestData(spark) // For debugging dump some statistics about how much time was spent in various optimizer rules logWarning(RuleExecutor.dumpTimeSpent()) From 40bde646da82ab4cb9fd11f9788bcb5a848a239b Mon Sep 17 00:00:00 2001 From: gengjiaan Date: Thu, 9 Apr 2020 13:41:45 +0800 Subject: [PATCH 2/3] Fix limit without order. --- .../test/resources/sql-tests/results/limit.sql.out | 14 +++++++------- .../org/apache/spark/sql/SQLQueryTestSuite.scala | 1 + 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/results/limit.sql.out b/sql/core/src/test/resources/sql-tests/results/limit.sql.out index 1e35f8b66ea32..074e7a6d28c47 100644 --- a/sql/core/src/test/resources/sql-tests/results/limit.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/limit.sql.out @@ -7,8 +7,8 @@ SELECT * FROM testdata LIMIT 2 -- !query schema struct -- !query output -51 51 -52 52 +1 1 +2 2 -- !query @@ -34,9 +34,9 @@ SELECT * FROM testdata LIMIT 2 + 1 -- !query schema struct -- !query output -51 51 -52 52 -53 53 +1 1 +2 2 +3 3 -- !query @@ -44,7 +44,7 @@ SELECT * FROM testdata LIMIT CAST(1 AS int) -- !query schema struct -- !query output -51 51 +1 1 -- !query @@ -70,7 +70,7 @@ SELECT * FROM testdata LIMIT CAST(1 AS INT) -- !query schema struct -- !query output -51 51 +1 1 -- !query diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index a83743c4657f8..75b7f2f27f5da 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -575,6 +575,7 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession { import session.implicits._ (1 to 100).map(i => (i, i.toString)).toDF("key", "value") + .repartition(1) .write .format("parquet") .saveAsTable("testdata") From af42f50c53aa9be5fa7540591fe2a6277357377c Mon Sep 17 00:00:00 2001 From: gengjiaan Date: Thu, 9 Apr 2020 13:44:40 +0800 Subject: [PATCH 3/3] Adjust method name --- .../scala/org/apache/spark/sql/SQLQueryTestSuite.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index 75b7f2f27f5da..bc9b4d85aacdb 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -571,7 +571,7 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession { } /** Load built-in test tables into the SparkSession. */ - private def loadTestData(session: SparkSession): Unit = { + private def createTestTables(session: SparkSession): Unit = { import session.implicits._ (1 to 100).map(i => (i, i.toString)).toDF("key", "value") @@ -663,7 +663,7 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession { .saveAsTable("tenk1") } - private def unloadTestData(session: SparkSession): Unit = { + private def removeTestTables(session: SparkSession): Unit = { session.sql("DROP TABLE IF EXISTS testdata") session.sql("DROP TABLE IF EXISTS arraydata") session.sql("DROP TABLE IF EXISTS mapdata") @@ -677,7 +677,7 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession { override def beforeAll(): Unit = { super.beforeAll() - loadTestData(spark) + createTestTables(spark) // Timezone is fixed to America/Los_Angeles for those timezone sensitive tests (timestamp_*) TimeZone.setDefault(TimeZone.getTimeZone("America/Los_Angeles")) // Add Locale setting @@ -691,7 +691,7 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession { try { TimeZone.setDefault(originalTimeZone) Locale.setDefault(originalLocale) - unloadTestData(spark) + removeTestTables(spark) // For debugging dump some statistics about how much time was spent in various optimizer rules logWarning(RuleExecutor.dumpTimeSpent())