From 7438f900ebf98e6f769128cc05a34f1ac8395d79 Mon Sep 17 00:00:00 2001 From: Tony Zhang Date: Thu, 4 Jul 2019 17:04:04 -0700 Subject: [PATCH 1/3] [SPARK-28189] Use semanticEquals in Dataset drop method --- .../scala/org/apache/spark/sql/Dataset.scala | 2 +- .../org/apache/spark/sql/DataFrameSuite.scala | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index fe5b15cb511da..58515bee0bd19 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -2322,7 +2322,7 @@ class Dataset[T] private[sql]( } val attrs = this.logicalPlan.output val colsAfterDrop = attrs.filter { attr => - attr != expression + !attr.semanticEquals(expression) }.map(attr => Column(attr)) select(colsAfterDrop : _*) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index d15c1f47b3d23..70a3ff955ab27 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -572,6 +572,22 @@ class DataFrameSuite extends QueryTest with SharedSQLContext { assert(df.schema.map(_.name) === Seq("value")) } + test("drop column using drop with column reference with case-insensitive names") { + val col1 = testData("KEY") + val df1 = testData.drop(col1) + checkAnswer( + df1, + testData.collect().map(x => Row(x.getString(1))).toSeq) + assert(df1.schema.map(_.name) === Seq("value")) + + val col2 = testData("Key") + val df2 = testData.drop(col2) + checkAnswer( + df2, + testData.collect().map(x => Row(x.getString(1))).toSeq) + assert(df2.schema.map(_.name) === Seq("value")) + } + test("drop unknown column (no-op) with column reference") { val col = Column("random") val df = testData.drop(col) From ef4122da0f667874098d71c7595440b43118dfa8 Mon Sep 17 00:00:00 2001 From: Tony Zhang Date: Thu, 4 Jul 2019 22:18:27 -0700 Subject: [PATCH 2/3] [SPARK-28189][SQL] Use semanticEquals in Dataset drop method --- .../org/apache/spark/sql/DataFrameSuite.scala | 26 +++++++++---------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index 70a3ff955ab27..979894cda5a34 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -572,20 +572,18 @@ class DataFrameSuite extends QueryTest with SharedSQLContext { assert(df.schema.map(_.name) === Seq("value")) } - test("drop column using drop with column reference with case-insensitive names") { - val col1 = testData("KEY") - val df1 = testData.drop(col1) - checkAnswer( - df1, - testData.collect().map(x => Row(x.getString(1))).toSeq) - assert(df1.schema.map(_.name) === Seq("value")) - - val col2 = testData("Key") - val df2 = testData.drop(col2) - checkAnswer( - df2, - testData.collect().map(x => Row(x.getString(1))).toSeq) - assert(df2.schema.map(_.name) === Seq("value")) + test("SPARK-28189 drop column using drop with column reference with case-insensitive names") { + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { + val col1 = testData("KEY") + val df1 = testData.drop(col1) + checkAnswer(df1, testData.selectExpr("value")) + assert(df1.schema.map(_.name) === Seq("value")) + + val col2 = testData("Key") + val df2 = testData.drop(col2) + checkAnswer(df2, testData.selectExpr("value")) + assert(df2.schema.map(_.name) === Seq("value")) + } } test("drop unknown column (no-op) with column reference") { From 1dc9aa73bf4077bf7de6ab46c7227f09d7d4ffdb Mon Sep 17 00:00:00 2001 From: Tony Zhang Date: Fri, 5 Jul 2019 11:43:33 -0700 Subject: [PATCH 3/3] [SPARK-28189][SQL] Use semanticEquals in Dataset drop method --- .../test/scala/org/apache/spark/sql/DataFrameSuite.scala | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index 979894cda5a34..98936702a013d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -573,6 +573,7 @@ class DataFrameSuite extends QueryTest with SharedSQLContext { } test("SPARK-28189 drop column using drop with column reference with case-insensitive names") { + // With SQL config caseSensitive OFF, case insensitive column name should work withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { val col1 = testData("KEY") val df1 = testData.drop(col1) @@ -584,6 +585,14 @@ class DataFrameSuite extends QueryTest with SharedSQLContext { checkAnswer(df2, testData.selectExpr("value")) assert(df2.schema.map(_.name) === Seq("value")) } + + // With SQL config caseSensitive ON, AnalysisException should be thrown + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { + val e = intercept[AnalysisException] { + testData("KEY") + }.getMessage + assert(e.contains("Cannot resolve column name")) + } } test("drop unknown column (no-op) with column reference") {