From 3c654a0532508283c13e04b24f5ac1dd9c15fd73 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Fri, 13 Dec 2019 02:39:31 +0800 Subject: [PATCH] hive partition pruning can only support string and integral types --- .../spark/sql/hive/client/HiveShim.scala | 14 ++++++++------ .../sql/hive/execution/SQLQuerySuite.scala | 18 ++++++++++++++++++ 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala index 01ddaf2ee8ac1..d8078a534a874 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala @@ -703,7 +703,7 @@ private[client] class Shim_v0_13 extends Shim_v0_12 { } } - object NonVarcharAttribute { + object SupportedAttribute { // hive varchar is treated as catalyst string, but hive varchar can't be pushed down. private val varcharKeys = table.getPartitionKeys.asScala .filter(col => col.getType.startsWith(serdeConstants.VARCHAR_TYPE_NAME) || @@ -713,8 +713,10 @@ private[client] class Shim_v0_13 extends Shim_v0_12 { def unapply(attr: Attribute): Option[String] = { if (varcharKeys.contains(attr.name)) { None - } else { + } else if (attr.dataType.isInstanceOf[IntegralType] || attr.dataType == StringType) { Some(attr.name) + } else { + None } } } @@ -737,20 +739,20 @@ private[client] class Shim_v0_13 extends Shim_v0_12 { } def convert(expr: Expression): Option[String] = expr match { - case In(ExtractAttribute(NonVarcharAttribute(name)), ExtractableLiterals(values)) + case In(ExtractAttribute(SupportedAttribute(name)), ExtractableLiterals(values)) if useAdvanced => Some(convertInToOr(name, values)) - case InSet(ExtractAttribute(NonVarcharAttribute(name)), ExtractableValues(values)) + case InSet(ExtractAttribute(SupportedAttribute(name)), ExtractableValues(values)) if useAdvanced => Some(convertInToOr(name, values)) case op @ SpecialBinaryComparison( - ExtractAttribute(NonVarcharAttribute(name)), ExtractableLiteral(value)) => + ExtractAttribute(SupportedAttribute(name)), ExtractableLiteral(value)) => Some(s"$name ${op.symbol} $value") case op @ SpecialBinaryComparison( - ExtractableLiteral(value), ExtractAttribute(NonVarcharAttribute(name))) => + ExtractableLiteral(value), ExtractAttribute(SupportedAttribute(name))) => Some(s"$value ${op.symbol} $name") case And(expr1, expr2) if useAdvanced => diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index 7dcd9dfac2861..b0adb04d44614 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -2473,4 +2473,22 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { } } } + + test("partition pruning should handle date correctly") { + withSQLConf(SQLConf.OPTIMIZER_INSET_CONVERSION_THRESHOLD.key -> "2") { + withTable("t") { + sql("CREATE TABLE t (i INT) PARTITIONED BY (j DATE)") + sql("INSERT INTO t PARTITION(j='1990-11-11') SELECT 1") + checkAnswer(sql("SELECT i, CAST(j AS STRING) FROM t"), Row(1, "1990-11-11")) + checkAnswer( + sql( + """ + |SELECT i, CAST(j AS STRING) + |FROM t + |WHERE j IN (DATE'1990-11-10', DATE'1990-11-11', DATE'1990-11-12') + |""".stripMargin), + Row(1, "1990-11-11")) + } + } + } }