From 4af96a1b90ceb54c64c798215d2f5eefe87ad896 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Mon, 24 Jun 2019 14:06:18 +0800 Subject: [PATCH 1/5] [SPARK-28143][SQL]IN expression missing attribute should throw analysis exception --- .../spark/sql/catalyst/analysis/FunctionRegistry.scala | 1 - .../src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala | 6 ++++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 0041ccf775573..8d60b9c3d5813 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -581,7 +581,6 @@ object FunctionRegistry { val params = Seq.fill(expressions.size)(classOf[Expression]) val f = constructors.find(_.getParameterTypes.toSeq == params).getOrElse { val validParametersCount = constructors - .filter(_.getParameterTypes.forall(_ == classOf[Expression])) .map(_.getParameterCount).distinct.sorted val expectedNumberOfParameters = if (validParametersCount.length == 1) { validParametersCount.head.toString diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index ba1ac654c1a05..cd97afa70e776 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -106,6 +106,12 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { checkKeywordsExist(sql("describe functioN abcadf"), "Function: abcadf not found.") } + test("SPARK-28143: IN expression missing attribute should throw analysis exception") { + val query = "select 1 from where in ()" + val e = intercept[AnalysisException](sql(query)) + assert(e.getMessage.startsWith("Invalid number of arguments for function in.")) + } + test("SPARK-14415: All functions should have own descriptions") { for (f <- spark.sessionState.functionRegistry.listFunction()) { if (!Seq("cube", "grouping", "grouping_id", "rollup", "window").contains(f.unquotedString)) { From 9ca1990fbecc5076541d565e99bdd602de7eb1db Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Mon, 24 Jun 2019 21:47:01 +0800 Subject: [PATCH 2/5] bring back filter logic and and an or condition --- .../apache/spark/sql/catalyst/analysis/FunctionRegistry.scala | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 8d60b9c3d5813..41ed98113da87 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -581,6 +581,9 @@ object FunctionRegistry { val params = Seq.fill(expressions.size)(classOf[Expression]) val f = constructors.find(_.getParameterTypes.toSeq == params).getOrElse { val validParametersCount = constructors + .filter(_.getParameterTypes.forall { t => + t == classOf[Expression] || t == classOf[Seq[Expression]] + }) .map(_.getParameterCount).distinct.sorted val expectedNumberOfParameters = if (validParametersCount.length == 1) { validParametersCount.head.toString From 15c67d695fd63cc38ef038bd2c731846b5b74d42 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Tue, 25 Jun 2019 11:05:39 +0800 Subject: [PATCH 3/5] if valid paras count == 0 --- .../spark/sql/catalyst/analysis/FunctionRegistry.scala | 8 ++++---- .../test/scala/org/apache/spark/sql/SQLQuerySuite.scala | 5 +++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 41ed98113da87..0ab46e4c00dbb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -581,11 +581,11 @@ object FunctionRegistry { val params = Seq.fill(expressions.size)(classOf[Expression]) val f = constructors.find(_.getParameterTypes.toSeq == params).getOrElse { val validParametersCount = constructors - .filter(_.getParameterTypes.forall { t => - t == classOf[Expression] || t == classOf[Seq[Expression]] - }) + .filter(_.getParameterTypes.forall(_ == classOf[Expression])) .map(_.getParameterCount).distinct.sorted - val expectedNumberOfParameters = if (validParametersCount.length == 1) { + val expectedNumberOfParameters = if (validParametersCount.length == 0) { + throw new AnalysisException(s"$name is an invalid function.") + } else if (validParametersCount.length == 1) { validParametersCount.head.toString } else { validParametersCount.init.mkString("one of ", ", ", " and ") + diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index cd97afa70e776..efd5546d5ac79 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -107,9 +107,10 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { } test("SPARK-28143: IN expression missing attribute should throw analysis exception") { - val query = "select 1 from where in ()" + // missing attribute, which actually might be `select 1 where a in ()` + val query = "select 1 where in ()" val e = intercept[AnalysisException](sql(query)) - assert(e.getMessage.startsWith("Invalid number of arguments for function in.")) + assert(e.getMessage.startsWith("in")) } test("SPARK-14415: All functions should have own descriptions") { From 30ef47ed9b1e3375fb58393b377adb421f70ab84 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Tue, 25 Jun 2019 13:27:17 +0800 Subject: [PATCH 4/5] unified err msg --- .../apache/spark/sql/catalyst/analysis/FunctionRegistry.scala | 4 ++-- .../src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 0ab46e4c00dbb..6b9949ef68438 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -583,8 +583,8 @@ object FunctionRegistry { val validParametersCount = constructors .filter(_.getParameterTypes.forall(_ == classOf[Expression])) .map(_.getParameterCount).distinct.sorted - val expectedNumberOfParameters = if (validParametersCount.length == 0) { - throw new AnalysisException(s"$name is an invalid function.") + val expectedNumberOfParameters = if (validParametersCount.isEmpty) { + constructors.headOption.map(_.getParameterCount).getOrElse(0).toString } else if (validParametersCount.length == 1) { validParametersCount.head.toString } else { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index efd5546d5ac79..679aa5b5455ea 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -110,7 +110,7 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { // missing attribute, which actually might be `select 1 where a in ()` val query = "select 1 where in ()" val e = intercept[AnalysisException](sql(query)) - assert(e.getMessage.startsWith("in")) + assert(e.getMessage.startsWith("Invalid number of arguments for function in")) } test("SPARK-14415: All functions should have own descriptions") { From a0363de8932ad6b886d9deaa043354543da0ed56 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Fri, 26 Jul 2019 14:47:50 +0800 Subject: [PATCH 5/5] add some hints when the registered function is defined wrong or misused --- .../catalyst/analysis/FunctionRegistry.scala | 23 ++++++++++++------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 6b9949ef68438..918410807df8e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -580,16 +580,23 @@ object FunctionRegistry { // Otherwise, find a constructor method that matches the number of arguments, and use that. val params = Seq.fill(expressions.size)(classOf[Expression]) val f = constructors.find(_.getParameterTypes.toSeq == params).getOrElse { - val validParametersCount = constructors - .filter(_.getParameterTypes.forall(_ == classOf[Expression])) - .map(_.getParameterCount).distinct.sorted + val (validParameters, invalidParameters) = constructors + .partition(_.getParameterTypes.forall(_ == classOf[Expression])) + val validParametersCount = validParameters.map(_.getParameterCount).distinct.sorted + + def getExpectedNumberOfParameters(parametersCount: Array[Int]): String = { + if (parametersCount.length == 1) { + parametersCount.head.toString + } else { + parametersCount.init.mkString("one of ", ", ", " and ") + parametersCount.last + } + } val expectedNumberOfParameters = if (validParametersCount.isEmpty) { - constructors.headOption.map(_.getParameterCount).getOrElse(0).toString - } else if (validParametersCount.length == 1) { - validParametersCount.head.toString + val invalidParametersCount = invalidParameters.map(_.getParameterCount).distinct.sorted + getExpectedNumberOfParameters(invalidParametersCount) + + "(But none is valid, please check whether it is properly defined or misused)" } else { - validParametersCount.init.mkString("one of ", ", ", " and ") + - validParametersCount.last + getExpectedNumberOfParameters(validParametersCount) } throw new AnalysisException(s"Invalid number of arguments for function $name. " + s"Expected: $expectedNumberOfParameters; Found: ${params.length}")