From c3ee986181be6ab23b06b515f0f53cb936d12d76 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Mon, 9 Dec 2019 16:50:14 +0800 Subject: [PATCH 01/10] [SPARK-30183][SQL] Disallow to specify reserved properties in CREATE NAMESPACE syntax --- .../spark/sql/catalyst/parser/AstBuilder.scala | 15 +++++++++++---- .../sql/connector/DataSourceV2SQLSuite.scala | 10 ++++++++++ 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 858870a161417..95b3855004d99 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -2520,6 +2520,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging * }}} */ override def visitCreateNamespace(ctx: CreateNamespaceContext): LogicalPlan = withOrigin(ctx) { + import SupportsNamespaces._ checkDuplicateClauses(ctx.COMMENT, "COMMENT", ctx) checkDuplicateClauses(ctx.locationSpec, "LOCATION", ctx) checkDuplicateClauses(ctx.PROPERTIES, "WITH PROPERTIES", ctx) @@ -2532,11 +2533,17 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging var properties = ctx.tablePropertyList.asScala.headOption .map(visitPropertyKeyValues) .getOrElse(Map.empty) - Option(ctx.comment).map(string).map { - properties += SupportsNamespaces.PROP_COMMENT -> _ + + if (properties.keySet.intersect(RESERVED_PROPERTIES.asScala.toSet).nonEmpty) { + throw new ParseException(s"Disallow to specify reserved properties, including" + + s" ${RESERVED_PROPERTIES.asScala.mkString("(", ",", ")")}.", ctx) + } + + Option(ctx.comment).map(string).foreach { + properties += PROP_COMMENT -> _ } - ctx.locationSpec.asScala.headOption.map(visitLocationSpec).map { - properties += SupportsNamespaces.PROP_LOCATION -> _ + ctx.locationSpec.asScala.headOption.map(visitLocationSpec).foreach { + properties += PROP_LOCATION -> _ } CreateNamespaceStatement( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index ebaf753696fd5..b8357f3d3ed82 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -22,6 +22,7 @@ import scala.collection.JavaConverters._ import org.apache.spark.SparkException import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NamespaceAlreadyExistsException, NoSuchDatabaseException, NoSuchNamespaceException, NoSuchTableException, TableAlreadyExistsException} +import org.apache.spark.sql.catalyst.parser.ParseException import org.apache.spark.sql.connector.catalog._ import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf} @@ -856,6 +857,15 @@ class DataSourceV2SQLSuite } } + test("CreateNameSpace: reserved properties") { + withNamespace("testcat.ns1") { + val exception = intercept[ParseException] { + sql("CREATE NAMESPACE testcat.ns1 WITH DBPROPERTIES('location'='/dummy/path')") + } + assert(exception.getMessage.contains("Disallow to specify reserved properties")) + } + } + test("DropNamespace: basic tests") { // Session catalog is used. sql("CREATE NAMESPACE ns") From c7c2ebf78185ff78e2e2523cc1daea6c589c8c25 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Fri, 3 Jan 2020 17:52:55 +0800 Subject: [PATCH 02/10] apply new framework --- .../sql/catalyst/analysis/ResolveCatalogs.scala | 3 --- .../spark/sql/catalyst/parser/AstBuilder.scala | 8 +++----- .../sql/catalyst/plans/logical/statements.scala | 8 -------- .../sql/catalyst/plans/logical/v2Commands.scala | 7 ++++--- .../sql/catalyst/parser/DDLParserSuite.scala | 16 +++++++++------- .../analysis/ResolveSessionCatalog.scala | 3 ++- .../datasources/v2/DataSourceV2Strategy.scala | 9 ++++----- .../sql/connector/DataSourceV2SQLSuite.scala | 4 ++-- 8 files changed, 24 insertions(+), 34 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala index 4487a9f1b6b8e..2300f377c4578 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala @@ -190,9 +190,6 @@ class ResolveCatalogs(val catalogManager: CatalogManager) s"Can not specify catalog `${catalog.name}` for view ${viewName.quoted} " + s"because view support in catalog has not been implemented yet") - case c @ CreateNamespaceStatement(NonSessionCatalogAndNamespace(catalog, ns), _, _) => - CreateNamespace(catalog.asNamespaceCatalog, ns, c.ifNotExists, c.properties) - case DropNamespaceStatement(NonSessionCatalogAndNamespace(catalog, ns), ifExists, cascade) => DropNamespace(catalog, ns, ifExists, cascade) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 285710a7b3db4..8938dc0cb143d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -2499,7 +2499,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging } /** - * Create a [[CreateNamespaceStatement]] command. + * Create a [[CreateNamespace]] command. * * For example: * {{{ @@ -2539,10 +2539,8 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging properties += PROP_LOCATION -> _ } - CreateNamespaceStatement( - visitMultipartIdentifier(ctx.multipartIdentifier), - ctx.EXISTS != null, - properties) + val nameParts = visitMultipartIdentifier(ctx.multipartIdentifier) + CreateNamespace(UnresolvedNamespace(nameParts), ctx.EXISTS != null, properties) } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala index e205dd4e28993..1c638f5a5b7eb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala @@ -358,14 +358,6 @@ case class ShowTableStatement( partitionSpec: Option[TablePartitionSpec]) extends ParsedStatement -/** - * A CREATE NAMESPACE statement, as parsed from SQL. - */ -case class CreateNamespaceStatement( - namespace: Seq[String], - ifNotExists: Boolean, - properties: Map[String, String]) extends ParsedStatement - /** * A DROP NAMESPACE statement, as parsed from SQL. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala index 10e9dc85a0516..9192c9dc83888 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala @@ -241,10 +241,11 @@ case class ReplaceTableAsSelect( * The logical plan of the CREATE NAMESPACE command that works for v2 catalogs. */ case class CreateNamespace( - catalog: SupportsNamespaces, - namespace: Seq[String], + child: LogicalPlan, ifNotExists: Boolean, - properties: Map[String, String]) extends Command + properties: Map[String, String]) extends Command { + override def children: Seq[LogicalPlan] = child :: Nil +} /** * The logical plan of the DROP NAMESPACE command that works for v2 catalogs. diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala index e6b2c64401470..2dcc159d27a54 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala @@ -32,6 +32,8 @@ import org.apache.spark.unsafe.types.UTF8String class DDLParserSuite extends AnalysisTest { import CatalystSqlParser._ + private val unresolvedNamespace = UnresolvedNamespace(Seq("a", "b", "c")) + private def assertUnsupported(sql: String, containsThesePhrases: Seq[String] = Seq()): Unit = { val e = intercept[ParseException] { parsePlan(sql) @@ -1144,8 +1146,8 @@ class DDLParserSuite extends AnalysisTest { } test("create namespace -- backward compatibility with DATABASE/DBPROPERTIES") { - val expected = CreateNamespaceStatement( - Seq("a", "b", "c"), + val expected = CreateNamespace( + unresolvedNamespace, ifNotExists = true, Map( "a" -> "a", @@ -1217,8 +1219,8 @@ class DDLParserSuite extends AnalysisTest { """.stripMargin comparePlans( parsePlan(sql), - CreateNamespaceStatement( - Seq("a", "b", "c"), + CreateNamespace( + unresolvedNamespace, ifNotExists = false, Map( "a" -> "1", @@ -1986,15 +1988,15 @@ class DDLParserSuite extends AnalysisTest { test("comment on") { comparePlans( parsePlan("COMMENT ON DATABASE a.b.c IS NULL"), - CommentOnNamespace(UnresolvedNamespace(Seq("a", "b", "c")), "")) + CommentOnNamespace(unresolvedNamespace, "")) comparePlans( parsePlan("COMMENT ON DATABASE a.b.c IS 'NULL'"), - CommentOnNamespace(UnresolvedNamespace(Seq("a", "b", "c")), "NULL")) + CommentOnNamespace(unresolvedNamespace, "NULL")) comparePlans( parsePlan("COMMENT ON NAMESPACE a.b.c IS ''"), - CommentOnNamespace(UnresolvedNamespace(Seq("a", "b", "c")), "")) + CommentOnNamespace(unresolvedNamespace, "")) comparePlans( parsePlan("COMMENT ON TABLE a.b.c IS 'xYz'"), diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index 5abde31466d0e..beefcad81d499 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -25,6 +25,7 @@ import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogTable, CatalogT import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, LookupCatalog, SupportsNamespaces, Table, TableCatalog, TableChange, V1Table} +import org.apache.spark.sql.connector.catalog.CatalogV2Util.isSessionCatalog import org.apache.spark.sql.connector.expressions.Transform import org.apache.spark.sql.execution.command._ import org.apache.spark.sql.execution.datasources.{CreateTable, DataSource, RefreshTable} @@ -316,7 +317,7 @@ class ResolveSessionCatalog( case DropViewStatement(SessionCatalogAndTable(catalog, viewName), ifExists) => DropTableCommand(viewName.asTableIdentifier, ifExists, isView = true, purge = false) - case c @ CreateNamespaceStatement(SessionCatalogAndNamespace(_, ns), _, _) => + case c @ CreateNamespace(ResolvedNamespace(catalog, ns), _, _) if isSessionCatalog(catalog) => if (ns.length != 1) { throw new AnalysisException( s"The database name is not valid: ${ns.quoted}") diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index 938af1d87ddc0..815ee41439c4b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -23,8 +23,8 @@ import org.apache.spark.sql.{AnalysisException, Strategy} import org.apache.spark.sql.catalyst.analysis.{ResolvedNamespace, ResolvedTable} import org.apache.spark.sql.catalyst.expressions.{And, PredicateHelper, SubqueryExpression} import org.apache.spark.sql.catalyst.planning.PhysicalOperation -import org.apache.spark.sql.catalyst.plans.logical.{AlterNamespaceSetProperties, AlterTable, AppendData, CommentOnNamespace, CommentOnTable, CreateNamespace, CreateTableAsSelect, CreateV2Table, DeleteFromTable, DescribeNamespace, DescribeTable, DropNamespace, DropTable, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic, RefreshTable, RenameTable, Repartition, ReplaceTable, ReplaceTableAsSelect, SetCatalogAndNamespace, ShowCurrentNamespace, ShowNamespaces, ShowTableProperties, ShowTables} -import org.apache.spark.sql.connector.catalog.{Identifier, StagingTableCatalog, SupportsNamespaces, TableCapability, TableCatalog, TableChange} +import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.connector.catalog._ import org.apache.spark.sql.connector.read.streaming.{ContinuousStream, MicroBatchStream} import org.apache.spark.sql.execution.{FilterExec, ProjectExec, SparkPlan} import org.apache.spark.sql.execution.datasources.DataSourceStrategy @@ -33,8 +33,7 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap object DataSourceV2Strategy extends Strategy with PredicateHelper { - import DataSourceV2Implicits._ - import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ + import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._ override def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match { case PhysicalOperation(project, filters, relation: DataSourceV2ScanRelation) => @@ -222,7 +221,7 @@ object DataSourceV2Strategy extends Strategy with PredicateHelper { val changes = TableChange.setProperty(TableCatalog.PROP_COMMENT, comment) AlterTableExec(catalog, identifier, Seq(changes)) :: Nil - case CreateNamespace(catalog, namespace, ifNotExists, properties) => + case CreateNamespace(ResolvedNamespace(catalog, namespace), ifNotExists, properties) => CreateNamespaceExec(catalog, namespace, ifNotExists, properties) :: Nil case DropNamespace(catalog, namespace, ifExists, cascade) => diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index a1a2d6de3c1fc..90656a7f25c1f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -866,9 +866,9 @@ class DataSourceV2SQLSuite } test("CreateNameSpace: reserved properties") { - withNamespace("testcat.ns1") { + SupportsNamespaces.RESERVED_PROPERTIES.asScala.foreach { key => val exception = intercept[ParseException] { - sql("CREATE NAMESPACE testcat.ns1 WITH DBPROPERTIES('location'='/dummy/path')") + sql(s"CREATE NAMESPACE testcat.ns1 WITH DBPROPERTIES('$key'='dummyVal')") } assert(exception.getMessage.contains("Disallow to specify reserved properties")) } From 5abbfc212eea46c0a209bdc2fb0aced6be8b033b Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Mon, 6 Jan 2020 10:28:49 +0800 Subject: [PATCH 03/10] revert catalog related change --- .../sql/catalyst/analysis/ResolveCatalogs.scala | 3 +++ .../spark/sql/catalyst/parser/AstBuilder.scala | 8 +++++--- .../sql/catalyst/plans/logical/statements.scala | 8 ++++++++ .../sql/catalyst/plans/logical/v2Commands.scala | 7 +++---- .../sql/catalyst/parser/DDLParserSuite.scala | 16 +++++++--------- .../analysis/ResolveSessionCatalog.scala | 2 +- .../datasources/v2/DataSourceV2Strategy.scala | 9 +++++---- 7 files changed, 32 insertions(+), 21 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala index 2300f377c4578..4487a9f1b6b8e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala @@ -190,6 +190,9 @@ class ResolveCatalogs(val catalogManager: CatalogManager) s"Can not specify catalog `${catalog.name}` for view ${viewName.quoted} " + s"because view support in catalog has not been implemented yet") + case c @ CreateNamespaceStatement(NonSessionCatalogAndNamespace(catalog, ns), _, _) => + CreateNamespace(catalog.asNamespaceCatalog, ns, c.ifNotExists, c.properties) + case DropNamespaceStatement(NonSessionCatalogAndNamespace(catalog, ns), ifExists, cascade) => DropNamespace(catalog, ns, ifExists, cascade) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 8938dc0cb143d..285710a7b3db4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -2499,7 +2499,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging } /** - * Create a [[CreateNamespace]] command. + * Create a [[CreateNamespaceStatement]] command. * * For example: * {{{ @@ -2539,8 +2539,10 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging properties += PROP_LOCATION -> _ } - val nameParts = visitMultipartIdentifier(ctx.multipartIdentifier) - CreateNamespace(UnresolvedNamespace(nameParts), ctx.EXISTS != null, properties) + CreateNamespaceStatement( + visitMultipartIdentifier(ctx.multipartIdentifier), + ctx.EXISTS != null, + properties) } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala index 1c638f5a5b7eb..e205dd4e28993 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala @@ -358,6 +358,14 @@ case class ShowTableStatement( partitionSpec: Option[TablePartitionSpec]) extends ParsedStatement +/** + * A CREATE NAMESPACE statement, as parsed from SQL. + */ +case class CreateNamespaceStatement( + namespace: Seq[String], + ifNotExists: Boolean, + properties: Map[String, String]) extends ParsedStatement + /** * A DROP NAMESPACE statement, as parsed from SQL. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala index 9192c9dc83888..10e9dc85a0516 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala @@ -241,11 +241,10 @@ case class ReplaceTableAsSelect( * The logical plan of the CREATE NAMESPACE command that works for v2 catalogs. */ case class CreateNamespace( - child: LogicalPlan, + catalog: SupportsNamespaces, + namespace: Seq[String], ifNotExists: Boolean, - properties: Map[String, String]) extends Command { - override def children: Seq[LogicalPlan] = child :: Nil -} + properties: Map[String, String]) extends Command /** * The logical plan of the DROP NAMESPACE command that works for v2 catalogs. diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala index 2dcc159d27a54..e6b2c64401470 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala @@ -32,8 +32,6 @@ import org.apache.spark.unsafe.types.UTF8String class DDLParserSuite extends AnalysisTest { import CatalystSqlParser._ - private val unresolvedNamespace = UnresolvedNamespace(Seq("a", "b", "c")) - private def assertUnsupported(sql: String, containsThesePhrases: Seq[String] = Seq()): Unit = { val e = intercept[ParseException] { parsePlan(sql) @@ -1146,8 +1144,8 @@ class DDLParserSuite extends AnalysisTest { } test("create namespace -- backward compatibility with DATABASE/DBPROPERTIES") { - val expected = CreateNamespace( - unresolvedNamespace, + val expected = CreateNamespaceStatement( + Seq("a", "b", "c"), ifNotExists = true, Map( "a" -> "a", @@ -1219,8 +1217,8 @@ class DDLParserSuite extends AnalysisTest { """.stripMargin comparePlans( parsePlan(sql), - CreateNamespace( - unresolvedNamespace, + CreateNamespaceStatement( + Seq("a", "b", "c"), ifNotExists = false, Map( "a" -> "1", @@ -1988,15 +1986,15 @@ class DDLParserSuite extends AnalysisTest { test("comment on") { comparePlans( parsePlan("COMMENT ON DATABASE a.b.c IS NULL"), - CommentOnNamespace(unresolvedNamespace, "")) + CommentOnNamespace(UnresolvedNamespace(Seq("a", "b", "c")), "")) comparePlans( parsePlan("COMMENT ON DATABASE a.b.c IS 'NULL'"), - CommentOnNamespace(unresolvedNamespace, "NULL")) + CommentOnNamespace(UnresolvedNamespace(Seq("a", "b", "c")), "NULL")) comparePlans( parsePlan("COMMENT ON NAMESPACE a.b.c IS ''"), - CommentOnNamespace(unresolvedNamespace, "")) + CommentOnNamespace(UnresolvedNamespace(Seq("a", "b", "c")), "")) comparePlans( parsePlan("COMMENT ON TABLE a.b.c IS 'xYz'"), diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index beefcad81d499..5131f5a66fe17 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -317,7 +317,7 @@ class ResolveSessionCatalog( case DropViewStatement(SessionCatalogAndTable(catalog, viewName), ifExists) => DropTableCommand(viewName.asTableIdentifier, ifExists, isView = true, purge = false) - case c @ CreateNamespace(ResolvedNamespace(catalog, ns), _, _) if isSessionCatalog(catalog) => + case c @ CreateNamespaceStatement(SessionCatalogAndNamespace(_, ns), _, _) => if (ns.length != 1) { throw new AnalysisException( s"The database name is not valid: ${ns.quoted}") diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index 815ee41439c4b..938af1d87ddc0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -23,8 +23,8 @@ import org.apache.spark.sql.{AnalysisException, Strategy} import org.apache.spark.sql.catalyst.analysis.{ResolvedNamespace, ResolvedTable} import org.apache.spark.sql.catalyst.expressions.{And, PredicateHelper, SubqueryExpression} import org.apache.spark.sql.catalyst.planning.PhysicalOperation -import org.apache.spark.sql.catalyst.plans.logical._ -import org.apache.spark.sql.connector.catalog._ +import org.apache.spark.sql.catalyst.plans.logical.{AlterNamespaceSetProperties, AlterTable, AppendData, CommentOnNamespace, CommentOnTable, CreateNamespace, CreateTableAsSelect, CreateV2Table, DeleteFromTable, DescribeNamespace, DescribeTable, DropNamespace, DropTable, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic, RefreshTable, RenameTable, Repartition, ReplaceTable, ReplaceTableAsSelect, SetCatalogAndNamespace, ShowCurrentNamespace, ShowNamespaces, ShowTableProperties, ShowTables} +import org.apache.spark.sql.connector.catalog.{Identifier, StagingTableCatalog, SupportsNamespaces, TableCapability, TableCatalog, TableChange} import org.apache.spark.sql.connector.read.streaming.{ContinuousStream, MicroBatchStream} import org.apache.spark.sql.execution.{FilterExec, ProjectExec, SparkPlan} import org.apache.spark.sql.execution.datasources.DataSourceStrategy @@ -33,7 +33,8 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap object DataSourceV2Strategy extends Strategy with PredicateHelper { - import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._ + import DataSourceV2Implicits._ + import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ override def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match { case PhysicalOperation(project, filters, relation: DataSourceV2ScanRelation) => @@ -221,7 +222,7 @@ object DataSourceV2Strategy extends Strategy with PredicateHelper { val changes = TableChange.setProperty(TableCatalog.PROP_COMMENT, comment) AlterTableExec(catalog, identifier, Seq(changes)) :: Nil - case CreateNamespace(ResolvedNamespace(catalog, namespace), ifNotExists, properties) => + case CreateNamespace(catalog, namespace, ifNotExists, properties) => CreateNamespaceExec(catalog, namespace, ifNotExists, properties) :: Nil case DropNamespace(catalog, namespace, ifExists, cascade) => From ae9a0eb84be1f8c4b2edb7e3d338f27d02546d81 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Mon, 6 Jan 2020 13:28:20 +0800 Subject: [PATCH 04/10] nit --- .../scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala | 2 +- .../spark/sql/catalyst/analysis/ResolveSessionCatalog.scala | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 285710a7b3db4..0834065db9e8e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -2527,7 +2527,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging .map(visitPropertyKeyValues) .getOrElse(Map.empty) - if (properties.keySet.intersect(RESERVED_PROPERTIES.asScala.toSet).nonEmpty) { + if (properties.keySet.exists(RESERVED_PROPERTIES.contains)) { throw new ParseException(s"Disallow to specify reserved properties, including" + s" ${RESERVED_PROPERTIES.asScala.mkString("(", ",", ")")}.", ctx) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index 5131f5a66fe17..5abde31466d0e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -25,7 +25,6 @@ import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogTable, CatalogT import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, LookupCatalog, SupportsNamespaces, Table, TableCatalog, TableChange, V1Table} -import org.apache.spark.sql.connector.catalog.CatalogV2Util.isSessionCatalog import org.apache.spark.sql.connector.expressions.Transform import org.apache.spark.sql.execution.command._ import org.apache.spark.sql.execution.datasources.{CreateTable, DataSource, RefreshTable} From 499579ca56b94ca501f165dcfb7fe6bf47f32644 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Tue, 7 Jan 2020 15:50:02 +0800 Subject: [PATCH 05/10] prohibit the reserved from alter namespace set properite --- docs/sql-migration-guide.md | 2 + .../sql/catalyst/parser/AstBuilder.scala | 22 +++++-- .../apache/spark/sql/internal/SQLConf.scala | 10 ++++ .../sql/connector/DataSourceV2SQLSuite.scala | 57 +++++++++++++++++-- 4 files changed, 82 insertions(+), 9 deletions(-) diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index 76e075d18ab1f..3d456c538cb2b 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -264,6 +264,8 @@ license: | - Since Spark 3.0, the function `percentile_approx` and its alias `approx_percentile` only accept integral value with range in `[1, 2147483647]` as its 3rd argument `accuracy`, fractional and string types are disallowed, e.g. `percentile_approx(10.0, 0.2, 1.8D)` will cause `AnalysisException`. In Spark version 2.4 and earlier, if `accuracy` is fractional or string value, it will be coerced to an int value, `percentile_approx(10.0, 0.2, 1.8D)` is operated as `percentile_approx(10.0, 0.2, 1)` which results in `10.0`. + - Since Spark 3.0, the namespace properties `location` and `comment` become reserved, it will fail with `ParseException` if we use them as members of `DBPROTERTIES` in `CREATE NAMESPACE` and `ALTER NAMESPACE ... SET PROPERTIES(...)`. We need their specific clauses to specify them, e.g. `CREATE NAMESPACE a.b.c COMMENT 'any comment' LOCATION 'some path'`. We can set `spark.sql.legacy.property.nonReserved` to `true` to ignore the `ParseException`, but notice that in this case, these properties will produce side effects, e.g `SET DBPROTERTIES('location'='/tmp')` might change the location of the database. In Spark version 2.4 and earlier, these properties are neither reserved nor have side effects, e.g. `SET DBPROTERTIES('location'='/tmp')` will not change the location of the database but create a headless property just like `'a'='b'`. + ## Upgrading from Spark SQL 2.4 to 2.4.1 - The value of `spark.executor.heartbeatInterval`, when specified without units like "30" rather than "30s", was diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 0834065db9e8e..37fe849bb8898 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -2498,6 +2498,19 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging } } + private def checkNamespaceProperties( + properties: Map[String, String], ctx: ParserRuleContext): Unit = withOrigin(ctx) { + import SupportsNamespaces._ + if (!conf.getConf(SQLConf.LEGACY_PROPERTY_NON_RESERVED)) { + properties.foreach { + case (PROP_LOCATION, _) => throw new ParseException(s"$PROP_LOCATION is a reserved" + + s" namespace property, please use the LOCATION clause to specify it.", ctx) + case (PROP_COMMENT, _) => throw new ParseException(s"$PROP_COMMENT is a reserved" + + s" namespace property, please use the COMMENT clause to specify it.", ctx) + case _ => + } + } + } /** * Create a [[CreateNamespaceStatement]] command. * @@ -2527,10 +2540,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging .map(visitPropertyKeyValues) .getOrElse(Map.empty) - if (properties.keySet.exists(RESERVED_PROPERTIES.contains)) { - throw new ParseException(s"Disallow to specify reserved properties, including" + - s" ${RESERVED_PROPERTIES.asScala.mkString("(", ",", ")")}.", ctx) - } + checkNamespaceProperties(properties, ctx) Option(ctx.comment).map(string).foreach { properties += PROP_COMMENT -> _ @@ -2571,9 +2581,11 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging */ override def visitSetNamespaceProperties(ctx: SetNamespacePropertiesContext): LogicalPlan = { withOrigin(ctx) { + val properties = visitPropertyKeyValues(ctx.tablePropertyList) + checkNamespaceProperties(properties, ctx) AlterNamespaceSetPropertiesStatement( visitMultipartIdentifier(ctx.multipartIdentifier), - visitPropertyKeyValues(ctx.tablePropertyList)) + properties) } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 912a834c0bba3..badf9c012ade7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -2135,6 +2135,16 @@ object SQLConf { "defined by `from` and `to`.") .booleanConf .createWithDefault(false) + + val LEGACY_PROPERTY_NON_RESERVED = + buildConf("spark.sql.legacy.property.nonReserved") + .internal() + .doc("When true, all namespace and table properties are not reserved and available for " + + "create/alter syntaxes. But please be aware that the reserved properties will still be " + + "used by Spark internally. It is highly discouraged to turn on this to avoid potential " + + "side-effects.") + .booleanConf + .createWithDefault(false) } /** diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index 90656a7f25c1f..7fe6b1029c581 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -866,11 +866,29 @@ class DataSourceV2SQLSuite } test("CreateNameSpace: reserved properties") { - SupportsNamespaces.RESERVED_PROPERTIES.asScala.foreach { key => - val exception = intercept[ParseException] { - sql(s"CREATE NAMESPACE testcat.ns1 WITH DBPROPERTIES('$key'='dummyVal')") + withSQLConf((SQLConf.LEGACY_PROPERTY_NON_RESERVED.key, "false")) { + SupportsNamespaces.RESERVED_PROPERTIES.asScala.foreach { key => + val exception = intercept[ParseException] { + sql(s"CREATE NAMESPACE testcat.reservedTest WITH DBPROPERTIES('$key'='dummyVal')") + } + assert(exception.getMessage.contains(s"$key is a reserved namespace property")) + } + } + withSQLConf((SQLConf.LEGACY_PROPERTY_NON_RESERVED.key, "true")) { + SupportsNamespaces.RESERVED_PROPERTIES.asScala.foreach { key => + withNamespace("testcat.reservedTest") { + withTempDir { tmpDir => + val sideEffectVal = tmpDir.getCanonicalPath + sql(s"CREATE NAMESPACE testcat.reservedTest WITH DBPROPERTIES('$key'='$sideEffectVal')") + assert(sql("DESC NAMESPACE EXTENDED testcat.reservedTest") + .toDF("k", "v") + .where("k='Properties'") + .isEmpty, s"$key is a reserved namespace property") + assert(catalog("testcat").asNamespaceCatalog + .loadNamespaceMetadata(Array("reservedTest")).get(key) === sideEffectVal) + } + } } - assert(exception.getMessage.contains("Disallow to specify reserved properties")) } } @@ -971,6 +989,37 @@ class DataSourceV2SQLSuite } } + test("AlterNamespaceSetProperties: reserved properties") { + withSQLConf((SQLConf.LEGACY_PROPERTY_NON_RESERVED.key, "false")) { + SupportsNamespaces.RESERVED_PROPERTIES.asScala.foreach { key => + withNamespace("testcat.reservedTest") { + sql("CREATE NAMESPACE testcat.reservedTest") + val exception = intercept[ParseException] { + sql(s"ALTER NAMESPACE testcat.reservedTest SET PROPERTIES ('$key'='dummyVal')") + } + assert(exception.getMessage.contains(s"$key is a reserved namespace property")) + } + } + } + withSQLConf((SQLConf.LEGACY_PROPERTY_NON_RESERVED.key, "true")) { + SupportsNamespaces.RESERVED_PROPERTIES.asScala.foreach { key => + withNamespace("testcat.reservedTest") { + withTempDir { tmpDir => + val sideEffectVal = tmpDir.getCanonicalPath + sql(s"CREATE NAMESPACE testcat.reservedTest") + sql(s"ALTER NAMESPACE testcat.reservedTest SET PROPERTIES ('$key'='$sideEffectVal')") + assert(sql("DESC NAMESPACE EXTENDED testcat.reservedTest") + .toDF("k", "v") + .where("k='Properties'") + .isEmpty, s"$key is a reserved namespace property") + assert(catalog("testcat").asNamespaceCatalog + .loadNamespaceMetadata(Array("reservedTest")).get(key) === sideEffectVal) + } + } + } + } + } + test("AlterNamespaceSetLocation using v2 catalog") { withNamespace("testcat.ns1.ns2") { sql("CREATE NAMESPACE IF NOT EXISTS testcat.ns1.ns2 COMMENT " + From 375089077103e30d5e03626f46dcdc25bd10f9d3 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Tue, 7 Jan 2020 16:14:28 +0800 Subject: [PATCH 06/10] avoid side effect --- docs/sql-migration-guide.md | 2 +- .../sql/catalyst/parser/AstBuilder.scala | 22 ++++++++++++------- .../apache/spark/sql/internal/SQLConf.scala | 5 ++--- .../sql/connector/DataSourceV2SQLSuite.scala | 16 +++++++------- 4 files changed, 25 insertions(+), 20 deletions(-) diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index 3d456c538cb2b..99b35ae6bc172 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -264,7 +264,7 @@ license: | - Since Spark 3.0, the function `percentile_approx` and its alias `approx_percentile` only accept integral value with range in `[1, 2147483647]` as its 3rd argument `accuracy`, fractional and string types are disallowed, e.g. `percentile_approx(10.0, 0.2, 1.8D)` will cause `AnalysisException`. In Spark version 2.4 and earlier, if `accuracy` is fractional or string value, it will be coerced to an int value, `percentile_approx(10.0, 0.2, 1.8D)` is operated as `percentile_approx(10.0, 0.2, 1)` which results in `10.0`. - - Since Spark 3.0, the namespace properties `location` and `comment` become reserved, it will fail with `ParseException` if we use them as members of `DBPROTERTIES` in `CREATE NAMESPACE` and `ALTER NAMESPACE ... SET PROPERTIES(...)`. We need their specific clauses to specify them, e.g. `CREATE NAMESPACE a.b.c COMMENT 'any comment' LOCATION 'some path'`. We can set `spark.sql.legacy.property.nonReserved` to `true` to ignore the `ParseException`, but notice that in this case, these properties will produce side effects, e.g `SET DBPROTERTIES('location'='/tmp')` might change the location of the database. In Spark version 2.4 and earlier, these properties are neither reserved nor have side effects, e.g. `SET DBPROTERTIES('location'='/tmp')` will not change the location of the database but create a headless property just like `'a'='b'`. + - Since Spark 3.0, the namespace properties `location` and `comment` become reserved, it will fail with `ParseException` if we use them as members of `DBPROTERTIES` in `CREATE DATABASE` and `ALTER DATABASE ... SET PROPERTIES(...)`. We need their specific clauses to specify them, e.g. `CREATE DATABASE test COMMENT 'any comment' LOCATION 'some path'`. We can set `spark.sql.legacy.property.nonReserved` to `true` to ignore the `ParseException`, but notice that in this case, these properties will be ignored too, e.g `SET DBPROTERTIES('location'='/tmp')` will affect nothing. In Spark version 2.4 and earlier, these properties are neither reserved nor have side effects, e.g. `SET DBPROTERTIES('location'='/tmp')` will not change the location of the database but create a headless property just like `'a'='b'`. ## Upgrading from Spark SQL 2.4 to 2.4.1 diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 37fe849bb8898..146b51ac19953 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -2499,18 +2499,25 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging } private def checkNamespaceProperties( - properties: Map[String, String], ctx: ParserRuleContext): Unit = withOrigin(ctx) { + properties: Map[String, String], + ctx: ParserRuleContext): Map[String, String] = withOrigin(ctx) { import SupportsNamespaces._ if (!conf.getConf(SQLConf.LEGACY_PROPERTY_NON_RESERVED)) { properties.foreach { - case (PROP_LOCATION, _) => throw new ParseException(s"$PROP_LOCATION is a reserved" + - s" namespace property, please use the LOCATION clause to specify it.", ctx) - case (PROP_COMMENT, _) => throw new ParseException(s"$PROP_COMMENT is a reserved" + - s" namespace property, please use the COMMENT clause to specify it.", ctx) + case (PROP_LOCATION, _) => + throw new ParseException(s"$PROP_LOCATION is a reserved" + s" namespace property," + + s" please use the LOCATION clause to specify it.", ctx) + case (PROP_COMMENT, _) => + throw new ParseException(s"$PROP_COMMENT is a reserved" + s" namespace property," + + s" please use the COMMENT clause to specify it.", ctx) case _ => } + properties + } else { + properties -- RESERVED_PROPERTIES.asScala } } + /** * Create a [[CreateNamespaceStatement]] command. * @@ -2540,7 +2547,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging .map(visitPropertyKeyValues) .getOrElse(Map.empty) - checkNamespaceProperties(properties, ctx) + properties = checkNamespaceProperties(properties, ctx) Option(ctx.comment).map(string).foreach { properties += PROP_COMMENT -> _ @@ -2581,8 +2588,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging */ override def visitSetNamespaceProperties(ctx: SetNamespacePropertiesContext): LogicalPlan = { withOrigin(ctx) { - val properties = visitPropertyKeyValues(ctx.tablePropertyList) - checkNamespaceProperties(properties, ctx) + val properties = checkNamespaceProperties(visitPropertyKeyValues(ctx.tablePropertyList), ctx) AlterNamespaceSetPropertiesStatement( visitMultipartIdentifier(ctx.multipartIdentifier), properties) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index badf9c012ade7..4ade102aa3ce6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -2139,10 +2139,9 @@ object SQLConf { val LEGACY_PROPERTY_NON_RESERVED = buildConf("spark.sql.legacy.property.nonReserved") .internal() - .doc("When true, all namespace and table properties are not reserved and available for " + + .doc("When true, all database and table properties are not reserved and available for " + "create/alter syntaxes. But please be aware that the reserved properties will still be " + - "used by Spark internally. It is highly discouraged to turn on this to avoid potential " + - "side-effects.") + "used by Spark internally and will ignore their user specified values.") .booleanConf .createWithDefault(false) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index 7fe6b1029c581..a559aa8d41ced 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -878,14 +878,14 @@ class DataSourceV2SQLSuite SupportsNamespaces.RESERVED_PROPERTIES.asScala.foreach { key => withNamespace("testcat.reservedTest") { withTempDir { tmpDir => - val sideEffectVal = tmpDir.getCanonicalPath - sql(s"CREATE NAMESPACE testcat.reservedTest WITH DBPROPERTIES('$key'='$sideEffectVal')") + val noEffectVal = tmpDir.getCanonicalPath + sql(s"CREATE NAMESPACE testcat.reservedTest WITH DBPROPERTIES('$key'='$noEffectVal')") assert(sql("DESC NAMESPACE EXTENDED testcat.reservedTest") .toDF("k", "v") .where("k='Properties'") - .isEmpty, s"$key is a reserved namespace property") + .isEmpty, s"$key is a reserved namespace property and ignored") assert(catalog("testcat").asNamespaceCatalog - .loadNamespaceMetadata(Array("reservedTest")).get(key) === sideEffectVal) + .loadNamespaceMetadata(Array("reservedTest")).get(key) !== noEffectVal) } } } @@ -1005,15 +1005,15 @@ class DataSourceV2SQLSuite SupportsNamespaces.RESERVED_PROPERTIES.asScala.foreach { key => withNamespace("testcat.reservedTest") { withTempDir { tmpDir => - val sideEffectVal = tmpDir.getCanonicalPath + val noEffectVal = tmpDir.getCanonicalPath sql(s"CREATE NAMESPACE testcat.reservedTest") - sql(s"ALTER NAMESPACE testcat.reservedTest SET PROPERTIES ('$key'='$sideEffectVal')") + sql(s"ALTER NAMESPACE testcat.reservedTest SET PROPERTIES ('$key'='$noEffectVal')") assert(sql("DESC NAMESPACE EXTENDED testcat.reservedTest") .toDF("k", "v") .where("k='Properties'") - .isEmpty, s"$key is a reserved namespace property") + .isEmpty, s"$key is a reserved namespace property and ignored") assert(catalog("testcat").asNamespaceCatalog - .loadNamespaceMetadata(Array("reservedTest")).get(key) === sideEffectVal) + .loadNamespaceMetadata(Array("reservedTest")).get(key) !== noEffectVal) } } } From 735d93e9d3d82385264458e63953b161e627ea9b Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Tue, 7 Jan 2020 16:16:06 +0800 Subject: [PATCH 07/10] nit --- docs/sql-migration-guide.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index 99b35ae6bc172..38cf42f37ac02 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -264,7 +264,7 @@ license: | - Since Spark 3.0, the function `percentile_approx` and its alias `approx_percentile` only accept integral value with range in `[1, 2147483647]` as its 3rd argument `accuracy`, fractional and string types are disallowed, e.g. `percentile_approx(10.0, 0.2, 1.8D)` will cause `AnalysisException`. In Spark version 2.4 and earlier, if `accuracy` is fractional or string value, it will be coerced to an int value, `percentile_approx(10.0, 0.2, 1.8D)` is operated as `percentile_approx(10.0, 0.2, 1)` which results in `10.0`. - - Since Spark 3.0, the namespace properties `location` and `comment` become reserved, it will fail with `ParseException` if we use them as members of `DBPROTERTIES` in `CREATE DATABASE` and `ALTER DATABASE ... SET PROPERTIES(...)`. We need their specific clauses to specify them, e.g. `CREATE DATABASE test COMMENT 'any comment' LOCATION 'some path'`. We can set `spark.sql.legacy.property.nonReserved` to `true` to ignore the `ParseException`, but notice that in this case, these properties will be ignored too, e.g `SET DBPROTERTIES('location'='/tmp')` will affect nothing. In Spark version 2.4 and earlier, these properties are neither reserved nor have side effects, e.g. `SET DBPROTERTIES('location'='/tmp')` will not change the location of the database but create a headless property just like `'a'='b'`. + - Since Spark 3.0, the database properties `location` and `comment` become reserved, it will fail with `ParseException` if we use them as members of `DBPROTERTIES` in `CREATE DATABASE` and `ALTER DATABASE ... SET PROPERTIES(...)`. We need their specific clauses to specify them, e.g. `CREATE DATABASE test COMMENT 'any comment' LOCATION 'some path'`. We can set `spark.sql.legacy.property.nonReserved` to `true` to ignore the `ParseException`, but notice that in this case, these properties will be ignored too, e.g `SET DBPROTERTIES('location'='/tmp')` will affect nothing. In Spark version 2.4 and earlier, these properties are neither reserved nor have side effects, e.g. `SET DBPROTERTIES('location'='/tmp')` will not change the location of the database but create a headless property just like `'a'='b'`. ## Upgrading from Spark SQL 2.4 to 2.4.1 From ff0054fe625b5190dd420595ad7caad0e1b5b508 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Tue, 7 Jan 2020 16:19:52 +0800 Subject: [PATCH 08/10] nit --- docs/sql-migration-guide.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index 38cf42f37ac02..9572a480cea04 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -264,7 +264,7 @@ license: | - Since Spark 3.0, the function `percentile_approx` and its alias `approx_percentile` only accept integral value with range in `[1, 2147483647]` as its 3rd argument `accuracy`, fractional and string types are disallowed, e.g. `percentile_approx(10.0, 0.2, 1.8D)` will cause `AnalysisException`. In Spark version 2.4 and earlier, if `accuracy` is fractional or string value, it will be coerced to an int value, `percentile_approx(10.0, 0.2, 1.8D)` is operated as `percentile_approx(10.0, 0.2, 1)` which results in `10.0`. - - Since Spark 3.0, the database properties `location` and `comment` become reserved, it will fail with `ParseException` if we use them as members of `DBPROTERTIES` in `CREATE DATABASE` and `ALTER DATABASE ... SET PROPERTIES(...)`. We need their specific clauses to specify them, e.g. `CREATE DATABASE test COMMENT 'any comment' LOCATION 'some path'`. We can set `spark.sql.legacy.property.nonReserved` to `true` to ignore the `ParseException`, but notice that in this case, these properties will be ignored too, e.g `SET DBPROTERTIES('location'='/tmp')` will affect nothing. In Spark version 2.4 and earlier, these properties are neither reserved nor have side effects, e.g. `SET DBPROTERTIES('location'='/tmp')` will not change the location of the database but create a headless property just like `'a'='b'`. + - Since Spark 3.0, the words `location` and `comment` become reserved database properties, it will fail with `ParseException` if we use them as members of `DBPROTERTIES` in `CREATE DATABASE` and `ALTER DATABASE ... SET PROPERTIES(...)`. We need their specific clauses to specify them, e.g. `CREATE DATABASE test COMMENT 'any comment' LOCATION 'some path'`. We can set `spark.sql.legacy.property.nonReserved` to `true` to ignore the `ParseException`, but notice that in this case, these properties will be ignored too, e.g `SET DBPROTERTIES('location'='/tmp')` will affect nothing. In Spark version 2.4 and earlier, these properties are neither reserved nor have side effects, e.g. `SET DBPROTERTIES('location'='/tmp')` will not change the location of the database but only create a headless property just like `'a'='b'`. ## Upgrading from Spark SQL 2.4 to 2.4.1 From a16a15e12a418a1f59d3c5506c79c53e886055d2 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Tue, 7 Jan 2020 17:01:11 +0800 Subject: [PATCH 09/10] nit --- docs/sql-migration-guide.md | 2 +- .../spark/sql/catalyst/parser/AstBuilder.scala | 14 +++++++------- .../org/apache/spark/sql/internal/SQLConf.scala | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index 9572a480cea04..7a7e260177d4a 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -264,7 +264,7 @@ license: | - Since Spark 3.0, the function `percentile_approx` and its alias `approx_percentile` only accept integral value with range in `[1, 2147483647]` as its 3rd argument `accuracy`, fractional and string types are disallowed, e.g. `percentile_approx(10.0, 0.2, 1.8D)` will cause `AnalysisException`. In Spark version 2.4 and earlier, if `accuracy` is fractional or string value, it will be coerced to an int value, `percentile_approx(10.0, 0.2, 1.8D)` is operated as `percentile_approx(10.0, 0.2, 1)` which results in `10.0`. - - Since Spark 3.0, the words `location` and `comment` become reserved database properties, it will fail with `ParseException` if we use them as members of `DBPROTERTIES` in `CREATE DATABASE` and `ALTER DATABASE ... SET PROPERTIES(...)`. We need their specific clauses to specify them, e.g. `CREATE DATABASE test COMMENT 'any comment' LOCATION 'some path'`. We can set `spark.sql.legacy.property.nonReserved` to `true` to ignore the `ParseException`, but notice that in this case, these properties will be ignored too, e.g `SET DBPROTERTIES('location'='/tmp')` will affect nothing. In Spark version 2.4 and earlier, these properties are neither reserved nor have side effects, e.g. `SET DBPROTERTIES('location'='/tmp')` will not change the location of the database but only create a headless property just like `'a'='b'`. + - Since Spark 3.0, the words `location` and `comment` become reserved database properties, it will fail with `ParseException` if we use them as members of `DBPROTERTIES` in `CREATE DATABASE` and `ALTER DATABASE ... SET PROPERTIES(...)`. We need their specific clauses to specify them, e.g. `CREATE DATABASE test COMMENT 'any comment' LOCATION 'some path'`. We can set `spark.sql.legacy.property.nonReserved` to `true` to ignore the `ParseException`, in this case, these properties will be silently removed, e.g `SET DBPROTERTIES('location'='/tmp')` will affect nothing. In Spark version 2.4 and earlier, these properties are neither reserved nor have side effects, e.g. `SET DBPROTERTIES('location'='/tmp')` will not change the location of the database but only create a headless property just like `'a'='b'`. ## Upgrading from Spark SQL 2.4 to 2.4.1 diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 146b51ac19953..b17a11fa38551 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -2498,18 +2498,18 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging } } - private def checkNamespaceProperties( + private def cleanNamespaceProperties( properties: Map[String, String], ctx: ParserRuleContext): Map[String, String] = withOrigin(ctx) { import SupportsNamespaces._ if (!conf.getConf(SQLConf.LEGACY_PROPERTY_NON_RESERVED)) { properties.foreach { case (PROP_LOCATION, _) => - throw new ParseException(s"$PROP_LOCATION is a reserved" + s" namespace property," + - s" please use the LOCATION clause to specify it.", ctx) + throw new ParseException(s"$PROP_LOCATION is a reserved namespace property, please use" + + s" the LOCATION clause to specify it.", ctx) case (PROP_COMMENT, _) => - throw new ParseException(s"$PROP_COMMENT is a reserved" + s" namespace property," + - s" please use the COMMENT clause to specify it.", ctx) + throw new ParseException(s"$PROP_COMMENT is a reserved namespace property, please use" + + s" the COMMENT clause to specify it.", ctx) case _ => } properties @@ -2547,7 +2547,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging .map(visitPropertyKeyValues) .getOrElse(Map.empty) - properties = checkNamespaceProperties(properties, ctx) + properties = cleanNamespaceProperties(properties, ctx) Option(ctx.comment).map(string).foreach { properties += PROP_COMMENT -> _ @@ -2588,7 +2588,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging */ override def visitSetNamespaceProperties(ctx: SetNamespacePropertiesContext): LogicalPlan = { withOrigin(ctx) { - val properties = checkNamespaceProperties(visitPropertyKeyValues(ctx.tablePropertyList), ctx) + val properties = cleanNamespaceProperties(visitPropertyKeyValues(ctx.tablePropertyList), ctx) AlterNamespaceSetPropertiesStatement( visitMultipartIdentifier(ctx.multipartIdentifier), properties) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 4ade102aa3ce6..a819a51093679 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -2140,8 +2140,8 @@ object SQLConf { buildConf("spark.sql.legacy.property.nonReserved") .internal() .doc("When true, all database and table properties are not reserved and available for " + - "create/alter syntaxes. But please be aware that the reserved properties will still be " + - "used by Spark internally and will ignore their user specified values.") + "create/alter syntaxes. But please be aware that the reserved properties will be " + + "silently removed.") .booleanConf .createWithDefault(false) } From 1e05f05c490cc6a8851706cc71313c64800b0792 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Wed, 8 Jan 2020 10:34:28 +0800 Subject: [PATCH 10/10] nit --- docs/sql-migration-guide.md | 2 +- .../sql/connector/DataSourceV2SQLSuite.scala | 38 +++++++++---------- 2 files changed, 18 insertions(+), 22 deletions(-) diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index 7a7e260177d4a..ccc7a13102cd5 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -264,7 +264,7 @@ license: | - Since Spark 3.0, the function `percentile_approx` and its alias `approx_percentile` only accept integral value with range in `[1, 2147483647]` as its 3rd argument `accuracy`, fractional and string types are disallowed, e.g. `percentile_approx(10.0, 0.2, 1.8D)` will cause `AnalysisException`. In Spark version 2.4 and earlier, if `accuracy` is fractional or string value, it will be coerced to an int value, `percentile_approx(10.0, 0.2, 1.8D)` is operated as `percentile_approx(10.0, 0.2, 1)` which results in `10.0`. - - Since Spark 3.0, the words `location` and `comment` become reserved database properties, it will fail with `ParseException` if we use them as members of `DBPROTERTIES` in `CREATE DATABASE` and `ALTER DATABASE ... SET PROPERTIES(...)`. We need their specific clauses to specify them, e.g. `CREATE DATABASE test COMMENT 'any comment' LOCATION 'some path'`. We can set `spark.sql.legacy.property.nonReserved` to `true` to ignore the `ParseException`, in this case, these properties will be silently removed, e.g `SET DBPROTERTIES('location'='/tmp')` will affect nothing. In Spark version 2.4 and earlier, these properties are neither reserved nor have side effects, e.g. `SET DBPROTERTIES('location'='/tmp')` will not change the location of the database but only create a headless property just like `'a'='b'`. + - Since Spark 3.0, `location` and `comment` become reserved database properties, Commands will fail if we specify reserved properties in `CREATE DATABASE ... WITH DBPROPERTIES` and `ALTER DATABASE ... SET DBPROPERTIES`. We need their specific clauses to specify them, e.g. `CREATE DATABASE test COMMENT 'any comment' LOCATION 'some path'`. We can set `spark.sql.legacy.property.nonReserved` to `true` to ignore the `ParseException`, in this case, these properties will be silently removed, e.g `SET DBPROTERTIES('location'='/tmp')` will affect nothing. In Spark version 2.4 and earlier, these properties are neither reserved nor have side effects, e.g. `SET DBPROTERTIES('location'='/tmp')` will not change the location of the database but only create a headless property just like `'a'='b'`. ## Upgrading from Spark SQL 2.4 to 2.4.1 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index a559aa8d41ced..4c54a176e37be 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -877,16 +877,14 @@ class DataSourceV2SQLSuite withSQLConf((SQLConf.LEGACY_PROPERTY_NON_RESERVED.key, "true")) { SupportsNamespaces.RESERVED_PROPERTIES.asScala.foreach { key => withNamespace("testcat.reservedTest") { - withTempDir { tmpDir => - val noEffectVal = tmpDir.getCanonicalPath - sql(s"CREATE NAMESPACE testcat.reservedTest WITH DBPROPERTIES('$key'='$noEffectVal')") - assert(sql("DESC NAMESPACE EXTENDED testcat.reservedTest") - .toDF("k", "v") - .where("k='Properties'") - .isEmpty, s"$key is a reserved namespace property and ignored") - assert(catalog("testcat").asNamespaceCatalog - .loadNamespaceMetadata(Array("reservedTest")).get(key) !== noEffectVal) - } + sql(s"CREATE NAMESPACE testcat.reservedTest WITH DBPROPERTIES('$key'='foo')") + assert(sql("DESC NAMESPACE EXTENDED testcat.reservedTest") + .toDF("k", "v") + .where("k='Properties'") + .isEmpty, s"$key is a reserved namespace property and ignored") + val meta = + catalog("testcat").asNamespaceCatalog.loadNamespaceMetadata(Array("reservedTest")) + assert(meta.get(key) === null, "reserved properties should not have side effects") } } } @@ -1004,17 +1002,15 @@ class DataSourceV2SQLSuite withSQLConf((SQLConf.LEGACY_PROPERTY_NON_RESERVED.key, "true")) { SupportsNamespaces.RESERVED_PROPERTIES.asScala.foreach { key => withNamespace("testcat.reservedTest") { - withTempDir { tmpDir => - val noEffectVal = tmpDir.getCanonicalPath - sql(s"CREATE NAMESPACE testcat.reservedTest") - sql(s"ALTER NAMESPACE testcat.reservedTest SET PROPERTIES ('$key'='$noEffectVal')") - assert(sql("DESC NAMESPACE EXTENDED testcat.reservedTest") - .toDF("k", "v") - .where("k='Properties'") - .isEmpty, s"$key is a reserved namespace property and ignored") - assert(catalog("testcat").asNamespaceCatalog - .loadNamespaceMetadata(Array("reservedTest")).get(key) !== noEffectVal) - } + sql(s"CREATE NAMESPACE testcat.reservedTest") + sql(s"ALTER NAMESPACE testcat.reservedTest SET PROPERTIES ('$key'='foo')") + assert(sql("DESC NAMESPACE EXTENDED testcat.reservedTest") + .toDF("k", "v") + .where("k='Properties'") + .isEmpty, s"$key is a reserved namespace property and ignored") + val meta = + catalog("testcat").asNamespaceCatalog.loadNamespaceMetadata(Array("reservedTest")) + assert(meta.get(key) === null, "reserved properties should not have side effects") } } }