From ad79c56ca038fac6797814410d665110ef43e826 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Thu, 20 Sep 2018 20:15:41 +0800 Subject: [PATCH 1/3] add a config to turn off precise precision for integral literals --- .../sql/catalyst/analysis/DecimalPrecision.scala | 10 ++++++---- .../org/apache/spark/sql/internal/SQLConf.scala | 12 ++++++++++++ .../scala/org/apache/spark/sql/SQLQuerySuite.scala | 7 +++++++ 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala index e511f8064e28a..325336a78cce5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala @@ -290,11 +290,13 @@ object DecimalPrecision extends TypeCoercionRule { // potentially loosing 11 digits of the fractional part. Using only the precision needed // by the Literal, instead, the result would be DECIMAL(38 + 1 + 1, 18), which would // become DECIMAL(38, 16), safely having a much lower precision loss. - case (l: Literal, r) if r.dataType.isInstanceOf[DecimalType] - && l.dataType.isInstanceOf[IntegralType] => + case (l: Literal, r) if r.dataType.isInstanceOf[DecimalType] && + l.dataType.isInstanceOf[IntegralType] && + SQLConf.get.literalPrecisePrecision => b.makeCopy(Array(Cast(l, DecimalType.fromLiteral(l)), r)) - case (l, r: Literal) if l.dataType.isInstanceOf[DecimalType] - && r.dataType.isInstanceOf[IntegralType] => + case (l, r: Literal) if l.dataType.isInstanceOf[DecimalType] && + r.dataType.isInstanceOf[IntegralType] && + SQLConf.get.literalPrecisePrecision => b.makeCopy(Array(l, Cast(r, DecimalType.fromLiteral(r)))) // Promote integers inside a binary expression with fixed-precision decimals to decimals, // and fixed-precision decimals in an expression with floats / doubles to doubles diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index a01e87c8d1dd3..7bbce9eb02fd7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1345,6 +1345,16 @@ object SQLConf { .booleanConf .createWithDefault(true) + val LITERAL_PRECISE_PRECISION = + buildConf("spark.sql.literal.precisePrecision") + .internal() + .doc("When integral literals are used with decimals in binary operators, Spark will " + + "pick a precise precision for the literals to calculate the precision and scale " + + "of the result decimal, when this config is true. By picking a precise precision, we " + + "can avoid wasting precision, to reduce the possibility of overflow.") + .booleanConf + .createWithDefault(true) + val SQL_OPTIONS_REDACTION_PATTERN = buildConf("spark.sql.redaction.options.regex") .doc("Regex to decide which keys in a Spark SQL command's options map contain sensitive " + @@ -1939,6 +1949,8 @@ class SQLConf extends Serializable with Logging { def decimalOperationsAllowPrecisionLoss: Boolean = getConf(DECIMAL_OPERATIONS_ALLOW_PREC_LOSS) + def literalPrecisePrecision: Boolean = getConf(LITERAL_PRECISE_PRECISION) + def continuousStreamingExecutorQueueSize: Int = getConf(CONTINUOUS_STREAMING_EXECUTOR_QUEUE_SIZE) def continuousStreamingExecutorPollIntervalMs: Long = diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 01dc28d70184e..2d78343c9b7ac 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -2858,6 +2858,13 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { val result = ds.flatMap(_.bar).distinct result.rdd.isEmpty } + + test("SPARK-25454: decimal division with negative scale") { + // TODO: completely fix this issue even LITERAL_PRECISE_PRECISION is true. + withSQLConf(SQLConf.LITERAL_PRECISE_PRECISION.key -> "false") { + checkAnswer(sql("select 26393499451 / (1e6 * 1000)"), Row(BigDecimal("26.3934994510000"))) + } + } } case class Foo(bar: Option[String]) From b4fdd1307059c7df7c386a96aad6bc17b593d9c5 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Fri, 21 Sep 2018 21:46:20 +0800 Subject: [PATCH 2/3] address comments --- .../sql/catalyst/analysis/DecimalPrecision.scala | 4 ++-- .../org/apache/spark/sql/internal/SQLConf.scala | 13 ++++++------- .../scala/org/apache/spark/sql/SQLQuerySuite.scala | 4 ++-- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala index 325336a78cce5..82692334544e2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala @@ -292,11 +292,11 @@ object DecimalPrecision extends TypeCoercionRule { // become DECIMAL(38, 16), safely having a much lower precision loss. case (l: Literal, r) if r.dataType.isInstanceOf[DecimalType] && l.dataType.isInstanceOf[IntegralType] && - SQLConf.get.literalPrecisePrecision => + SQLConf.get.literalPickMinimumPrecision => b.makeCopy(Array(Cast(l, DecimalType.fromLiteral(l)), r)) case (l, r: Literal) if l.dataType.isInstanceOf[DecimalType] && r.dataType.isInstanceOf[IntegralType] && - SQLConf.get.literalPrecisePrecision => + SQLConf.get.literalPickMinimumPrecision => b.makeCopy(Array(l, Cast(r, DecimalType.fromLiteral(r)))) // Promote integers inside a binary expression with fixed-precision decimals to decimals, // and fixed-precision decimals in an expression with floats / doubles to doubles diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 7bbce9eb02fd7..ab0560fc382bb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1345,13 +1345,12 @@ object SQLConf { .booleanConf .createWithDefault(true) - val LITERAL_PRECISE_PRECISION = - buildConf("spark.sql.literal.precisePrecision") + val LITERAL_PICK_MINIMUM_PRECISION = + buildConf("spark.sql.literal.pickMinimumPrecision") .internal() - .doc("When integral literals are used with decimals in binary operators, Spark will " + - "pick a precise precision for the literals to calculate the precision and scale " + - "of the result decimal, when this config is true. By picking a precise precision, we " + - "can avoid wasting precision, to reduce the possibility of overflow.") + .doc("When integral literal is used in decimal operations, pick a minimum precision " + + "required by the literal if this config is true, to make the resulting precision and/or " + + "scale smaller. This can reduce the possibility of precision lose and/or overflow.") .booleanConf .createWithDefault(true) @@ -1949,7 +1948,7 @@ class SQLConf extends Serializable with Logging { def decimalOperationsAllowPrecisionLoss: Boolean = getConf(DECIMAL_OPERATIONS_ALLOW_PREC_LOSS) - def literalPrecisePrecision: Boolean = getConf(LITERAL_PRECISE_PRECISION) + def literalPickMinimumPrecision: Boolean = getConf(LITERAL_PICK_MINIMUM_PRECISION) def continuousStreamingExecutorQueueSize: Int = getConf(CONTINUOUS_STREAMING_EXECUTOR_QUEUE_SIZE) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 2d78343c9b7ac..6bee5bcee2a30 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -2860,8 +2860,8 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { } test("SPARK-25454: decimal division with negative scale") { - // TODO: completely fix this issue even LITERAL_PRECISE_PRECISION is true. - withSQLConf(SQLConf.LITERAL_PRECISE_PRECISION.key -> "false") { + // TODO: completely fix this issue even when LITERAL_PRECISE_PRECISION is true. + withSQLConf(SQLConf.LITERAL_PICK_MINIMUM_PRECISION.key -> "false") { checkAnswer(sql("select 26393499451 / (1e6 * 1000)"), Row(BigDecimal("26.3934994510000"))) } } From faa0bd065114ea7f0f9836e36ac40cd311e1983f Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Mon, 24 Sep 2018 21:41:27 +0800 Subject: [PATCH 3/3] add legacy --- .../src/main/scala/org/apache/spark/sql/internal/SQLConf.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index ab0560fc382bb..02e9938cd3080 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1346,7 +1346,7 @@ object SQLConf { .createWithDefault(true) val LITERAL_PICK_MINIMUM_PRECISION = - buildConf("spark.sql.literal.pickMinimumPrecision") + buildConf("spark.sql.legacy.literal.pickMinimumPrecision") .internal() .doc("When integral literal is used in decimal operations, pick a minimum precision " + "required by the literal if this config is true, to make the resulting precision and/or " +