From 0b7de13d2b6477df8682189fb66dc8f1fa3955be Mon Sep 17 00:00:00 2001 From: beliefer Date: Sat, 17 Apr 2021 22:15:11 +0800 Subject: [PATCH 1/3] The generated data fits the precision of DayTimeIntervalType in spark --- .../org/apache/spark/sql/RandomDataGenerator.scala | 9 +++++++-- .../spark/sql/RandomDataGeneratorSuite.scala | 14 ++++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala index fad2ccff6021b..021f9b95e3d32 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala @@ -25,8 +25,8 @@ import scala.collection.mutable import scala.util.{Random, Try} import org.apache.spark.sql.catalyst.CatalystTypeConverters +import org.apache.spark.sql.catalyst.util.{DateTimeUtils, IntervalUtils} import org.apache.spark.sql.catalyst.util.DateTimeConstants.{MICROS_PER_MILLIS, MILLIS_PER_DAY} -import org.apache.spark.sql.catalyst.util.DateTimeUtils import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.CalendarInterval @@ -275,8 +275,13 @@ object RandomDataGenerator { case DayTimeIntervalType => Some(() => { val maxSeconds = Duration.ofDays(106751991).getSeconds val seconds = rand.nextLong() % maxSeconds + // The precision of java.time.Duration is nanosecond, but when it is used as + // DayTimeIntervalType in Spark, it is microsecond. Here by following the behavior + // of DurationConverter to achieve consistency val nanoAdjustment = rand.nextLong() % 999999000 - Duration.ofSeconds(seconds, nanoAdjustment) + val duration = Duration.ofSeconds(seconds, nanoAdjustment) + val micros = IntervalUtils.durationToMicros(duration) + IntervalUtils.microsToDuration(micros) }) case YearMonthIntervalType => Some(() => { val years = rand.nextInt() % 178956970 diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGeneratorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGeneratorSuite.scala index cb335e5f435a3..9a045c9aafe3a 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGeneratorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGeneratorSuite.scala @@ -18,6 +18,7 @@ package org.apache.spark.sql import java.nio.ByteBuffer +import java.time.Duration import java.util.Arrays import scala.util.Random @@ -143,4 +144,17 @@ class RandomDataGeneratorSuite extends SparkFunSuite with SQLHelper { assert(!Arrays.equals(array1, arrayExpected)) assert(Arrays.equals(array2, arrayExpected)) } + + test("SPARK-35116: The generated data fits the precision of DayTimeIntervalType in spark") { + for (seed <- 1 to 1000) { + val generator = RandomDataGenerator.forType( + DayTimeIntervalType, nullable = false, rand = new Random(seed)).get + val toCatalyst = CatalystTypeConverters.createToCatalystConverter(DayTimeIntervalType) + val toScala = CatalystTypeConverters.createToScalaConverter(DayTimeIntervalType) + val duration = generator.apply().asInstanceOf[Duration] + val micros = toCatalyst(duration) + val convertedBack = toScala(micros) + assert(duration == convertedBack) + } + } } From 8201a77091868c0ec7f5c25ee75ec171e1ce6cec Mon Sep 17 00:00:00 2001 From: beliefer Date: Sun, 18 Apr 2021 09:30:37 +0800 Subject: [PATCH 2/3] Update code --- .../spark/sql/RandomDataGenerator.scala | 21 ++++--------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala index 021f9b95e3d32..a07712dd336db 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala @@ -20,13 +20,14 @@ package org.apache.spark.sql import java.math.MathContext import java.sql.{Date, Timestamp} import java.time.{Duration, Instant, LocalDate, LocalDateTime, Period, ZoneId} +import java.time.temporal.ChronoUnit import scala.collection.mutable import scala.util.{Random, Try} import org.apache.spark.sql.catalyst.CatalystTypeConverters -import org.apache.spark.sql.catalyst.util.{DateTimeUtils, IntervalUtils} import org.apache.spark.sql.catalyst.util.DateTimeConstants.{MICROS_PER_MILLIS, MILLIS_PER_DAY} +import org.apache.spark.sql.catalyst.util.DateTimeUtils import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.CalendarInterval @@ -272,22 +273,8 @@ object RandomDataGenerator { val ns = rand.nextLong() new CalendarInterval(months, days, ns) }) - case DayTimeIntervalType => Some(() => { - val maxSeconds = Duration.ofDays(106751991).getSeconds - val seconds = rand.nextLong() % maxSeconds - // The precision of java.time.Duration is nanosecond, but when it is used as - // DayTimeIntervalType in Spark, it is microsecond. Here by following the behavior - // of DurationConverter to achieve consistency - val nanoAdjustment = rand.nextLong() % 999999000 - val duration = Duration.ofSeconds(seconds, nanoAdjustment) - val micros = IntervalUtils.durationToMicros(duration) - IntervalUtils.microsToDuration(micros) - }) - case YearMonthIntervalType => Some(() => { - val years = rand.nextInt() % 178956970 - val months = rand.nextInt() % 12 - Period.of(years, months, 0) - }) + case DayTimeIntervalType => Some(() => Duration.of(rand.nextLong(), ChronoUnit.MICROS)) + case YearMonthIntervalType => Some(() => Period.ofMonths(rand.nextInt())) case DecimalType.Fixed(precision, scale) => Some( () => BigDecimal.apply( rand.nextLong() % math.pow(10, precision).toLong, From e28601f4ac03af101619f471026f16d25eace4da Mon Sep 17 00:00:00 2001 From: beliefer Date: Sun, 18 Apr 2021 09:46:41 +0800 Subject: [PATCH 3/3] Add test cases --- .../spark/sql/RandomDataGenerator.scala | 2 +- .../spark/sql/RandomDataGeneratorSuite.scala | 20 +++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala index a07712dd336db..6d2ea464ec7bb 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala @@ -274,7 +274,7 @@ object RandomDataGenerator { new CalendarInterval(months, days, ns) }) case DayTimeIntervalType => Some(() => Duration.of(rand.nextLong(), ChronoUnit.MICROS)) - case YearMonthIntervalType => Some(() => Period.ofMonths(rand.nextInt())) + case YearMonthIntervalType => Some(() => Period.ofMonths(rand.nextInt()).normalized()) case DecimalType.Fixed(precision, scale) => Some( () => BigDecimal.apply( rand.nextLong() % math.pow(10, precision).toLong, diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGeneratorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGeneratorSuite.scala index 9a045c9aafe3a..69dca2cb7384b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGeneratorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGeneratorSuite.scala @@ -18,7 +18,6 @@ package org.apache.spark.sql import java.nio.ByteBuffer -import java.time.Duration import java.util.Arrays import scala.util.Random @@ -146,15 +145,16 @@ class RandomDataGeneratorSuite extends SparkFunSuite with SQLHelper { } test("SPARK-35116: The generated data fits the precision of DayTimeIntervalType in spark") { - for (seed <- 1 to 1000) { - val generator = RandomDataGenerator.forType( - DayTimeIntervalType, nullable = false, rand = new Random(seed)).get - val toCatalyst = CatalystTypeConverters.createToCatalystConverter(DayTimeIntervalType) - val toScala = CatalystTypeConverters.createToScalaConverter(DayTimeIntervalType) - val duration = generator.apply().asInstanceOf[Duration] - val micros = toCatalyst(duration) - val convertedBack = toScala(micros) - assert(duration == convertedBack) + Seq(DayTimeIntervalType, YearMonthIntervalType).foreach { dt => + for (seed <- 1 to 1000) { + val generator = RandomDataGenerator.forType(dt, false, new Random(seed)).get + val toCatalyst = CatalystTypeConverters.createToCatalystConverter(dt) + val toScala = CatalystTypeConverters.createToScalaConverter(dt) + val data = generator.apply() + val catalyst = toCatalyst(data) + val convertedBack = toScala(catalyst) + assert(data == convertedBack) + } } } }