From ba506a232bf43b9fed570f08ef4c64749030d4ac Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Mon, 17 Feb 2020 23:49:25 +0300 Subject: [PATCH 01/38] Move TimeZoneGMT/UTC to DateTimeTestUtils --- .../org/apache/spark/sql/catalyst/util/DateTimeUtils.scala | 3 --- .../sql/catalyst/expressions/CsvExpressionsSuite.scala | 7 ++++--- .../sql/catalyst/expressions/DateExpressionsSuite.scala | 1 - .../sql/catalyst/expressions/JsonExpressionsSuite.scala | 7 ++++--- .../spark/sql/catalyst/json/JacksonGeneratorSuite.scala | 5 +++-- .../apache/spark/sql/catalyst/util/DateTimeTestUtils.scala | 3 +++ .../org/apache/spark/sql/StatisticsCollectionSuite.scala | 7 ++++--- 7 files changed, 18 insertions(+), 15 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 593bd18f3de9c..917967fd22aeb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -47,9 +47,6 @@ object DateTimeUtils { // it's 2440587.5, rounding up to compatible with Hive final val JULIAN_DAY_OF_EPOCH = 2440588 - final val TimeZoneGMT = TimeZone.getTimeZone("GMT") - final val TimeZoneUTC = TimeZone.getTimeZone("UTC") - val TIMEZONE_OPTION = "timeZone" def defaultTimeZone(): TimeZone = TimeZone.getDefault() diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CsvExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CsvExpressionsSuite.scala index e623910e2efe1..3e2aa2184e1d7 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CsvExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CsvExpressionsSuite.scala @@ -27,13 +27,14 @@ import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.plans.PlanTestBase import org.apache.spark.sql.catalyst.util._ +import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.TimeZoneGMT import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} class CsvExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with PlanTestBase { val badCsv = "\u0000\u0000\u0000A\u0001AAA" - val gmtId = Option(DateTimeUtils.TimeZoneGMT.getID) + val gmtId = Option(TimeZoneGMT.getID) test("from_csv") { val csvData = "1" @@ -74,7 +75,7 @@ class CsvExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with P val schema = StructType(StructField("t", TimestampType) :: Nil) val csvData1 = "2016-01-01T00:00:00.123Z" - var c = Calendar.getInstance(DateTimeUtils.TimeZoneGMT) + var c = Calendar.getInstance(TimeZoneGMT) c.set(2016, 0, 1, 0, 0, 0) c.set(Calendar.MILLISECOND, 123) checkEvaluation( @@ -185,7 +186,7 @@ class CsvExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with P test("to_csv with timestamp") { val schema = StructType(StructField("t", TimestampType) :: Nil) - val c = Calendar.getInstance(DateTimeUtils.TimeZoneGMT) + val c = Calendar.getInstance(TimeZoneGMT) c.set(2016, 0, 1, 0, 0, 0) c.set(Calendar.MILLISECOND, 0) val struct = Literal.create(create_row(c.getTimeInMillis * 1000L), schema) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala index a514c902f1d1d..f1a36a5d62323 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala @@ -30,7 +30,6 @@ import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjectio import org.apache.spark.sql.catalyst.util.{DateTimeUtils, IntervalUtils, TimestampFormatter} import org.apache.spark.sql.catalyst.util.DateTimeConstants.NANOS_PER_SECOND import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._ -import org.apache.spark.sql.catalyst.util.DateTimeUtils.TimeZoneGMT import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala index 3693531f47610..e0ca0a8619df3 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala @@ -27,6 +27,7 @@ import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.plans.PlanTestBase import org.apache.spark.sql.catalyst.util._ +import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.TimeZoneGMT import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} @@ -395,7 +396,7 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with InternalRow(UTF8String.fromString("1"), null, UTF8String.fromString("1"))) } - val gmtId = Option(DateTimeUtils.TimeZoneGMT.getID) + val gmtId = Option(TimeZoneGMT.getID) test("from_json") { val jsonData = """{"a": 1}""" @@ -502,7 +503,7 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with val schema = StructType(StructField("t", TimestampType) :: Nil) val jsonData1 = """{"t": "2016-01-01T00:00:00.123Z"}""" - var c = Calendar.getInstance(DateTimeUtils.TimeZoneGMT) + var c = Calendar.getInstance(TimeZoneGMT) c.set(2016, 0, 1, 0, 0, 0) c.set(Calendar.MILLISECOND, 123) checkEvaluation( @@ -597,7 +598,7 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with test("to_json with timestamp") { val schema = StructType(StructField("t", TimestampType) :: Nil) - val c = Calendar.getInstance(DateTimeUtils.TimeZoneGMT) + val c = Calendar.getInstance(TimeZoneGMT) c.set(2016, 0, 1, 0, 0, 0) c.set(Calendar.MILLISECOND, 0) val struct = Literal.create(create_row(c.getTimeInMillis * 1000L), schema) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JacksonGeneratorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JacksonGeneratorSuite.scala index 2bb948ec24fb3..85479099fc14e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JacksonGeneratorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JacksonGeneratorSuite.scala @@ -21,12 +21,13 @@ import java.io.CharArrayWriter import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, DateTimeUtils, GenericArrayData} +import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData} +import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.TimeZoneGMT import org.apache.spark.sql.types._ class JacksonGeneratorSuite extends SparkFunSuite { - val gmtId = DateTimeUtils.TimeZoneGMT.getID + val gmtId = TimeZoneGMT.getID val option = new JSONOptions(Map.empty, gmtId) test("initial with StructType and write out a row") { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala index 8d16007a18f7f..18699452c45cc 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala @@ -26,6 +26,9 @@ import java.util.concurrent.TimeUnit */ object DateTimeTestUtils { + final val TimeZoneGMT = TimeZone.getTimeZone("GMT") + final val TimeZoneUTC = TimeZone.getTimeZone("UTC") + val ALL_TIMEZONES: Seq[TimeZone] = TimeZone.getAvailableIDs.toSeq.map(TimeZone.getTimeZone) val outstandingTimezonesIds: Seq[String] = Seq( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala index e9ceab6724659..b73a5746e7783 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala @@ -28,7 +28,8 @@ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.NoSuchTableException import org.apache.spark.sql.catalyst.catalog.CatalogColumnStat import org.apache.spark.sql.catalyst.plans.logical._ -import org.apache.spark.sql.catalyst.util.{DateTimeTestUtils, DateTimeUtils} +import org.apache.spark.sql.catalyst.util.DateTimeTestUtils +import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.TimeZoneUTC import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.test.SQLTestData.ArrayData @@ -482,11 +483,11 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared } DateTimeTestUtils.outstandingTimezones.foreach { timeZone => - checkTimestampStats(DateType, DateTimeUtils.TimeZoneUTC, timeZone) { stats => + checkTimestampStats(DateType, TimeZoneUTC, timeZone) { stats => assert(stats.min.get.asInstanceOf[Int] == TimeUnit.SECONDS.toDays(start)) assert(stats.max.get.asInstanceOf[Int] == TimeUnit.SECONDS.toDays(end - 1)) } - checkTimestampStats(TimestampType, DateTimeUtils.TimeZoneUTC, timeZone) { stats => + checkTimestampStats(TimestampType, TimeZoneUTC, timeZone) { stats => assert(stats.min.get.asInstanceOf[Long] == TimeUnit.SECONDS.toMicros(start)) assert(stats.max.get.asInstanceOf[Long] == TimeUnit.SECONDS.toMicros(end - 1)) } From 6933070c834f221846ccec4c27dc9a2e6fa4a8f5 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Tue, 18 Feb 2020 00:00:42 +0300 Subject: [PATCH 02/38] Remove TimeZoneGMT --- .../sql/catalyst/expressions/CastSuite.scala | 7 ++--- .../expressions/CsvExpressionsSuite.scala | 8 ++--- .../expressions/DateExpressionsSuite.scala | 30 +++++++++---------- .../expressions/JsonExpressionsSuite.scala | 8 ++--- .../catalyst/json/JacksonGeneratorSuite.scala | 4 +-- .../sql/catalyst/util/DateTimeTestUtils.scala | 1 - 6 files changed, 28 insertions(+), 30 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index ad66873c02518..cd089dc08cd69 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -32,7 +32,6 @@ import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext import org.apache.spark.sql.catalyst.util.DateTimeConstants._ import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._ import org.apache.spark.sql.catalyst.util.DateTimeUtils -import org.apache.spark.sql.catalyst.util.DateTimeUtils._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String @@ -263,17 +262,17 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { val zts = sd + " 00:00:00" val sts = sd + " 00:00:02" val nts = sts + ".1" - val ts = withDefaultTimeZone(TimeZoneGMT)(Timestamp.valueOf(nts)) + val ts = withDefaultTimeZone(TimeZoneUTC)(Timestamp.valueOf(nts)) for (tz <- ALL_TIMEZONES) { val timeZoneId = Option(tz.getID) - var c = Calendar.getInstance(TimeZoneGMT) + var c = Calendar.getInstance(TimeZoneUTC) c.set(2015, 2, 8, 2, 30, 0) checkEvaluation( cast(cast(new Timestamp(c.getTimeInMillis), StringType, timeZoneId), TimestampType, timeZoneId), MILLISECONDS.toMicros(c.getTimeInMillis)) - c = Calendar.getInstance(TimeZoneGMT) + c = Calendar.getInstance(TimeZoneUTC) c.set(2015, 10, 1, 2, 30, 0) checkEvaluation( cast(cast(new Timestamp(c.getTimeInMillis), StringType, timeZoneId), diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CsvExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CsvExpressionsSuite.scala index 3e2aa2184e1d7..5ef88eae81a6b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CsvExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CsvExpressionsSuite.scala @@ -27,14 +27,14 @@ import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.plans.PlanTestBase import org.apache.spark.sql.catalyst.util._ -import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.TimeZoneGMT +import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.TimeZoneUTC import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} class CsvExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with PlanTestBase { val badCsv = "\u0000\u0000\u0000A\u0001AAA" - val gmtId = Option(TimeZoneGMT.getID) + val gmtId = Option(TimeZoneUTC.getID) test("from_csv") { val csvData = "1" @@ -75,7 +75,7 @@ class CsvExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with P val schema = StructType(StructField("t", TimestampType) :: Nil) val csvData1 = "2016-01-01T00:00:00.123Z" - var c = Calendar.getInstance(TimeZoneGMT) + var c = Calendar.getInstance(TimeZoneUTC) c.set(2016, 0, 1, 0, 0, 0) c.set(Calendar.MILLISECOND, 123) checkEvaluation( @@ -186,7 +186,7 @@ class CsvExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with P test("to_csv with timestamp") { val schema = StructType(StructField("t", TimestampType) :: Nil) - val c = Calendar.getInstance(TimeZoneGMT) + val c = Calendar.getInstance(TimeZoneUTC) c.set(2016, 0, 1, 0, 0, 0) c.set(Calendar.MILLISECOND, 0) val struct = Literal.create(create_row(c.getTimeInMillis * 1000L), schema) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala index f1a36a5d62323..9dea680561285 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala @@ -41,7 +41,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { val TimeZonePST = TimeZone.getTimeZone("PST") val TimeZoneJST = TimeZone.getTimeZone("JST") - val gmtId = Option(TimeZoneGMT.getID) + val gmtId = Option(TimeZoneUTC.getID) val pstId = Option(TimeZonePST.getID) val jstId = Option(TimeZoneJST.getID) @@ -190,7 +190,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(Second(Literal(ts), gmtId), 15) val c = Calendar.getInstance() - for (tz <- Seq(TimeZoneGMT, TimeZonePST, TimeZoneJST)) { + for (tz <- Seq(TimeZoneUTC, TimeZonePST, TimeZoneJST)) { val timeZoneId = Option(tz.getID) c.setTimeZone(tz) (0 to 60 by 5).foreach { s => @@ -290,7 +290,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(Hour(Literal(ts), gmtId), 13) val c = Calendar.getInstance() - for (tz <- Seq(TimeZoneGMT, TimeZonePST, TimeZoneJST)) { + for (tz <- Seq(TimeZoneUTC, TimeZonePST, TimeZoneJST)) { val timeZoneId = Option(tz.getID) c.setTimeZone(tz) (0 to 24 by 6).foreach { h => @@ -317,7 +317,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(Minute(Literal(ts), gmtId), 10) val c = Calendar.getInstance() - for (tz <- Seq(TimeZoneGMT, TimeZonePST, TimeZoneJST)) { + for (tz <- Seq(TimeZoneUTC, TimeZonePST, TimeZoneJST)) { val timeZoneId = Option(tz.getID) c.setTimeZone(tz) (0 to 60 by 5).foreach { m => @@ -389,7 +389,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { test("time_add") { val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS", Locale.US) - for (tz <- Seq(TimeZoneGMT, TimeZonePST, TimeZoneJST)) { + for (tz <- Seq(TimeZoneUTC, TimeZonePST, TimeZoneJST)) { val timeZoneId = Option(tz.getID) sdf.setTimeZone(tz) @@ -427,7 +427,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { test("time_sub") { val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS", Locale.US) - for (tz <- Seq(TimeZoneGMT, TimeZonePST, TimeZoneJST)) { + for (tz <- Seq(TimeZoneUTC, TimeZonePST, TimeZoneJST)) { val timeZoneId = Option(tz.getID) sdf.setTimeZone(tz) @@ -513,7 +513,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { test("months_between") { val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.US) - for (tz <- Seq(TimeZoneGMT, TimeZonePST, TimeZoneJST)) { + for (tz <- Seq(TimeZoneUTC, TimeZonePST, TimeZoneJST)) { val timeZoneId = Option(tz.getID) sdf.setTimeZone(tz) @@ -646,7 +646,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { expected) } - withDefaultTimeZone(TimeZoneGMT) { + withDefaultTimeZone(TimeZoneUTC) { val inputDate = Timestamp.valueOf("2015-07-22 05:30:06") Seq("yyyy", "YYYY", "year", "YEAR", "yy", "YY").foreach { fmt => @@ -716,7 +716,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { val sdf1 = new SimpleDateFormat(fmt1, Locale.US) val fmt2 = "yyyy-MM-dd HH:mm:ss.SSS" val sdf2 = new SimpleDateFormat(fmt2, Locale.US) - for (tz <- Seq(TimeZoneGMT, TimeZonePST, TimeZoneJST)) { + for (tz <- Seq(TimeZoneUTC, TimeZonePST, TimeZoneJST)) { val timeZoneId = Option(tz.getID) sdf1.setTimeZone(tz) sdf2.setTimeZone(tz) @@ -765,10 +765,10 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { val sdf2 = new SimpleDateFormat(fmt2, Locale.US) val fmt3 = "yy-MM-dd" val sdf3 = new SimpleDateFormat(fmt3, Locale.US) - sdf3.setTimeZone(TimeZoneGMT) + sdf3.setTimeZone(TimeZoneUTC) - withDefaultTimeZone(TimeZoneGMT) { - for (tz <- Seq(TimeZoneGMT, TimeZonePST, TimeZoneJST)) { + withDefaultTimeZone(TimeZoneUTC) { + for (tz <- Seq(TimeZoneUTC, TimeZonePST, TimeZoneJST)) { val timeZoneId = Option(tz.getID) sdf1.setTimeZone(tz) sdf2.setTimeZone(tz) @@ -832,10 +832,10 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { val sdf2 = new SimpleDateFormat(fmt2, Locale.US) val fmt3 = "yy-MM-dd" val sdf3 = new SimpleDateFormat(fmt3, Locale.US) - sdf3.setTimeZone(TimeZoneGMT) + sdf3.setTimeZone(TimeZoneUTC) - withDefaultTimeZone(TimeZoneGMT) { - for (tz <- Seq(TimeZoneGMT, TimeZonePST, TimeZoneJST)) { + withDefaultTimeZone(TimeZoneUTC) { + for (tz <- Seq(TimeZoneUTC, TimeZonePST, TimeZoneJST)) { val timeZoneId = Option(tz.getID) sdf1.setTimeZone(tz) sdf2.setTimeZone(tz) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala index e0ca0a8619df3..01efaa6714a3e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala @@ -27,7 +27,7 @@ import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.plans.PlanTestBase import org.apache.spark.sql.catalyst.util._ -import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.TimeZoneGMT +import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.TimeZoneUTC import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} @@ -396,7 +396,7 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with InternalRow(UTF8String.fromString("1"), null, UTF8String.fromString("1"))) } - val gmtId = Option(TimeZoneGMT.getID) + val gmtId = Option(TimeZoneUTC.getID) test("from_json") { val jsonData = """{"a": 1}""" @@ -503,7 +503,7 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with val schema = StructType(StructField("t", TimestampType) :: Nil) val jsonData1 = """{"t": "2016-01-01T00:00:00.123Z"}""" - var c = Calendar.getInstance(TimeZoneGMT) + var c = Calendar.getInstance(TimeZoneUTC) c.set(2016, 0, 1, 0, 0, 0) c.set(Calendar.MILLISECOND, 123) checkEvaluation( @@ -598,7 +598,7 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with test("to_json with timestamp") { val schema = StructType(StructField("t", TimestampType) :: Nil) - val c = Calendar.getInstance(TimeZoneGMT) + val c = Calendar.getInstance(TimeZoneUTC) c.set(2016, 0, 1, 0, 0, 0) c.set(Calendar.MILLISECOND, 0) val struct = Literal.create(create_row(c.getTimeInMillis * 1000L), schema) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JacksonGeneratorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JacksonGeneratorSuite.scala index 85479099fc14e..23a0caacc34e2 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JacksonGeneratorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JacksonGeneratorSuite.scala @@ -22,12 +22,12 @@ import java.io.CharArrayWriter import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData} -import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.TimeZoneGMT +import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.TimeZoneUTC import org.apache.spark.sql.types._ class JacksonGeneratorSuite extends SparkFunSuite { - val gmtId = TimeZoneGMT.getID + val gmtId = TimeZoneUTC.getID val option = new JSONOptions(Map.empty, gmtId) test("initial with StructType and write out a row") { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala index 18699452c45cc..4801d162e1fa0 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala @@ -26,7 +26,6 @@ import java.util.concurrent.TimeUnit */ object DateTimeTestUtils { - final val TimeZoneGMT = TimeZone.getTimeZone("GMT") final val TimeZoneUTC = TimeZone.getTimeZone("UTC") val ALL_TIMEZONES: Seq[TimeZone] = TimeZone.getAvailableIDs.toSeq.map(TimeZone.getTimeZone) From e195d2504615cb81ede6669b167b4b6da442250f Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Tue, 18 Feb 2020 00:04:02 +0300 Subject: [PATCH 03/38] gmtId -> utcId --- .../sql/catalyst/expressions/CastSuite.scala | 18 ++-- .../expressions/CsvExpressionsSuite.scala | 40 ++++----- .../expressions/DateExpressionsSuite.scala | 90 +++++++++---------- .../expressions/JsonExpressionsSuite.scala | 64 ++++++------- .../catalyst/json/JacksonGeneratorSuite.scala | 8 +- 5 files changed, 110 insertions(+), 110 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index cd089dc08cd69..4eb5f0c3d78f5 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -280,23 +280,23 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { MILLISECONDS.toMicros(c.getTimeInMillis)) } - val gmtId = Option("GMT") + val utcId = Option("UTC") checkEvaluation(cast("abdef", StringType), "abdef") - checkEvaluation(cast("abdef", TimestampType, gmtId), null) + checkEvaluation(cast("abdef", TimestampType, utcId), null) checkEvaluation(cast("12.65", DecimalType.SYSTEM_DEFAULT), Decimal(12.65)) checkEvaluation(cast(cast(sd, DateType), StringType), sd) checkEvaluation(cast(cast(d, StringType), DateType), 0) - checkEvaluation(cast(cast(nts, TimestampType, gmtId), StringType, gmtId), nts) + checkEvaluation(cast(cast(nts, TimestampType, utcId), StringType, utcId), nts) checkEvaluation( - cast(cast(ts, StringType, gmtId), TimestampType, gmtId), + cast(cast(ts, StringType, utcId), TimestampType, utcId), DateTimeUtils.fromJavaTimestamp(ts)) // all convert to string type to check - checkEvaluation(cast(cast(cast(nts, TimestampType, gmtId), DateType, gmtId), StringType), sd) + checkEvaluation(cast(cast(cast(nts, TimestampType, utcId), DateType, utcId), StringType), sd) checkEvaluation( - cast(cast(cast(ts, DateType, gmtId), TimestampType, gmtId), StringType, gmtId), + cast(cast(cast(ts, DateType, utcId), TimestampType, utcId), StringType, utcId), zts) checkEvaluation(cast(cast("abdef", BinaryType), StringType), "abdef") @@ -309,7 +309,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { DecimalType.SYSTEM_DEFAULT), LongType), StringType), ShortType), 5.toShort) checkEvaluation( - cast(cast(cast(cast(cast(cast("5", TimestampType, gmtId), ByteType), + cast(cast(cast(cast(cast(cast("5", TimestampType, utcId), ByteType), DecimalType.SYSTEM_DEFAULT), LongType), StringType), ShortType), null) checkEvaluation(cast(cast(cast(cast(cast(cast("5", DecimalType.SYSTEM_DEFAULT), @@ -360,8 +360,8 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(cast(d, DecimalType(10, 2)), null) checkEvaluation(cast(d, StringType), "1970-01-01") - val gmtId = Option("GMT") - checkEvaluation(cast(cast(d, TimestampType, gmtId), StringType, gmtId), "1970-01-01 00:00:00") + val utcId = Option("UTC") + checkEvaluation(cast(cast(d, TimestampType, utcId), StringType, utcId), "1970-01-01 00:00:00") } test("cast from timestamp") { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CsvExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CsvExpressionsSuite.scala index 5ef88eae81a6b..6e26fec4fa08c 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CsvExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CsvExpressionsSuite.scala @@ -34,13 +34,13 @@ import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} class CsvExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with PlanTestBase { val badCsv = "\u0000\u0000\u0000A\u0001AAA" - val gmtId = Option(TimeZoneUTC.getID) + val utcId = Option(TimeZoneUTC.getID) test("from_csv") { val csvData = "1" val schema = StructType(StructField("a", IntegerType) :: Nil) checkEvaluation( - CsvToStructs(schema, Map.empty, Literal(csvData), gmtId), + CsvToStructs(schema, Map.empty, Literal(csvData), utcId), InternalRow(1) ) } @@ -49,17 +49,17 @@ class CsvExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with P val csvData = "---" val schema = StructType(StructField("a", DoubleType) :: Nil) checkEvaluation( - CsvToStructs(schema, Map("mode" -> PermissiveMode.name), Literal(csvData), gmtId), + CsvToStructs(schema, Map("mode" -> PermissiveMode.name), Literal(csvData), utcId), InternalRow(null)) // Default mode is Permissive - checkEvaluation(CsvToStructs(schema, Map.empty, Literal(csvData), gmtId), InternalRow(null)) + checkEvaluation(CsvToStructs(schema, Map.empty, Literal(csvData), utcId), InternalRow(null)) } test("from_csv null input column") { val schema = StructType(StructField("a", IntegerType) :: Nil) checkEvaluation( - CsvToStructs(schema, Map.empty, Literal.create(null, StringType), gmtId), + CsvToStructs(schema, Map.empty, Literal.create(null, StringType), utcId), null ) } @@ -67,7 +67,7 @@ class CsvExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with P test("from_csv bad UTF-8") { val schema = StructType(StructField("a", IntegerType) :: Nil) checkEvaluation( - CsvToStructs(schema, Map.empty, Literal(badCsv), gmtId), + CsvToStructs(schema, Map.empty, Literal(badCsv), utcId), InternalRow(null)) } @@ -79,7 +79,7 @@ class CsvExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with P c.set(2016, 0, 1, 0, 0, 0) c.set(Calendar.MILLISECOND, 123) checkEvaluation( - CsvToStructs(schema, Map.empty, Literal(csvData1), gmtId), + CsvToStructs(schema, Map.empty, Literal(csvData1), utcId), InternalRow(c.getTimeInMillis * 1000L) ) // The result doesn't change because the CSV string includes timezone string ("Z" here), @@ -109,7 +109,7 @@ class CsvExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with P Map("timestampFormat" -> "yyyy-MM-dd'T'HH:mm:ss", DateTimeUtils.TIMEZONE_OPTION -> tz.getID), Literal(csvData2), - gmtId), + utcId), InternalRow(c.getTimeInMillis * 1000L) ) } @@ -118,7 +118,7 @@ class CsvExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with P test("from_csv empty input column") { val schema = StructType(StructField("a", IntegerType) :: Nil) checkEvaluation( - CsvToStructs(schema, Map.empty, Literal.create(" ", StringType), gmtId), + CsvToStructs(schema, Map.empty, Literal.create(" ", StringType), utcId), InternalRow(null) ) } @@ -130,7 +130,7 @@ class CsvExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with P .add("b", StringType, nullable = false) .add("c", StringType, nullable = false) val output = InternalRow(1L, null, UTF8String.fromString("foo")) - val expr = CsvToStructs(csvSchema, Map.empty, Literal.create(input, StringType), gmtId) + val expr = CsvToStructs(csvSchema, Map.empty, Literal.create(input, StringType), utcId) checkEvaluation(expr, output) val schema = expr.dataType val schemaToCompare = csvSchema.asNullable @@ -143,7 +143,7 @@ class CsvExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with P .add("a", IntegerType) .add("b", IntegerType) checkEvaluation( - CsvToStructs(schema, Map.empty, Literal.create("1"), gmtId), + CsvToStructs(schema, Map.empty, Literal.create("1"), utcId), InternalRow(1, null) ) } @@ -153,7 +153,7 @@ class CsvExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with P val schema = StructType(StructField("a", DoubleType) :: Nil) val exception = intercept[TestFailedException] { checkEvaluation( - CsvToStructs(schema, Map("mode" -> DropMalformedMode.name), Literal(csvData), gmtId), + CsvToStructs(schema, Map("mode" -> DropMalformedMode.name), Literal(csvData), utcId), InternalRow(null)) }.getCause assert(exception.getMessage.contains("from_csv() doesn't support the DROPMALFORMED mode")) @@ -172,14 +172,14 @@ class CsvExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with P test("to_csv - struct") { val schema = StructType(StructField("a", IntegerType) :: Nil) val struct = Literal.create(create_row(1), schema) - checkEvaluation(StructsToCsv(Map.empty, struct, gmtId), "1") + checkEvaluation(StructsToCsv(Map.empty, struct, utcId), "1") } test("to_csv null input column") { val schema = StructType(StructField("a", IntegerType) :: Nil) val struct = Literal.create(null, schema) checkEvaluation( - StructsToCsv(Map.empty, struct, gmtId), + StructsToCsv(Map.empty, struct, utcId), null ) } @@ -191,16 +191,16 @@ class CsvExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with P c.set(Calendar.MILLISECOND, 0) val struct = Literal.create(create_row(c.getTimeInMillis * 1000L), schema) - checkEvaluation(StructsToCsv(Map.empty, struct, gmtId), "2016-01-01T00:00:00.000Z") + checkEvaluation(StructsToCsv(Map.empty, struct, utcId), "2016-01-01T00:00:00.000Z") checkEvaluation( StructsToCsv(Map.empty, struct, Option("PST")), "2015-12-31T16:00:00.000-08:00") checkEvaluation( StructsToCsv( Map("timestampFormat" -> "yyyy-MM-dd'T'HH:mm:ss", - DateTimeUtils.TIMEZONE_OPTION -> gmtId.get), + DateTimeUtils.TIMEZONE_OPTION -> utcId.get), struct, - gmtId), + utcId), "2016-01-01T00:00:00" ) checkEvaluation( @@ -208,7 +208,7 @@ class CsvExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with P Map("timestampFormat" -> "yyyy-MM-dd'T'HH:mm:ss", DateTimeUtils.TIMEZONE_OPTION -> "PST"), struct, - gmtId), + utcId), "2015-12-31T16:00:00" ) } @@ -224,7 +224,7 @@ class CsvExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with P val options = Map("dateFormat" -> dateFormat, "locale" -> langTag) checkEvaluation( - CsvToStructs(schema, options, Literal.create(dateStr), gmtId), + CsvToStructs(schema, options, Literal.create(dateStr), utcId), InternalRow(17836)) // number of days from 1970-01-01 } } @@ -235,7 +235,7 @@ class CsvExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with P schema = StructType.fromDDL("i int, _unparsed boolean"), options = Map("columnNameOfCorruptRecord" -> "_unparsed"), child = Literal.create("a"), - timeZoneId = gmtId), + timeZoneId = utcId), expectedErrMsg = "The field for corrupt records must be string type and nullable") } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala index 9dea680561285..ef6f544d36e32 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala @@ -41,7 +41,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { val TimeZonePST = TimeZone.getTimeZone("PST") val TimeZoneJST = TimeZone.getTimeZone("JST") - val gmtId = Option(TimeZoneUTC.getID) + val utcId = Option(TimeZoneUTC.getID) val pstId = Option(TimeZonePST.getID) val jstId = Option(TimeZoneJST.getID) @@ -56,7 +56,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { test("datetime function current_date") { val d0 = DateTimeUtils.millisToDays(System.currentTimeMillis(), ZoneOffset.UTC) - val cd = CurrentDate(gmtId).eval(EmptyRow).asInstanceOf[Int] + val cd = CurrentDate(utcId).eval(EmptyRow).asInstanceOf[Int] val d1 = DateTimeUtils.millisToDays(System.currentTimeMillis(), ZoneOffset.UTC) assert(d0 <= cd && cd <= d1 && d1 - d0 <= 1) @@ -93,8 +93,8 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { test("Year") { checkEvaluation(Year(Literal.create(null, DateType)), null) checkEvaluation(Year(Literal(d)), 2015) - checkEvaluation(Year(Cast(Literal(date), DateType, gmtId)), 2015) - checkEvaluation(Year(Cast(Literal(ts), DateType, gmtId)), 2013) + checkEvaluation(Year(Cast(Literal(date), DateType, utcId)), 2015) + checkEvaluation(Year(Cast(Literal(ts), DateType, utcId)), 2013) val c = Calendar.getInstance() (2000 to 2002).foreach { y => @@ -115,8 +115,8 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { test("Quarter") { checkEvaluation(Quarter(Literal.create(null, DateType)), null) checkEvaluation(Quarter(Literal(d)), 2) - checkEvaluation(Quarter(Cast(Literal(date), DateType, gmtId)), 2) - checkEvaluation(Quarter(Cast(Literal(ts), DateType, gmtId)), 4) + checkEvaluation(Quarter(Cast(Literal(date), DateType, utcId)), 2) + checkEvaluation(Quarter(Cast(Literal(ts), DateType, utcId)), 4) val c = Calendar.getInstance() (2003 to 2004).foreach { y => @@ -138,8 +138,8 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { test("Month") { checkEvaluation(Month(Literal.create(null, DateType)), null) checkEvaluation(Month(Literal(d)), 4) - checkEvaluation(Month(Cast(Literal(date), DateType, gmtId)), 4) - checkEvaluation(Month(Cast(Literal(ts), DateType, gmtId)), 11) + checkEvaluation(Month(Cast(Literal(date), DateType, utcId)), 4) + checkEvaluation(Month(Cast(Literal(ts), DateType, utcId)), 11) checkEvaluation(Month(Cast(Literal("1582-04-28 13:10:15"), DateType)), 4) checkEvaluation(Month(Cast(Literal("1582-10-04 13:10:15"), DateType)), 10) @@ -163,8 +163,8 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(DayOfMonth(Cast(Literal("2000-02-29"), DateType)), 29) checkEvaluation(DayOfMonth(Literal.create(null, DateType)), null) checkEvaluation(DayOfMonth(Literal(d)), 8) - checkEvaluation(DayOfMonth(Cast(Literal(date), DateType, gmtId)), 8) - checkEvaluation(DayOfMonth(Cast(Literal(ts), DateType, gmtId)), 8) + checkEvaluation(DayOfMonth(Cast(Literal(date), DateType, utcId)), 8) + checkEvaluation(DayOfMonth(Cast(Literal(ts), DateType, utcId)), 8) checkEvaluation(DayOfMonth(Cast(Literal("1582-04-28 13:10:15"), DateType)), 28) checkEvaluation(DayOfMonth(Cast(Literal("1582-10-15 13:10:15"), DateType)), 15) @@ -183,11 +183,11 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { } test("Seconds") { - assert(Second(Literal.create(null, DateType), gmtId).resolved === false) - assert(Second(Cast(Literal(d), TimestampType, gmtId), gmtId).resolved ) - checkEvaluation(Second(Cast(Literal(d), TimestampType, gmtId), gmtId), 0) - checkEvaluation(Second(Cast(Literal(date), TimestampType, gmtId), gmtId), 15) - checkEvaluation(Second(Literal(ts), gmtId), 15) + assert(Second(Literal.create(null, DateType), utcId).resolved === false) + assert(Second(Cast(Literal(d), TimestampType, utcId), utcId).resolved ) + checkEvaluation(Second(Cast(Literal(d), TimestampType, utcId), utcId), 0) + checkEvaluation(Second(Cast(Literal(date), TimestampType, utcId), utcId), 15) + checkEvaluation(Second(Literal(ts), utcId), 15) val c = Calendar.getInstance() for (tz <- Seq(TimeZoneUTC, TimeZonePST, TimeZoneJST)) { @@ -207,10 +207,10 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { test("DayOfWeek") { checkEvaluation(DayOfWeek(Literal.create(null, DateType)), null) checkEvaluation(DayOfWeek(Literal(d)), Calendar.WEDNESDAY) - checkEvaluation(DayOfWeek(Cast(Literal(date), DateType, gmtId)), + checkEvaluation(DayOfWeek(Cast(Literal(date), DateType, utcId)), Calendar.WEDNESDAY) - checkEvaluation(DayOfWeek(Cast(Literal(ts), DateType, gmtId)), Calendar.FRIDAY) - checkEvaluation(DayOfWeek(Cast(Literal("2011-05-06"), DateType, gmtId)), Calendar.FRIDAY) + checkEvaluation(DayOfWeek(Cast(Literal(ts), DateType, utcId)), Calendar.FRIDAY) + checkEvaluation(DayOfWeek(Cast(Literal("2011-05-06"), DateType, utcId)), Calendar.FRIDAY) checkEvaluation(DayOfWeek(Literal(new Date(toMillis("2017-05-27 13:10:15")))), Calendar.SATURDAY) checkEvaluation(DayOfWeek(Literal(new Date(toMillis("1582-10-15 13:10:15")))), @@ -221,9 +221,9 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { test("WeekDay") { checkEvaluation(WeekDay(Literal.create(null, DateType)), null) checkEvaluation(WeekDay(Literal(d)), 2) - checkEvaluation(WeekDay(Cast(Literal(date), DateType, gmtId)), 2) - checkEvaluation(WeekDay(Cast(Literal(ts), DateType, gmtId)), 4) - checkEvaluation(WeekDay(Cast(Literal("2011-05-06"), DateType, gmtId)), 4) + checkEvaluation(WeekDay(Cast(Literal(date), DateType, utcId)), 2) + checkEvaluation(WeekDay(Cast(Literal(ts), DateType, utcId)), 4) + checkEvaluation(WeekDay(Cast(Literal("2011-05-06"), DateType, utcId)), 4) checkEvaluation(WeekDay(Literal(new Date(toMillis("2017-05-27 13:10:15")))), 5) checkEvaluation(WeekDay(Literal(new Date(toMillis("1582-10-15 13:10:15")))), 4) checkConsistencyBetweenInterpretedAndCodegen(WeekDay, DateType) @@ -232,11 +232,11 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { test("WeekOfYear") { checkEvaluation(WeekOfYear(Literal.create(null, DateType)), null) checkEvaluation(WeekOfYear(Literal(d)), 15) - checkEvaluation(WeekOfYear(Cast(Literal(date), DateType, gmtId)), 15) - checkEvaluation(WeekOfYear(Cast(Literal(ts), DateType, gmtId)), 45) - checkEvaluation(WeekOfYear(Cast(Literal("2011-05-06"), DateType, gmtId)), 18) - checkEvaluation(WeekOfYear(Cast(Literal("1582-10-15 13:10:15"), DateType, gmtId)), 41) - checkEvaluation(WeekOfYear(Cast(Literal("1582-10-04 13:10:15"), DateType, gmtId)), 40) + checkEvaluation(WeekOfYear(Cast(Literal(date), DateType, utcId)), 15) + checkEvaluation(WeekOfYear(Cast(Literal(ts), DateType, utcId)), 45) + checkEvaluation(WeekOfYear(Cast(Literal("2011-05-06"), DateType, utcId)), 18) + checkEvaluation(WeekOfYear(Cast(Literal("1582-10-15 13:10:15"), DateType, utcId)), 41) + checkEvaluation(WeekOfYear(Cast(Literal("1582-10-04 13:10:15"), DateType, utcId)), 40) checkConsistencyBetweenInterpretedAndCodegen(WeekOfYear, DateType) } @@ -244,17 +244,17 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { Seq(false, true).foreach { legacyParser => withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> legacyParser.toString) { checkEvaluation( - DateFormatClass(Literal.create(null, TimestampType), Literal("y"), gmtId), + DateFormatClass(Literal.create(null, TimestampType), Literal("y"), utcId), null) - checkEvaluation(DateFormatClass(Cast(Literal(d), TimestampType, gmtId), - Literal.create(null, StringType), gmtId), null) + checkEvaluation(DateFormatClass(Cast(Literal(d), TimestampType, utcId), + Literal.create(null, StringType), utcId), null) - checkEvaluation(DateFormatClass(Cast(Literal(d), TimestampType, gmtId), - Literal("y"), gmtId), "2015") - checkEvaluation(DateFormatClass(Literal(ts), Literal("y"), gmtId), "2013") - checkEvaluation(DateFormatClass(Cast(Literal(d), TimestampType, gmtId), - Literal("H"), gmtId), "0") - checkEvaluation(DateFormatClass(Literal(ts), Literal("H"), gmtId), "13") + checkEvaluation(DateFormatClass(Cast(Literal(d), TimestampType, utcId), + Literal("y"), utcId), "2015") + checkEvaluation(DateFormatClass(Literal(ts), Literal("y"), utcId), "2013") + checkEvaluation(DateFormatClass(Cast(Literal(d), TimestampType, utcId), + Literal("H"), utcId), "0") + checkEvaluation(DateFormatClass(Literal(ts), Literal("H"), utcId), "13") checkEvaluation(DateFormatClass(Cast(Literal(d), TimestampType, pstId), Literal("y"), pstId), "2015") @@ -283,11 +283,11 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { } test("Hour") { - assert(Hour(Literal.create(null, DateType), gmtId).resolved === false) - assert(Hour(Literal(ts), gmtId).resolved) - checkEvaluation(Hour(Cast(Literal(d), TimestampType, gmtId), gmtId), 0) - checkEvaluation(Hour(Cast(Literal(date), TimestampType, gmtId), gmtId), 13) - checkEvaluation(Hour(Literal(ts), gmtId), 13) + assert(Hour(Literal.create(null, DateType), utcId).resolved === false) + assert(Hour(Literal(ts), utcId).resolved) + checkEvaluation(Hour(Cast(Literal(d), TimestampType, utcId), utcId), 0) + checkEvaluation(Hour(Cast(Literal(date), TimestampType, utcId), utcId), 13) + checkEvaluation(Hour(Literal(ts), utcId), 13) val c = Calendar.getInstance() for (tz <- Seq(TimeZoneUTC, TimeZonePST, TimeZoneJST)) { @@ -309,12 +309,12 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { } test("Minute") { - assert(Minute(Literal.create(null, DateType), gmtId).resolved === false) - assert(Minute(Literal(ts), gmtId).resolved) - checkEvaluation(Minute(Cast(Literal(d), TimestampType, gmtId), gmtId), 0) + assert(Minute(Literal.create(null, DateType), utcId).resolved === false) + assert(Minute(Literal(ts), utcId).resolved) + checkEvaluation(Minute(Cast(Literal(d), TimestampType, utcId), utcId), 0) checkEvaluation( - Minute(Cast(Literal(date), TimestampType, gmtId), gmtId), 10) - checkEvaluation(Minute(Literal(ts), gmtId), 10) + Minute(Cast(Literal(date), TimestampType, utcId), utcId), 10) + checkEvaluation(Minute(Literal(ts), utcId), 10) val c = Calendar.getInstance() for (tz <- Seq(TimeZoneUTC, TimeZonePST, TimeZoneJST)) { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala index 01efaa6714a3e..f8ba4e780e2af 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala @@ -396,13 +396,13 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with InternalRow(UTF8String.fromString("1"), null, UTF8String.fromString("1"))) } - val gmtId = Option(TimeZoneUTC.getID) + val utcId = Option(TimeZoneUTC.getID) test("from_json") { val jsonData = """{"a": 1}""" val schema = StructType(StructField("a", IntegerType) :: Nil) checkEvaluation( - JsonToStructs(schema, Map.empty, Literal(jsonData), gmtId), + JsonToStructs(schema, Map.empty, Literal(jsonData), utcId), InternalRow(1) ) } @@ -411,13 +411,13 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with val jsonData = """{"a" 1}""" val schema = StructType(StructField("a", IntegerType) :: Nil) checkEvaluation( - JsonToStructs(schema, Map.empty, Literal(jsonData), gmtId), + JsonToStructs(schema, Map.empty, Literal(jsonData), utcId), InternalRow(null) ) val exception = intercept[TestFailedException] { checkEvaluation( - JsonToStructs(schema, Map("mode" -> FailFastMode.name), Literal(jsonData), gmtId), + JsonToStructs(schema, Map("mode" -> FailFastMode.name), Literal(jsonData), utcId), InternalRow(null) ) }.getCause @@ -430,35 +430,35 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with val input = """[{"a": 1}, {"a": 2}]""" val schema = ArrayType(StructType(StructField("a", IntegerType) :: Nil)) val output = InternalRow(1) :: InternalRow(2) :: Nil - checkEvaluation(JsonToStructs(schema, Map.empty, Literal(input), gmtId), output) + checkEvaluation(JsonToStructs(schema, Map.empty, Literal(input), utcId), output) } test("from_json - input=object, schema=array, output=array of single row") { val input = """{"a": 1}""" val schema = ArrayType(StructType(StructField("a", IntegerType) :: Nil)) val output = InternalRow(1) :: Nil - checkEvaluation(JsonToStructs(schema, Map.empty, Literal(input), gmtId), output) + checkEvaluation(JsonToStructs(schema, Map.empty, Literal(input), utcId), output) } test("from_json - input=empty array, schema=array, output=empty array") { val input = "[ ]" val schema = ArrayType(StructType(StructField("a", IntegerType) :: Nil)) val output = Nil - checkEvaluation(JsonToStructs(schema, Map.empty, Literal(input), gmtId), output) + checkEvaluation(JsonToStructs(schema, Map.empty, Literal(input), utcId), output) } test("from_json - input=empty object, schema=array, output=array of single row with null") { val input = "{ }" val schema = ArrayType(StructType(StructField("a", IntegerType) :: Nil)) val output = InternalRow(null) :: Nil - checkEvaluation(JsonToStructs(schema, Map.empty, Literal(input), gmtId), output) + checkEvaluation(JsonToStructs(schema, Map.empty, Literal(input), utcId), output) } test("from_json - input=array of single object, schema=struct, output=single row") { val input = """[{"a": 1}]""" val schema = StructType(StructField("a", IntegerType) :: Nil) val output = InternalRow(null) - checkEvaluation(JsonToStructs(schema, Map.empty, Literal(input), gmtId), output) + checkEvaluation(JsonToStructs(schema, Map.empty, Literal(input), utcId), output) } test("from_json - input=array, schema=struct, output=single row") { @@ -467,27 +467,27 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with val schema = new StructType().add("a", IntegerType).add(corrupted, StringType) val output = InternalRow(null, UTF8String.fromString(input)) val options = Map("columnNameOfCorruptRecord" -> corrupted) - checkEvaluation(JsonToStructs(schema, options, Literal(input), gmtId), output) + checkEvaluation(JsonToStructs(schema, options, Literal(input), utcId), output) } test("from_json - input=empty array, schema=struct, output=single row with null") { val input = """[]""" val schema = StructType(StructField("a", IntegerType) :: Nil) val output = InternalRow(null) - checkEvaluation(JsonToStructs(schema, Map.empty, Literal(input), gmtId), output) + checkEvaluation(JsonToStructs(schema, Map.empty, Literal(input), utcId), output) } test("from_json - input=empty object, schema=struct, output=single row with null") { val input = """{ }""" val schema = StructType(StructField("a", IntegerType) :: Nil) val output = InternalRow(null) - checkEvaluation(JsonToStructs(schema, Map.empty, Literal(input), gmtId), output) + checkEvaluation(JsonToStructs(schema, Map.empty, Literal(input), utcId), output) } test("from_json null input column") { val schema = StructType(StructField("a", IntegerType) :: Nil) checkEvaluation( - JsonToStructs(schema, Map.empty, Literal.create(null, StringType), gmtId), + JsonToStructs(schema, Map.empty, Literal.create(null, StringType), utcId), null ) } @@ -495,7 +495,7 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with test("SPARK-20549: from_json bad UTF-8") { val schema = StructType(StructField("a", IntegerType) :: Nil) checkEvaluation( - JsonToStructs(schema, Map.empty, Literal(badJson), gmtId), + JsonToStructs(schema, Map.empty, Literal(badJson), utcId), InternalRow(null)) } @@ -507,7 +507,7 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with c.set(2016, 0, 1, 0, 0, 0) c.set(Calendar.MILLISECOND, 123) checkEvaluation( - JsonToStructs(schema, Map.empty, Literal(jsonData1), gmtId), + JsonToStructs(schema, Map.empty, Literal(jsonData1), utcId), InternalRow(c.getTimeInMillis * 1000L) ) // The result doesn't change because the json string includes timezone string ("Z" here), @@ -537,7 +537,7 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with Map("timestampFormat" -> "yyyy-MM-dd'T'HH:mm:ss", DateTimeUtils.TIMEZONE_OPTION -> tz.getID), Literal(jsonData2), - gmtId), + utcId), InternalRow(c.getTimeInMillis * 1000L) ) } @@ -546,7 +546,7 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with test("SPARK-19543: from_json empty input column") { val schema = StructType(StructField("a", IntegerType) :: Nil) checkEvaluation( - JsonToStructs(schema, Map.empty, Literal.create(" ", StringType), gmtId), + JsonToStructs(schema, Map.empty, Literal.create(" ", StringType), utcId), null ) } @@ -555,7 +555,7 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with val schema = StructType(StructField("a", IntegerType) :: Nil) val struct = Literal.create(create_row(1), schema) checkEvaluation( - StructsToJson(Map.empty, struct, gmtId), + StructsToJson(Map.empty, struct, utcId), """{"a":1}""" ) } @@ -565,7 +565,7 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with val input = new GenericArrayData(InternalRow(1) :: InternalRow(2) :: Nil) val output = """[{"a":1},{"a":2}]""" checkEvaluation( - StructsToJson(Map.empty, Literal.create(input, inputSchema), gmtId), + StructsToJson(Map.empty, Literal.create(input, inputSchema), utcId), output) } @@ -574,7 +574,7 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with val input = new GenericArrayData(InternalRow(null) :: Nil) val output = """[{}]""" checkEvaluation( - StructsToJson(Map.empty, Literal.create(input, inputSchema), gmtId), + StructsToJson(Map.empty, Literal.create(input, inputSchema), utcId), output) } @@ -583,7 +583,7 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with val input = new GenericArrayData(Nil) val output = """[]""" checkEvaluation( - StructsToJson(Map.empty, Literal.create(input, inputSchema), gmtId), + StructsToJson(Map.empty, Literal.create(input, inputSchema), utcId), output) } @@ -591,7 +591,7 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with val schema = StructType(StructField("a", IntegerType) :: Nil) val struct = Literal.create(null, schema) checkEvaluation( - StructsToJson(Map.empty, struct, gmtId), + StructsToJson(Map.empty, struct, utcId), null ) } @@ -604,7 +604,7 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with val struct = Literal.create(create_row(c.getTimeInMillis * 1000L), schema) checkEvaluation( - StructsToJson(Map.empty, struct, gmtId), + StructsToJson(Map.empty, struct, utcId), """{"t":"2016-01-01T00:00:00.000Z"}""" ) checkEvaluation( @@ -615,9 +615,9 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with checkEvaluation( StructsToJson( Map("timestampFormat" -> "yyyy-MM-dd'T'HH:mm:ss", - DateTimeUtils.TIMEZONE_OPTION -> gmtId.get), + DateTimeUtils.TIMEZONE_OPTION -> utcId.get), struct, - gmtId), + utcId), """{"t":"2016-01-01T00:00:00"}""" ) checkEvaluation( @@ -625,7 +625,7 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with Map("timestampFormat" -> "yyyy-MM-dd'T'HH:mm:ss", DateTimeUtils.TIMEZONE_OPTION -> "PST"), struct, - gmtId), + utcId), """{"t":"2015-12-31T16:00:00"}""" ) } @@ -666,7 +666,7 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with ArrayBasedMapData(Map(UTF8String.fromString("b") -> 2)) :: Nil) val output = """[{"a":1},{"b":2}]""" checkEvaluation( - StructsToJson(Map.empty, Literal(input, inputSchema), gmtId), + StructsToJson(Map.empty, Literal(input, inputSchema), utcId), output) } @@ -675,7 +675,7 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with val input = new GenericArrayData(ArrayBasedMapData(Map(UTF8String.fromString("a") -> 1)) :: Nil) val output = """[{"a":1}]""" checkEvaluation( - StructsToJson(Map.empty, Literal.create(input, inputSchema), gmtId), + StructsToJson(Map.empty, Literal.create(input, inputSchema), utcId), output) } @@ -705,7 +705,7 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with .add("b", StringType, nullable = false) .add("c", StringType, nullable = false) val output = InternalRow(1L, null, UTF8String.fromString("foo")) - val expr = JsonToStructs(jsonSchema, Map.empty, Literal.create(input, StringType), gmtId) + val expr = JsonToStructs(jsonSchema, Map.empty, Literal.create(input, StringType), utcId) checkEvaluation(expr, output) val schema = expr.dataType val schemaToCompare = jsonSchema.asNullable @@ -738,7 +738,7 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with val options = Map("dateFormat" -> dateFormat, "locale" -> langTag) checkEvaluation( - JsonToStructs(schema, options, Literal.create(dateStr), gmtId), + JsonToStructs(schema, options, Literal.create(dateStr), utcId), InternalRow(17836)) // number of days from 1970-01-01 } } @@ -749,7 +749,7 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with schema = StructType.fromDDL("i int, _unparsed boolean"), options = Map("columnNameOfCorruptRecord" -> "_unparsed"), child = Literal.create("""{"i":"a"}"""), - timeZoneId = gmtId), + timeZoneId = utcId), expectedErrMsg = "The field for corrupt records must be string type and nullable") } @@ -771,7 +771,7 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with val (expected, input) = decimalInput(langTag) checkEvaluation( - JsonToStructs(schema, options, Literal.create(input), gmtId), + JsonToStructs(schema, options, Literal.create(input), utcId), InternalRow(expected)) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JacksonGeneratorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JacksonGeneratorSuite.scala index 23a0caacc34e2..60cd186208100 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JacksonGeneratorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JacksonGeneratorSuite.scala @@ -27,8 +27,8 @@ import org.apache.spark.sql.types._ class JacksonGeneratorSuite extends SparkFunSuite { - val gmtId = TimeZoneUTC.getID - val option = new JSONOptions(Map.empty, gmtId) + val utcId = TimeZoneUTC.getID + val option = new JSONOptions(Map.empty, utcId) test("initial with StructType and write out a row") { val dataType = StructType(StructField("a", IntegerType) :: Nil) @@ -46,7 +46,7 @@ class JacksonGeneratorSuite extends SparkFunSuite { val input = InternalRow(null) val writer = new CharArrayWriter() val allowNullOption = - new JSONOptions(Map("ignoreNullFields" -> "false"), gmtId) + new JSONOptions(Map("ignoreNullFields" -> "false"), utcId) val gen = new JacksonGenerator(dataType, writer, allowNullOption) gen.write(input) gen.flush() @@ -60,7 +60,7 @@ class JacksonGeneratorSuite extends SparkFunSuite { val input = InternalRow(InternalRow(null)) val writer = new CharArrayWriter() val allowNullOption = - new JSONOptions(Map("ignoreNullFields" -> "false"), gmtId) + new JSONOptions(Map("ignoreNullFields" -> "false"), utcId) val gen = new JacksonGenerator(dataType, writer, allowNullOption) gen.write(input) gen.flush() From cf620cb6c658709ed5b4207d056888edb34e4fa1 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Tue, 18 Feb 2020 00:09:53 +0300 Subject: [PATCH 04/38] Remove defaultTimeZone --- .../org/apache/spark/sql/catalyst/util/DateTimeUtils.scala | 6 ++---- .../apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala | 4 ++-- .../scala/org/apache/spark/sql/util/ArrowUtilsSuite.scala | 5 +++-- .../apache/spark/sql/sources/ResolvedDataSourceSuite.scala | 4 +++- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 917967fd22aeb..9302f506d60bc 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -49,8 +49,6 @@ object DateTimeUtils { val TIMEZONE_OPTION = "timeZone" - def defaultTimeZone(): TimeZone = TimeZone.getDefault() - def getZoneId(timeZoneId: String): ZoneId = ZoneId.of(timeZoneId, ZoneId.SHORT_IDS) def getTimeZone(timeZoneId: String): TimeZone = { TimeZone.getTimeZone(getZoneId(timeZoneId)) @@ -58,7 +56,7 @@ object DateTimeUtils { // we should use the exact day as Int, for example, (year, month, day) -> day def millisToDays(millisUtc: Long): SQLDate = { - millisToDays(millisUtc, defaultTimeZone().toZoneId) + millisToDays(millisUtc, ZoneId.systemDefault()) } def millisToDays(millisUtc: Long, zoneId: ZoneId): SQLDate = { @@ -68,7 +66,7 @@ object DateTimeUtils { // reverse of millisToDays def daysToMillis(days: SQLDate): Long = { - daysToMillis(days, defaultTimeZone().toZoneId) + daysToMillis(days, ZoneId.systemDefault()) } def daysToMillis(days: SQLDate, zoneId: ZoneId): Long = { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index 6ed18ca1bedab..05d0d154f607b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -40,7 +40,7 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper { private def defaultZoneId = ZoneId.systemDefault() test("nanoseconds truncation") { - val tf = TimestampFormatter.getFractionFormatter(DateTimeUtils.defaultTimeZone.toZoneId) + val tf = TimestampFormatter.getFractionFormatter(ZoneId.systemDefault()) def checkStringToTimestamp(originalTime: String, expectedParsedTime: String): Unit = { val parsedTimestampOp = DateTimeUtils.stringToTimestamp( UTF8String.fromString(originalTime), defaultZoneId) @@ -89,7 +89,7 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper { test("SPARK-6785: java date conversion before and after epoch") { def format(d: Date): String = { - TimestampFormatter("uuuu-MM-dd", defaultTimeZone().toZoneId) + TimestampFormatter("uuuu-MM-dd", ZoneId.systemDefault()) .format(d.getTime * MICROS_PER_MILLIS) } def checkFromToJavaDate(d1: Date): Unit = { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/ArrowUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/ArrowUtilsSuite.scala index 4439a7bb3ae87..deac53e395d1f 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/ArrowUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/ArrowUtilsSuite.scala @@ -17,10 +17,11 @@ package org.apache.spark.sql.util +import java.time.ZoneId + import org.apache.arrow.vector.types.pojo.ArrowType import org.apache.spark.SparkFunSuite -import org.apache.spark.sql.catalyst.util.DateTimeUtils import org.apache.spark.sql.types._ class ArrowUtilsSuite extends SparkFunSuite { @@ -62,7 +63,7 @@ class ArrowUtilsSuite extends SparkFunSuite { assert(ArrowUtils.fromArrowSchema(arrowSchema) === schema) } - roundtripWithTz(DateTimeUtils.defaultTimeZone().getID) + roundtripWithTz(ZoneId.systemDefault().getId) roundtripWithTz("Asia/Tokyo") roundtripWithTz("UTC") roundtripWithTz("America/Los_Angeles") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/ResolvedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/ResolvedDataSourceSuite.scala index 36fb418b09cb6..818a66eb436cc 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/ResolvedDataSourceSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/ResolvedDataSourceSuite.scala @@ -17,6 +17,8 @@ package org.apache.spark.sql.sources +import java.time.ZoneId + import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.util.DateTimeUtils import org.apache.spark.sql.execution.datasources.DataSource @@ -27,7 +29,7 @@ class ResolvedDataSourceSuite extends SharedSparkSession { DataSource( sparkSession = spark, className = name, - options = Map(DateTimeUtils.TIMEZONE_OPTION -> DateTimeUtils.defaultTimeZone().getID) + options = Map(DateTimeUtils.TIMEZONE_OPTION -> ZoneId.systemDefault().getId) ).providingClass test("jdbc") { From b4879335611cd9e9c5b9088c6909367414ea868b Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Tue, 18 Feb 2020 00:18:18 +0300 Subject: [PATCH 05/38] Use Date/TimestampType.InternalType --- .../spark/sql/catalyst/util/DateTimeUtils.scala | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 9302f506d60bc..efa9d760735f3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -27,7 +27,7 @@ import java.util.concurrent.TimeUnit._ import scala.util.control.NonFatal import org.apache.spark.sql.catalyst.util.DateTimeConstants._ -import org.apache.spark.sql.types.Decimal +import org.apache.spark.sql.types.{DateType, Decimal, TimestampType} import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} /** @@ -39,9 +39,8 @@ import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} */ object DateTimeUtils { - // we use Int and Long internally to represent [[DateType]] and [[TimestampType]] - type SQLDate = Int - type SQLTimestamp = Long + type SQLDate = DateType.InternalType + type SQLTimestamp = TimestampType.InternalType // see http://stackoverflow.com/questions/466321/convert-unix-timestamp-to-julian // it's 2440587.5, rounding up to compatible with Hive @@ -316,13 +315,13 @@ object DateTimeUtils { } } - def instantToMicros(instant: Instant): Long = { + def instantToMicros(instant: Instant): SQLTimestamp = { val us = Math.multiplyExact(instant.getEpochSecond, MICROS_PER_SECOND) val result = Math.addExact(us, NANOSECONDS.toMicros(instant.getNano)) result } - def microsToInstant(us: Long): Instant = { + def microsToInstant(us: SQLTimestamp): Instant = { val secs = Math.floorDiv(us, MICROS_PER_SECOND) // Unfolded Math.floorMod(us, MICROS_PER_SECOND) to reuse the result of // the above calculation of `secs` via `floorDiv`. From b26730c776a7c8109bd36bfe328677cbe57aab77 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Tue, 18 Feb 2020 00:27:41 +0300 Subject: [PATCH 06/38] Use TimeZoneUTC --- .../test/scala/org/apache/spark/sql/avro/AvroSuite.scala | 3 ++- .../apache/spark/sql/catalyst/expressions/CastSuite.scala | 4 ++-- .../catalyst/expressions/CollectionExpressionsSuite.scala | 6 +++--- .../org/apache/spark/sql/DataFrameFunctionsSuite.scala | 4 ++-- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala index 360160c9c9398..deebc96682275 100644 --- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala +++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala @@ -38,6 +38,7 @@ import org.apache.spark.sql._ import org.apache.spark.sql.TestingUDT.{IntervalData, NullData, NullUDT} import org.apache.spark.sql.catalyst.expressions.AttributeReference import org.apache.spark.sql.catalyst.plans.logical.Filter +import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.TimeZoneUTC import org.apache.spark.sql.execution.SparkPlan import org.apache.spark.sql.execution.datasources.{DataSource, FilePartition} import org.apache.spark.sql.execution.datasources.v2.BatchScanExec @@ -402,7 +403,7 @@ abstract class AvroSuite extends QueryTest with SharedSparkSession { StructField("float", FloatType, true), StructField("date", DateType, true) )) - TimeZone.setDefault(TimeZone.getTimeZone("UTC")) + TimeZone.setDefault(TimeZoneUTC) val rdd = spark.sparkContext.parallelize(Seq( Row(1f, null), Row(2f, new Date(1451948400000L)), diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index 4eb5f0c3d78f5..75be7f94c8ca8 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -141,7 +141,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { // If the string value includes timezone string, it represents the timestamp string // in the timezone regardless of the timeZoneId parameter. - c = Calendar.getInstance(TimeZone.getTimeZone("UTC")) + c = Calendar.getInstance(TimeZoneUTC) c.set(2015, 2, 18, 12, 3, 17) c.set(Calendar.MILLISECOND, 0) checkCastStringToTimestamp("2015-03-18T12:03:17Z", new Timestamp(c.getTimeInMillis)) @@ -172,7 +172,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { // If the string value includes timezone string, it represents the timestamp string // in the timezone regardless of the timeZoneId parameter. - c = Calendar.getInstance(TimeZone.getTimeZone("UTC")) + c = Calendar.getInstance(TimeZoneUTC) c.set(2015, 2, 18, 12, 3, 17) c.set(Calendar.MILLISECOND, 456) checkCastStringToTimestamp("2015-03-18T12:03:17.456Z", new Timestamp(c.getTimeInMillis)) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala index 01df6675016d2..4c0c97d1aa937 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala @@ -28,7 +28,7 @@ import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.util.DateTimeConstants.MICROS_PER_DAY -import org.apache.spark.sql.catalyst.util.DateTimeTestUtils +import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._ import org.apache.spark.sql.catalyst.util.IntervalUtils._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ @@ -835,7 +835,7 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper def noDST(t: Timestamp): Timestamp = new Timestamp(t.getTime - dstOffset) - DateTimeTestUtils.withDefaultTimeZone(timeZone) { + withDefaultTimeZone(timeZone) { // Spring time change checkEvaluation(new Sequence( Literal(Timestamp.valueOf("2018-03-25 01:30:00")), @@ -863,7 +863,7 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper } test("Sequence of dates") { - DateTimeTestUtils.withDefaultTimeZone(TimeZone.getTimeZone("UTC")) { + withDefaultTimeZone(TimeZoneUTC) { checkEvaluation(new Sequence( Literal(Date.valueOf("2018-01-01")), Literal(Date.valueOf("2018-01-05")), diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala index 341b3255b12bf..8f77e45a9db0a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala @@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.plans.logical.OneRowRelation -import org.apache.spark.sql.catalyst.util.DateTimeTestUtils +import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._ import org.apache.spark.sql.functions._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession @@ -1013,7 +1013,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession { Timestamp.valueOf("2018-01-01 12:00:00"), Timestamp.valueOf("2018-01-02 00:00:00"))))) - DateTimeTestUtils.withDefaultTimeZone(TimeZone.getTimeZone("UTC")) { + withDefaultTimeZone(TimeZoneUTC) { checkAnswer( spark.sql("select sequence(" + " cast('2018-01-01' as date)" + From 0b5711e0e332817f6cff28eeeef79ccffeaca304 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Tue, 18 Feb 2020 00:30:06 +0300 Subject: [PATCH 07/38] Use TimeZoneUTC instead of GMT --- .../datasources/parquet/ParquetPartitionDiscoverySuite.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala index e63929470ce5f..e55cf99b2d5e5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala @@ -21,7 +21,7 @@ import java.io.File import java.math.BigInteger import java.sql.{Date, Timestamp} import java.time.{ZoneId, ZoneOffset} -import java.util.{Calendar, Locale, TimeZone} +import java.util.{Calendar, Locale} import scala.collection.mutable.ArrayBuffer @@ -35,6 +35,7 @@ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils import org.apache.spark.sql.catalyst.expressions.Literal import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, TimestampFormatter} +import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.TimeZoneUTC import org.apache.spark.sql.execution.datasources._ import org.apache.spark.sql.execution.datasources.{PartitionPath => Partition} import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, FileTable} @@ -87,7 +88,7 @@ abstract class ParquetPartitionDiscoverySuite check("1990-02-24 12:00:30", Literal.create(Timestamp.valueOf("1990-02-24 12:00:30"), TimestampType)) - val c = Calendar.getInstance(TimeZone.getTimeZone("GMT")) + val c = Calendar.getInstance(TimeZoneUTC) c.set(1990, 1, 24, 12, 0, 30) c.set(Calendar.MILLISECOND, 0) check("1990-02-24 12:00:30", From 9ba2d3d4ceb8f7b863ef0887f4e2c879a4ef05d4 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Fri, 21 Feb 2020 09:00:53 +0300 Subject: [PATCH 08/38] Remove SQLDate and SQLTimestamp --- .../sql/catalyst/util/DateTimeUtils.scala | 125 +++++++++--------- .../catalyst/util/TimestampFormatter.scala | 8 +- .../expressions/ObjectExpressionsSuite.scala | 4 +- .../catalyst/util/DateTimeUtilsSuite.scala | 6 +- .../datasources/parquet/ParquetFilters.scala | 3 +- .../parquet/ParquetRowConverter.scala | 3 +- 6 files changed, 72 insertions(+), 77 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index efa9d760735f3..59a514fd7c862 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -27,7 +27,7 @@ import java.util.concurrent.TimeUnit._ import scala.util.control.NonFatal import org.apache.spark.sql.catalyst.util.DateTimeConstants._ -import org.apache.spark.sql.types.{DateType, Decimal, TimestampType} +import org.apache.spark.sql.types.Decimal import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} /** @@ -39,9 +39,6 @@ import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} */ object DateTimeUtils { - type SQLDate = DateType.InternalType - type SQLTimestamp = TimestampType.InternalType - // see http://stackoverflow.com/questions/466321/convert-unix-timestamp-to-julian // it's 2440587.5, rounding up to compatible with Hive final val JULIAN_DAY_OF_EPOCH = 2440588 @@ -54,55 +51,55 @@ object DateTimeUtils { } // we should use the exact day as Int, for example, (year, month, day) -> day - def millisToDays(millisUtc: Long): SQLDate = { + def millisToDays(millisUtc: Long): Int = { millisToDays(millisUtc, ZoneId.systemDefault()) } - def millisToDays(millisUtc: Long, zoneId: ZoneId): SQLDate = { + def millisToDays(millisUtc: Long, zoneId: ZoneId): Int = { val instant = microsToInstant(Math.multiplyExact(millisUtc, MICROS_PER_MILLIS)) localDateToDays(LocalDateTime.ofInstant(instant, zoneId).toLocalDate) } // reverse of millisToDays - def daysToMillis(days: SQLDate): Long = { + def daysToMillis(days: Int): Long = { daysToMillis(days, ZoneId.systemDefault()) } - def daysToMillis(days: SQLDate, zoneId: ZoneId): Long = { + def daysToMillis(days: Int, zoneId: ZoneId): Long = { val instant = daysToLocalDate(days).atStartOfDay(zoneId).toInstant instantToMicros(instant) / MICROS_PER_MILLIS } // Converts Timestamp to string according to Hive TimestampWritable convention. - def timestampToString(tf: TimestampFormatter, us: SQLTimestamp): String = { + def timestampToString(tf: TimestampFormatter, us: Long): String = { tf.format(us) } /** * Returns the number of days since epoch from java.sql.Date. */ - def fromJavaDate(date: Date): SQLDate = { + def fromJavaDate(date: Date): Int = { millisToDays(date.getTime) } /** * Returns a java.sql.Date from number of days since epoch. */ - def toJavaDate(daysSinceEpoch: SQLDate): Date = { + def toJavaDate(daysSinceEpoch: Int): Date = { new Date(daysToMillis(daysSinceEpoch)) } /** * Returns a java.sql.Timestamp from number of micros since epoch. */ - def toJavaTimestamp(us: SQLTimestamp): Timestamp = { + def toJavaTimestamp(us: Long): Timestamp = { Timestamp.from(microsToInstant(us)) } /** * Returns the number of micros since epoch from java.sql.Timestamp. */ - def fromJavaTimestamp(t: Timestamp): SQLTimestamp = { + def fromJavaTimestamp(t: Timestamp): Long = { instantToMicros(t.toInstant) } @@ -110,7 +107,7 @@ object DateTimeUtils { * Returns the number of microseconds since epoch from Julian day * and nanoseconds in a day */ - def fromJulianDay(day: Int, nanoseconds: Long): SQLTimestamp = { + def fromJulianDay(day: Int, nanoseconds: Long): Long = { // use Long to avoid rounding errors val seconds = (day - JULIAN_DAY_OF_EPOCH).toLong * SECONDS_PER_DAY SECONDS.toMicros(seconds) + NANOSECONDS.toMicros(nanoseconds) @@ -121,7 +118,7 @@ object DateTimeUtils { * * Note: support timestamp since 4717 BC (without negative nanoseconds, compatible with Hive). */ - def toJulianDay(us: SQLTimestamp): (Int, Long) = { + def toJulianDay(us: Long): (Int, Long) = { val julian_us = us + JULIAN_DAY_OF_EPOCH * MICROS_PER_DAY val day = julian_us / MICROS_PER_DAY val micros = julian_us % MICROS_PER_DAY @@ -132,7 +129,7 @@ object DateTimeUtils { * Converts the timestamp to milliseconds since epoch. In spark timestamp values have microseconds * precision, so this conversion is lossy. */ - def toMillis(us: SQLTimestamp): Long = { + def toMillis(us: Long): Long = { // When the timestamp is negative i.e before 1970, we need to adjust the millseconds portion. // Example - 1965-01-01 10:11:12.123456 is represented as (-157700927876544) in micro precision. // In millis precision the above needs to be represented as (-157700927877). @@ -140,17 +137,17 @@ object DateTimeUtils { } /* - * Converts milliseconds since epoch to SQLTimestamp. + * Converts milliseconds since epoch to Long. */ - def fromMillis(millis: Long): SQLTimestamp = { + def fromMillis(millis: Long): Long = { MILLISECONDS.toMicros(millis) } - def microsToEpochDays(epochMicros: SQLTimestamp, zoneId: ZoneId): SQLDate = { + def microsToEpochDays(epochMicros: Long, zoneId: ZoneId): Int = { localDateToDays(microsToInstant(epochMicros).atZone(zoneId).toLocalDate) } - def epochDaysToMicros(epochDays: SQLDate, zoneId: ZoneId): SQLTimestamp = { + def epochDaysToMicros(epochDays: Int, zoneId: ZoneId): Long = { val localDate = LocalDate.ofEpochDay(epochDays) val zeroLocalTime = LocalTime.MIDNIGHT val localDateTime = LocalDateTime.of(localDate, zeroLocalTime) @@ -183,7 +180,7 @@ object DateTimeUtils { * `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]-[h]h:[m]m` * `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]+[h]h:[m]m` */ - def stringToTimestamp(s: UTF8String, timeZoneId: ZoneId): Option[SQLTimestamp] = { + def stringToTimestamp(s: UTF8String, timeZoneId: ZoneId): Option[Long] = { if (s == null) { return None } @@ -315,13 +312,13 @@ object DateTimeUtils { } } - def instantToMicros(instant: Instant): SQLTimestamp = { + def instantToMicros(instant: Instant): Long = { val us = Math.multiplyExact(instant.getEpochSecond, MICROS_PER_SECOND) val result = Math.addExact(us, NANOSECONDS.toMicros(instant.getNano)) result } - def microsToInstant(us: SQLTimestamp): Instant = { + def microsToInstant(us: Long): Instant = { val secs = Math.floorDiv(us, MICROS_PER_SECOND) // Unfolded Math.floorMod(us, MICROS_PER_SECOND) to reuse the result of // the above calculation of `secs` via `floorDiv`. @@ -353,7 +350,7 @@ object DateTimeUtils { * `yyyy-[m]m-[d]d *` * `yyyy-[m]m-[d]dT*` */ - def stringToDate(s: UTF8String, zoneId: ZoneId): Option[SQLDate] = { + def stringToDate(s: UTF8String, zoneId: ZoneId): Option[Int] = { if (s == null) { return None } @@ -401,14 +398,14 @@ object DateTimeUtils { } } - private def localTimestamp(microsec: SQLTimestamp, zoneId: ZoneId): LocalDateTime = { + private def localTimestamp(microsec: Long, zoneId: ZoneId): LocalDateTime = { microsToInstant(microsec).atZone(zoneId).toLocalDateTime } /** * Returns the hour value of a given timestamp value. The timestamp is expressed in microseconds. */ - def getHours(microsec: SQLTimestamp, zoneId: ZoneId): Int = { + def getHours(microsec: Long, zoneId: ZoneId): Int = { localTimestamp(microsec, zoneId).getHour } @@ -416,7 +413,7 @@ object DateTimeUtils { * Returns the minute value of a given timestamp value. The timestamp is expressed in * microseconds. */ - def getMinutes(microsec: SQLTimestamp, zoneId: ZoneId): Int = { + def getMinutes(microsec: Long, zoneId: ZoneId): Int = { localTimestamp(microsec, zoneId).getMinute } @@ -424,14 +421,14 @@ object DateTimeUtils { * Returns the second value of a given timestamp value. The timestamp is expressed in * microseconds. */ - def getSeconds(microsec: SQLTimestamp, zoneId: ZoneId): Int = { + def getSeconds(microsec: Long, zoneId: ZoneId): Int = { localTimestamp(microsec, zoneId).getSecond } /** * Returns the seconds part and its fractional part with microseconds. */ - def getSecondsWithFraction(microsec: SQLTimestamp, zoneId: ZoneId): Decimal = { + def getSecondsWithFraction(microsec: Long, zoneId: ZoneId): Decimal = { Decimal(getMicroseconds(microsec, zoneId), 8, 6) } @@ -439,7 +436,7 @@ object DateTimeUtils { * Returns seconds, including fractional parts, multiplied by 1000. The timestamp * is expressed in microseconds since the epoch. */ - def getMilliseconds(timestamp: SQLTimestamp, zoneId: ZoneId): Decimal = { + def getMilliseconds(timestamp: Long, zoneId: ZoneId): Decimal = { Decimal(getMicroseconds(timestamp, zoneId), 8, 3) } @@ -447,7 +444,7 @@ object DateTimeUtils { * Returns seconds, including fractional parts, multiplied by 1000000. The timestamp * is expressed in microseconds since the epoch. */ - def getMicroseconds(timestamp: SQLTimestamp, zoneId: ZoneId): Int = { + def getMicroseconds(timestamp: Long, zoneId: ZoneId): Int = { val lt = localTimestamp(timestamp, zoneId) (lt.getLong(ChronoField.MICRO_OF_SECOND) + lt.getSecond * MICROS_PER_SECOND).toInt } @@ -456,11 +453,11 @@ object DateTimeUtils { * Returns the 'day in year' value for the given date. The date is expressed in days * since 1.1.1970. */ - def getDayInYear(date: SQLDate): Int = { + def getDayInYear(date: Int): Int = { LocalDate.ofEpochDay(date).getDayOfYear } - private def extractFromYear(date: SQLDate, divider: Int): Int = { + private def extractFromYear(date: Int, divider: Int): Int = { val localDate = daysToLocalDate(date) val yearOfEra = localDate.get(ChronoField.YEAR_OF_ERA) var result = yearOfEra / divider @@ -470,19 +467,19 @@ object DateTimeUtils { } /** Returns the millennium for the given date. The date is expressed in days since 1.1.1970. */ - def getMillennium(date: SQLDate): Int = extractFromYear(date, 1000) + def getMillennium(date: Int): Int = extractFromYear(date, 1000) /** Returns the century for the given date. The date is expressed in days since 1.1.1970. */ - def getCentury(date: SQLDate): Int = extractFromYear(date, 100) + def getCentury(date: Int): Int = extractFromYear(date, 100) /** Returns the decade for the given date. The date is expressed in days since 1.1.1970. */ - def getDecade(date: SQLDate): Int = Math.floorDiv(getYear(date), 10) + def getDecade(date: Int): Int = Math.floorDiv(getYear(date), 10) /** * Returns the year value for the given date. The date is expressed in days * since 1.1.1970. */ - def getYear(date: SQLDate): Int = { + def getYear(date: Int): Int = { LocalDate.ofEpochDay(date).getYear } @@ -490,7 +487,7 @@ object DateTimeUtils { * Returns the year which conforms to ISO 8601. Each ISO 8601 week-numbering * year begins with the Monday of the week containing the 4th of January. */ - def getIsoYear(date: SQLDate): Int = { + def getIsoYear(date: Int): Int = { daysToLocalDate(date).get(IsoFields.WEEK_BASED_YEAR) } @@ -498,7 +495,7 @@ object DateTimeUtils { * Returns the quarter for the given date. The date is expressed in days * since 1.1.1970. */ - def getQuarter(date: SQLDate): Int = { + def getQuarter(date: Int): Int = { LocalDate.ofEpochDay(date).get(IsoFields.QUARTER_OF_YEAR) } @@ -506,7 +503,7 @@ object DateTimeUtils { * Split date (expressed in days since 1.1.1970) into four fields: * year, month (Jan is Month 1), dayInMonth, daysToMonthEnd (0 if it's last day of month). */ - def splitDate(date: SQLDate): (Int, Int, Int, Int) = { + def splitDate(date: Int): (Int, Int, Int, Int) = { val ld = LocalDate.ofEpochDay(date) (ld.getYear, ld.getMonthValue, ld.getDayOfMonth, ld.lengthOfMonth() - ld.getDayOfMonth) } @@ -515,7 +512,7 @@ object DateTimeUtils { * Returns the month value for the given date. The date is expressed in days * since 1.1.1970. January is month 1. */ - def getMonth(date: SQLDate): Int = { + def getMonth(date: Int): Int = { LocalDate.ofEpochDay(date).getMonthValue } @@ -523,7 +520,7 @@ object DateTimeUtils { * Returns the 'day of month' value for the given date. The date is expressed in days * since 1.1.1970. */ - def getDayOfMonth(date: SQLDate): Int = { + def getDayOfMonth(date: Int): Int = { LocalDate.ofEpochDay(date).getDayOfMonth } @@ -531,7 +528,7 @@ object DateTimeUtils { * Add date and year-month interval. * Returns a date value, expressed in days since 1.1.1970. */ - def dateAddMonths(days: SQLDate, months: Int): SQLDate = { + def dateAddMonths(days: Int, months: Int): Int = { LocalDate.ofEpochDay(days).plusMonths(months).toEpochDay.toInt } @@ -540,11 +537,11 @@ object DateTimeUtils { * Returns a timestamp value, expressed in microseconds since 1.1.1970 00:00:00. */ def timestampAddInterval( - start: SQLTimestamp, + start: Long, months: Int, days: Int, microseconds: Long, - zoneId: ZoneId): SQLTimestamp = { + zoneId: ZoneId): Long = { val resultTimestamp = microsToInstant(start) .atZone(zoneId) .plusMonths(months) @@ -564,8 +561,8 @@ object DateTimeUtils { * The result is rounded to 8 decimal places if `roundOff` is set to true. */ def monthsBetween( - time1: SQLTimestamp, - time2: SQLTimestamp, + time1: Long, + time2: Long, roundOff: Boolean, zoneId: ZoneId): Double = { val millis1 = MICROSECONDS.toMillis(time1) @@ -629,7 +626,7 @@ object DateTimeUtils { * Returns the first date which is later than startDate and is of the given dayOfWeek. * dayOfWeek is an integer ranges in [0, 6], and 0 is Thu, 1 is Fri, etc,. */ - def getNextDateForDayOfWeek(startDate: SQLDate, dayOfWeek: Int): SQLDate = { + def getNextDateForDayOfWeek(startDate: Int, dayOfWeek: Int): Int = { startDate + 1 + ((dayOfWeek - 1 - startDate) % 7 + 7) % 7 } @@ -637,7 +634,7 @@ object DateTimeUtils { * Returns last day of the month for the given date. The date is expressed in days * since 1.1.1970. */ - def getLastDayOfMonth(date: SQLDate): SQLDate = { + def getLastDayOfMonth(date: Int): Int = { val localDate = LocalDate.ofEpochDay(date) (date - localDate.getDayOfMonth) + localDate.lengthOfMonth() } @@ -668,8 +665,8 @@ object DateTimeUtils { * Returns the trunc date from original date and trunc level. * Trunc level should be generated using `parseTruncLevel()`, should be between 0 and 6. */ - def truncDate(d: SQLDate, level: Int): SQLDate = { - def truncToYearLevel(divider: Int, adjust: Int): SQLDate = { + def truncDate(d: Int, level: Int): Int = { + def truncToYearLevel(divider: Int, adjust: Int): Int = { val oldYear = getYear(d) var newYear = Math.floorDiv(oldYear, divider) * divider if (adjust > 0 && newYear == oldYear) { @@ -693,7 +690,7 @@ object DateTimeUtils { } } - private def truncToUnit(t: SQLTimestamp, zoneId: ZoneId, unit: ChronoUnit): SQLTimestamp = { + private def truncToUnit(t: Long, zoneId: ZoneId, unit: ChronoUnit): Long = { val truncated = microsToInstant(t).atZone(zoneId).truncatedTo(unit) instantToMicros(truncated.toInstant) } @@ -702,7 +699,7 @@ object DateTimeUtils { * Returns the trunc date time from original date time and trunc level. * Trunc level should be generated using `parseTruncLevel()`, should be between 0 and 12. */ - def truncTimestamp(t: SQLTimestamp, level: Int, zoneId: ZoneId): SQLTimestamp = { + def truncTimestamp(t: Long, level: Int, zoneId: ZoneId): Long = { level match { case TRUNC_TO_MICROSECOND => t case TRUNC_TO_HOUR => truncToUnit(t, zoneId, ChronoUnit.HOURS) @@ -757,7 +754,7 @@ object DateTimeUtils { * mapping, the conversion here may return wrong result, we should make the timestamp * timezone-aware. */ - def convertTz(ts: SQLTimestamp, fromZone: ZoneId, toZone: ZoneId): SQLTimestamp = { + def convertTz(ts: Long, fromZone: ZoneId, toZone: ZoneId): Long = { val rebasedDateTime = microsToInstant(ts).atZone(toZone).toLocalDateTime.atZone(fromZone) instantToMicros(rebasedDateTime.toInstant) } @@ -766,7 +763,7 @@ object DateTimeUtils { * Returns a timestamp of given timezone from utc timestamp, with the same string * representation in their timezone. */ - def fromUTCTime(time: SQLTimestamp, timeZone: String): SQLTimestamp = { + def fromUTCTime(time: Long, timeZone: String): Long = { convertTz(time, ZoneOffset.UTC, getZoneId(timeZone)) } @@ -774,7 +771,7 @@ object DateTimeUtils { * Returns a utc timestamp from a given timestamp from a given timezone, with the same * string representation in their timezone. */ - def toUTCTime(time: SQLTimestamp, timeZone: String): SQLTimestamp = { + def toUTCTime(time: Long, timeZone: String): Long = { convertTz(time, getZoneId(timeZone), ZoneOffset.UTC) } @@ -782,16 +779,16 @@ object DateTimeUtils { * Returns the number of seconds with fractional part in microsecond precision * since 1970-01-01 00:00:00 local time. */ - def getEpoch(timestamp: SQLTimestamp, zoneId: ZoneId): Decimal = { + def getEpoch(timestamp: Long, zoneId: ZoneId): Decimal = { val offset = SECONDS.toMicros( zoneId.getRules.getOffset(microsToInstant(timestamp)).getTotalSeconds) val sinceEpoch = timestamp + offset Decimal(sinceEpoch, 20, 6) } - def currentTimestamp(): SQLTimestamp = instantToMicros(Instant.now()) + def currentTimestamp(): Long = instantToMicros(Instant.now()) - def currentDate(zoneId: ZoneId): SQLDate = localDateToDays(LocalDate.now(zoneId)) + def currentDate(zoneId: ZoneId): Int = localDateToDays(LocalDate.now(zoneId)) private def today(zoneId: ZoneId): ZonedDateTime = { Instant.now().atZone(zoneId).`with`(LocalTime.MIDNIGHT) @@ -835,7 +832,7 @@ object DateTimeUtils { * @return some of microseconds since the epoch if the conversion completed * successfully otherwise None. */ - def convertSpecialTimestamp(input: String, zoneId: ZoneId): Option[SQLTimestamp] = { + def convertSpecialTimestamp(input: String, zoneId: ZoneId): Option[Long] = { extractSpecialValue(input, zoneId).flatMap { case "epoch" => Some(0) case "now" => Some(currentTimestamp()) @@ -846,7 +843,7 @@ object DateTimeUtils { } } - private def convertSpecialTimestamp(bytes: Array[Byte], zoneId: ZoneId): Option[SQLTimestamp] = { + private def convertSpecialTimestamp(bytes: Array[Byte], zoneId: ZoneId): Option[Long] = { if (bytes.length > 0 && Character.isAlphabetic(bytes(0))) { convertSpecialTimestamp(new String(bytes, StandardCharsets.UTF_8), zoneId) } else { @@ -860,7 +857,7 @@ object DateTimeUtils { * @param zoneId - zone identifier used to get the current date. * @return some of days since the epoch if the conversion completed successfully otherwise None. */ - def convertSpecialDate(input: String, zoneId: ZoneId): Option[SQLDate] = { + def convertSpecialDate(input: String, zoneId: ZoneId): Option[Int] = { extractSpecialValue(input, zoneId).flatMap { case "epoch" => Some(0) case "now" | "today" => Some(currentDate(zoneId)) @@ -870,7 +867,7 @@ object DateTimeUtils { } } - private def convertSpecialDate(bytes: Array[Byte], zoneId: ZoneId): Option[SQLDate] = { + private def convertSpecialDate(bytes: Array[Byte], zoneId: ZoneId): Option[Int] = { if (bytes.length > 0 && Character.isAlphabetic(bytes(0))) { convertSpecialDate(new String(bytes, StandardCharsets.UTF_8), zoneId) } else { @@ -885,7 +882,7 @@ object DateTimeUtils { * @return an interval between two dates. The interval can be negative * if the end date is before the start date. */ - def subtractDates(endDate: SQLDate, startDate: SQLDate): CalendarInterval = { + def subtractDates(endDate: Int, startDate: Int): CalendarInterval = { val period = Period.between( LocalDate.ofEpochDay(startDate), LocalDate.ofEpochDay(endDate)) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala index 4893a7ec91cbb..d79a892fdff50 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala @@ -28,7 +28,7 @@ import java.util.concurrent.TimeUnit.SECONDS import org.apache.commons.lang3.time.FastDateFormat import org.apache.spark.sql.catalyst.util.DateTimeConstants._ -import org.apache.spark.sql.catalyst.util.DateTimeUtils.{ convertSpecialTimestamp, SQLTimestamp} +import org.apache.spark.sql.catalyst.util.DateTimeUtils.convertSpecialTimestamp import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.Decimal @@ -105,7 +105,7 @@ class MicrosCalendar(tz: TimeZone, digitsInFraction: Int) // Converts parsed `MILLISECOND` field to seconds fraction in microsecond precision. // For example if the fraction pattern is `SSSS` then `digitsInFraction` = 4, and // if the `MILLISECOND` field was parsed to `1234`. - def getMicros(): SQLTimestamp = { + def getMicros(): Long = { // Append 6 zeros to the field: 1234 -> 1234000000 val d = fields(Calendar.MILLISECOND) * MICROS_PER_SECOND // Take the first 6 digits from `d`: 1234000000 -> 123400 @@ -134,7 +134,7 @@ class LegacyFastTimestampFormatter( fastDateFormat.getTimeZone, fastDateFormat.getPattern.count(_ == 'S')) - def parse(s: String): SQLTimestamp = { + def parse(s: String): Long = { cal.clear() // Clear the calendar because it can be re-used many times if (!fastDateFormat.parse(s, new ParsePosition(0), cal)) { throw new IllegalArgumentException(s"'$s' is an invalid timestamp") @@ -144,7 +144,7 @@ class LegacyFastTimestampFormatter( cal.getTimeInMillis * MICROS_PER_MILLIS + micros } - def format(timestamp: SQLTimestamp): String = { + def format(timestamp: Long): String = { cal.setTimeInMillis(Math.floorDiv(timestamp, MICROS_PER_SECOND) * MILLIS_PER_SECOND) cal.setMicros(Math.floorMod(timestamp, MICROS_PER_SECOND)) fastDateFormat.format(cal) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala index ef7764dba1e9e..a4ba7de012680 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala @@ -156,10 +156,10 @@ class ObjectExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { "fromPrimitiveArray", ObjectType(classOf[Array[Int]]), Array[Int](1, 2, 3), UnsafeArrayData.fromPrimitiveArray(Array[Int](1, 2, 3))), (DateTimeUtils.getClass, ObjectType(classOf[Date]), - "toJavaDate", ObjectType(classOf[DateTimeUtils.SQLDate]), 77777, + "toJavaDate", ObjectType(classOf[Int]), 77777, DateTimeUtils.toJavaDate(77777)), (DateTimeUtils.getClass, ObjectType(classOf[Timestamp]), - "toJavaTimestamp", ObjectType(classOf[DateTimeUtils.SQLTimestamp]), + "toJavaTimestamp", ObjectType(classOf[Long]), 88888888.toLong, DateTimeUtils.toJavaTimestamp(88888888)) ).foreach { case (cls, dataType, methodName, argType, arg, expected) => checkObjectExprEvaluation(StaticInvoke(cls, dataType, methodName, diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index 05d0d154f607b..999fd9f1d6a33 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -128,7 +128,7 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper { checkFromToJavaDate(new Date(df2.parse("1776-07-04 18:30:00 UTC").getTime)) } - private def toDate(s: String, zoneId: ZoneId = ZoneOffset.UTC): Option[SQLDate] = { + private def toDate(s: String, zoneId: ZoneId = ZoneOffset.UTC): Option[Int] = { stringToDate(UTF8String.fromString(s), zoneId) } @@ -156,7 +156,7 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper { assert(toDate("1999 08").isEmpty) } - private def toTimestamp(str: String, zoneId: ZoneId): Option[SQLTimestamp] = { + private def toTimestamp(str: String, zoneId: ZoneId): Option[Long] = { stringToTimestamp(UTF8String.fromString(str), zoneId) } @@ -507,7 +507,7 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper { def testTrunc( level: Int, expected: String, - inputTS: SQLTimestamp, + inputTS: Long, zoneId: ZoneId = defaultZoneId): Unit = { val truncated = DateTimeUtils.truncTimestamp(inputTS, level, zoneId) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilters.scala index 948a120e0d6e0..d79b6c7f18bd4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilters.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilters.scala @@ -33,7 +33,6 @@ import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName._ import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils} -import org.apache.spark.sql.catalyst.util.DateTimeUtils.SQLDate import org.apache.spark.sql.sources import org.apache.spark.unsafe.types.UTF8String @@ -103,7 +102,7 @@ class ParquetFilters( private val ParquetTimestampMicrosType = ParquetSchemaType(TIMESTAMP_MICROS, INT64, 0, null) private val ParquetTimestampMillisType = ParquetSchemaType(TIMESTAMP_MILLIS, INT64, 0, null) - private def dateToDays(date: Date): SQLDate = { + private def dateToDays(date: Date): Int = { DateTimeUtils.fromJavaDate(date) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala index 850adae8a6b95..5b841ede07cd1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala @@ -34,7 +34,6 @@ import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, DateTimeUtils, GenericArrayData} -import org.apache.spark.sql.catalyst.util.DateTimeUtils.SQLTimestamp import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String @@ -719,7 +718,7 @@ private[parquet] object ParquetRowConverter { unscaled } - def binaryToSQLTimestamp(binary: Binary): SQLTimestamp = { + def binaryToSQLTimestamp(binary: Binary): Long = { assert(binary.length() == 12, s"Timestamps (with nanoseconds) are expected to be stored in" + s" 12-byte long binaries. Found a ${binary.length()}-byte binary instead.") val buffer = binary.toByteBuffer.order(ByteOrder.LITTLE_ENDIAN) From c475f271b24f8d9ef7bbbc0f93a238600d70a717 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Fri, 21 Feb 2020 09:13:14 +0300 Subject: [PATCH 09/38] Improve a comment --- .../org/apache/spark/sql/catalyst/util/DateTimeUtils.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 59a514fd7c862..18f1a46b89a0c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -137,7 +137,7 @@ object DateTimeUtils { } /* - * Converts milliseconds since epoch to Long. + * Converts milliseconds since the epoch to microseconds. */ def fromMillis(millis: Long): Long = { MILLISECONDS.toMicros(millis) From cbf69fb295aa6a5df4d8cc791dc3acf2f4badb4b Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Fri, 21 Feb 2020 23:17:54 +0300 Subject: [PATCH 10/38] Renaming --- .../sql/catalyst/util/DateTimeUtils.scala | 246 +++++++++--------- 1 file changed, 123 insertions(+), 123 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 18f1a46b89a0c..c6c6c771cc666 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -50,13 +50,12 @@ object DateTimeUtils { TimeZone.getTimeZone(getZoneId(timeZoneId)) } - // we should use the exact day as Int, for example, (year, month, day) -> day - def millisToDays(millisUtc: Long): Int = { - millisToDays(millisUtc, ZoneId.systemDefault()) + def millisToDays(millis: Long): Int = { + millisToDays(millis, ZoneId.systemDefault()) } - def millisToDays(millisUtc: Long, zoneId: ZoneId): Int = { - val instant = microsToInstant(Math.multiplyExact(millisUtc, MICROS_PER_MILLIS)) + def millisToDays(millis: Long, zoneId: ZoneId): Int = { + val instant = microsToInstant(Math.multiplyExact(millis, MICROS_PER_MILLIS)) localDateToDays(LocalDateTime.ofInstant(instant, zoneId).toLocalDate) } @@ -70,9 +69,9 @@ object DateTimeUtils { instantToMicros(instant) / MICROS_PER_MILLIS } - // Converts Timestamp to string according to Hive TimestampWritable convention. - def timestampToString(tf: TimestampFormatter, us: Long): String = { - tf.format(us) + // Converts the `micros` timestamp to string according to Hive TimestampWritable convention. + def timestampToString(tf: TimestampFormatter, micros: Long): String = { + tf.format(micros) } /** @@ -85,31 +84,31 @@ object DateTimeUtils { /** * Returns a java.sql.Date from number of days since epoch. */ - def toJavaDate(daysSinceEpoch: Int): Date = { - new Date(daysToMillis(daysSinceEpoch)) + def toJavaDate(days: Int): Date = { + new Date(daysToMillis(days)) } /** * Returns a java.sql.Timestamp from number of micros since epoch. */ - def toJavaTimestamp(us: Long): Timestamp = { - Timestamp.from(microsToInstant(us)) + def toJavaTimestamp(micros: Long): Timestamp = { + Timestamp.from(microsToInstant(micros)) } /** * Returns the number of micros since epoch from java.sql.Timestamp. */ - def fromJavaTimestamp(t: Timestamp): Long = { - instantToMicros(t.toInstant) + def fromJavaTimestamp(timestamp: Timestamp): Long = { + instantToMicros(timestamp.toInstant) } /** * Returns the number of microseconds since epoch from Julian day * and nanoseconds in a day */ - def fromJulianDay(day: Int, nanoseconds: Long): Long = { + def fromJulianDay(days: Int, nanoseconds: Long): Long = { // use Long to avoid rounding errors - val seconds = (day - JULIAN_DAY_OF_EPOCH).toLong * SECONDS_PER_DAY + val seconds = (days - JULIAN_DAY_OF_EPOCH).toLong * SECONDS_PER_DAY SECONDS.toMicros(seconds) + NANOSECONDS.toMicros(nanoseconds) } @@ -118,37 +117,37 @@ object DateTimeUtils { * * Note: support timestamp since 4717 BC (without negative nanoseconds, compatible with Hive). */ - def toJulianDay(us: Long): (Int, Long) = { - val julian_us = us + JULIAN_DAY_OF_EPOCH * MICROS_PER_DAY - val day = julian_us / MICROS_PER_DAY - val micros = julian_us % MICROS_PER_DAY - (day.toInt, MICROSECONDS.toNanos(micros)) + def toJulianDay(micros: Long): (Int, Long) = { + val julianUs = micros + JULIAN_DAY_OF_EPOCH * MICROS_PER_DAY + val days = julianUs / MICROS_PER_DAY + val us = julianUs % MICROS_PER_DAY + (days.toInt, MICROSECONDS.toNanos(us)) } - /* + /** * Converts the timestamp to milliseconds since epoch. In spark timestamp values have microseconds * precision, so this conversion is lossy. */ - def toMillis(us: Long): Long = { + def toMillis(micros: Long): Long = { // When the timestamp is negative i.e before 1970, we need to adjust the millseconds portion. // Example - 1965-01-01 10:11:12.123456 is represented as (-157700927876544) in micro precision. // In millis precision the above needs to be represented as (-157700927877). - Math.floorDiv(us, MICROS_PER_MILLIS) + Math.floorDiv(micros, MICROS_PER_MILLIS) } - /* + /** * Converts milliseconds since the epoch to microseconds. */ def fromMillis(millis: Long): Long = { MILLISECONDS.toMicros(millis) } - def microsToEpochDays(epochMicros: Long, zoneId: ZoneId): Int = { - localDateToDays(microsToInstant(epochMicros).atZone(zoneId).toLocalDate) + def microsToEpochDays(micros: Long, zoneId: ZoneId): Int = { + localDateToDays(microsToInstant(micros).atZone(zoneId).toLocalDate) } - def epochDaysToMicros(epochDays: Int, zoneId: ZoneId): Long = { - val localDate = LocalDate.ofEpochDay(epochDays) + def epochDaysToMicros(days: Int, zoneId: ZoneId): Long = { + val localDate = LocalDate.ofEpochDay(days) val zeroLocalTime = LocalTime.MIDNIGHT val localDateTime = LocalDateTime.of(localDate, zeroLocalTime) instantToMicros(localDateTime.atZone(zoneId).toInstant) @@ -318,11 +317,11 @@ object DateTimeUtils { result } - def microsToInstant(us: Long): Instant = { - val secs = Math.floorDiv(us, MICROS_PER_SECOND) + def microsToInstant(micros: Long): Instant = { + val secs = Math.floorDiv(micros, MICROS_PER_SECOND) // Unfolded Math.floorMod(us, MICROS_PER_SECOND) to reuse the result of // the above calculation of `secs` via `floorDiv`. - val mos = us - secs * MICROS_PER_SECOND + val mos = micros - secs * MICROS_PER_SECOND Instant.ofEpochSecond(secs, mos * NANOS_PER_MICROS) } @@ -398,67 +397,70 @@ object DateTimeUtils { } } - private def localTimestamp(microsec: Long, zoneId: ZoneId): LocalDateTime = { - microsToInstant(microsec).atZone(zoneId).toLocalDateTime + private def getLocalDateTime(micros: Long, zoneId: ZoneId): LocalDateTime = { + microsToInstant(micros).atZone(zoneId).toLocalDateTime } /** * Returns the hour value of a given timestamp value. The timestamp is expressed in microseconds. */ - def getHours(microsec: Long, zoneId: ZoneId): Int = { - localTimestamp(microsec, zoneId).getHour + def getHours(micros: Long, zoneId: ZoneId): Int = { + getLocalDateTime(micros, zoneId).getHour } /** * Returns the minute value of a given timestamp value. The timestamp is expressed in - * microseconds. + * microseconds since the epoch. */ - def getMinutes(microsec: Long, zoneId: ZoneId): Int = { - localTimestamp(microsec, zoneId).getMinute + def getMinutes(micros: Long, zoneId: ZoneId): Int = { + getLocalDateTime(micros, zoneId).getMinute } /** * Returns the second value of a given timestamp value. The timestamp is expressed in - * microseconds. + * microseconds since the epoch. */ - def getSeconds(microsec: Long, zoneId: ZoneId): Int = { - localTimestamp(microsec, zoneId).getSecond + def getSeconds(micros: Long, zoneId: ZoneId): Int = { + getLocalDateTime(micros, zoneId).getSecond } /** * Returns the seconds part and its fractional part with microseconds. */ - def getSecondsWithFraction(microsec: Long, zoneId: ZoneId): Decimal = { - Decimal(getMicroseconds(microsec, zoneId), 8, 6) + def getSecondsWithFraction(micros: Long, zoneId: ZoneId): Decimal = { + Decimal(getMicroseconds(micros, zoneId), 8, 6) } /** - * Returns seconds, including fractional parts, multiplied by 1000. The timestamp - * is expressed in microseconds since the epoch. + * Returns local seconds, including fractional parts, multiplied by 1000. + * + * @param micros The number of microseconds since the epoch. + * @param zoneId The time zone id which milliseconds should be obtained in. */ - def getMilliseconds(timestamp: Long, zoneId: ZoneId): Decimal = { - Decimal(getMicroseconds(timestamp, zoneId), 8, 3) + def getMilliseconds(micros: Long, zoneId: ZoneId): Decimal = { + Decimal(getMicroseconds(micros, zoneId), 8, 3) } /** - * Returns seconds, including fractional parts, multiplied by 1000000. The timestamp - * is expressed in microseconds since the epoch. + * Returns local seconds, including fractional parts, multiplied by 1000000. + * + * @param micros The number of microseconds since the epoch. + * @param zoneId The time zone id which milliseconds should be obtained in. */ - def getMicroseconds(timestamp: Long, zoneId: ZoneId): Int = { - val lt = localTimestamp(timestamp, zoneId) + def getMicroseconds(micros: Long, zoneId: ZoneId): Int = { + val lt = getLocalDateTime(micros, zoneId) (lt.getLong(ChronoField.MICRO_OF_SECOND) + lt.getSecond * MICROS_PER_SECOND).toInt } /** - * Returns the 'day in year' value for the given date. The date is expressed in days - * since 1.1.1970. + * Returns the 'day in year' value for the given number of days since 1970-01-01. */ - def getDayInYear(date: Int): Int = { - LocalDate.ofEpochDay(date).getDayOfYear + def getDayInYear(days: Int): Int = { + LocalDate.ofEpochDay(days).getDayOfYear } - private def extractFromYear(date: Int, divider: Int): Int = { - val localDate = daysToLocalDate(date) + private def extractFromYear(days: Int, divider: Int): Int = { + val localDate = daysToLocalDate(days) val yearOfEra = localDate.get(ChronoField.YEAR_OF_ERA) var result = yearOfEra / divider if ((yearOfEra % divider) != 0 || yearOfEra <= 1) result += 1 @@ -466,62 +468,63 @@ object DateTimeUtils { result } - /** Returns the millennium for the given date. The date is expressed in days since 1.1.1970. */ - def getMillennium(date: Int): Int = extractFromYear(date, 1000) + /** + * Returns the millennium for the given number of days since 1970-01-01. + */ + def getMillennium(days: Int): Int = extractFromYear(days, 1000) - /** Returns the century for the given date. The date is expressed in days since 1.1.1970. */ - def getCentury(date: Int): Int = extractFromYear(date, 100) + /** + * Returns the century for the given number of days since 1970-01-01. + */ + def getCentury(days: Int): Int = extractFromYear(days, 100) - /** Returns the decade for the given date. The date is expressed in days since 1.1.1970. */ - def getDecade(date: Int): Int = Math.floorDiv(getYear(date), 10) + /** + * Returns the decade for the given number of days since 1970-01-01. + */ + def getDecade(days: Int): Int = Math.floorDiv(getYear(days), 10) /** - * Returns the year value for the given date. The date is expressed in days - * since 1.1.1970. + * Returns the year value for the given number of days since 1970-01-01. */ - def getYear(date: Int): Int = { - LocalDate.ofEpochDay(date).getYear + def getYear(days: Int): Int = { + LocalDate.ofEpochDay(days).getYear } /** * Returns the year which conforms to ISO 8601. Each ISO 8601 week-numbering * year begins with the Monday of the week containing the 4th of January. */ - def getIsoYear(date: Int): Int = { - daysToLocalDate(date).get(IsoFields.WEEK_BASED_YEAR) + def getIsoYear(days: Int): Int = { + daysToLocalDate(days).get(IsoFields.WEEK_BASED_YEAR) } - /** - * Returns the quarter for the given date. The date is expressed in days - * since 1.1.1970. - */ - def getQuarter(date: Int): Int = { - LocalDate.ofEpochDay(date).get(IsoFields.QUARTER_OF_YEAR) + /** Returns the quarter for the given number of days since 1970-01-01. */ + def getQuarter(days: Int): Int = { + LocalDate.ofEpochDay(days).get(IsoFields.QUARTER_OF_YEAR) } /** * Split date (expressed in days since 1.1.1970) into four fields: * year, month (Jan is Month 1), dayInMonth, daysToMonthEnd (0 if it's last day of month). */ - def splitDate(date: Int): (Int, Int, Int, Int) = { - val ld = LocalDate.ofEpochDay(date) + def splitDate(days: Int): (Int, Int, Int, Int) = { + val ld = LocalDate.ofEpochDay(days) (ld.getYear, ld.getMonthValue, ld.getDayOfMonth, ld.lengthOfMonth() - ld.getDayOfMonth) } /** - * Returns the month value for the given date. The date is expressed in days - * since 1.1.1970. January is month 1. + * Returns the month value for the given number of days since 1970-01-01. + * January is month 1. */ - def getMonth(date: Int): Int = { - LocalDate.ofEpochDay(date).getMonthValue + def getMonth(days: Int): Int = { + LocalDate.ofEpochDay(days).getMonthValue } /** - * Returns the 'day of month' value for the given date. The date is expressed in days - * since 1.1.1970. + * Returns the 'day of month' value for the given number of days since 1970-01-01. */ - def getDayOfMonth(date: Int): Int = { - LocalDate.ofEpochDay(date).getDayOfMonth + def getDayOfMonth(days: Int): Int = { + LocalDate.ofEpochDay(days).getDayOfMonth } /** @@ -626,17 +629,14 @@ object DateTimeUtils { * Returns the first date which is later than startDate and is of the given dayOfWeek. * dayOfWeek is an integer ranges in [0, 6], and 0 is Thu, 1 is Fri, etc,. */ - def getNextDateForDayOfWeek(startDate: Int, dayOfWeek: Int): Int = { - startDate + 1 + ((dayOfWeek - 1 - startDate) % 7 + 7) % 7 + def getNextDateForDayOfWeek(startDay: Int, dayOfWeek: Int): Int = { + startDay + 1 + ((dayOfWeek - 1 - startDay) % 7 + 7) % 7 } - /** - * Returns last day of the month for the given date. The date is expressed in days - * since 1.1.1970. - */ - def getLastDayOfMonth(date: Int): Int = { - val localDate = LocalDate.ofEpochDay(date) - (date - localDate.getDayOfMonth) + localDate.lengthOfMonth() + /** Returns last day of the month for the given number of days since 1970-01-01. */ + def getLastDayOfMonth(days: Int): Int = { + val localDate = LocalDate.ofEpochDay(days) + (days - localDate.getDayOfMonth) + localDate.lengthOfMonth() } // The constants are visible for testing purpose only. @@ -665,9 +665,9 @@ object DateTimeUtils { * Returns the trunc date from original date and trunc level. * Trunc level should be generated using `parseTruncLevel()`, should be between 0 and 6. */ - def truncDate(d: Int, level: Int): Int = { + def truncDate(days: Int, level: Int): Int = { def truncToYearLevel(divider: Int, adjust: Int): Int = { - val oldYear = getYear(d) + val oldYear = getYear(days) var newYear = Math.floorDiv(oldYear, divider) * divider if (adjust > 0 && newYear == oldYear) { newYear -= divider @@ -676,11 +676,11 @@ object DateTimeUtils { localDateToDays(LocalDate.of(newYear, 1, 1)) } level match { - case TRUNC_TO_WEEK => getNextDateForDayOfWeek(d - 7, MONDAY) - case TRUNC_TO_MONTH => d - DateTimeUtils.getDayOfMonth(d) + 1 + case TRUNC_TO_WEEK => getNextDateForDayOfWeek(days - 7, MONDAY) + case TRUNC_TO_MONTH => days - getDayOfMonth(days) + 1 case TRUNC_TO_QUARTER => - localDateToDays(daysToLocalDate(d).`with`(IsoFields.DAY_OF_QUARTER, 1L)) - case TRUNC_TO_YEAR => d - DateTimeUtils.getDayInYear(d) + 1 + localDateToDays(daysToLocalDate(days).`with`(IsoFields.DAY_OF_QUARTER, 1L)) + case TRUNC_TO_YEAR => days - getDayInYear(days) + 1 case TRUNC_TO_DECADE => truncToYearLevel(10, 0) case TRUNC_TO_CENTURY => truncToYearLevel(100, 1) case TRUNC_TO_MILLENNIUM => truncToYearLevel(1000, 1) @@ -690,8 +690,8 @@ object DateTimeUtils { } } - private def truncToUnit(t: Long, zoneId: ZoneId, unit: ChronoUnit): Long = { - val truncated = microsToInstant(t).atZone(zoneId).truncatedTo(unit) + private def truncToUnit(micros: Long, zoneId: ZoneId, unit: ChronoUnit): Long = { + val truncated = microsToInstant(micros).atZone(zoneId).truncatedTo(unit) instantToMicros(truncated.toInstant) } @@ -699,13 +699,13 @@ object DateTimeUtils { * Returns the trunc date time from original date time and trunc level. * Trunc level should be generated using `parseTruncLevel()`, should be between 0 and 12. */ - def truncTimestamp(t: Long, level: Int, zoneId: ZoneId): Long = { + def truncTimestamp(micros: Long, level: Int, zoneId: ZoneId): Long = { level match { - case TRUNC_TO_MICROSECOND => t - case TRUNC_TO_HOUR => truncToUnit(t, zoneId, ChronoUnit.HOURS) - case TRUNC_TO_DAY => truncToUnit(t, zoneId, ChronoUnit.DAYS) + case TRUNC_TO_MICROSECOND => micros + case TRUNC_TO_HOUR => truncToUnit(micros, zoneId, ChronoUnit.HOURS) + case TRUNC_TO_DAY => truncToUnit(micros, zoneId, ChronoUnit.DAYS) case _ => - val millis = MICROSECONDS.toMillis(t) + val millis = MICROSECONDS.toMillis(micros) val truncated = level match { case TRUNC_TO_MILLISECOND => millis case TRUNC_TO_SECOND => @@ -748,14 +748,14 @@ object DateTimeUtils { } /** - * Convert the timestamp `ts` from one timezone to another. + * Convert the timestamp `micros` from one timezone to another. * * TODO: Because of DST, the conversion between UTC and human time is not exactly one-to-one * mapping, the conversion here may return wrong result, we should make the timestamp * timezone-aware. */ - def convertTz(ts: Long, fromZone: ZoneId, toZone: ZoneId): Long = { - val rebasedDateTime = microsToInstant(ts).atZone(toZone).toLocalDateTime.atZone(fromZone) + def convertTz(micros: Long, fromZone: ZoneId, toZone: ZoneId): Long = { + val rebasedDateTime = microsToInstant(micros).atZone(toZone).toLocalDateTime.atZone(fromZone) instantToMicros(rebasedDateTime.toInstant) } @@ -763,26 +763,26 @@ object DateTimeUtils { * Returns a timestamp of given timezone from utc timestamp, with the same string * representation in their timezone. */ - def fromUTCTime(time: Long, timeZone: String): Long = { - convertTz(time, ZoneOffset.UTC, getZoneId(timeZone)) + def fromUTCTime(micros: Long, timeZone: String): Long = { + convertTz(micros, ZoneOffset.UTC, getZoneId(timeZone)) } /** * Returns a utc timestamp from a given timestamp from a given timezone, with the same * string representation in their timezone. */ - def toUTCTime(time: Long, timeZone: String): Long = { - convertTz(time, getZoneId(timeZone), ZoneOffset.UTC) + def toUTCTime(micros: Long, timeZone: String): Long = { + convertTz(micros, getZoneId(timeZone), ZoneOffset.UTC) } /** * Returns the number of seconds with fractional part in microsecond precision * since 1970-01-01 00:00:00 local time. */ - def getEpoch(timestamp: Long, zoneId: ZoneId): Decimal = { + def getEpoch(micros: Long, zoneId: ZoneId): Decimal = { val offset = SECONDS.toMicros( - zoneId.getRules.getOffset(microsToInstant(timestamp)).getTotalSeconds) - val sinceEpoch = timestamp + offset + zoneId.getRules.getOffset(microsToInstant(micros)).getTotalSeconds) + val sinceEpoch = micros + offset Decimal(sinceEpoch, 20, 6) } @@ -877,15 +877,15 @@ object DateTimeUtils { /** * Subtracts two dates. - * @param endDate - the end date, exclusive - * @param startDate - the start date, inclusive + * @param endDay - the end date, exclusive + * @param startDay - the start date, inclusive * @return an interval between two dates. The interval can be negative * if the end date is before the start date. */ - def subtractDates(endDate: Int, startDate: Int): CalendarInterval = { + def subtractDates(endDay: Int, startDay: Int): CalendarInterval = { val period = Period.between( - LocalDate.ofEpochDay(startDate), - LocalDate.ofEpochDay(endDate)) + LocalDate.ofEpochDay(startDay), + LocalDate.ofEpochDay(endDay)) val months = period.getMonths + 12 * period.getYears val days = period.getDays new CalendarInterval(months, days, 0) From 6840291526b44fb1fe5a76e5debddfc29d7e9eb3 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Mon, 24 Feb 2020 09:57:08 +0300 Subject: [PATCH 11/38] Fix import --- .../spark/sql/catalyst/expressions/CastSuite.scala | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index ef03903fe5b5e..3f3f92e273a57 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -30,7 +30,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.{CollectList, Collect import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext import org.apache.spark.sql.catalyst.util.DateTimeConstants._ import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._ -import org.apache.spark.sql.catalyst.util.DateTimeUtils +import org.apache.spark.sql.catalyst.util.DateTimeUtils._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String @@ -290,7 +290,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(cast(cast(nts, TimestampType, utcId), StringType, utcId), nts) checkEvaluation( cast(cast(ts, StringType, utcId), TimestampType, utcId), - DateTimeUtils.fromJavaTimestamp(ts)) + fromJavaTimestamp(ts)) // all convert to string type to check checkEvaluation(cast(cast(cast(nts, TimestampType, utcId), DateType, utcId), StringType), sd) @@ -374,11 +374,11 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(cast(ts, FloatType), 15.003f) checkEvaluation(cast(ts, DoubleType), 15.003) checkEvaluation(cast(cast(tss, ShortType), TimestampType), - DateTimeUtils.fromJavaTimestamp(ts) * MILLIS_PER_SECOND) + fromJavaTimestamp(ts) * MILLIS_PER_SECOND) checkEvaluation(cast(cast(tss, IntegerType), TimestampType), - DateTimeUtils.fromJavaTimestamp(ts) * MILLIS_PER_SECOND) + fromJavaTimestamp(ts) * MILLIS_PER_SECOND) checkEvaluation(cast(cast(tss, LongType), TimestampType), - DateTimeUtils.fromJavaTimestamp(ts) * MILLIS_PER_SECOND) + fromJavaTimestamp(ts) * MILLIS_PER_SECOND) checkEvaluation( cast(cast(millis.toFloat / MILLIS_PER_SECOND, TimestampType), FloatType), millis.toFloat / MILLIS_PER_SECOND) From 86ccef1da832d25166c3bfd018f1df9b6af5b9ee Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Wed, 26 Feb 2020 10:17:48 +0300 Subject: [PATCH 12/38] Fix merge --- .../sql/catalyst/util/DateTimeUtils.scala | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 1a809563891c5..b94ff833e5f3b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -50,12 +50,12 @@ object DateTimeUtils { TimeZone.getTimeZone(getZoneId(timeZoneId)) } - def microsToDays(timestamp: Long): Int = { - microsToDays(timestamp, ZoneId.systemDefault()) + def microsToDays(micros: Long): Int = { + microsToDays(micros, ZoneId.systemDefault()) } - def microsToDays(timestamp: Long, zoneId: ZoneId): Int = { - val instant = microsToInstant(timestamp) + def microsToDays(micros: Long, zoneId: ZoneId): Int = { + val instant = microsToInstant(micros) localDateToDays(LocalDateTime.ofInstant(instant, zoneId).toLocalDate) } @@ -83,8 +83,8 @@ object DateTimeUtils { /** * Returns a java.sql.Date from number of days since epoch. */ - def toJavaDate(daysSinceEpoch: Int): Date = { - new Date(microsToMillis(daysToMicros(daysSinceEpoch))) + def toJavaDate(days: Int): Date = { + new Date(microsToMillis(daysToMicros(days))) } /** @@ -553,22 +553,22 @@ object DateTimeUtils { } /** - * Returns number of months between time1 and time2. time1 and time2 are expressed in - * microseconds since 1.1.1970. If time1 is later than time2, the result is positive. + * Returns number of months between micros1 and micros2. micros1 and micros2 are expressed in + * microseconds since 1.1.1970. If micros1 is later than micros2, the result is positive. * - * If time1 and time2 are on the same day of month, or both are the last day of month, + * If micros1 and micros2 are on the same day of month, or both are the last day of month, * returns, time of day will be ignored. * * Otherwise, the difference is calculated based on 31 days per month. * The result is rounded to 8 decimal places if `roundOff` is set to true. */ def monthsBetween( - time1: Long, - time2: Long, + micros1: Long, + micros2: Long, roundOff: Boolean, zoneId: ZoneId): Double = { - val date1 = microsToDays(time1, zoneId) - val date2 = microsToDays(time2, zoneId) + val date1 = microsToDays(micros1, zoneId) + val date2 = microsToDays(micros2, zoneId) val (year1, monthInYear1, dayInMonth1, daysToMonthEnd1) = splitDate(date1) val (year2, monthInYear2, dayInMonth2, daysToMonthEnd2) = splitDate(date2) @@ -582,8 +582,8 @@ object DateTimeUtils { } // using milliseconds can cause precision loss with more than 8 digits // we follow Hive's implementation which uses seconds - val secondsInDay1 = MICROSECONDS.toSeconds(time1 - daysToMicros(date1, zoneId)) - val secondsInDay2 = MICROSECONDS.toSeconds(time2 - daysToMicros(date2, zoneId)) + val secondsInDay1 = MICROSECONDS.toSeconds(micros1 - daysToMicros(date1, zoneId)) + val secondsInDay2 = MICROSECONDS.toSeconds(micros2 - daysToMicros(date2, zoneId)) val secondsDiff = (dayInMonth1 - dayInMonth2) * SECONDS_PER_DAY + secondsInDay1 - secondsInDay2 val secondsInMonth = DAYS.toSeconds(31) val diff = monthDiff + secondsDiff / secondsInMonth.toDouble From 54c0fe9f0c54d65e9eb7b46a17e7d6a1519f5e22 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Thu, 12 Mar 2020 09:34:46 +0300 Subject: [PATCH 13/38] Fix merge issues --- .../main/scala/org/apache/spark/sql/hive/HiveInspectors.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala index e217c5279046e..a3210e6037a18 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala @@ -189,7 +189,7 @@ private[hive] trait HiveInspectors { val localDate = LocalDate.ofEpochDay(daysSinceEpoch) val utcCal = new Calendar.Builder() .setCalendarType("gregory") - .setTimeZone(DateTimeUtils.TimeZoneUTC) + .setTimeZone(DateTimeUtils.getTimeZone("UTC")) .setDate(localDate.getYear, localDate.getMonthValue - 1, localDate.getDayOfMonth) .build() Math.toIntExact(Math.floorDiv(utcCal.getTimeInMillis, DateTimeConstants.MILLIS_PER_DAY)) @@ -199,7 +199,7 @@ private[hive] trait HiveInspectors { val millis = Math.multiplyExact(daysSinceEpoch, DateTimeConstants.MILLIS_PER_DAY) val utcCal = new Calendar.Builder() .setCalendarType("gregory") - .setTimeZone(DateTimeUtils.TimeZoneUTC) + .setTimeZone(DateTimeUtils.getTimeZone("UTC")) .setInstant(millis) .build() val localDate = LocalDate.of( From e12ec282f11fa49f1cf5dbe99c526ef79b1b5806 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Tue, 17 Mar 2020 22:01:13 +0300 Subject: [PATCH 14/38] Fix merge issues --- .../apache/spark/sql/hive/DaysWritable.scala | 4 +-- .../spark/sql/hive/HiveInspectors.scala | 27 ------------------- 2 files changed, 2 insertions(+), 29 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/DaysWritable.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/DaysWritable.scala index 53a0deb270fd1..862aec26aff5c 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/DaysWritable.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/DaysWritable.scala @@ -85,7 +85,7 @@ private[hive] object DaysWritable { val millis = Math.multiplyExact(daysSinceEpoch, DateTimeConstants.MILLIS_PER_DAY) val utcCal = new Calendar.Builder() .setCalendarType("gregory") - .setTimeZone(DateTimeUtils.TimeZoneUTC) + .setTimeZone(DateTimeUtils.getTimeZone("UTC")) .setInstant(millis) .build() val localDate = LocalDate.of( @@ -103,7 +103,7 @@ private[hive] object DaysWritable { val localDate = LocalDate.ofEpochDay(daysSinceEpoch) val utcCal = new Calendar.Builder() .setCalendarType("gregory") - .setTimeZone(DateTimeUtils.TimeZoneUTC) + .setTimeZone(DateTimeUtils.getTimeZone("UTC")) .setDate(localDate.getYear, localDate.getMonthValue - 1, localDate.getDayOfMonth) .build() Math.toIntExact(Math.floorDiv(utcCal.getTimeInMillis, DateTimeConstants.MILLIS_PER_DAY)) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala index 4694bfc9f2b49..e3e9a3144728d 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala @@ -180,33 +180,6 @@ import org.apache.spark.unsafe.types.UTF8String */ private[hive] trait HiveInspectors { - private final val JULIAN_CUTOVER_DAY = - rebaseGregorianToJulianDays(DateTimeUtils.GREGORIAN_CUTOVER_DAY.toInt) - - private def rebaseJulianToGregorianDays(daysSinceEpoch: Int): Int = { - val localDate = LocalDate.ofEpochDay(daysSinceEpoch) - val utcCal = new Calendar.Builder() - .setCalendarType("gregory") - .setTimeZone(DateTimeUtils.getTimeZone("UTC")) - .setDate(localDate.getYear, localDate.getMonthValue - 1, localDate.getDayOfMonth) - .build() - Math.toIntExact(Math.floorDiv(utcCal.getTimeInMillis, DateTimeConstants.MILLIS_PER_DAY)) - } - - private def rebaseGregorianToJulianDays(daysSinceEpoch: Int): Int = { - val millis = Math.multiplyExact(daysSinceEpoch, DateTimeConstants.MILLIS_PER_DAY) - val utcCal = new Calendar.Builder() - .setCalendarType("gregory") - .setTimeZone(DateTimeUtils.getTimeZone("UTC")) - .setInstant(millis) - .build() - val localDate = LocalDate.of( - utcCal.get(Calendar.YEAR), - utcCal.get(Calendar.MONTH) + 1, - utcCal.get(Calendar.DAY_OF_MONTH)) - Math.toIntExact(localDate.toEpochDay) - } - def javaTypeToDataType(clz: Type): DataType = clz match { // writable case c: Class[_] if c == classOf[hadoopIo.DoubleWritable] => DoubleType From 9d78a7d37a970231654fce94263b1ebbe52cf658 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Thu, 19 Mar 2020 22:04:05 +0300 Subject: [PATCH 15/38] Restore TimeZoneUTC in DateTimeUtils --- .../test/scala/org/apache/spark/sql/avro/AvroSuite.scala | 2 +- .../org/apache/spark/sql/catalyst/util/DateTimeUtils.scala | 4 +++- .../catalyst/expressions/CollectionExpressionsSuite.scala | 6 +++--- .../sql/catalyst/expressions/CsvExpressionsSuite.scala | 7 +++---- .../sql/catalyst/expressions/DateExpressionsSuite.scala | 1 + .../sql/catalyst/expressions/JsonExpressionsSuite.scala | 7 +++---- .../spark/sql/catalyst/json/JacksonGeneratorSuite.scala | 5 ++--- .../apache/spark/sql/catalyst/util/DateTimeTestUtils.scala | 2 -- .../spark/sql/catalyst/util/DateTimeUtilsSuite.scala | 2 +- .../org/apache/spark/sql/DataFrameFunctionsSuite.scala | 4 ++-- .../org/apache/spark/sql/StatisticsCollectionSuite.scala | 2 +- .../parquet/ParquetPartitionDiscoverySuite.scala | 2 +- 12 files changed, 21 insertions(+), 23 deletions(-) diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala index deebc96682275..2071de3a9a29f 100644 --- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala +++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala @@ -38,7 +38,7 @@ import org.apache.spark.sql._ import org.apache.spark.sql.TestingUDT.{IntervalData, NullData, NullUDT} import org.apache.spark.sql.catalyst.expressions.AttributeReference import org.apache.spark.sql.catalyst.plans.logical.Filter -import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.TimeZoneUTC +import org.apache.spark.sql.catalyst.util.DateTimeUtils.TimeZoneUTC import org.apache.spark.sql.execution.SparkPlan import org.apache.spark.sql.execution.datasources.{DataSource, FilePartition} import org.apache.spark.sql.execution.datasources.v2.BatchScanExec diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index a7e704ae29e06..8ae9517f33a91 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -52,6 +52,8 @@ object DateTimeUtils { final val julianCommonEraStart = Timestamp.valueOf("0001-01-01 00:00:00") + final val TimeZoneUTC = TimeZone.getTimeZone("UTC") + val TIMEZONE_OPTION = "timeZone" def getZoneId(timeZoneId: String): ZoneId = ZoneId.of(timeZoneId, ZoneId.SHORT_IDS) @@ -150,7 +152,7 @@ object DateTimeUtils { * Note: support timestamp since 4717 BC (without negative nanoseconds, compatible with Hive). */ def toJulianDay(micros: Long): (Int, Long) = { - val julianUs = rebaseGregorianToJulianMicros(us) + JULIAN_DAY_OF_EPOCH * MICROS_PER_DAY + val julianUs = rebaseGregorianToJulianMicros(micros) + JULIAN_DAY_OF_EPOCH * MICROS_PER_DAY val days = julianUs / MICROS_PER_DAY val us = julianUs % MICROS_PER_DAY (days.toInt, MICROSECONDS.toNanos(us)) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala index 5aea921bff427..173f24881531a 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala @@ -28,7 +28,7 @@ import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.util.DateTimeConstants.MICROS_PER_DAY -import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._ +import org.apache.spark.sql.catalyst.util.DateTimeTestUtils import org.apache.spark.sql.catalyst.util.IntervalUtils._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ @@ -849,7 +849,7 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper def noDST(t: Timestamp): Timestamp = new Timestamp(t.getTime - dstOffset) - withDefaultTimeZone(timeZone) { + DateTimeTestUtils.withDefaultTimeZone(timeZone) { // Spring time change checkEvaluation(new Sequence( Literal(Timestamp.valueOf("2018-03-25 01:30:00")), @@ -877,7 +877,7 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper } test("Sequence of dates") { - withDefaultTimeZone(TimeZoneUTC) { + DateTimeTestUtils.withDefaultTimeZone(TimeZone.getTimeZone("UTC")) { checkEvaluation(new Sequence( Literal(Date.valueOf("2018-01-01")), Literal(Date.valueOf("2018-01-05")), diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CsvExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CsvExpressionsSuite.scala index 6e26fec4fa08c..68449d710554d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CsvExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CsvExpressionsSuite.scala @@ -27,14 +27,13 @@ import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.plans.PlanTestBase import org.apache.spark.sql.catalyst.util._ -import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.TimeZoneUTC import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} class CsvExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with PlanTestBase { val badCsv = "\u0000\u0000\u0000A\u0001AAA" - val utcId = Option(TimeZoneUTC.getID) + val utcId = Option(DateTimeUtils.TimeZoneUTC.getID) test("from_csv") { val csvData = "1" @@ -75,7 +74,7 @@ class CsvExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with P val schema = StructType(StructField("t", TimestampType) :: Nil) val csvData1 = "2016-01-01T00:00:00.123Z" - var c = Calendar.getInstance(TimeZoneUTC) + var c = Calendar.getInstance(DateTimeUtils.TimeZoneUTC) c.set(2016, 0, 1, 0, 0, 0) c.set(Calendar.MILLISECOND, 123) checkEvaluation( @@ -186,7 +185,7 @@ class CsvExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with P test("to_csv with timestamp") { val schema = StructType(StructField("t", TimestampType) :: Nil) - val c = Calendar.getInstance(TimeZoneUTC) + val c = Calendar.getInstance(DateTimeUtils.TimeZoneUTC) c.set(2016, 0, 1, 0, 0, 0) c.set(Calendar.MILLISECOND, 0) val struct = Literal.create(create_row(c.getTimeInMillis * 1000L), schema) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala index 27eb74ff8e066..20f47a506c6b1 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala @@ -29,6 +29,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjectio import org.apache.spark.sql.catalyst.util.{DateTimeUtils, IntervalUtils, TimestampFormatter} import org.apache.spark.sql.catalyst.util.DateTimeConstants.NANOS_PER_SECOND import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._ +import org.apache.spark.sql.catalyst.util.DateTimeUtils.TimeZoneUTC import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala index f8ba4e780e2af..242c734d07491 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala @@ -27,7 +27,6 @@ import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.plans.PlanTestBase import org.apache.spark.sql.catalyst.util._ -import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.TimeZoneUTC import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} @@ -396,7 +395,7 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with InternalRow(UTF8String.fromString("1"), null, UTF8String.fromString("1"))) } - val utcId = Option(TimeZoneUTC.getID) + val utcId = Option(DateTimeUtils.TimeZoneUTC.getID) test("from_json") { val jsonData = """{"a": 1}""" @@ -503,7 +502,7 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with val schema = StructType(StructField("t", TimestampType) :: Nil) val jsonData1 = """{"t": "2016-01-01T00:00:00.123Z"}""" - var c = Calendar.getInstance(TimeZoneUTC) + var c = Calendar.getInstance(DateTimeUtils.TimeZoneUTC) c.set(2016, 0, 1, 0, 0, 0) c.set(Calendar.MILLISECOND, 123) checkEvaluation( @@ -598,7 +597,7 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with test("to_json with timestamp") { val schema = StructType(StructField("t", TimestampType) :: Nil) - val c = Calendar.getInstance(TimeZoneUTC) + val c = Calendar.getInstance(DateTimeUtils.TimeZoneUTC) c.set(2016, 0, 1, 0, 0, 0) c.set(Calendar.MILLISECOND, 0) val struct = Literal.create(create_row(c.getTimeInMillis * 1000L), schema) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JacksonGeneratorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JacksonGeneratorSuite.scala index 60cd186208100..4b8693cf7fd53 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JacksonGeneratorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JacksonGeneratorSuite.scala @@ -21,13 +21,12 @@ import java.io.CharArrayWriter import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData} -import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.TimeZoneUTC +import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, DateTimeUtils, GenericArrayData} import org.apache.spark.sql.types._ class JacksonGeneratorSuite extends SparkFunSuite { - val utcId = TimeZoneUTC.getID + val utcId = DateTimeUtils.TimeZoneUTC.getID val option = new JSONOptions(Map.empty, utcId) test("initial with StructType and write out a row") { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala index 3fe2fb8cf55ef..5f1428f96e2b0 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala @@ -28,8 +28,6 @@ import org.apache.spark.sql.catalyst.util.DateTimeConstants._ */ object DateTimeTestUtils { - final val TimeZoneUTC = TimeZone.getTimeZone("UTC") - val ALL_TIMEZONES: Seq[TimeZone] = TimeZone.getAvailableIDs.toSeq.map(TimeZone.getTimeZone) val outstandingTimezonesIds: Seq[String] = Seq( diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index 7ecf8c875c454..ca632216306bf 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -707,7 +707,7 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper { Math.floor(millisLocal.toDouble / MILLIS_PER_DAY).toInt } def fromJavaDate(date: Date): Int = { - millisToDays(date.getTime, defaultTimeZone()) + millisToDays(date.getTime, TimeZone.getDefault) } outstandingTimezones.foreach { timeZone => withDefaultTimeZone(timeZone) { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala index 91368c2421724..297e99b53869c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala @@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.plans.logical.OneRowRelation -import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._ +import org.apache.spark.sql.catalyst.util.{DateTimeTestUtils, DateTimeUtils} import org.apache.spark.sql.functions._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession @@ -1028,7 +1028,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession { Timestamp.valueOf("2018-01-01 12:00:00"), Timestamp.valueOf("2018-01-02 00:00:00"))))) - withDefaultTimeZone(TimeZoneUTC) { + DateTimeTestUtils.withDefaultTimeZone(DateTimeUtils.TimeZoneUTC) { checkAnswer( spark.sql("select sequence(" + " cast('2018-01-01' as date)" + diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala index c9dc5c1a591d5..ba4827810c48b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala @@ -29,7 +29,7 @@ import org.apache.spark.sql.catalyst.analysis.NoSuchTableException import org.apache.spark.sql.catalyst.catalog.CatalogColumnStat import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.util.DateTimeTestUtils -import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.TimeZoneUTC +import org.apache.spark.sql.catalyst.util.DateTimeUtils.TimeZoneUTC import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.test.SQLTestData.ArrayData diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala index 04eee4545837a..5854d0f9c6bb3 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala @@ -35,7 +35,7 @@ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils import org.apache.spark.sql.catalyst.expressions.Literal import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, TimestampFormatter} -import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.TimeZoneUTC +import org.apache.spark.sql.catalyst.util.DateTimeUtils.TimeZoneUTC import org.apache.spark.sql.execution.datasources._ import org.apache.spark.sql.execution.datasources.{PartitionPath => Partition} import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, FileTable} From 8669e294ac38592b41cc733b87c176151801b7a3 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Fri, 20 Mar 2020 19:54:06 +0300 Subject: [PATCH 16/38] Fix merge issue --- .../src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala index e3fbbe9cd1ffa..2c9e0c0a57dcd 100644 --- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala +++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala @@ -38,7 +38,7 @@ import org.apache.spark.sql._ import org.apache.spark.sql.TestingUDT.IntervalData import org.apache.spark.sql.catalyst.expressions.AttributeReference import org.apache.spark.sql.catalyst.plans.logical.Filter -import org.apache.spark.sql.catalyst.util.{DateTimeTestUtils, DateTimeUtils} +import org.apache.spark.sql.catalyst.util.DateTimeTestUtils import org.apache.spark.sql.catalyst.util.DateTimeUtils.TimeZoneUTC import org.apache.spark.sql.execution.SparkPlan import org.apache.spark.sql.execution.datasources.{DataSource, FilePartition} From 9ec98eb9ad870f336d6e6626bdf190f65c024297 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Tue, 24 Mar 2020 19:46:49 +0300 Subject: [PATCH 17/38] Fix merge issue --- .../org/apache/spark/sql/catalyst/util/DateTimeUtils.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 072304053349b..b9c8602e534a4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -163,7 +163,7 @@ object DateTimeUtils { * Gregorian calendars. * @return The number of micros since epoch from `java.sql.Timestamp`. */ - def fromJavaTimestamp(t: Timestamp): SQLTimestamp = { + def fromJavaTimestamp(t: Timestamp): Long = { val era = if (t.before(julianCommonEraStart)) 0 else 1 val localDateTime = LocalDateTime.of( t.getYear + 1900, t.getMonth + 1, 1, From d7f343b0cdfda28f165d64dcd3d05b217dc498b0 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Thu, 26 Mar 2020 21:32:00 +0300 Subject: [PATCH 18/38] Fix merge issues --- .../apache/spark/sql/catalyst/expressions/CastSuite.scala | 2 +- .../spark/sql/catalyst/expressions/CsvExpressionsSuite.scala | 2 +- .../sql/catalyst/expressions/DateExpressionsSuite.scala | 3 ++- .../scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala | 5 ++--- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index 0109a32fbf5b0..af327c9b297a0 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -289,7 +289,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(cast(cast(nts, TimestampType, UTC_OPT), StringType, UTC_OPT), nts) checkEvaluation( cast(cast(ts, StringType, UTC_OPT), TimestampType, UTC_OPT), - DateTimeUtils.fromJavaTimestamp(ts)) + fromJavaTimestamp(ts)) // all convert to string type to check checkEvaluation( diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CsvExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CsvExpressionsSuite.scala index 29fd39a3457d8..532832466583c 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CsvExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CsvExpressionsSuite.scala @@ -170,7 +170,7 @@ class CsvExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with P test("to_csv - struct") { val schema = StructType(StructField("a", IntegerType) :: Nil) val struct = Literal.create(create_row(1), schema) - checkEvaluation(StructsToCsv(Map.empty, struct, utcId), "1") + checkEvaluation(StructsToCsv(Map.empty, struct, UTC_OPT), "1") } test("to_csv null input column") { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala index 132d53e2b330e..8be28ba22e775 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.expressions import java.sql.{Date, Timestamp} import java.text.SimpleDateFormat import java.time.{Instant, LocalDate, LocalDateTime, ZoneId} -import java.util.{Calendar, Locale, TimeZone} +import java.util.{Calendar, Locale} import java.util.concurrent.TimeUnit._ import org.apache.spark.{SparkFunSuite, SparkUpgradeException} @@ -29,6 +29,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjectio import org.apache.spark.sql.catalyst.util.{DateTimeUtils, IntervalUtils, TimestampFormatter} import org.apache.spark.sql.catalyst.util.DateTimeConstants.NANOS_PER_SECOND import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._ +import org.apache.spark.sql.catalyst.util.DateTimeUtils.TimeZoneUTC import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala index 6918e53a157c2..4f149ca1b2b89 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala @@ -19,7 +19,6 @@ package org.apache.spark.sql import java.nio.charset.StandardCharsets import java.sql.{Date, Timestamp} -import java.util.TimeZone import scala.util.Random @@ -28,8 +27,8 @@ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.plans.logical.OneRowRelation -import org.apache.spark.sql.catalyst.util.DateTimeTestUtils -import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.UTC +import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.withDefaultTimeZone +import org.apache.spark.sql.catalyst.util.DateTimeUtils.TimeZoneUTC import org.apache.spark.sql.functions._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession From 3b23c5268eb720c8a67910198cb6cbd2fd5d860c Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Sat, 28 Mar 2020 14:44:13 +0300 Subject: [PATCH 19/38] Fix merge issues --- .../spark/sql/catalyst/expressions/DateExpressionsSuite.scala | 2 +- .../scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala index 8454cbf7fd81f..e9ad49c321d8a 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.expressions import java.sql.{Date, Timestamp} import java.text.SimpleDateFormat import java.time.{Instant, LocalDate, LocalDateTime, ZoneId} -import java.util.{Calendar, Locale} +import java.util.{Calendar, Locale, TimeZone} import java.util.concurrent.TimeUnit._ import org.apache.spark.{SparkFunSuite, SparkUpgradeException} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala index 46a464a471a5f..cb11519497747 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala @@ -27,8 +27,7 @@ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.plans.logical.OneRowRelation -import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.withDefaultTimeZone -import org.apache.spark.sql.catalyst.util.DateTimeUtils.TimeZoneUTC +import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.{withDefaultTimeZone, UTC} import org.apache.spark.sql.functions._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession From 32159484c0a27def8e9b03532047f8d942797743 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Fri, 10 Apr 2020 20:37:27 +0300 Subject: [PATCH 20/38] Fix merge conflicts --- .../apache/spark/sql/catalyst/util/RebaseDateTimeSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/RebaseDateTimeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/RebaseDateTimeSuite.scala index 71ac900c789c2..2c9c33dd6aae6 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/RebaseDateTimeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/RebaseDateTimeSuite.scala @@ -74,7 +74,7 @@ class RebaseDateTimeSuite extends SparkFunSuite with Matchers with SQLHelper { Math.floor(millisLocal.toDouble / MILLIS_PER_DAY).toInt } private def fromJavaDateLegacy(date: Date): Int = { - millisToDaysLegacy(date.getTime, defaultTimeZone()) + millisToDaysLegacy(date.getTime, TimeZone.getTimeZone(ZoneId.systemDefault())) } test("rebase gregorian to/from julian days") { From 28c844355991d633fdfd1bb2ed7bcf0e01972120 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Sat, 25 Apr 2020 23:31:28 +0300 Subject: [PATCH 21/38] Fix merge conflicts --- .../org/apache/spark/sql/catalyst/util/DateTimeUtils.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index e81b96fb994ba..11e2cef06551a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -714,7 +714,7 @@ object DateTimeUtils { * Returns the trunc date from original date and trunc level. * Trunc level should be generated using `parseTruncLevel()`, should be between 6 and 9. */ - def truncDate(days: Int, level: Int): SQLDate = { + def truncDate(days: Int, level: Int): Int = { level match { case TRUNC_TO_WEEK => getNextDateForDayOfWeek(days - 7, MONDAY) case TRUNC_TO_MONTH => days - DateTimeUtils.getDayOfMonth(days) + 1 From 70a057f9a49ac335c0bd3245df4a364e7542148f Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Mon, 27 Apr 2020 09:40:10 +0300 Subject: [PATCH 22/38] Fix merge conflicts --- .../org/apache/spark/sql/catalyst/util/DateTimeUtils.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 1606204a24834..7fb63971e2203 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -615,8 +615,8 @@ object DateTimeUtils { * @throws IllegalArgumentException if the interval has `microseconds` part */ def dateAddInterval( - start: SQLDate, - interval: CalendarInterval): SQLDate = { + start: Int, + interval: CalendarInterval): Int = { require(interval.microseconds == 0, "Cannot add hours, minutes or seconds, milliseconds, microseconds to a date") val ld = LocalDate.ofEpochDay(start).plusMonths(interval.months).plusDays(interval.days) From b43fe03b40435ccf2eb02c0f9cae674617414cdb Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Fri, 29 May 2020 13:59:27 +0300 Subject: [PATCH 23/38] Avoid one `*` in fromJulianDay --- .../spark/sql/catalyst/util/DateTimeUtils.scala | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 7fb63971e2203..75efc1161bc92 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -175,15 +175,12 @@ object DateTimeUtils { } /** - * Returns the number of microseconds since epoch from Julian day - * and nanoseconds in a day + * Returns the number of microseconds since epoch from Julian day and nanoseconds in a day. */ - def fromJulianDay(days: Int, nanoseconds: Long): Long = { + def fromJulianDay(days: Int, nanos: Long): Long = { // use Long to avoid rounding errors - val seconds = (days - JULIAN_DAY_OF_EPOCH).toLong * SECONDS_PER_DAY - val micros = SECONDS.toMicros(seconds) + NANOSECONDS.toMicros(nanoseconds) - val rebased = rebaseJulianToGregorianMicros(micros) - rebased + val micros = (days - JULIAN_DAY_OF_EPOCH).toLong * MICROS_PER_DAY + nanos / NANOS_PER_MICROS + rebaseJulianToGregorianMicros(micros) } /** From 61b85ac54bbf4fc8c8db716912ab0bf5db3bda89 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Fri, 29 May 2020 14:06:27 +0300 Subject: [PATCH 24/38] Use toTotalMonths in subtractDates --- .../org/apache/spark/sql/catalyst/util/DateTimeUtils.scala | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 75efc1161bc92..a424ff67b5585 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -915,10 +915,8 @@ object DateTimeUtils { * if the end date is before the start date. */ def subtractDates(endDay: Int, startDay: Int): CalendarInterval = { - val period = Period.between( - LocalDate.ofEpochDay(startDay), - LocalDate.ofEpochDay(endDay)) - val months = period.getMonths + 12 * period.getYears + val period = Period.between(LocalDate.ofEpochDay(startDay), LocalDate.ofEpochDay(endDay)) + val months = Math.toIntExact(period.toTotalMonths) val days = period.getDays new CalendarInterval(months, days, 0) } From b18859a33d42fe56488f567346c896aefc069804 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Tue, 16 Jun 2020 22:19:34 +0300 Subject: [PATCH 25/38] Resolve merge issue --- .../org/apache/spark/sql/StatisticsCollectionSuite.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala index 69a322d0624df..18356a4de9ef4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala @@ -28,9 +28,9 @@ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.NoSuchTableException import org.apache.spark.sql.catalyst.catalog.CatalogColumnStat import org.apache.spark.sql.catalyst.plans.logical._ -import org.apache.spark.sql.catalyst.util.{DateTimeTestUtils, DateTimeUtils} -import org.apache.spark.sql.functions.timestamp_seconds +import org.apache.spark.sql.catalyst.util.DateTimeTestUtils import org.apache.spark.sql.catalyst.util.DateTimeUtils.TimeZoneUTC +import org.apache.spark.sql.functions.timestamp_seconds import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.test.SQLTestData.ArrayData From f477a71564aa130afdce5c57bd0b31be6a9aab3b Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Thu, 18 Jun 2020 20:24:40 +0300 Subject: [PATCH 26/38] Remove timestampToString() --- .../org/apache/spark/sql/catalyst/expressions/Cast.scala | 5 ++--- .../org/apache/spark/sql/catalyst/util/DateTimeUtils.scala | 5 ----- .../apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala | 2 +- .../spark/sql/execution/datasources/jdbc/JDBCRelation.scala | 2 +- 4 files changed, 4 insertions(+), 10 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 5576e71b57024..363d94c4f883e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -303,7 +303,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit case BinaryType => buildCast[Array[Byte]](_, UTF8String.fromBytes) case DateType => buildCast[Int](_, d => UTF8String.fromString(dateFormatter.format(d))) case TimestampType => buildCast[Long](_, - t => UTF8String.fromString(DateTimeUtils.timestampToString(timestampFormatter, t))) + t => UTF8String.fromString(timestampFormatter.format(t))) case ArrayType(et, _) => buildCast[ArrayData](_, array => { val builder = new UTF8StringBuilder @@ -1034,8 +1034,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit val tf = JavaCode.global( ctx.addReferenceObj("timestampFormatter", timestampFormatter), timestampFormatter.getClass) - (c, evPrim, evNull) => code"""$evPrim = UTF8String.fromString( - org.apache.spark.sql.catalyst.util.DateTimeUtils.timestampToString($tf, $c));""" + (c, evPrim, evNull) => code"""$evPrim = UTF8String.fromString($tf.format($c));""" case CalendarIntervalType => (c, evPrim, _) => code"""$evPrim = UTF8String.fromString($c.toString());""" case ArrayType(et, _) => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 2e18cf3d369ef..7f6f749fcaf5d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -75,11 +75,6 @@ object DateTimeUtils { instantToMicros(instant) } - // Converts the `micros` timestamp to string according to Hive TimestampWritable convention. - def timestampToString(tf: TimestampFormatter, micros: Long): String = { - tf.format(micros) - } - /** * Converts a local date at the default JVM time zone to the number of days since 1970-01-01 * in the hybrid calendar (Julian + Gregorian) by discarding the time part. The resulted days are diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index 8f6196289dcc8..caf4b7e16f285 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -42,7 +42,7 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper { val parsedTimestampOp = DateTimeUtils.stringToTimestamp( UTF8String.fromString(originalTime), defaultZoneId) assert(parsedTimestampOp.isDefined, "timestamp with nanoseconds was not parsed correctly") - assert(DateTimeUtils.timestampToString(tf, parsedTimestampOp.get) === expectedParsedTime) + assert(tf.format(parsedTimestampOp.get) === expectedParsedTime) } checkStringToTimestamp("2015-01-02 00:00:00.123456789", "2015-01-02 00:00:00.123456") diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala index f5a474ddf3904..2f1ee0f23d45a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala @@ -203,7 +203,7 @@ private[sql] object JDBCRelation extends Logging { case TimestampType => val timestampFormatter = TimestampFormatter.getFractionFormatter( DateTimeUtils.getZoneId(timeZoneId)) - DateTimeUtils.timestampToString(timestampFormatter, value) + timestampFormatter.format(value) } s"'$dateTimeStr'" } From ec386b7c7fca171c3af71b496f9e04c9fea27295 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Thu, 18 Jun 2020 21:00:31 +0300 Subject: [PATCH 27/38] Remove microsToEpochDays() and epochDaysToMicros() --- .../apache/spark/sql/catalyst/expressions/Cast.scala | 8 ++++---- .../catalyst/expressions/datetimeExpressions.scala | 12 ++++++------ .../spark/sql/catalyst/util/DateTimeUtils.scala | 11 ----------- .../sql/catalyst/util/DatetimeFormatterSuite.scala | 2 +- 4 files changed, 11 insertions(+), 22 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 363d94c4f883e..3ce284d5518a8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -443,7 +443,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit case ByteType => buildCast[Byte](_, b => longToTimestamp(b.toLong)) case DateType => - buildCast[Int](_, d => epochDaysToMicros(d, zoneId)) + buildCast[Int](_, d => daysToMicros(d, zoneId)) // TimestampWritable.decimalToTimestamp case DecimalType() => buildCast[Decimal](_, d => decimalToTimestamp(d)) @@ -480,7 +480,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit case TimestampType => // throw valid precision more than seconds, according to Hive. // Timestamp.nanos is in 0 to 999,999,999, no more than a second. - buildCast[Long](_, t => microsToEpochDays(t, zoneId)) + buildCast[Long](_, t => microsToDays(t, zoneId)) } // IntervalConverter @@ -1119,7 +1119,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit val zid = getZoneId() (c, evPrim, evNull) => code"""$evPrim = - org.apache.spark.sql.catalyst.util.DateTimeUtils.microsToEpochDays($c, $zid);""" + org.apache.spark.sql.catalyst.util.DateTimeUtils.microsToDays($c, $zid);""" case _ => (c, evPrim, evNull) => code"$evNull = true;" } @@ -1246,7 +1246,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit zoneIdClass) (c, evPrim, evNull) => code"""$evPrim = - org.apache.spark.sql.catalyst.util.DateTimeUtils.epochDaysToMicros($c, $zid);""" + org.apache.spark.sql.catalyst.util.DateTimeUtils.daysToMicros($c, $zid);""" case DecimalType() => (c, evPrim, evNull) => code"$evPrim = ${decimalToTimestampCode(c)};" case DoubleType => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index b46c3fb349ee1..b9ba32b8ee337 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -895,7 +895,7 @@ abstract class ToTimestamp } else { left.dataType match { case DateType => - epochDaysToMicros(t.asInstanceOf[Int], zoneId) / downScaleFactor + daysToMicros(t.asInstanceOf[Int], zoneId) / downScaleFactor case TimestampType => t.asInstanceOf[Long] / downScaleFactor case StringType => @@ -975,7 +975,7 @@ abstract class ToTimestamp boolean ${ev.isNull} = ${eval1.isNull}; $javaType ${ev.value} = ${CodeGenerator.defaultValue(dataType)}; if (!${ev.isNull}) { - ${ev.value} = $dtu.epochDaysToMicros(${eval1.value}, $zid) / $downScaleFactor; + ${ev.value} = $dtu.daysToMicros(${eval1.value}, $zid) / $downScaleFactor; }""") } } @@ -1242,10 +1242,10 @@ case class DateAddInterval( if (ansiEnabled || itvl.microseconds == 0) { DateTimeUtils.dateAddInterval(start.asInstanceOf[Int], itvl) } else { - val startTs = DateTimeUtils.epochDaysToMicros(start.asInstanceOf[Int], zoneId) + val startTs = DateTimeUtils.daysToMicros(start.asInstanceOf[Int], zoneId) val resultTs = DateTimeUtils.timestampAddInterval( startTs, itvl.months, itvl.days, itvl.microseconds, zoneId) - DateTimeUtils.microsToEpochDays(resultTs, zoneId) + DateTimeUtils.microsToDays(resultTs, zoneId) } } @@ -1261,10 +1261,10 @@ case class DateAddInterval( |if ($i.microseconds == 0) { | ${ev.value} = $dtu.dateAddInterval($sd, $i); |} else { - | long $startTs = $dtu.epochDaysToMicros($sd, $zid); + | long $startTs = $dtu.daysToMicros($sd, $zid); | long $resultTs = | $dtu.timestampAddInterval($startTs, $i.months, $i.days, $i.microseconds, $zid); - | ${ev.value} = $dtu.microsToEpochDays($resultTs, $zid); + | ${ev.value} = $dtu.microsToDays($resultTs, $zid); |} |""".stripMargin }) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 7f6f749fcaf5d..61bc71f7b7090 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -210,17 +210,6 @@ object DateTimeUtils { Math.multiplyExact(millis, MICROS_PER_MILLIS) } - def microsToEpochDays(micros: Long, zoneId: ZoneId): Int = { - localDateToDays(microsToInstant(micros).atZone(zoneId).toLocalDate) - } - - def epochDaysToMicros(days: Int, zoneId: ZoneId): Long = { - val localDate = LocalDate.ofEpochDay(days) - val zeroLocalTime = LocalTime.MIDNIGHT - val localDateTime = LocalDateTime.of(localDate, zeroLocalTime) - instantToMicros(localDateTime.atZone(zoneId).toInstant) - } - // A method called by JSON/CSV parser to clean up the legacy timestamp string by removing the // "GMT" string. def cleanLegacyTimestampStr(s: String): String = { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DatetimeFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DatetimeFormatterSuite.scala index b78facd963338..3b9a4ae88d586 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DatetimeFormatterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DatetimeFormatterSuite.scala @@ -45,7 +45,7 @@ trait DatetimeFormatterSuite extends SparkFunSuite with SQLHelper with Matchers private def assertEqual(pattern: String, datetimeStr: String, expected: Long): Unit = { if (useDateFormatter) { assert(dateFormatter(pattern).parse(datetimeStr) === - DateTimeUtils.microsToEpochDays(expected, UTC)) + DateTimeUtils.microsToDays(expected, UTC)) } else { assert(timestampFormatter(pattern).parse(datetimeStr) === expected) } From cf858668abb63b29d2ff735b94ff27b98bb6e3d2 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Thu, 18 Jun 2020 21:06:10 +0300 Subject: [PATCH 28/38] Re-use getLocalDateTime in convertTz --- .../org/apache/spark/sql/catalyst/util/DateTimeUtils.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 61bc71f7b7090..794e83d1f99b5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -784,7 +784,7 @@ object DateTimeUtils { * timezone-aware. */ def convertTz(micros: Long, fromZone: ZoneId, toZone: ZoneId): Long = { - val rebasedDateTime = microsToInstant(micros).atZone(toZone).toLocalDateTime.atZone(fromZone) + val rebasedDateTime = getLocalDateTime(micros, toZone).atZone(fromZone) instantToMicros(rebasedDateTime.toInstant) } From 89d7fadeed10f2e151ceee7bd9ae1424950a27e9 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Thu, 18 Jun 2020 21:22:54 +0300 Subject: [PATCH 29/38] Re-use getLocalDateTime --- .../sql/catalyst/util/DateTimeUtils.scala | 45 +++++++------------ 1 file changed, 15 insertions(+), 30 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 794e83d1f99b5..f89ae7ad7b854 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -62,8 +62,7 @@ object DateTimeUtils { } def microsToDays(micros: Long, zoneId: ZoneId): Int = { - val instant = microsToInstant(micros) - localDateToDays(LocalDateTime.ofInstant(instant, zoneId).toLocalDate) + localDateToDays(getLocalDateTime(micros, zoneId).toLocalDate) } def daysToMicros(days: Int): Long = { @@ -402,9 +401,7 @@ object DateTimeUtils { days.toInt } - def localDateToDays(localDate: LocalDate): Int = { - Math.toIntExact(localDate.toEpochDay) - } + def localDateToDays(localDate: LocalDate): Int = Math.toIntExact(localDate.toEpochDay) def daysToLocalDate(days: Int): LocalDate = LocalDate.ofEpochDay(days) @@ -516,36 +513,28 @@ object DateTimeUtils { /** * Returns the 'day in year' value for the given number of days since 1970-01-01. */ - def getDayInYear(days: Int): Int = { - LocalDate.ofEpochDay(days).getDayOfYear - } + def getDayInYear(days: Int): Int = daysToLocalDate(days).getDayOfYear /** * Returns the year value for the given number of days since 1970-01-01. */ - def getYear(days: Int): Int = { - LocalDate.ofEpochDay(days).getYear - } + def getYear(days: Int): Int = daysToLocalDate(days).getYear /** * Returns the year which conforms to ISO 8601. Each ISO 8601 week-numbering * year begins with the Monday of the week containing the 4th of January. */ - def getWeekBasedYear(days: Int): Int = { - daysToLocalDate(days).get(IsoFields.WEEK_BASED_YEAR) - } + def getWeekBasedYear(days: Int): Int = daysToLocalDate(days).get(IsoFields.WEEK_BASED_YEAR) /** Returns the quarter for the given number of days since 1970-01-01. */ - def getQuarter(days: Int): Int = { - LocalDate.ofEpochDay(days).get(IsoFields.QUARTER_OF_YEAR) - } + def getQuarter(days: Int): Int = daysToLocalDate(days).get(IsoFields.QUARTER_OF_YEAR) /** * Split date (expressed in days since 1.1.1970) into four fields: * year, month (Jan is Month 1), dayInMonth, daysToMonthEnd (0 if it's last day of month). */ def splitDate(days: Int): (Int, Int, Int, Int) = { - val ld = LocalDate.ofEpochDay(days) + val ld = daysToLocalDate(days) (ld.getYear, ld.getMonthValue, ld.getDayOfMonth, ld.lengthOfMonth() - ld.getDayOfMonth) } @@ -553,23 +542,19 @@ object DateTimeUtils { * Returns the month value for the given number of days since 1970-01-01. * January is month 1. */ - def getMonth(days: Int): Int = { - LocalDate.ofEpochDay(days).getMonthValue - } + def getMonth(days: Int): Int = daysToLocalDate(days).getMonthValue /** * Returns the 'day of month' value for the given number of days since 1970-01-01. */ - def getDayOfMonth(days: Int): Int = { - LocalDate.ofEpochDay(days).getDayOfMonth - } + def getDayOfMonth(days: Int): Int = daysToLocalDate(days).getDayOfMonth /** * Add date and year-month interval. * Returns a date value, expressed in days since 1.1.1970. */ def dateAddMonths(days: Int, months: Int): Int = { - LocalDate.ofEpochDay(days).plusMonths(months).toEpochDay.toInt + localDateToDays(daysToLocalDate(days).plusMonths(months)) } /** @@ -602,7 +587,7 @@ object DateTimeUtils { interval: CalendarInterval): Int = { require(interval.microseconds == 0, "Cannot add hours, minutes or seconds, milliseconds, microseconds to a date") - val ld = LocalDate.ofEpochDay(start).plusMonths(interval.months).plusDays(interval.days) + val ld = daysToLocalDate(start).plusMonths(interval.months).plusDays(interval.days) localDateToDays(ld) } @@ -686,7 +671,7 @@ object DateTimeUtils { /** Returns last day of the month for the given number of days since 1970-01-01. */ def getLastDayOfMonth(days: Int): Int = { - val localDate = LocalDate.ofEpochDay(days) + val localDate = daysToLocalDate(days) (days - localDate.getDayOfMonth) + localDate.lengthOfMonth() } @@ -716,10 +701,10 @@ object DateTimeUtils { def truncDate(days: Int, level: Int): Int = { level match { case TRUNC_TO_WEEK => getNextDateForDayOfWeek(days - 7, MONDAY) - case TRUNC_TO_MONTH => days - DateTimeUtils.getDayOfMonth(days) + 1 + case TRUNC_TO_MONTH => days - getDayOfMonth(days) + 1 case TRUNC_TO_QUARTER => localDateToDays(daysToLocalDate(days).`with`(IsoFields.DAY_OF_QUARTER, 1L)) - case TRUNC_TO_YEAR => days - DateTimeUtils.getDayInYear(days) + 1 + case TRUNC_TO_YEAR => days - getDayInYear(days) + 1 case _ => // caller make sure that this should never be reached sys.error(s"Invalid trunc level: $level") @@ -901,7 +886,7 @@ object DateTimeUtils { * if the end date is before the start date. */ def subtractDates(endDay: Int, startDay: Int): CalendarInterval = { - val period = Period.between(LocalDate.ofEpochDay(startDay), LocalDate.ofEpochDay(endDay)) + val period = Period.between(daysToLocalDate(startDay), daysToLocalDate(endDay)) val months = Math.toIntExact(period.toTotalMonths) val days = period.getDays new CalendarInterval(months, days, 0) From d9699c4cb6e8a1f1f725102c0c8a0ce721833fd5 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Thu, 18 Jun 2020 21:28:20 +0300 Subject: [PATCH 30/38] Remove julianCommonEraStart --- .../org/apache/spark/sql/catalyst/util/DateTimeUtils.scala | 2 -- 1 file changed, 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index f89ae7ad7b854..ab9121e751e59 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -46,8 +46,6 @@ object DateTimeUtils { // it's 2440587.5, rounding up to compatible with Hive final val JULIAN_DAY_OF_EPOCH = 2440588 - final val julianCommonEraStart = Timestamp.valueOf("0001-01-01 00:00:00") - final val TimeZoneUTC = TimeZone.getTimeZone("UTC") val TIMEZONE_OPTION = "timeZone" From d261ba72c834ee4d53c88ab2ec6b3c7102661caa Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Thu, 18 Jun 2020 21:32:51 +0300 Subject: [PATCH 31/38] Remove instantToDays --- .../org/apache/spark/sql/catalyst/util/DateTimeUtils.scala | 6 ------ 1 file changed, 6 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index ab9121e751e59..b7576574c346a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -393,12 +393,6 @@ object DateTimeUtils { Instant.ofEpochSecond(secs, mos * NANOS_PER_MICROS) } - def instantToDays(instant: Instant): Int = { - val seconds = instant.getEpochSecond - val days = Math.floorDiv(seconds, SECONDS_PER_DAY) - days.toInt - } - def localDateToDays(localDate: LocalDate): Int = Math.toIntExact(localDate.toEpochDay) def daysToLocalDate(days: Int): LocalDate = LocalDate.ofEpochDay(days) From 53a33dab3ccea56052eb29eecc590a46f5844747 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Thu, 18 Jun 2020 21:35:15 +0300 Subject: [PATCH 32/38] Make splitDate private --- .../sql/catalyst/util/DateTimeUtils.scala | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index b7576574c346a..4d6780e4bfdec 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -521,15 +521,6 @@ object DateTimeUtils { /** Returns the quarter for the given number of days since 1970-01-01. */ def getQuarter(days: Int): Int = daysToLocalDate(days).get(IsoFields.QUARTER_OF_YEAR) - /** - * Split date (expressed in days since 1.1.1970) into four fields: - * year, month (Jan is Month 1), dayInMonth, daysToMonthEnd (0 if it's last day of month). - */ - def splitDate(days: Int): (Int, Int, Int, Int) = { - val ld = daysToLocalDate(days) - (ld.getYear, ld.getMonthValue, ld.getDayOfMonth, ld.lengthOfMonth() - ld.getDayOfMonth) - } - /** * Returns the month value for the given number of days since 1970-01-01. * January is month 1. @@ -583,6 +574,15 @@ object DateTimeUtils { localDateToDays(ld) } + /** + * Split date (expressed in days since 1.1.1970) into four fields: + * year, month (Jan is Month 1), dayInMonth, daysToMonthEnd (0 if it's last day of month). + */ + private def splitDate(days: Int): (Int, Int, Int, Int) = { + val ld = daysToLocalDate(days) + (ld.getYear, ld.getMonthValue, ld.getDayOfMonth, ld.lengthOfMonth() - ld.getDayOfMonth) + } + /** * Returns number of months between micros1 and micros2. micros1 and micros2 are expressed in * microseconds since 1.1.1970. If micros1 is later than micros2, the result is positive. From ee912110a44fffd1d407464721b39d4d21841b07 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Thu, 18 Jun 2020 21:41:19 +0300 Subject: [PATCH 33/38] Remove def daysToMicros(days: Int): Long --- .../apache/spark/sql/catalyst/util/DateTimeUtils.scala | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 4d6780e4bfdec..679170adfc09e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -51,9 +51,7 @@ object DateTimeUtils { val TIMEZONE_OPTION = "timeZone" def getZoneId(timeZoneId: String): ZoneId = ZoneId.of(timeZoneId, ZoneId.SHORT_IDS) - def getTimeZone(timeZoneId: String): TimeZone = { - TimeZone.getTimeZone(getZoneId(timeZoneId)) - } + def getTimeZone(timeZoneId: String): TimeZone = TimeZone.getTimeZone(getZoneId(timeZoneId)) def microsToDays(micros: Long): Int = { microsToDays(micros, ZoneId.systemDefault()) @@ -63,10 +61,6 @@ object DateTimeUtils { localDateToDays(getLocalDateTime(micros, zoneId).toLocalDate) } - def daysToMicros(days: Int): Long = { - daysToMicros(days, ZoneId.systemDefault()) - } - def daysToMicros(days: Int, zoneId: ZoneId): Long = { val instant = daysToLocalDate(days).atStartOfDay(zoneId).toInstant instantToMicros(instant) From 386f6cfe153e9510df1b007fdf4657cc8ab15d76 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Thu, 18 Jun 2020 21:50:05 +0300 Subject: [PATCH 34/38] Remove def microsToDays(micros: Long): Int --- .../org/apache/spark/sql/catalyst/util/DateTimeUtils.scala | 4 ---- .../spark/sql/execution/datasources/json/JsonSuite.scala | 6 +++--- .../scala/org/apache/spark/sql/streaming/StreamSuite.scala | 4 +++- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 679170adfc09e..f06efd5ef70ef 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -53,10 +53,6 @@ object DateTimeUtils { def getZoneId(timeZoneId: String): ZoneId = ZoneId.of(timeZoneId, ZoneId.SHORT_IDS) def getTimeZone(timeZoneId: String): TimeZone = TimeZone.getTimeZone(getZoneId(timeZoneId)) - def microsToDays(micros: Long): Int = { - microsToDays(micros, ZoneId.systemDefault()) - } - def microsToDays(micros: Long, zoneId: ZoneId): Int = { localDateToDays(getLocalDateTime(micros, zoneId).toLocalDate) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala index 3f8ee12f97776..d0f0e4f15093c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala @@ -21,15 +21,15 @@ import java.io._ import java.nio.charset.{Charset, StandardCharsets, UnsupportedCharsetException} import java.nio.file.Files import java.sql.{Date, Timestamp} -import java.time.LocalDate +import java.time.{LocalDate, ZoneId} import java.util.Locale import com.fasterxml.jackson.core.JsonFactory import org.apache.hadoop.fs.{Path, PathFilter} import org.apache.hadoop.io.SequenceFile.CompressionType import org.apache.hadoop.io.compress.GzipCodec - import org.apache.spark.{SparkConf, SparkException, TestUtils} + import org.apache.spark.rdd.RDD import org.apache.spark.sql.{functions => F, _} import org.apache.spark.sql.catalyst.json._ @@ -125,7 +125,7 @@ abstract class JsonSuite extends QueryTest with SharedSparkSession with TestJson Map("timestampFormat" -> "yyyy-MM-dd'T'HH:mm:ssXXX"))) val ISO8601Date = "1970-01-01" - checkTypePromotion(DateTimeUtils.microsToDays(32400000000L), + checkTypePromotion(DateTimeUtils.microsToDays(32400000000L, ZoneId.systemDefault), enforceCorrectType(ISO8601Date, DateType)) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala index bbd0220a74f88..030009572deb3 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.streaming import java.io.{File, InterruptedIOException, IOException, UncheckedIOException} import java.nio.channels.ClosedByInterruptException +import java.time.ZoneId import java.util.concurrent.{CountDownLatch, ExecutionException, TimeUnit} import scala.concurrent.TimeoutException @@ -1219,7 +1220,8 @@ class StreamSuite extends StreamTest { } var lastTimestamp = System.currentTimeMillis() - val currentDate = DateTimeUtils.microsToDays(DateTimeUtils.millisToMicros(lastTimestamp)) + val currentDate = DateTimeUtils.microsToDays( + DateTimeUtils.millisToMicros(lastTimestamp), ZoneId.systemDefault) testStream(df) ( AddData(input, 1), CheckLastBatch { rows: Seq[Row] => From c47d52225214d77db8ba3dd62de5ef962f401b4e Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Thu, 18 Jun 2020 21:53:22 +0300 Subject: [PATCH 35/38] 1.1.1970 -> 1970-01-01 --- .../apache/spark/sql/catalyst/util/DateTimeUtils.scala | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index f06efd5ef70ef..0841ed27f324d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -524,7 +524,7 @@ object DateTimeUtils { /** * Add date and year-month interval. - * Returns a date value, expressed in days since 1.1.1970. + * Returns a date value, expressed in days since 1970-01-01. */ def dateAddMonths(days: Int, months: Int): Int = { localDateToDays(daysToLocalDate(days).plusMonths(months)) @@ -532,7 +532,7 @@ object DateTimeUtils { /** * Add timestamp and full interval. - * Returns a timestamp value, expressed in microseconds since 1.1.1970 00:00:00. + * Returns a timestamp value, expressed in microseconds since 1970-01-01 00:00:00. */ def timestampAddInterval( start: Long, @@ -550,7 +550,7 @@ object DateTimeUtils { /** * Add the date and the interval's months and days. - * Returns a date value, expressed in days since 1.1.1970. + * Returns a date value, expressed in days since 1970-01-01. * * @throws DateTimeException if the result exceeds the supported date range * @throws IllegalArgumentException if the interval has `microseconds` part @@ -565,7 +565,7 @@ object DateTimeUtils { } /** - * Split date (expressed in days since 1.1.1970) into four fields: + * Split date (expressed in days since 1970-01-01) into four fields: * year, month (Jan is Month 1), dayInMonth, daysToMonthEnd (0 if it's last day of month). */ private def splitDate(days: Int): (Int, Int, Int, Int) = { @@ -575,7 +575,7 @@ object DateTimeUtils { /** * Returns number of months between micros1 and micros2. micros1 and micros2 are expressed in - * microseconds since 1.1.1970. If micros1 is later than micros2, the result is positive. + * microseconds since 1970-01-01. If micros1 is later than micros2, the result is positive. * * If micros1 and micros2 are on the same day of month, or both are the last day of month, * returns, time of day will be ignored. From 025ea807b7eee35a28322fa44042511ac326be4d Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Thu, 18 Jun 2020 21:55:17 +0300 Subject: [PATCH 36/38] 1970-01-01 00:00:00 -> 1970-01-01 00:00:00Z --- .../org/apache/spark/sql/catalyst/util/DateTimeUtils.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 0841ed27f324d..bad3aeb7ff364 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -532,7 +532,7 @@ object DateTimeUtils { /** * Add timestamp and full interval. - * Returns a timestamp value, expressed in microseconds since 1970-01-01 00:00:00. + * Returns a timestamp value, expressed in microseconds since 1970-01-01 00:00:00Z. */ def timestampAddInterval( start: Long, From 8a7ac3f6d162aee1971172a2dfd654522ecac00e Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Thu, 18 Jun 2020 23:16:38 +0300 Subject: [PATCH 37/38] Make Scala style checker happy --- .../apache/spark/sql/execution/datasources/json/JsonSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala index d0f0e4f15093c..6344ec6be4878 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala @@ -28,8 +28,8 @@ import com.fasterxml.jackson.core.JsonFactory import org.apache.hadoop.fs.{Path, PathFilter} import org.apache.hadoop.io.SequenceFile.CompressionType import org.apache.hadoop.io.compress.GzipCodec -import org.apache.spark.{SparkConf, SparkException, TestUtils} +import org.apache.spark.{SparkConf, SparkException, TestUtils} import org.apache.spark.rdd.RDD import org.apache.spark.sql.{functions => F, _} import org.apache.spark.sql.catalyst.json._ From 863d747f6132a42cde0f0bb3bfa558faa3ab6170 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Fri, 19 Jun 2020 00:04:15 +0300 Subject: [PATCH 38/38] Update comments --- .../sql/catalyst/util/DateTimeUtils.scala | 75 +++++++++++++------ 1 file changed, 54 insertions(+), 21 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index bad3aeb7ff364..c466a60259c7f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -42,8 +42,8 @@ import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} */ object DateTimeUtils { - // see http://stackoverflow.com/questions/466321/convert-unix-timestamp-to-julian - // it's 2440587.5, rounding up to compatible with Hive + // See http://stackoverflow.com/questions/466321/convert-unix-timestamp-to-julian + // It's 2440587.5, rounding up to be compatible with Hive. final val JULIAN_DAY_OF_EPOCH = 2440588 final val TimeZoneUTC = TimeZone.getTimeZone("UTC") @@ -53,10 +53,16 @@ object DateTimeUtils { def getZoneId(timeZoneId: String): ZoneId = ZoneId.of(timeZoneId, ZoneId.SHORT_IDS) def getTimeZone(timeZoneId: String): TimeZone = TimeZone.getTimeZone(getZoneId(timeZoneId)) + /** + * Converts microseconds since 1970-01-01 00:00:00Z to days since 1970-01-01 at the given zone ID. + */ def microsToDays(micros: Long, zoneId: ZoneId): Int = { localDateToDays(getLocalDateTime(micros, zoneId).toLocalDate) } + /** + * Converts days since 1970-01-01 at the given zone ID to microseconds since 1970-01-01 00:00:00Z. + */ def daysToMicros(days: Int, zoneId: ZoneId): Long = { val instant = daysToLocalDate(days).atStartOfDay(zoneId).toInstant instantToMicros(instant) @@ -180,7 +186,7 @@ object DateTimeUtils { } /** - * Converts the timestamp to milliseconds since epoch. In spark timestamp values have microseconds + * Converts the timestamp to milliseconds since epoch. In Spark timestamp values have microseconds * precision, so this conversion is lossy. */ def microsToMillis(micros: Long): Long = { @@ -197,8 +203,8 @@ object DateTimeUtils { Math.multiplyExact(millis, MICROS_PER_MILLIS) } - // A method called by JSON/CSV parser to clean up the legacy timestamp string by removing the - // "GMT" string. + // The method is called by JSON/CSV parser to clean up the legacy timestamp string by removing + // the "GMT" string. def cleanLegacyTimestampStr(s: String): String = { val indexOfGMT = s.indexOf("GMT") if (indexOfGMT != -1) { @@ -213,8 +219,8 @@ object DateTimeUtils { } /** - * Trim and parse a given UTF8 date string to the corresponding a corresponding [[Long]] value. - * The return type is [[Option]] in order to distinguish between 0L and null. The following + * Trims and parses a given UTF8 timestamp string to the corresponding a corresponding [[Long]] + * value. The return type is [[Option]] in order to distinguish between 0L and null. The following * formats are allowed: * * `yyyy` @@ -369,12 +375,21 @@ object DateTimeUtils { } } + /** + * Gets the number of microseconds since the epoch of 1970-01-01 00:00:00Z from the given + * instance of `java.time.Instant`. The epoch microsecond count is a simple incrementing count of + * microseconds where microsecond 0 is 1970-01-01 00:00:00Z. + */ def instantToMicros(instant: Instant): Long = { val us = Math.multiplyExact(instant.getEpochSecond, MICROS_PER_SECOND) val result = Math.addExact(us, NANOSECONDS.toMicros(instant.getNano)) result } + /** + * Obtains an instance of `java.time.Instant` using microseconds from + * the epoch of 1970-01-01 00:00:00Z. + */ def microsToInstant(micros: Long): Instant = { val secs = Math.floorDiv(micros, MICROS_PER_SECOND) // Unfolded Math.floorMod(us, MICROS_PER_SECOND) to reuse the result of @@ -383,12 +398,18 @@ object DateTimeUtils { Instant.ofEpochSecond(secs, mos * NANOS_PER_MICROS) } + /** + * Converts the local date to the number of days since 1970-01-01. + */ def localDateToDays(localDate: LocalDate): Int = Math.toIntExact(localDate.toEpochDay) + /** + * Obtains an instance of `java.time.LocalDate` from the epoch day count. + */ def daysToLocalDate(days: Int): LocalDate = LocalDate.ofEpochDay(days) /** - * Trim and parse a given UTF8 date string to a corresponding [[Int]] value. + * Trims and parses a given UTF8 date string to a corresponding [[Int]] value. * The return type is [[Option]] in order to distinguish between 0 and null. The following * formats are allowed: * @@ -447,6 +468,8 @@ object DateTimeUtils { } } + // Gets the local date-time parts (year, month, day and time) of the instant expressed as the + // number of microseconds since the epoch at the given time zone ID. private def getLocalDateTime(micros: Long, zoneId: ZoneId): LocalDateTime = { microsToInstant(micros).atZone(zoneId).toLocalDateTime } @@ -523,16 +546,17 @@ object DateTimeUtils { def getDayOfMonth(days: Int): Int = daysToLocalDate(days).getDayOfMonth /** - * Add date and year-month interval. - * Returns a date value, expressed in days since 1970-01-01. + * Adds an year-month interval to a date represented as days since 1970-01-01. + * @return a date value, expressed in days since 1970-01-01. */ def dateAddMonths(days: Int, months: Int): Int = { localDateToDays(daysToLocalDate(days).plusMonths(months)) } /** - * Add timestamp and full interval. - * Returns a timestamp value, expressed in microseconds since 1970-01-01 00:00:00Z. + * Adds a full interval (months, days, microseconds) a timestamp represented as the number of + * microseconds since 1970-01-01 00:00:00Z. + * @return A timestamp value, expressed in microseconds since 1970-01-01 00:00:00Z. */ def timestampAddInterval( start: Long, @@ -549,8 +573,8 @@ object DateTimeUtils { } /** - * Add the date and the interval's months and days. - * Returns a date value, expressed in days since 1970-01-01. + * Adds the interval's months and days to a date expressed as days since the epoch. + * @return A date value, expressed in days since 1970-01-01. * * @throws DateTimeException if the result exceeds the supported date range * @throws IllegalArgumentException if the interval has `microseconds` part @@ -565,7 +589,7 @@ object DateTimeUtils { } /** - * Split date (expressed in days since 1970-01-01) into four fields: + * Splits date (expressed in days since 1970-01-01) into four fields: * year, month (Jan is Month 1), dayInMonth, daysToMonthEnd (0 if it's last day of month). */ private def splitDate(days: Int): (Int, Int, Int, Int) = { @@ -744,11 +768,10 @@ object DateTimeUtils { } /** - * Convert the timestamp `micros` from one timezone to another. + * Converts the timestamp `micros` from one timezone to another. * - * TODO: Because of DST, the conversion between UTC and human time is not exactly one-to-one - * mapping, the conversion here may return wrong result, we should make the timestamp - * timezone-aware. + * Time-zone rules, such as daylight savings, mean that not every local date-time + * is valid for the `toZone` time zone, thus the local date-time may be adjusted. */ def convertTz(micros: Long, fromZone: ZoneId, toZone: ZoneId): Long = { val rebasedDateTime = getLocalDateTime(micros, toZone).atZone(fromZone) @@ -756,7 +779,7 @@ object DateTimeUtils { } /** - * Returns a timestamp of given timezone from utc timestamp, with the same string + * Returns a timestamp of given timezone from UTC timestamp, with the same string * representation in their timezone. */ def fromUTCTime(micros: Long, timeZone: String): Long = { @@ -771,8 +794,14 @@ object DateTimeUtils { convertTz(micros, getZoneId(timeZone), ZoneOffset.UTC) } + /** + * Obtains the current instant as microseconds since the epoch at the UTC time zone. + */ def currentTimestamp(): Long = instantToMicros(Instant.now()) + /** + * Obtains the current date as days since the epoch in the specified time-zone. + */ def currentDate(zoneId: ZoneId): Int = localDateToDays(LocalDate.now(zoneId)) private def today(zoneId: ZoneId): ZonedDateTime = { @@ -783,6 +812,7 @@ object DateTimeUtils { /** * Extracts special values from an input string ignoring case. + * * @param input A trimmed string * @param zoneId Zone identifier used to get the current date. * @return Some special value in lower case or None. @@ -812,6 +842,7 @@ object DateTimeUtils { /** * Converts notational shorthands that are converted to ordinary timestamps. + * * @param input A trimmed string * @param zoneId Zone identifier used to get the current date. * @return Some of microseconds since the epoch if the conversion completed @@ -838,6 +869,7 @@ object DateTimeUtils { /** * Converts notational shorthands that are converted to ordinary dates. + * * @param input A trimmed string * @param zoneId Zone identifier used to get the current date. * @return Some of days since the epoch if the conversion completed successfully otherwise None. @@ -861,7 +893,8 @@ object DateTimeUtils { } /** - * Subtracts two dates. + * Subtracts two dates expressed as days since 1970-01-01. + * * @param endDay The end date, exclusive * @param startDay The start date, inclusive * @return An interval between two dates. The interval can be negative