From 519b381e0bb5d17dc74889b50e4368adf571b695 Mon Sep 17 00:00:00 2001 From: Maarten Kesselaers Date: Wed, 7 Jun 2017 11:08:43 +0200 Subject: [PATCH 1/8] Changed the Strings from MegaBytes to MebiBytes (and likewise for other sizes) --- core/src/main/resources/org/apache/spark/ui/static/utils.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/resources/org/apache/spark/ui/static/utils.js b/core/src/main/resources/org/apache/spark/ui/static/utils.js index edc0ee2ce181d..88645d12cd175 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/utils.js +++ b/core/src/main/resources/org/apache/spark/ui/static/utils.js @@ -42,7 +42,7 @@ function formatBytes(bytes, type) { if (bytes == 0) return '0.0 B'; var k = 1000; var dm = 1; - var sizes = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB']; + var sizes = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB']; var i = Math.floor(Math.log(bytes) / Math.log(k)); return parseFloat((bytes / Math.pow(k, i)).toFixed(dm)) + ' ' + sizes[i]; } From 7c775d9085be0af2ac51881b5ecd1eb34c48c732 Mon Sep 17 00:00:00 2001 From: Maarten Kesselaers Date: Wed, 7 Jun 2017 12:00:00 +0200 Subject: [PATCH 2/8] Changed the calculation of bytesToString towards MebiBytes instead of Megabytes --- .../scala/org/apache/spark/util/Utils.scala | 28 ++++++++++--------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index bbb7999e2a144..dbec6910e8ae2 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -40,6 +40,7 @@ import scala.reflect.ClassTag import scala.util.Try import scala.util.control.{ControlThrowable, NonFatal} import scala.util.matching.Regex +import scala.math.pow import _root_.io.netty.channel.unix.Errors.NativeIoException import com.google.common.cache.{CacheBuilder, CacheLoader, LoadingCache} @@ -1127,17 +1128,18 @@ private[spark] object Utils extends Logging { } /** - * Convert a quantity in bytes to a human-readable string such as "4.0 MB". + * Convert a quantity in bytes to a human-readable string such as "4.0 MiB". + * The result is in MebiBytes instead of Megabytes */ def bytesToString(size: Long): String = bytesToString(BigInt(size)) def bytesToString(size: BigInt): String = { - val EB = 1L << 60 - val PB = 1L << 50 - val TB = 1L << 40 - val GB = 1L << 30 - val MB = 1L << 20 - val KB = 1L << 10 + val EB = pow(1024, 6) + val PB = pow(1024, 5) + val TB = pow(1024, 4) + val GB = pow(1024, 3) + val MB = pow(1024, 2) + val KB = 1024 if (size >= BigInt(1L << 11) * EB) { // The number is too large, show it in scientific notation. @@ -1145,17 +1147,17 @@ private[spark] object Utils extends Logging { } else { val (value, unit) = { if (size >= 2 * EB) { - (BigDecimal(size) / EB, "EB") + (BigDecimal(size) / EB, "EiB") } else if (size >= 2 * PB) { - (BigDecimal(size) / PB, "PB") + (BigDecimal(size) / PB, "PiB") } else if (size >= 2 * TB) { - (BigDecimal(size) / TB, "TB") + (BigDecimal(size) / TB, "TiB") } else if (size >= 2 * GB) { - (BigDecimal(size) / GB, "GB") + (BigDecimal(size) / GB, "GiB") } else if (size >= 2 * MB) { - (BigDecimal(size) / MB, "MB") + (BigDecimal(size) / MB, "MiB") } else if (size >= 2 * KB) { - (BigDecimal(size) / KB, "KB") + (BigDecimal(size) / KB, "KiB") } else { (BigDecimal(size), "B") } From 232847eaf46d4d0aa69630ea096a64f82b1556ad Mon Sep 17 00:00:00 2001 From: Maarten Kesselaers Date: Wed, 7 Jun 2017 12:00:00 +0200 Subject: [PATCH 3/8] Revert "Changed the calculation of bytesToString towards MebiBytes instead of Megabytes" This reverts commit 7c775d9085be0af2ac51881b5ecd1eb34c48c732. --- .../scala/org/apache/spark/util/Utils.scala | 28 +++++++++---------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index dbec6910e8ae2..bbb7999e2a144 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -40,7 +40,6 @@ import scala.reflect.ClassTag import scala.util.Try import scala.util.control.{ControlThrowable, NonFatal} import scala.util.matching.Regex -import scala.math.pow import _root_.io.netty.channel.unix.Errors.NativeIoException import com.google.common.cache.{CacheBuilder, CacheLoader, LoadingCache} @@ -1128,18 +1127,17 @@ private[spark] object Utils extends Logging { } /** - * Convert a quantity in bytes to a human-readable string such as "4.0 MiB". - * The result is in MebiBytes instead of Megabytes + * Convert a quantity in bytes to a human-readable string such as "4.0 MB". */ def bytesToString(size: Long): String = bytesToString(BigInt(size)) def bytesToString(size: BigInt): String = { - val EB = pow(1024, 6) - val PB = pow(1024, 5) - val TB = pow(1024, 4) - val GB = pow(1024, 3) - val MB = pow(1024, 2) - val KB = 1024 + val EB = 1L << 60 + val PB = 1L << 50 + val TB = 1L << 40 + val GB = 1L << 30 + val MB = 1L << 20 + val KB = 1L << 10 if (size >= BigInt(1L << 11) * EB) { // The number is too large, show it in scientific notation. @@ -1147,17 +1145,17 @@ private[spark] object Utils extends Logging { } else { val (value, unit) = { if (size >= 2 * EB) { - (BigDecimal(size) / EB, "EiB") + (BigDecimal(size) / EB, "EB") } else if (size >= 2 * PB) { - (BigDecimal(size) / PB, "PiB") + (BigDecimal(size) / PB, "PB") } else if (size >= 2 * TB) { - (BigDecimal(size) / TB, "TiB") + (BigDecimal(size) / TB, "TB") } else if (size >= 2 * GB) { - (BigDecimal(size) / GB, "GiB") + (BigDecimal(size) / GB, "GB") } else if (size >= 2 * MB) { - (BigDecimal(size) / MB, "MiB") + (BigDecimal(size) / MB, "MB") } else if (size >= 2 * KB) { - (BigDecimal(size) / KB, "KiB") + (BigDecimal(size) / KB, "KB") } else { (BigDecimal(size), "B") } From 83090a307725101d76385b8c57d341011f067529 Mon Sep 17 00:00:00 2001 From: Maarten Kesselaers Date: Wed, 7 Jun 2017 12:05:40 +0200 Subject: [PATCH 4/8] Changed the result string to indicate MebiBytes --- .../src/main/scala/org/apache/spark/util/Utils.scala | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index bbb7999e2a144..8c09f923a668f 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -1145,17 +1145,17 @@ private[spark] object Utils extends Logging { } else { val (value, unit) = { if (size >= 2 * EB) { - (BigDecimal(size) / EB, "EB") + (BigDecimal(size) / EB, "EiB") } else if (size >= 2 * PB) { - (BigDecimal(size) / PB, "PB") + (BigDecimal(size) / PB, "PiB") } else if (size >= 2 * TB) { - (BigDecimal(size) / TB, "TB") + (BigDecimal(size) / TB, "TiB") } else if (size >= 2 * GB) { - (BigDecimal(size) / GB, "GB") + (BigDecimal(size) / GB, "GiB") } else if (size >= 2 * MB) { - (BigDecimal(size) / MB, "MB") + (BigDecimal(size) / MB, "MiB") } else if (size >= 2 * KB) { - (BigDecimal(size) / KB, "KB") + (BigDecimal(size) / KB, "KiB") } else { (BigDecimal(size), "B") } From bdb8858a4e63ba90f57cfabfba1aa3371e31b9dc Mon Sep 17 00:00:00 2001 From: Maarten Kesselaers Date: Wed, 7 Jun 2017 12:07:29 +0200 Subject: [PATCH 5/8] Update the test to take into account the newly returned String --- .../scala/org/apache/spark/util/UtilsSuite.scala | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala index f7bc8f888b0d5..59ff6ff176d16 100644 --- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala @@ -196,13 +196,13 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging { test("bytesToString") { assert(Utils.bytesToString(10) === "10.0 B") assert(Utils.bytesToString(1500) === "1500.0 B") - assert(Utils.bytesToString(2000000) === "1953.1 KB") - assert(Utils.bytesToString(2097152) === "2.0 MB") - assert(Utils.bytesToString(2306867) === "2.2 MB") - assert(Utils.bytesToString(5368709120L) === "5.0 GB") - assert(Utils.bytesToString(5L * (1L << 40)) === "5.0 TB") - assert(Utils.bytesToString(5L * (1L << 50)) === "5.0 PB") - assert(Utils.bytesToString(5L * (1L << 60)) === "5.0 EB") + assert(Utils.bytesToString(2000000) === "1953.1 KiB") + assert(Utils.bytesToString(2097152) === "2.0 MiB") + assert(Utils.bytesToString(2306867) === "2.2 MiB") + assert(Utils.bytesToString(5368709120L) === "5.0 GiB") + assert(Utils.bytesToString(5L * (1L << 40)) === "5.0 TiB") + assert(Utils.bytesToString(5L * (1L << 50)) === "5.0 PiB") + assert(Utils.bytesToString(5L * (1L << 60)) === "5.0 EiB") assert(Utils.bytesToString(BigInt(1L << 11) * (1L << 60)) === "2.36E+21 B") } From c7f25e6f27e49f6d4cd15d65a3b1968520d9608c Mon Sep 17 00:00:00 2001 From: Maarten Kesselaers Date: Wed, 7 Jun 2017 12:23:20 +0200 Subject: [PATCH 6/8] formatBytes should calculate MiB instead of MB. --- core/src/main/resources/org/apache/spark/ui/static/utils.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/resources/org/apache/spark/ui/static/utils.js b/core/src/main/resources/org/apache/spark/ui/static/utils.js index 88645d12cd175..46ec0fbcab234 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/utils.js +++ b/core/src/main/resources/org/apache/spark/ui/static/utils.js @@ -40,7 +40,7 @@ function formatDuration(milliseconds) { function formatBytes(bytes, type) { if (type !== 'display') return bytes; if (bytes == 0) return '0.0 B'; - var k = 1000; + var k = 1024; var dm = 1; var sizes = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB']; var i = Math.floor(Math.log(bytes) / Math.log(k)); From 71c2bbe5a42b99e12fce0653a2d23e48eb1358c4 Mon Sep 17 00:00:00 2001 From: Maarten Kesselaers Date: Wed, 7 Jun 2017 15:58:37 +0200 Subject: [PATCH 7/8] Changed the variable names to reflect the difference between MegaBytes en MebiBytes --- .../scala/org/apache/spark/util/Utils.scala | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index 8c09f923a668f..ab631381b07d7 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -1132,30 +1132,30 @@ private[spark] object Utils extends Logging { def bytesToString(size: Long): String = bytesToString(BigInt(size)) def bytesToString(size: BigInt): String = { - val EB = 1L << 60 - val PB = 1L << 50 - val TB = 1L << 40 - val GB = 1L << 30 - val MB = 1L << 20 - val KB = 1L << 10 - - if (size >= BigInt(1L << 11) * EB) { + val EiB = 1L << 60 + val PiB = 1L << 50 + val TiB = 1L << 40 + val GiB = 1L << 30 + val MiB = 1L << 20 + val KiB = 1L << 10 + + if (size >= BigInt(1L << 11) * EiB) { // The number is too large, show it in scientific notation. BigDecimal(size, new MathContext(3, RoundingMode.HALF_UP)).toString() + " B" } else { val (value, unit) = { - if (size >= 2 * EB) { - (BigDecimal(size) / EB, "EiB") - } else if (size >= 2 * PB) { - (BigDecimal(size) / PB, "PiB") - } else if (size >= 2 * TB) { - (BigDecimal(size) / TB, "TiB") - } else if (size >= 2 * GB) { - (BigDecimal(size) / GB, "GiB") - } else if (size >= 2 * MB) { - (BigDecimal(size) / MB, "MiB") - } else if (size >= 2 * KB) { - (BigDecimal(size) / KB, "KiB") + if (size >= 2 * EiB) { + (BigDecimal(size) / EiB, "EiB") + } else if (size >= 2 * PiB) { + (BigDecimal(size) / PiB, "PiB") + } else if (size >= 2 * TiB) { + (BigDecimal(size) / TiB, "TiB") + } else if (size >= 2 * GiB) { + (BigDecimal(size) / GiB, "GiB") + } else if (size >= 2 * MiB) { + (BigDecimal(size) / MiB, "MiB") + } else if (size >= 2 * KiB) { + (BigDecimal(size) / KiB, "KiB") } else { (BigDecimal(size), "B") } From dd9cc4def988053b670bc253e2ee5b0f53940de3 Mon Sep 17 00:00:00 2001 From: Maarten Kesselaers Date: Wed, 7 Jun 2017 18:29:06 +0200 Subject: [PATCH 8/8] Fixed filling test --- .../apache/spark/sql/StatisticsCollectionSuite.scala | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala index 601324f2c0172..77fc34b0479fc 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala @@ -153,12 +153,12 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared BigInt(0) -> ("0.0 B", "0"), BigInt(100) -> ("100.0 B", "100"), BigInt(2047) -> ("2047.0 B", "2.05E+3"), - BigInt(2048) -> ("2.0 KB", "2.05E+3"), - BigInt(3333333) -> ("3.2 MB", "3.33E+6"), - BigInt(4444444444L) -> ("4.1 GB", "4.44E+9"), - BigInt(5555555555555L) -> ("5.1 TB", "5.56E+12"), - BigInt(6666666666666666L) -> ("5.9 PB", "6.67E+15"), - BigInt(1L << 10 ) * (1L << 60) -> ("1024.0 EB", "1.18E+21"), + BigInt(2048) -> ("2.0 KiB", "2.05E+3"), + BigInt(3333333) -> ("3.2 MiB", "3.33E+6"), + BigInt(4444444444L) -> ("4.1 GiB", "4.44E+9"), + BigInt(5555555555555L) -> ("5.1 TiB", "5.56E+12"), + BigInt(6666666666666666L) -> ("5.9 PiB", "6.67E+15"), + BigInt(1L << 10 ) * (1L << 60) -> ("1024.0 EiB", "1.18E+21"), BigInt(1L << 11) * (1L << 60) -> ("2.36E+21 B", "2.36E+21") ) numbers.foreach { case (input, (expectedSize, expectedRows)) =>