diff --git a/client-mr/mr-shaded/pom.xml b/client-mr/mr-shaded/pom.xml index 2ffa40e1aad..0f9052f1f56 100644 --- a/client-mr/mr-shaded/pom.xml +++ b/client-mr/mr-shaded/pom.xml @@ -64,8 +64,8 @@ ${shading.prefix}.org.scala-lang - org.lz4 - ${shading.prefix}.org.lz4 + ${lz4-java.group} + ${shading.prefix}.${lz4-java.group} org.roaringbitmap @@ -81,7 +81,7 @@ io.netty:* org.apache.commons:commons-lang3 org.scala-lang:scala-library - org.lz4:lz4-java + ${lz4-java.group}:lz4-java com.github.luben:zstd-jni org.roaringbitmap:RoaringBitmap diff --git a/client-mr/mr-shaded/src/main/resources/META-INF/LICENSE b/client-mr/mr-shaded/src/main/resources/META-INF/LICENSE index ec665dcc837..7435dd2e5c3 100644 --- a/client-mr/mr-shaded/src/main/resources/META-INF/LICENSE +++ b/client-mr/mr-shaded/src/main/resources/META-INF/LICENSE @@ -208,6 +208,7 @@ This project bundles the following dependencies under the Apache License 2.0 (ht Apache License 2.0 -------------------------------------- +at.yawk.lz4:lz4-java com.google.guava:failureaccess com.google.guava:guava io.netty:netty @@ -240,7 +241,6 @@ io.netty:netty-transport-rxtx io.netty:netty-transport-sctp io.netty:netty-transport-udt org.apache.commons:commons-lang3 -org.lz4:lz4-java org.roaringbitmap:RoaringBitmap org.scala-lang:scala-library diff --git a/client-tez/tez-shaded/pom.xml b/client-tez/tez-shaded/pom.xml index e8060d95a81..73bb78371a8 100644 --- a/client-tez/tez-shaded/pom.xml +++ b/client-tez/tez-shaded/pom.xml @@ -94,7 +94,7 @@ org.roaringbitmap:RoaringBitmap org.scala-lang:scala-library org.scala-lang:scala-reflect - org.lz4:lz4-java + ${lz4-java.group}:lz4-java io.dropwizard.metrics:metrics-core com.codahale.metrics:metrics-core com.github.luben:zstd-jni diff --git a/client-tez/tez-shaded/src/main/resources/META-INF/LICENSE b/client-tez/tez-shaded/src/main/resources/META-INF/LICENSE index ec665dcc837..7435dd2e5c3 100644 --- a/client-tez/tez-shaded/src/main/resources/META-INF/LICENSE +++ b/client-tez/tez-shaded/src/main/resources/META-INF/LICENSE @@ -208,6 +208,7 @@ This project bundles the following dependencies under the Apache License 2.0 (ht Apache License 2.0 -------------------------------------- +at.yawk.lz4:lz4-java com.google.guava:failureaccess com.google.guava:guava io.netty:netty @@ -240,7 +241,6 @@ io.netty:netty-transport-rxtx io.netty:netty-transport-sctp io.netty:netty-transport-udt org.apache.commons:commons-lang3 -org.lz4:lz4-java org.roaringbitmap:RoaringBitmap org.scala-lang:scala-library diff --git a/client/benchmarks/LZ4TPCDSDataBenchmark-jdk17-results.txt b/client/benchmarks/LZ4TPCDSDataBenchmark-jdk17-results.txt index a6ce37abaca..1ae36bb16f2 100644 --- a/client/benchmarks/LZ4TPCDSDataBenchmark-jdk17-results.txt +++ b/client/benchmarks/LZ4TPCDSDataBenchmark-jdk17-results.txt @@ -6,48 +6,48 @@ OpenJDK 64-Bit Server VM 17.0.15+6-LTS on Linux 6.17.9-76061709-generic Intel(R) Core(TM) i5-9500 CPU @ 3.00GHz Compression: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------- -Compression with chunk size 65536 4 times 2131 2134 5 0.0 532707902.0 1.0X +Compression with chunk size 65536 4 times 2193 2210 24 0.0 548315522.5 1.0X OpenJDK 64-Bit Server VM 17.0.15+6-LTS on Linux 6.17.9-76061709-generic Intel(R) Core(TM) i5-9500 CPU @ 3.00GHz Decompression: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -Decompression with chunk size 65536 4 times 536 541 9 0.0 133951799.0 1.0X +Decompression with chunk size 65536 4 times 460 463 3 0.0 114986376.3 1.0X OpenJDK 64-Bit Server VM 17.0.15+6-LTS on Linux 6.17.9-76061709-generic Intel(R) Core(TM) i5-9500 CPU @ 3.00GHz Compression: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative -------------------------------------------------------------------------------------------------------------------------- -Compression with chunk size 262144 4 times 1754 1756 2 0.0 438523185.2 1.0X +Compression with chunk size 262144 4 times 1683 1683 0 0.0 420711475.3 1.0X OpenJDK 64-Bit Server VM 17.0.15+6-LTS on Linux 6.17.9-76061709-generic Intel(R) Core(TM) i5-9500 CPU @ 3.00GHz Decompression: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ---------------------------------------------------------------------------------------------------------------------------- -Decompression with chunk size 262144 4 times 436 439 4 0.0 109013659.0 1.0X +Decompression with chunk size 262144 4 times 367 369 1 0.0 91804273.0 1.0X OpenJDK 64-Bit Server VM 17.0.15+6-LTS on Linux 6.17.9-76061709-generic Intel(R) Core(TM) i5-9500 CPU @ 3.00GHz Compression: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -Compression with chunk size 1048576 4 times 1774 1780 9 0.0 443426664.3 1.0X +Compression with chunk size 1048576 4 times 1725 1726 3 0.0 431152298.8 1.0X OpenJDK 64-Bit Server VM 17.0.15+6-LTS on Linux 6.17.9-76061709-generic Intel(R) Core(TM) i5-9500 CPU @ 3.00GHz Decompression: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ----------------------------------------------------------------------------------------------------------------------------- -Decompression with chunk size 1048576 4 times 431 434 4 0.0 107823243.0 1.0X +Decompression with chunk size 1048576 4 times 367 368 1 0.0 91743487.5 1.0X OpenJDK 64-Bit Server VM 17.0.15+6-LTS on Linux 6.17.9-76061709-generic Intel(R) Core(TM) i5-9500 CPU @ 3.00GHz Compression: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -Compression with chunk size 4194304 4 times 1785 1791 8 0.0 446360006.8 1.0X +Compression with chunk size 4194304 4 times 1697 1702 7 0.0 424249326.5 1.0X OpenJDK 64-Bit Server VM 17.0.15+6-LTS on Linux 6.17.9-76061709-generic Intel(R) Core(TM) i5-9500 CPU @ 3.00GHz Decompression: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ----------------------------------------------------------------------------------------------------------------------------- -Decompression with chunk size 4194304 4 times 436 438 1 0.0 109117732.3 1.0X +Decompression with chunk size 4194304 4 times 371 376 4 0.0 92720790.5 1.0X diff --git a/client/pom.xml b/client/pom.xml index 854258cd524..12241885914 100644 --- a/client/pom.xml +++ b/client/pom.xml @@ -71,7 +71,7 @@ guava - org.lz4 + ${lz4-java.group} lz4-java diff --git a/client/src/main/java/org/apache/celeborn/client/compress/Lz4Decompressor.java b/client/src/main/java/org/apache/celeborn/client/compress/Lz4Decompressor.java index 8a538ef3484..a579c012208 100644 --- a/client/src/main/java/org/apache/celeborn/client/compress/Lz4Decompressor.java +++ b/client/src/main/java/org/apache/celeborn/client/compress/Lz4Decompressor.java @@ -26,7 +26,7 @@ import com.google.common.collect.ImmutableMap; import net.jpountz.lz4.LZ4Factory; -import net.jpountz.lz4.LZ4FastDecompressor; +import net.jpountz.lz4.LZ4SafeDecompressor; import net.jpountz.xxhash.XXHashFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -34,7 +34,7 @@ public class Lz4Decompressor extends Lz4Trait implements Decompressor { private static final Logger logger = LoggerFactory.getLogger(Lz4Decompressor.class); - private final LZ4FastDecompressor decompressor; + private final LZ4SafeDecompressor decompressor; private final Checksum checksum; private final Map> xxHashFactories = @@ -47,7 +47,7 @@ public class Lz4Decompressor extends Lz4Trait implements Decompressor { XXHashFactory::unsafeInstance); public Lz4Decompressor(Option xxHashInstance) { - decompressor = LZ4Factory.fastestInstance().fastDecompressor(); + decompressor = LZ4Factory.fastestInstance().safeDecompressor(); checksum = getXXHashFactory(xxHashInstance).newStreamingHash32(DEFAULT_SEED).asChecksum(); } @@ -68,13 +68,13 @@ public int decompress(byte[] src, byte[] dst, int dstOff) throws IOException { System.arraycopy(src, HEADER_LENGTH, dst, dstOff, originalLen); break; case COMPRESSION_METHOD_LZ4: - int compressedLen2 = decompressor.decompress(src, HEADER_LENGTH, dst, dstOff, originalLen); - if (compressedLen != compressedLen2) { + int originalLen2 = decompressor.decompress(src, HEADER_LENGTH, compressedLen, dst, dstOff); + if (originalLen != originalLen2) { throw new IOException( - "Compressed length corrupted! expected: " - + compressedLen + "Original length corrupted! expected: " + + originalLen + ", actual: " - + compressedLen2 + + originalLen2 + "."); } } diff --git a/dev/deps/dependencies-client-flink-1.16 b/dev/deps/dependencies-client-flink-1.16 index a35adf79a8f..d2604d91bfc 100644 --- a/dev/deps/dependencies-client-flink-1.16 +++ b/dev/deps/dependencies-client-flink-1.16 @@ -32,7 +32,7 @@ jcl-over-slf4j/1.7.36//jcl-over-slf4j-1.7.36.jar jsr305/1.3.9//jsr305-1.3.9.jar jul-to-slf4j/1.7.36//jul-to-slf4j-1.7.36.jar leveldbjni-all/1.8//leveldbjni-all-1.8.jar -lz4-java/1.8.0//lz4-java-1.8.0.jar +lz4-java/1.10.4//lz4-java-1.10.4.jar maven-jdk-tools-wrapper/0.1//maven-jdk-tools-wrapper-0.1.jar metrics-core/4.2.25//metrics-core-4.2.25.jar metrics-graphite/4.2.25//metrics-graphite-4.2.25.jar diff --git a/dev/deps/dependencies-client-flink-1.17 b/dev/deps/dependencies-client-flink-1.17 index a35adf79a8f..d2604d91bfc 100644 --- a/dev/deps/dependencies-client-flink-1.17 +++ b/dev/deps/dependencies-client-flink-1.17 @@ -32,7 +32,7 @@ jcl-over-slf4j/1.7.36//jcl-over-slf4j-1.7.36.jar jsr305/1.3.9//jsr305-1.3.9.jar jul-to-slf4j/1.7.36//jul-to-slf4j-1.7.36.jar leveldbjni-all/1.8//leveldbjni-all-1.8.jar -lz4-java/1.8.0//lz4-java-1.8.0.jar +lz4-java/1.10.4//lz4-java-1.10.4.jar maven-jdk-tools-wrapper/0.1//maven-jdk-tools-wrapper-0.1.jar metrics-core/4.2.25//metrics-core-4.2.25.jar metrics-graphite/4.2.25//metrics-graphite-4.2.25.jar diff --git a/dev/deps/dependencies-client-flink-1.18 b/dev/deps/dependencies-client-flink-1.18 index a35adf79a8f..d2604d91bfc 100644 --- a/dev/deps/dependencies-client-flink-1.18 +++ b/dev/deps/dependencies-client-flink-1.18 @@ -32,7 +32,7 @@ jcl-over-slf4j/1.7.36//jcl-over-slf4j-1.7.36.jar jsr305/1.3.9//jsr305-1.3.9.jar jul-to-slf4j/1.7.36//jul-to-slf4j-1.7.36.jar leveldbjni-all/1.8//leveldbjni-all-1.8.jar -lz4-java/1.8.0//lz4-java-1.8.0.jar +lz4-java/1.10.4//lz4-java-1.10.4.jar maven-jdk-tools-wrapper/0.1//maven-jdk-tools-wrapper-0.1.jar metrics-core/4.2.25//metrics-core-4.2.25.jar metrics-graphite/4.2.25//metrics-graphite-4.2.25.jar diff --git a/dev/deps/dependencies-client-flink-1.19 b/dev/deps/dependencies-client-flink-1.19 index a35adf79a8f..d2604d91bfc 100644 --- a/dev/deps/dependencies-client-flink-1.19 +++ b/dev/deps/dependencies-client-flink-1.19 @@ -32,7 +32,7 @@ jcl-over-slf4j/1.7.36//jcl-over-slf4j-1.7.36.jar jsr305/1.3.9//jsr305-1.3.9.jar jul-to-slf4j/1.7.36//jul-to-slf4j-1.7.36.jar leveldbjni-all/1.8//leveldbjni-all-1.8.jar -lz4-java/1.8.0//lz4-java-1.8.0.jar +lz4-java/1.10.4//lz4-java-1.10.4.jar maven-jdk-tools-wrapper/0.1//maven-jdk-tools-wrapper-0.1.jar metrics-core/4.2.25//metrics-core-4.2.25.jar metrics-graphite/4.2.25//metrics-graphite-4.2.25.jar diff --git a/dev/deps/dependencies-client-flink-1.20 b/dev/deps/dependencies-client-flink-1.20 index a35adf79a8f..d2604d91bfc 100644 --- a/dev/deps/dependencies-client-flink-1.20 +++ b/dev/deps/dependencies-client-flink-1.20 @@ -32,7 +32,7 @@ jcl-over-slf4j/1.7.36//jcl-over-slf4j-1.7.36.jar jsr305/1.3.9//jsr305-1.3.9.jar jul-to-slf4j/1.7.36//jul-to-slf4j-1.7.36.jar leveldbjni-all/1.8//leveldbjni-all-1.8.jar -lz4-java/1.8.0//lz4-java-1.8.0.jar +lz4-java/1.10.4//lz4-java-1.10.4.jar maven-jdk-tools-wrapper/0.1//maven-jdk-tools-wrapper-0.1.jar metrics-core/4.2.25//metrics-core-4.2.25.jar metrics-graphite/4.2.25//metrics-graphite-4.2.25.jar diff --git a/dev/deps/dependencies-client-flink-2.0 b/dev/deps/dependencies-client-flink-2.0 index 3ae0c7811da..b06979be85e 100644 --- a/dev/deps/dependencies-client-flink-2.0 +++ b/dev/deps/dependencies-client-flink-2.0 @@ -32,7 +32,7 @@ jcl-over-slf4j/1.7.36//jcl-over-slf4j-1.7.36.jar jsr305/1.3.9//jsr305-1.3.9.jar jul-to-slf4j/1.7.36//jul-to-slf4j-1.7.36.jar leveldbjni-all/1.8//leveldbjni-all-1.8.jar -lz4-java/1.8.0//lz4-java-1.8.0.jar +lz4-java/1.10.4//lz4-java-1.10.4.jar metrics-core/4.2.25//metrics-core-4.2.25.jar metrics-graphite/4.2.25//metrics-graphite-4.2.25.jar metrics-jvm/4.2.25//metrics-jvm-4.2.25.jar diff --git a/dev/deps/dependencies-client-flink-2.1 b/dev/deps/dependencies-client-flink-2.1 index 3ae0c7811da..b06979be85e 100644 --- a/dev/deps/dependencies-client-flink-2.1 +++ b/dev/deps/dependencies-client-flink-2.1 @@ -32,7 +32,7 @@ jcl-over-slf4j/1.7.36//jcl-over-slf4j-1.7.36.jar jsr305/1.3.9//jsr305-1.3.9.jar jul-to-slf4j/1.7.36//jul-to-slf4j-1.7.36.jar leveldbjni-all/1.8//leveldbjni-all-1.8.jar -lz4-java/1.8.0//lz4-java-1.8.0.jar +lz4-java/1.10.4//lz4-java-1.10.4.jar metrics-core/4.2.25//metrics-core-4.2.25.jar metrics-graphite/4.2.25//metrics-graphite-4.2.25.jar metrics-jvm/4.2.25//metrics-jvm-4.2.25.jar diff --git a/dev/deps/dependencies-client-flink-2.2 b/dev/deps/dependencies-client-flink-2.2 index 3ae0c7811da..b06979be85e 100644 --- a/dev/deps/dependencies-client-flink-2.2 +++ b/dev/deps/dependencies-client-flink-2.2 @@ -32,7 +32,7 @@ jcl-over-slf4j/1.7.36//jcl-over-slf4j-1.7.36.jar jsr305/1.3.9//jsr305-1.3.9.jar jul-to-slf4j/1.7.36//jul-to-slf4j-1.7.36.jar leveldbjni-all/1.8//leveldbjni-all-1.8.jar -lz4-java/1.8.0//lz4-java-1.8.0.jar +lz4-java/1.10.4//lz4-java-1.10.4.jar metrics-core/4.2.25//metrics-core-4.2.25.jar metrics-graphite/4.2.25//metrics-graphite-4.2.25.jar metrics-jvm/4.2.25//metrics-jvm-4.2.25.jar diff --git a/dev/deps/dependencies-client-mr b/dev/deps/dependencies-client-mr index 62339262e96..82919c08041 100644 --- a/dev/deps/dependencies-client-mr +++ b/dev/deps/dependencies-client-mr @@ -134,7 +134,7 @@ kerby-xdr/1.0.1//kerby-xdr-1.0.1.jar kotlin-stdlib-common/1.4.10//kotlin-stdlib-common-1.4.10.jar kotlin-stdlib/1.4.10//kotlin-stdlib-1.4.10.jar leveldbjni-all/1.8//leveldbjni-all-1.8.jar -lz4-java/1.8.0//lz4-java-1.8.0.jar +lz4-java/1.10.4//lz4-java-1.10.4.jar maven-jdk-tools-wrapper/0.1//maven-jdk-tools-wrapper-0.1.jar metrics-core/4.2.25//metrics-core-4.2.25.jar metrics-graphite/4.2.25//metrics-graphite-4.2.25.jar diff --git a/dev/deps/dependencies-client-tez b/dev/deps/dependencies-client-tez index a5b82c7f362..9d73b59b5fd 100644 --- a/dev/deps/dependencies-client-tez +++ b/dev/deps/dependencies-client-tez @@ -107,7 +107,7 @@ kerby-util/1.0.1//kerby-util-1.0.1.jar kerby-xdr/1.0.1//kerby-xdr-1.0.1.jar leveldbjni-all/1.8//leveldbjni-all-1.8.jar log4j/1.2.17//log4j-1.2.17.jar -lz4-java/1.8.0//lz4-java-1.8.0.jar +lz4-java/1.10.4//lz4-java-1.10.4.jar maven-jdk-tools-wrapper/0.1//maven-jdk-tools-wrapper-0.1.jar metrics-core/4.2.25//metrics-core-4.2.25.jar metrics-graphite/4.2.25//metrics-graphite-4.2.25.jar diff --git a/dev/deps/dependencies-server b/dev/deps/dependencies-server index e8d7f2e41b0..301ad077b16 100644 --- a/dev/deps/dependencies-server +++ b/dev/deps/dependencies-server @@ -79,7 +79,7 @@ log4j-1.2-api/2.24.3//log4j-1.2-api-2.24.3.jar log4j-api/2.24.3//log4j-api-2.24.3.jar log4j-core/2.24.3//log4j-core-2.24.3.jar log4j-slf4j-impl/2.24.3//log4j-slf4j-impl-2.24.3.jar -lz4-java/1.8.0//lz4-java-1.8.0.jar +lz4-java/1.10.4//lz4-java-1.10.4.jar maven-jdk-tools-wrapper/0.1//maven-jdk-tools-wrapper-0.1.jar metrics-core/4.2.25//metrics-core-4.2.25.jar metrics-graphite/4.2.25//metrics-graphite-4.2.25.jar diff --git a/pom.xml b/pom.xml index c17f70c0ac4..8fca6aa0fcb 100644 --- a/pom.xml +++ b/pom.xml @@ -91,7 +91,8 @@ 1.8 2.24.3 3.4.4 - 1.8.0 + at.yawk.lz4 + 1.10.4 4.11.0 1.17.14 4.2.10.Final @@ -527,7 +528,7 @@ ${leveldb.version} - org.lz4 + ${lz4-java.group} lz4-java ${lz4-java.version} @@ -1530,6 +1531,7 @@ tests/spark-it + org.lz4 1.4.0 2.11.12 2.11 @@ -1549,6 +1551,7 @@ tests/spark-it + org.lz4 1.7.1 2.12.10 2.12 @@ -1569,6 +1572,7 @@ tests/spark-it + org.lz4 1.7.1 2.12.10 2.12 @@ -1589,6 +1593,7 @@ tests/spark-it + org.lz4 1.7.1 2.12.15 2.12 @@ -1608,6 +1613,7 @@ tests/spark-it + org.lz4 1.8.0 2.12.15 2.12 @@ -1627,6 +1633,7 @@ tests/spark-it + org.lz4 1.8.0 2.12.17 2.12 @@ -1646,6 +1653,7 @@ tests/spark-it + org.lz4 1.8.0 2.12.18 2.12 @@ -1665,6 +1673,7 @@ tests/spark-it + org.lz4 1.8.0 2.13.16 2.13 @@ -1684,6 +1693,7 @@ tests/spark-it + org.lz4 1.8.0 2.13.17 2.13 diff --git a/project/CelebornBuild.scala b/project/CelebornBuild.scala index 5eed99615c1..259a8ae8ce5 100644 --- a/project/CelebornBuild.scala +++ b/project/CelebornBuild.scala @@ -38,7 +38,8 @@ import CelebornCommonSettings._ object Dependencies { val zstdJniVersion = sparkClientProjects.map(_.zstdJniVersion).getOrElse("1.5.7-1") - val lz4JavaVersion = sparkClientProjects.map(_.lz4JavaVersion).getOrElse("1.8.0") + val lz4JavaGroup = sparkClientProjects.map(_.lz4JavaGroup).getOrElse("at.yawk.lz4") + val lz4JavaVersion = sparkClientProjects.map(_.lz4JavaVersion).getOrElse("1.10.4") // Dependent library versions val apLoaderVersion = "4.0-10" @@ -176,7 +177,7 @@ object Dependencies { val log4j12Api = "org.apache.logging.log4j" % "log4j-1.2-api" % log4j2Version val log4jSlf4jImpl = "org.apache.logging.log4j" % "log4j-slf4j-impl" % log4j2Version val disruptor = "com.lmax" % "disruptor" % disruptorVersion - val lz4Java = "org.lz4" % "lz4-java" % lz4JavaVersion + val lz4Java = lz4JavaGroup % "lz4-java" % lz4JavaVersion val protobufJava = "com.google.protobuf" % "protobuf-java" % protoVersion val ratisClient = "org.apache.ratis" % "ratis-client" % ratisVersion val ratisCommon = "org.apache.ratis" % "ratis-common" % ratisVersion @@ -1017,6 +1018,7 @@ trait SparkClientProjects { val sparkClientShadedProjectPath: String val sparkClientShadedProjectName: String + val lz4JavaGroup: String = "org.lz4" val lz4JavaVersion: String val sparkProjectScalaVersion: String val sparkVersion: String