From f7b5c5ed8a819aba71e3cef22cd27469cf108c32 Mon Sep 17 00:00:00 2001 From: Tomasz Lelek Date: Sat, 1 Jul 2017 18:23:22 +0200 Subject: [PATCH 1/3] BAEL-1010 HLL article code --- libraries/pom.xml | 6 ++ .../java/com/baeldung/hll/HLLUnitTest.java | 66 +++++++++++++++++++ 2 files changed, 72 insertions(+) create mode 100644 libraries/src/test/java/com/baeldung/hll/HLLUnitTest.java diff --git a/libraries/pom.xml b/libraries/pom.xml index ee5fb2f977df..f7a3ff68e2a7 100644 --- a/libraries/pom.xml +++ b/libraries/pom.xml @@ -366,6 +366,11 @@ groovy-all 2.4.10 + + net.agkn + hll + ${hll.version} + 0.7.0 @@ -397,6 +402,7 @@ 4.12 0.10 3.5.0 + 1.6.0 diff --git a/libraries/src/test/java/com/baeldung/hll/HLLUnitTest.java b/libraries/src/test/java/com/baeldung/hll/HLLUnitTest.java new file mode 100644 index 000000000000..6409b2813710 --- /dev/null +++ b/libraries/src/test/java/com/baeldung/hll/HLLUnitTest.java @@ -0,0 +1,66 @@ +package com.baeldung.hll; + + +import com.google.common.hash.HashFunction; +import com.google.common.hash.Hashing; +import net.agkn.hll.HLL; +import org.junit.Test; + +import java.util.stream.LongStream; + +import static org.assertj.core.api.AssertionsForInterfaceTypes.assertThat; + +public class HLLUnitTest { + + @Test + public void givenHLL_whenAddHugeAmountOfNumbers_thenShouldReturnEstimatedCardinality() { + //given + int numberOfElements = 100_000_000; + HashFunction hashFunction = Hashing.murmur3_128(); + HLL hll = new HLL(14, 5); + + //when + LongStream.range(0, numberOfElements).forEach(element -> { + long hashedValue = hashFunction.newHasher().putLong(element).hash().asLong(); + hll.addRaw(hashedValue); + } + ); + + //then + long cardinality = hll.cardinality(); + assertThat(isSimilarTo(cardinality, numberOfElements)).isTrue(); + } + + @Test + public void givenTwoHLLs_whenAddHugeAmountOfNumbers_thenShouldReturnEstimatedCardinalityForUnionOfHLLs() { + //given + int numberOfElements = 100_000_000; + HashFunction hashFunction = Hashing.murmur3_128(); + HLL firstHll = new HLL(15, 5); + HLL secondHLL = new HLL(15, 5); + + //when + LongStream.range(0, numberOfElements).forEach(element -> { + long hashedValue = hashFunction.newHasher().putLong(element).hash().asLong(); + firstHll.addRaw(hashedValue); + } + ); + + LongStream.range(numberOfElements, numberOfElements * 2).forEach(element -> { + long hashedValue = hashFunction.newHasher().putLong(element).hash().asLong(); + secondHLL.addRaw(hashedValue); + } + ); + + //then + firstHll.union(secondHLL); + long cardinality = firstHll.cardinality(); + assertThat(isSimilarTo(cardinality, numberOfElements * 2)).isTrue(); + } + + private boolean isSimilarTo(long cardinality, int numberOfElements) { + System.out.println(cardinality); + return Math.abs(cardinality - numberOfElements) <= 1_000_000; + + } +} From c68222adb91c2324a06666fa0fd534cbf0867b2a Mon Sep 17 00:00:00 2001 From: Tomasz Lelek Date: Fri, 7 Jul 2017 16:52:23 +0200 Subject: [PATCH 2/3] BAEL-1010 moved tolerated difference to a variable --- .../src/test/java/com/baeldung/hll/HLLUnitTest.java | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/libraries/src/test/java/com/baeldung/hll/HLLUnitTest.java b/libraries/src/test/java/com/baeldung/hll/HLLUnitTest.java index 6409b2813710..1d318b7e4e4d 100644 --- a/libraries/src/test/java/com/baeldung/hll/HLLUnitTest.java +++ b/libraries/src/test/java/com/baeldung/hll/HLLUnitTest.java @@ -16,6 +16,7 @@ public class HLLUnitTest { public void givenHLL_whenAddHugeAmountOfNumbers_thenShouldReturnEstimatedCardinality() { //given int numberOfElements = 100_000_000; + int toleratedDifference = 1_000_000; HashFunction hashFunction = Hashing.murmur3_128(); HLL hll = new HLL(14, 5); @@ -28,13 +29,14 @@ public void givenHLL_whenAddHugeAmountOfNumbers_thenShouldReturnEstimatedCardina //then long cardinality = hll.cardinality(); - assertThat(isSimilarTo(cardinality, numberOfElements)).isTrue(); + assertThat(isSimilarTo(cardinality, numberOfElements, toleratedDifference)).isTrue(); } @Test public void givenTwoHLLs_whenAddHugeAmountOfNumbers_thenShouldReturnEstimatedCardinalityForUnionOfHLLs() { //given int numberOfElements = 100_000_000; + int toleratedDifference = 1_000_000; HashFunction hashFunction = Hashing.murmur3_128(); HLL firstHll = new HLL(15, 5); HLL secondHLL = new HLL(15, 5); @@ -55,12 +57,11 @@ public void givenTwoHLLs_whenAddHugeAmountOfNumbers_thenShouldReturnEstimatedCar //then firstHll.union(secondHLL); long cardinality = firstHll.cardinality(); - assertThat(isSimilarTo(cardinality, numberOfElements * 2)).isTrue(); + assertThat(isSimilarTo(cardinality, numberOfElements * 2, toleratedDifference)).isTrue(); } - private boolean isSimilarTo(long cardinality, int numberOfElements) { + private boolean isSimilarTo(long cardinality, int numberOfElements, int maxToleratedDifference) { System.out.println(cardinality); - return Math.abs(cardinality - numberOfElements) <= 1_000_000; - + return Math.abs(cardinality - numberOfElements) <= maxToleratedDifference; } } From 17358c5c1bfe0aedeb82cc879f9060f64924b460 Mon Sep 17 00:00:00 2001 From: Tomasz Lelek Date: Mon, 10 Jul 2017 18:45:00 +0200 Subject: [PATCH 3/3] Merge branch 'master' of https://github.com/eugenp/tutorials into BAEL-1010_hll # Conflicts: # libraries/pom.xml --- libraries/pom.xml | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/libraries/pom.xml b/libraries/pom.xml index 28dd36fceb9d..a67a6621f298 100644 --- a/libraries/pom.xml +++ b/libraries/pom.xml @@ -480,22 +480,22 @@ org.eclipse.jetty.websocket websocket-server ${jetty.version} - + org.eclipse.jetty.websocket websocket-client ${jetty.version} - + org.eclipse.jetty.websocket websocket-api ${jetty.version} - + org.eclipse.jetty.websocket websocket-common ${jetty.version} - + org.eclipse.jetty jetty-continuation @@ -505,7 +505,7 @@ org.eclipse.jetty jetty-util ${jetty.version} - + org.seleniumhq.selenium selenium-api @@ -514,6 +514,11 @@ jar + + net.agkn + hll + ${hll.version} + 0.7.0 @@ -552,6 +557,7 @@ 8.0.6 mytheme + 1.6.0