From 569875811937dc954e1ffc1d969780d7089cdb62 Mon Sep 17 00:00:00 2001 From: Ivan Bella Date: Tue, 29 Nov 2022 14:58:42 +0000 Subject: [PATCH 01/10] Updated the geo normalizers to accept valid hashes --- .../AbstractGeometryNormalizer.java | 55 ++++++++++++++++++- .../data/normalizer/GeometryNormalizer.java | 15 ++++- .../data/normalizer/PointNormalizer.java | 6 ++ .../normalizer/GeometryNormalizerTest.java | 18 ++++++ 4 files changed, 92 insertions(+), 2 deletions(-) diff --git a/src/main/java/datawave/data/normalizer/AbstractGeometryNormalizer.java b/src/main/java/datawave/data/normalizer/AbstractGeometryNormalizer.java index 3ea2369..90ab6f6 100644 --- a/src/main/java/datawave/data/normalizer/AbstractGeometryNormalizer.java +++ b/src/main/java/datawave/data/normalizer/AbstractGeometryNormalizer.java @@ -1,5 +1,6 @@ package datawave.data.normalizer; +import com.google.common.base.Throwables; import datawave.data.parser.GeometryParser; import org.apache.commons.codec.binary.Hex; import org.locationtech.geowave.core.geotime.util.GeometryUtils; @@ -49,7 +50,15 @@ public abstract class AbstractGeometryNormalizer= 0 && tier <= 0x1f; + } + + public boolean validLength(short tier, String value) { + // determine the length of the position in hex characters + long posLen = Math.round(Math.ceil((double) tier / 4)) * 2; + // length is the tier length plus the position length + return value.length() == (2 + posLen); + } + + public boolean validPosition(short tier, long value) { + return value >= 0 && value < Math.round(Math.pow(2.0d, 2.0d * tier)); + } + } diff --git a/src/main/java/datawave/data/normalizer/GeometryNormalizer.java b/src/main/java/datawave/data/normalizer/GeometryNormalizer.java index e27c78a..ff6474a 100644 --- a/src/main/java/datawave/data/normalizer/GeometryNormalizer.java +++ b/src/main/java/datawave/data/normalizer/GeometryNormalizer.java @@ -1,9 +1,11 @@ package datawave.data.normalizer; +import com.google.common.base.Throwables; import com.google.common.collect.Lists; import datawave.data.type.util.Geometry; import org.locationtech.geowave.core.geotime.index.dimension.LatitudeDefinition; import org.locationtech.geowave.core.geotime.index.dimension.LongitudeDefinition; +import org.locationtech.geowave.core.geotime.util.GeometryUtils; import org.locationtech.geowave.core.index.NumericIndexStrategy; import org.locationtech.geowave.core.index.dimension.NumericDimensionDefinition; import org.locationtech.geowave.core.index.sfc.SFCFactory; @@ -11,6 +13,7 @@ import org.locationtech.geowave.core.store.api.Index; import org.locationtech.geowave.core.store.index.CustomNameIndex; +import java.util.Collections; import java.util.List; /** @@ -49,7 +52,16 @@ protected Index getIndex() { @Override public List normalizeToMany(String geoString) throws IllegalArgumentException { - return normalizeDelegateTypeToMany(createDatawaveGeometry(parseGeometry(geoString))); + try { + return normalizeDelegateTypeToMany(createDatawaveGeometry(parseGeometry(geoString))); + } catch (Exception e) { + // perhaps this is a geo hash instead + if (validHash(geoString)) { + return Collections.singletonList(geoString); + } + Throwables.propagateIfPossible(e, IllegalArgumentException.class); + } + throw new IllegalArgumentException("Cannot normalize geo string " + geoString); } @Override @@ -64,4 +76,5 @@ public List normalizeDelegateTypeToMany(Geometry geometry) { protected datawave.data.type.util.Geometry createDatawaveGeometry(org.locationtech.jts.geom.Geometry geometry) { return new datawave.data.type.util.Geometry(geometry); } + } diff --git a/src/main/java/datawave/data/normalizer/PointNormalizer.java b/src/main/java/datawave/data/normalizer/PointNormalizer.java index 6387a2c..842b486 100644 --- a/src/main/java/datawave/data/normalizer/PointNormalizer.java +++ b/src/main/java/datawave/data/normalizer/PointNormalizer.java @@ -47,4 +47,10 @@ protected Index getIndex() { protected Point createDatawaveGeometry(org.locationtech.jts.geom.Point geometry) { return new Point(geometry); } + + @Override + public boolean validTier(short tier) { + return tier == 0x1f; + } + } diff --git a/src/test/java/datawave/data/normalizer/GeometryNormalizerTest.java b/src/test/java/datawave/data/normalizer/GeometryNormalizerTest.java index 83e09b6..8177e2b 100644 --- a/src/test/java/datawave/data/normalizer/GeometryNormalizerTest.java +++ b/src/test/java/datawave/data/normalizer/GeometryNormalizerTest.java @@ -18,6 +18,7 @@ import java.util.List; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.fail; public class GeometryNormalizerTest { @@ -106,4 +107,21 @@ public void testQueryRanges() throws Exception { assertEquals(expected, result.toString()); } + + @Test + public void testHash() { + String[] validHashes = new String[] {"00", "0100", "020d", "031b", "04df", "05031e", "0604ff", "0713ff", "08c7fe", "09023fff", "0a04ffff", "0b0dffff", "0c8fffff", "0d01c00000", "0e0b000000", "0f0dfffffe", "1037ffffff", "11023fffffff", "1208ffffffff", "131c00000000", "1437ffffffff", "15023fffffffff", "16070000000000", "1723ffffffffff", "188fffffffffff", "19013fffffffffff", "1a08ffffffffffff", "1b1c000000000000", "1c4fffffffffffff", "1d01c0000000000000", "1e0700000000000000", "1f0dffffffffffffff"}; + String[] invalidHashes = new String[] {"0", "0001", "01", "1fffffffffffffffff", "200dffffffffffffff", "1c4fffffffffffffff"}; + for (String hash : validHashes) { + assertEquals(hash, normalizer.normalize(hash)); + } + for (String hash : invalidHashes) { + try { + normalizer.normalize(hash); + fail("Should have failed to normalize " + hash); + } catch (Exception e) { + // this is expected + } + } + } } From dcae6ed09730caa69b2b1913c43a22225a60c557 Mon Sep 17 00:00:00 2001 From: Ivan Bella Date: Tue, 29 Nov 2022 15:03:23 +0000 Subject: [PATCH 02/10] formatter --- .../datawave/data/normalizer/GeometryNormalizerTest.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/test/java/datawave/data/normalizer/GeometryNormalizerTest.java b/src/test/java/datawave/data/normalizer/GeometryNormalizerTest.java index 8177e2b..b9b49a8 100644 --- a/src/test/java/datawave/data/normalizer/GeometryNormalizerTest.java +++ b/src/test/java/datawave/data/normalizer/GeometryNormalizerTest.java @@ -107,10 +107,13 @@ public void testQueryRanges() throws Exception { assertEquals(expected, result.toString()); } - + @Test public void testHash() { - String[] validHashes = new String[] {"00", "0100", "020d", "031b", "04df", "05031e", "0604ff", "0713ff", "08c7fe", "09023fff", "0a04ffff", "0b0dffff", "0c8fffff", "0d01c00000", "0e0b000000", "0f0dfffffe", "1037ffffff", "11023fffffff", "1208ffffffff", "131c00000000", "1437ffffffff", "15023fffffffff", "16070000000000", "1723ffffffffff", "188fffffffffff", "19013fffffffffff", "1a08ffffffffffff", "1b1c000000000000", "1c4fffffffffffff", "1d01c0000000000000", "1e0700000000000000", "1f0dffffffffffffff"}; + String[] validHashes = new String[] {"00", "0100", "020d", "031b", "04df", "05031e", "0604ff", "0713ff", "08c7fe", "09023fff", "0a04ffff", "0b0dffff", + "0c8fffff", "0d01c00000", "0e0b000000", "0f0dfffffe", "1037ffffff", "11023fffffff", "1208ffffffff", "131c00000000", "1437ffffffff", + "15023fffffffff", "16070000000000", "1723ffffffffff", "188fffffffffff", "19013fffffffffff", "1a08ffffffffffff", "1b1c000000000000", + "1c4fffffffffffff", "1d01c0000000000000", "1e0700000000000000", "1f0dffffffffffffff"}; String[] invalidHashes = new String[] {"0", "0001", "01", "1fffffffffffffffff", "200dffffffffffffff", "1c4fffffffffffffff"}; for (String hash : validHashes) { assertEquals(hash, normalizer.normalize(hash)); From 2c7111a33796bf7188a053099c4e89cf5f41829e Mon Sep 17 00:00:00 2001 From: Ivan Bella Date: Tue, 29 Nov 2022 15:09:31 +0000 Subject: [PATCH 03/10] Check for a valid geohash first --- .../normalizer/AbstractGeometryNormalizer.java | 12 +++--------- .../data/normalizer/GeometryNormalizer.java | 15 +++------------ 2 files changed, 6 insertions(+), 21 deletions(-) diff --git a/src/main/java/datawave/data/normalizer/AbstractGeometryNormalizer.java b/src/main/java/datawave/data/normalizer/AbstractGeometryNormalizer.java index 90ab6f6..d0fce22 100644 --- a/src/main/java/datawave/data/normalizer/AbstractGeometryNormalizer.java +++ b/src/main/java/datawave/data/normalizer/AbstractGeometryNormalizer.java @@ -1,6 +1,5 @@ package datawave.data.normalizer; -import com.google.common.base.Throwables; import datawave.data.parser.GeometryParser; import org.apache.commons.codec.binary.Hex; import org.locationtech.geowave.core.geotime.util.GeometryUtils; @@ -50,15 +49,10 @@ public abstract class AbstractGeometryNormalizer normalizeToMany(String geoString) throws IllegalArgumentException { - try { - return normalizeDelegateTypeToMany(createDatawaveGeometry(parseGeometry(geoString))); - } catch (Exception e) { - // perhaps this is a geo hash instead - if (validHash(geoString)) { - return Collections.singletonList(geoString); - } - Throwables.propagateIfPossible(e, IllegalArgumentException.class); + if (validHash(geoString)) { + return Lists.newArrayList(geoString); } - throw new IllegalArgumentException("Cannot normalize geo string " + geoString); + return normalizeDelegateTypeToMany(createDatawaveGeometry(parseGeometry(geoString))); } @Override From 85a630cc7475a1815b9a70c9e28266d7fbee34da Mon Sep 17 00:00:00 2001 From: Ivan Bella Date: Tue, 29 Nov 2022 17:12:55 +0000 Subject: [PATCH 04/10] review comments --- .../data/normalizer/AbstractGeometryNormalizer.java | 8 +++++--- .../java/datawave/data/normalizer/GeometryNormalizer.java | 3 ++- .../java/datawave/data/normalizer/PointNormalizer.java | 3 ++- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/main/java/datawave/data/normalizer/AbstractGeometryNormalizer.java b/src/main/java/datawave/data/normalizer/AbstractGeometryNormalizer.java index d0fce22..2d008f5 100644 --- a/src/main/java/datawave/data/normalizer/AbstractGeometryNormalizer.java +++ b/src/main/java/datawave/data/normalizer/AbstractGeometryNormalizer.java @@ -35,6 +35,7 @@ public abstract class AbstractGeometryNormalizer= 0 && value < Math.round(Math.pow(2.0d, 2.0d * tier)); + return value >= 0 && value < (long) (Math.pow(2d, 2d * tier)); } } diff --git a/src/main/java/datawave/data/normalizer/GeometryNormalizer.java b/src/main/java/datawave/data/normalizer/GeometryNormalizer.java index 6a66b78..f9a883d 100644 --- a/src/main/java/datawave/data/normalizer/GeometryNormalizer.java +++ b/src/main/java/datawave/data/normalizer/GeometryNormalizer.java @@ -16,11 +16,11 @@ /** * A normalizer that, given a parseable geometry string representing an arbitrary geometry, will perform GeoWave indexing with a multi-tiered spatial geowave * index configuration - * */ public class GeometryNormalizer extends AbstractGeometryNormalizer implements OneToManyNormalizer { private static final long serialVersionUID = 171360806347433135L; + // NOTE: If we change the index strategy, then we will need to update the validHash method appropriately. // @formatter:off public static final NumericIndexStrategy indexStrategy = TieredSFCIndexFactory.createFullIncrementalTieredStrategy( new NumericDimensionDefinition[]{ @@ -40,6 +40,7 @@ public class GeometryNormalizer extends AbstractGeometryNormalizer { private static final long serialVersionUID = 171360806347433135L; + // NOTE: If we change the index strategy, then we will need to update the validHash method appropriately. // @formatter:off public static final NumericIndexStrategy indexStrategy = TieredSFCIndexFactory.createSingleTierStrategy( new NumericDimensionDefinition[]{ @@ -37,6 +37,7 @@ public class PointNormalizer extends AbstractGeometryNormalizer Date: Tue, 29 Nov 2022 19:41:57 +0000 Subject: [PATCH 05/10] More efficient computations --- .../data/normalizer/AbstractGeometryNormalizer.java | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/main/java/datawave/data/normalizer/AbstractGeometryNormalizer.java b/src/main/java/datawave/data/normalizer/AbstractGeometryNormalizer.java index 2d008f5..b9cece8 100644 --- a/src/main/java/datawave/data/normalizer/AbstractGeometryNormalizer.java +++ b/src/main/java/datawave/data/normalizer/AbstractGeometryNormalizer.java @@ -215,13 +215,19 @@ public boolean validTier(short tier) { public boolean validLength(short tier, String value) { // determine the length of the position in hex characters - long posLen = (long) (Math.ceil((double) tier / 4)) * 2; + // ceil(tier/4) will get the number of bytes + long bytes = (tier >> 2) + ((tier & 0x3) == 0 ? 0 : 1); + + // multiply by 2 to get the number of hex digits + long posLen = 2L * bytes; // length is the tier length plus the position length return value.length() == (2 + posLen); } public boolean validPosition(short tier, long value) { - return value >= 0 && value < (long) (Math.pow(2d, 2d * tier)); + // The maximum value must be less than pow(2, tier*2) + long max = 1L << (tier * 2); + return value >= 0 && value < max; } } From 911f225f75d900fb8b745aa224426cd2ebe480f4 Mon Sep 17 00:00:00 2001 From: Ivan Bella Date: Wed, 30 Nov 2022 14:39:29 +0000 Subject: [PATCH 06/10] tighter calc --- .../datawave/data/normalizer/AbstractGeometryNormalizer.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/datawave/data/normalizer/AbstractGeometryNormalizer.java b/src/main/java/datawave/data/normalizer/AbstractGeometryNormalizer.java index b9cece8..9a4f55f 100644 --- a/src/main/java/datawave/data/normalizer/AbstractGeometryNormalizer.java +++ b/src/main/java/datawave/data/normalizer/AbstractGeometryNormalizer.java @@ -216,10 +216,10 @@ public boolean validTier(short tier) { public boolean validLength(short tier, String value) { // determine the length of the position in hex characters // ceil(tier/4) will get the number of bytes - long bytes = (tier >> 2) + ((tier & 0x3) == 0 ? 0 : 1); + int bytes = (tier >> 2) + ((tier & 0x3) == 0 ? 0 : 1); // multiply by 2 to get the number of hex digits - long posLen = 2L * bytes; + int posLen = 2 * bytes; // length is the tier length plus the position length return value.length() == (2 + posLen); } From 5af449662e442b0b85cf785c6631c19b2ee85afa Mon Sep 17 00:00:00 2001 From: Ivan Bella Date: Wed, 30 Nov 2022 09:40:38 -0500 Subject: [PATCH 07/10] Updated the geo normalizers to accept valid hashes (#14) --- .../AbstractGeometryNormalizer.java | 55 +++++++++++++++++++ .../data/normalizer/GeometryNormalizer.java | 7 ++- .../data/normalizer/PointNormalizer.java | 9 ++- .../normalizer/GeometryNormalizerTest.java | 21 +++++++ 4 files changed, 90 insertions(+), 2 deletions(-) diff --git a/src/main/java/datawave/data/normalizer/AbstractGeometryNormalizer.java b/src/main/java/datawave/data/normalizer/AbstractGeometryNormalizer.java index 3ea2369..9a4f55f 100644 --- a/src/main/java/datawave/data/normalizer/AbstractGeometryNormalizer.java +++ b/src/main/java/datawave/data/normalizer/AbstractGeometryNormalizer.java @@ -35,6 +35,7 @@ public abstract class AbstractGeometryNormalizer= 0 && tier <= 0x1f; + } + + public boolean validLength(short tier, String value) { + // determine the length of the position in hex characters + // ceil(tier/4) will get the number of bytes + int bytes = (tier >> 2) + ((tier & 0x3) == 0 ? 0 : 1); + + // multiply by 2 to get the number of hex digits + int posLen = 2 * bytes; + // length is the tier length plus the position length + return value.length() == (2 + posLen); + } + + public boolean validPosition(short tier, long value) { + // The maximum value must be less than pow(2, tier*2) + long max = 1L << (tier * 2); + return value >= 0 && value < max; + } + } diff --git a/src/main/java/datawave/data/normalizer/GeometryNormalizer.java b/src/main/java/datawave/data/normalizer/GeometryNormalizer.java index e27c78a..f9a883d 100644 --- a/src/main/java/datawave/data/normalizer/GeometryNormalizer.java +++ b/src/main/java/datawave/data/normalizer/GeometryNormalizer.java @@ -16,11 +16,11 @@ /** * A normalizer that, given a parseable geometry string representing an arbitrary geometry, will perform GeoWave indexing with a multi-tiered spatial geowave * index configuration - * */ public class GeometryNormalizer extends AbstractGeometryNormalizer implements OneToManyNormalizer { private static final long serialVersionUID = 171360806347433135L; + // NOTE: If we change the index strategy, then we will need to update the validHash method appropriately. // @formatter:off public static final NumericIndexStrategy indexStrategy = TieredSFCIndexFactory.createFullIncrementalTieredStrategy( new NumericDimensionDefinition[]{ @@ -40,6 +40,7 @@ public class GeometryNormalizer extends AbstractGeometryNormalizer normalizeToMany(String geoString) throws IllegalArgumentException { + if (validHash(geoString)) { + return Lists.newArrayList(geoString); + } return normalizeDelegateTypeToMany(createDatawaveGeometry(parseGeometry(geoString))); } @@ -64,4 +68,5 @@ public List normalizeDelegateTypeToMany(Geometry geometry) { protected datawave.data.type.util.Geometry createDatawaveGeometry(org.locationtech.jts.geom.Geometry geometry) { return new datawave.data.type.util.Geometry(geometry); } + } diff --git a/src/main/java/datawave/data/normalizer/PointNormalizer.java b/src/main/java/datawave/data/normalizer/PointNormalizer.java index 6387a2c..b6f3ce9 100644 --- a/src/main/java/datawave/data/normalizer/PointNormalizer.java +++ b/src/main/java/datawave/data/normalizer/PointNormalizer.java @@ -13,11 +13,11 @@ /** * A normalizer that, given a parseable geometry string representing a point geometry will perform GeoWave indexing with a single-tier spatial geowave index * configuration - * */ public class PointNormalizer extends AbstractGeometryNormalizer { private static final long serialVersionUID = 171360806347433135L; + // NOTE: If we change the index strategy, then we will need to update the validHash method appropriately. // @formatter:off public static final NumericIndexStrategy indexStrategy = TieredSFCIndexFactory.createSingleTierStrategy( new NumericDimensionDefinition[]{ @@ -37,6 +37,7 @@ public class PointNormalizer extends AbstractGeometryNormalizer Date: Wed, 30 Nov 2022 14:41:05 +0000 Subject: [PATCH 08/10] 1.12 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index db68447..a3322bf 100644 --- a/pom.xml +++ b/pom.xml @@ -8,7 +8,7 @@ type-utils - 1.12-SNAPSHOT + 1.12 https://code.nsa.gov/datawave-type-utils From 6fd78bb852c685142c4cde38392ca8b1e909e125 Mon Sep 17 00:00:00 2001 From: Ivan Bella Date: Wed, 30 Nov 2022 14:46:14 +0000 Subject: [PATCH 09/10] 1.13-SNAPSHOT --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index a3322bf..b215677 100644 --- a/pom.xml +++ b/pom.xml @@ -8,7 +8,7 @@ type-utils - 1.12 + 1.13-SNAPSHOT https://code.nsa.gov/datawave-type-utils From c982676db4d1af9b8b839ddf66a366fe0b8b1163 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 30 Nov 2022 14:46:43 +0000 Subject: [PATCH 10/10] Bump version.hadoop from 2.6.0-cdh5.9.1 to 3.1.1.7.2.15.0-147 Bumps `version.hadoop` from 2.6.0-cdh5.9.1 to 3.1.1.7.2.15.0-147. Updates `hadoop-auth` from 2.6.0-cdh5.9.1 to 3.1.1.7.2.15.0-147 Updates `hadoop-common` from 2.6.0-cdh5.9.1 to 3.1.1.7.2.15.0-147 --- updated-dependencies: - dependency-name: org.apache.hadoop:hadoop-auth dependency-type: direct:production - dependency-name: org.apache.hadoop:hadoop-common dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index b215677..808e119 100644 --- a/pom.xml +++ b/pom.xml @@ -28,7 +28,7 @@ 3.9 3.6 1.1.0 - 2.6.0-cdh5.9.1 + 3.1.1.7.2.15.0-147 1.19.0 1.6.2 1.7.29