From c77cde35dea8b646791c94f795e913bb288eb923 Mon Sep 17 00:00:00 2001 From: Keith Turner Date: Thu, 27 Feb 2025 23:32:23 +0000 Subject: [PATCH 1/4] Adds property validation for instance.volumes property --- .../apache/accumulo/core/conf/Property.java | 2 +- .../accumulo/core/conf/PropertyType.java | 6 ++++-- .../core/volume/VolumeConfiguration.java | 19 ++++++++++++++++++- 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/core/src/main/java/org/apache/accumulo/core/conf/Property.java b/core/src/main/java/org/apache/accumulo/core/conf/Property.java index 42a57a1962a..e7d9a6f03f1 100644 --- a/core/src/main/java/org/apache/accumulo/core/conf/Property.java +++ b/core/src/main/java/org/apache/accumulo/core/conf/Property.java @@ -135,7 +135,7 @@ public enum Property { + " HDFS. To use the ChangeSecret tool, run the command: `./bin/accumulo" + " admin changeSecret`.", "1.3.5"), - INSTANCE_VOLUMES("instance.volumes", "", PropertyType.STRING, + INSTANCE_VOLUMES("instance.volumes", "", PropertyType.VOLUMES, "A comma separated list of dfs uris to use. Files will be stored across" + " these filesystems. In some situations, the first volume in this list" + " may be treated differently, such as being preferred for writing out" diff --git a/core/src/main/java/org/apache/accumulo/core/conf/PropertyType.java b/core/src/main/java/org/apache/accumulo/core/conf/PropertyType.java index 0181db3357b..6347632a239 100644 --- a/core/src/main/java/org/apache/accumulo/core/conf/PropertyType.java +++ b/core/src/main/java/org/apache/accumulo/core/conf/PropertyType.java @@ -31,6 +31,7 @@ import java.util.stream.Stream; import org.apache.accumulo.core.file.rfile.RFile; +import org.apache.accumulo.core.volume.VolumeConfiguration; import org.apache.commons.lang3.Range; import org.apache.hadoop.fs.Path; import org.slf4j.Logger; @@ -148,7 +149,9 @@ public enum PropertyType { FILENAME_EXT("file name extension", in(true, RFile.EXTENSION), "One of the currently supported filename extensions for storing table data files. " - + "Currently, only " + RFile.EXTENSION + " is supported."); + + "Currently, only " + RFile.EXTENSION + " is supported."), + + VOLUMES("volumes", VolumeConfiguration::isValidVolumeUris, "See instance.volumes documentation"); private final String shortname; private final String format; @@ -395,5 +398,4 @@ public static IntStream parse(String portRange) { } } - } diff --git a/core/src/main/java/org/apache/accumulo/core/volume/VolumeConfiguration.java b/core/src/main/java/org/apache/accumulo/core/volume/VolumeConfiguration.java index 634cb3aab3d..c2dac68aa38 100644 --- a/core/src/main/java/org/apache/accumulo/core/volume/VolumeConfiguration.java +++ b/core/src/main/java/org/apache/accumulo/core/volume/VolumeConfiguration.java @@ -31,6 +31,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.slf4j.LoggerFactory; public class VolumeConfiguration { @@ -39,7 +40,23 @@ public static FileSystem fileSystemForPath(String path, Configuration conf) thro } public static Set getVolumeUris(AccumuloConfiguration conf) { - String volumes = conf.get(Property.INSTANCE_VOLUMES); + return getVolumeUris(conf.get(Property.INSTANCE_VOLUMES)); + } + + public static boolean isValidVolumeUris(String volumes) { + try { + if (volumes == null || volumes.isBlank()) { + return false; + } + getVolumeUris(volumes); + return true; + } catch (IllegalArgumentException e) { + LoggerFactory.getLogger(VolumeConfiguration.class).warn(e.getMessage()); + return false; + } + } + + private static Set getVolumeUris(String volumes) { if (volumes == null || volumes.isBlank()) { throw new IllegalArgumentException( "Missing required property " + Property.INSTANCE_VOLUMES.getKey()); From eb172144d1a601bf70e9121379be41e7b41598ad Mon Sep 17 00:00:00 2001 From: Christopher Tubbs Date: Fri, 28 Feb 2025 16:49:47 -0500 Subject: [PATCH 2/4] Move volume parsing to ConfigurationTypeHelper * Move parsing code for volume properties to ConfigurationTypeHelper and add a unit test * Make VolumeManager do an additional check to require the property to be set (could also be done elsewhere in other overall configuration checks) * Added a validator to the PropertyType to verify the format when it is set * Slightly modified some of the exception messages, so that it does not mention the instance.volumes property itself... since the PropertyType validation could apply to other properties of the same type (Added other minor changes to log errors at the error level) --- .../core/conf/ConfigurationTypeHelper.java | 51 +++++++++++++++++++ .../accumulo/core/conf/PropertyType.java | 31 ++++++++--- .../core/volume/VolumeConfiguration.java | 49 ++---------------- .../conf/ConfigurationTypeHelperTest.java | 50 ++++++++++++++++++ 4 files changed, 129 insertions(+), 52 deletions(-) diff --git a/core/src/main/java/org/apache/accumulo/core/conf/ConfigurationTypeHelper.java b/core/src/main/java/org/apache/accumulo/core/conf/ConfigurationTypeHelper.java index cf7b9deb484..78cf10f2dbf 100644 --- a/core/src/main/java/org/apache/accumulo/core/conf/ConfigurationTypeHelper.java +++ b/core/src/main/java/org/apache/accumulo/core/conf/ConfigurationTypeHelper.java @@ -18,18 +18,27 @@ */ package org.apache.accumulo.core.conf; +import static java.util.Objects.requireNonNull; import static java.util.concurrent.TimeUnit.DAYS; import static java.util.concurrent.TimeUnit.HOURS; import static java.util.concurrent.TimeUnit.MILLISECONDS; import static java.util.concurrent.TimeUnit.MINUTES; import static java.util.concurrent.TimeUnit.SECONDS; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.Arrays; import java.util.Collections; import java.util.HashMap; +import java.util.LinkedHashSet; import java.util.Map; +import java.util.Set; import java.util.concurrent.TimeUnit; +import java.util.function.Predicate; +import java.util.stream.Collectors; import org.apache.accumulo.core.classloader.ClassLoaderUtil; +import org.apache.hadoop.fs.Path; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -222,4 +231,46 @@ public static int getNumThreads(String threads) { } return nThreads; } + + /** + * Get the set of volumes parsed from a volumes property type, and throw exceptions if the volumes + * aren't valid, are null, contains only blanks, or contains duplicates. An empty string is + * allowed (resulting in an empty set of volumes), to handle the case where the property is not + * set by a user (or... is set to the same as the default, which is equivalent to not being set). + * If the property is required to be set, it is the caller's responsibility to verify that the + * parsed set is non-empty. + * + * @throws IllegalArgumentException when the volumes are set to something that cannot be parsed + */ + public static Set getVolumeUris(String volumes) { + if (requireNonNull(volumes).isEmpty()) { + // special case when the property is not set and defaults to an empty string + return Set.of(); + } + var blanksRemoved = Arrays.stream(volumes.split(",")).map(String::strip) + .filter(Predicate.not(String::isEmpty)).collect(Collectors.toList()); + if (blanksRemoved.isEmpty()) { + throw new IllegalArgumentException("property contains only blank volumes"); + } + var deduplicated = blanksRemoved.stream().map(ConfigurationTypeHelper::normalizeVolume) + .collect(Collectors.toCollection(LinkedHashSet::new)); + if (deduplicated.size() < blanksRemoved.size()) { + throw new IllegalArgumentException("property contains duplicate volumes"); + } + return deduplicated; + } + + private static String normalizeVolume(String volume) { + if (!volume.contains(":")) { + throw new IllegalArgumentException("'" + volume + "' is not a fully qualified URI"); + } + try { + // pass through URI to unescape hex encoded chars (e.g. convert %2C to "," char) + return new Path(new URI(volume.strip())).toString(); + } catch (URISyntaxException e) { + throw new IllegalArgumentException( + "volume contains '" + volume + "' which has a syntax error", e); + } + } + } diff --git a/core/src/main/java/org/apache/accumulo/core/conf/PropertyType.java b/core/src/main/java/org/apache/accumulo/core/conf/PropertyType.java index 6347632a239..8032e03bbb1 100644 --- a/core/src/main/java/org/apache/accumulo/core/conf/PropertyType.java +++ b/core/src/main/java/org/apache/accumulo/core/conf/PropertyType.java @@ -31,7 +31,6 @@ import java.util.stream.Stream; import org.apache.accumulo.core.file.rfile.RFile; -import org.apache.accumulo.core.volume.VolumeConfiguration; import org.apache.commons.lang3.Range; import org.apache.hadoop.fs.Path; import org.slf4j.Logger; @@ -140,8 +139,9 @@ public enum PropertyType { + " interpreted based on the context of the property to which it applies."), JSON("json", new ValidJson(), - "An arbitrary string that is represents a valid, parsable generic json object." - + "The validity of the json object in the context of the property usage is not checked by this type."), + "An arbitrary string that is represents a valid, parsable generic json object. The validity " + + "of the json object in the context of the property usage is not checked by this type."), + BOOLEAN("boolean", in(false, null, "true", "false"), "Has a value of either 'true' or 'false' (case-insensitive)"), @@ -151,7 +151,7 @@ public enum PropertyType { "One of the currently supported filename extensions for storing table data files. " + "Currently, only " + RFile.EXTENSION + " is supported."), - VOLUMES("volumes", VolumeConfiguration::isValidVolumeUris, "See instance.volumes documentation"); + VOLUMES("volumes", new ValidVolumes(), "See instance.volumes documentation"); private final String shortname; private final String format; @@ -218,13 +218,32 @@ private static class ValidJson implements Predicate { public boolean test(String value) { try { if (value.length() > ONE_MILLION) { - log.info("provided json string length {} is greater than limit of {} for parsing", + log.error("provided json string length {} is greater than limit of {} for parsing", value.length(), ONE_MILLION); return false; } jsonMapper.readTree(value); return true; - } catch (IOException ex) { + } catch (IOException e) { + log.error("provided json string resulted in an error", e); + return false; + } + } + } + + private static class ValidVolumes implements Predicate { + private static final Logger log = LoggerFactory.getLogger(ValidVolumes.class); + + @Override + public boolean test(String volumes) { + if (volumes == null) { + return false; + } + try { + ConfigurationTypeHelper.getVolumeUris(volumes); + return true; + } catch (IllegalArgumentException e) { + log.error("provided volume string is not valid", e); return false; } } diff --git a/core/src/main/java/org/apache/accumulo/core/volume/VolumeConfiguration.java b/core/src/main/java/org/apache/accumulo/core/volume/VolumeConfiguration.java index c2dac68aa38..097b941507d 100644 --- a/core/src/main/java/org/apache/accumulo/core/volume/VolumeConfiguration.java +++ b/core/src/main/java/org/apache/accumulo/core/volume/VolumeConfiguration.java @@ -19,19 +19,14 @@ package org.apache.accumulo.core.volume; import java.io.IOException; -import java.net.URI; -import java.net.URISyntaxException; -import java.util.Arrays; -import java.util.LinkedHashSet; import java.util.Set; -import java.util.stream.Collectors; import org.apache.accumulo.core.conf.AccumuloConfiguration; +import org.apache.accumulo.core.conf.ConfigurationTypeHelper; import org.apache.accumulo.core.conf.Property; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.slf4j.LoggerFactory; public class VolumeConfiguration { @@ -40,50 +35,12 @@ public static FileSystem fileSystemForPath(String path, Configuration conf) thro } public static Set getVolumeUris(AccumuloConfiguration conf) { - return getVolumeUris(conf.get(Property.INSTANCE_VOLUMES)); - } - - public static boolean isValidVolumeUris(String volumes) { - try { - if (volumes == null || volumes.isBlank()) { - return false; - } - getVolumeUris(volumes); - return true; - } catch (IllegalArgumentException e) { - LoggerFactory.getLogger(VolumeConfiguration.class).warn(e.getMessage()); - return false; - } - } - - private static Set getVolumeUris(String volumes) { + var volumes = conf.get(Property.INSTANCE_VOLUMES); if (volumes == null || volumes.isBlank()) { throw new IllegalArgumentException( "Missing required property " + Property.INSTANCE_VOLUMES.getKey()); } - String[] volArray = volumes.split(","); - LinkedHashSet deduplicated = - Arrays.stream(volArray).map(VolumeConfiguration::normalizeVolume) - .collect(Collectors.toCollection(LinkedHashSet::new)); - if (deduplicated.size() < volArray.length) { - throw new IllegalArgumentException( - Property.INSTANCE_VOLUMES.getKey() + " contains duplicate volumes (" + volumes + ")"); - } - return deduplicated; - } - - private static String normalizeVolume(String volume) { - if (volume == null || volume.isBlank() || !volume.contains(":")) { - throw new IllegalArgumentException("Expected fully qualified URI for " - + Property.INSTANCE_VOLUMES.getKey() + " got " + volume); - } - try { - // pass through URI to unescape hex encoded chars (e.g. convert %2C to "," char) - return new Path(new URI(volume.strip())).toString(); - } catch (URISyntaxException e) { - throw new IllegalArgumentException(Property.INSTANCE_VOLUMES.getKey() + " contains '" + volume - + "' which has a syntax error", e); - } + return ConfigurationTypeHelper.getVolumeUris(volumes); } } diff --git a/core/src/test/java/org/apache/accumulo/core/conf/ConfigurationTypeHelperTest.java b/core/src/test/java/org/apache/accumulo/core/conf/ConfigurationTypeHelperTest.java index 627f21c450b..f8b06ec4042 100644 --- a/core/src/test/java/org/apache/accumulo/core/conf/ConfigurationTypeHelperTest.java +++ b/core/src/test/java/org/apache/accumulo/core/conf/ConfigurationTypeHelperTest.java @@ -24,7 +24,9 @@ import static java.util.concurrent.TimeUnit.SECONDS; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import java.util.Set; import java.util.function.Function; import java.util.stream.Stream; @@ -132,4 +134,52 @@ public void testGetFractionFailureCase2() { public void testGetFractionFailureCase3() { assertThrows(IllegalArgumentException.class, () -> ConfigurationTypeHelper.getFraction(".%")); } + + @Test + public void testGetVolumeUris() { + // test property not set + assertEquals(Set.of(), ConfigurationTypeHelper.getVolumeUris("")); + + // test blank cases + assertThrows(NullPointerException.class, () -> ConfigurationTypeHelper.getVolumeUris(null)); + for (String s : Set.of(" ", ",", ",,,", " ,,,", ",,, ", ", ,,")) { + var e = assertThrows(IllegalArgumentException.class, + () -> ConfigurationTypeHelper.getVolumeUris(s)); + assertEquals("property contains only blank volumes", e.getMessage()); + } + + // test 1 volume + for (String s : Set.of("hdfs:/volA", ",hdfs:/volA", "hdfs:/volA,")) { + var uris = ConfigurationTypeHelper.getVolumeUris(s); + assertEquals(1, uris.size()); + assertTrue(uris.contains("hdfs:/volA")); + } + + // test more than 1 volume + for (String s : Set.of("hdfs:/volA,file:/volB", ",hdfs:/volA,file:/volB", + "hdfs:/volA,,file:/volB", "hdfs:/volA,file:/volB, ,")) { + var uris = ConfigurationTypeHelper.getVolumeUris(s); + assertEquals(2, uris.size()); + assertTrue(uris.contains("hdfs:/volA")); + assertTrue(uris.contains("file:/volB")); + } + + // test invalid URI + for (String s : Set.of("hdfs:/volA,hdfs:/volB,volA", ",volA,hdfs:/volA,hdfs:/volB", + "hdfs:/volA,,volA,hdfs:/volB", "hdfs:/volA,volA,hdfs:/volB, ,")) { + var iae = assertThrows(IllegalArgumentException.class, + () -> ConfigurationTypeHelper.getVolumeUris(s)); + assertEquals("'volA' is not a fully qualified URI", iae.getMessage()); + } + + // test duplicates + var iae = assertThrows(IllegalArgumentException.class, + () -> ConfigurationTypeHelper.getVolumeUris("hdfs:/volA,hdfs:/volB,hdfs:/volA")); + assertEquals("property contains duplicate volumes", iae.getMessage()); + + // test syntax error in URI + iae = assertThrows(IllegalArgumentException.class, + () -> ConfigurationTypeHelper.getVolumeUris("hdfs:/volA,hdfs :/::/volB")); + assertEquals("volume contains 'hdfs :/::/volB' which has a syntax error", iae.getMessage()); + } } From af62fc2368175ea093979245f2bc609935299d58 Mon Sep 17 00:00:00 2001 From: Christopher Tubbs Date: Fri, 28 Feb 2025 17:11:47 -0500 Subject: [PATCH 3/4] Add test coverage in PropertyTypeTest --- .../apache/accumulo/core/conf/PropertyTypeTest.java | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/core/src/test/java/org/apache/accumulo/core/conf/PropertyTypeTest.java b/core/src/test/java/org/apache/accumulo/core/conf/PropertyTypeTest.java index 315543ade93..5cb81a9835a 100644 --- a/core/src/test/java/org/apache/accumulo/core/conf/PropertyTypeTest.java +++ b/core/src/test/java/org/apache/accumulo/core/conf/PropertyTypeTest.java @@ -225,4 +225,15 @@ public void testTypeFILENAME_EXT() { invalid(null, "RF", "map", "", "MAP", "rF", "Rf", " rf "); } + @Test + public void testTypeVOLUMES() { + // more comprehensive parsing tests are in ConfigurationTypeHelperTest.testGetVolumeUris() + valid("", "hdfs:/volA", ",hdfs:/volA", "hdfs:/volA,", "hdfs:/volA,file:/volB", + ",hdfs:/volA,file:/volB", "hdfs:/volA,,file:/volB", "hdfs:/volA,file:/volB, ,"); + invalid(null, " ", ",", ",,,", " ,,,", ",,, ", ", ,,", "hdfs:/volA,hdfs:/volB,volA", + ",volA,hdfs:/volA,hdfs:/volB", "hdfs:/volA,,volA,hdfs:/volB", + "hdfs:/volA,volA,hdfs:/volB, ,", "hdfs:/volA,hdfs:/volB,hdfs:/volA", + "hdfs:/volA,hdfs :/::/volB"); + } + } From 59c82593bbd7a13e2d36b38e6f0746ea40482f69 Mon Sep 17 00:00:00 2001 From: Keith Turner Date: Sat, 1 Mar 2025 00:45:33 +0000 Subject: [PATCH 4/4] adds another test --- .../core/volume/VolumeConfigurationTest.java | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 core/src/test/java/org/apache/accumulo/core/volume/VolumeConfigurationTest.java diff --git a/core/src/test/java/org/apache/accumulo/core/volume/VolumeConfigurationTest.java b/core/src/test/java/org/apache/accumulo/core/volume/VolumeConfigurationTest.java new file mode 100644 index 00000000000..5e46ca6e055 --- /dev/null +++ b/core/src/test/java/org/apache/accumulo/core/volume/VolumeConfigurationTest.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.accumulo.core.volume; + +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import org.apache.accumulo.core.conf.ConfigurationCopy; +import org.apache.accumulo.core.conf.Property; +import org.junit.jupiter.api.Test; + +public class VolumeConfigurationTest { + @Test + public void testEmptyVolumes() { + ConfigurationCopy config = new ConfigurationCopy(); + assertNull(config.get(Property.INSTANCE_VOLUMES.getKey())); + assertThrows(IllegalArgumentException.class, () -> VolumeConfiguration.getVolumeUris(config)); + + config.set(Property.INSTANCE_VOLUMES.getKey(), ""); + assertThrows(IllegalArgumentException.class, () -> VolumeConfiguration.getVolumeUris(config)); + config.set(Property.INSTANCE_VOLUMES.getKey(), " "); + assertThrows(IllegalArgumentException.class, () -> VolumeConfiguration.getVolumeUris(config)); + config.set(Property.INSTANCE_VOLUMES.getKey(), ","); + assertThrows(IllegalArgumentException.class, () -> VolumeConfiguration.getVolumeUris(config)); + } +}