diff --git a/src/main/java/com/sonalake/utah/Parser.java b/src/main/java/com/sonalake/utah/Parser.java index 269e1f3..d169505 100644 --- a/src/main/java/com/sonalake/utah/Parser.java +++ b/src/main/java/com/sonalake/utah/Parser.java @@ -93,34 +93,40 @@ private String getNextRecord(boolean isSelectingHeader) { boolean wasDelimMatched = false; while (!isRecordLoaded) { String currentLine = reader.readLine(); - if (null == currentLine) { - isReaderFinished = true; - isRecordLoaded = true; + boolean shouldIgnoreLine = config.isIgnorable(currentLine); + + if (shouldIgnoreLine) { + continue; } else { - if (StringUtils.isNotBlank(previousDelim)) { - buffer.append(previousDelim + "\n"); - previousDelim = ""; - } - if (isSelectingHeader && config.matchesHeaderDelim(currentLine)) { + if (null == currentLine) { + isReaderFinished = true; isRecordLoaded = true; - } else if (!isSelectingHeader && config.matchesRecordDelim(currentLine)) { - Delimiter applicableDelim = config.getApplicableDelim(currentLine); - // if the delimiter says we're at the start of the record, - // and this is the first record, we need to treat it differently - boolean isFirstDelimOfInterest = 0 == recordNumber && !wasDelimMatched; - if (applicableDelim.isDelimAtStartOfRecord() && isFirstDelimOfInterest) { - // this is the first record, so we don't stop here - wasDelimMatched = true; - } else { - if (applicableDelim.isRetainDelim()) { - previousDelim = currentLine; - } + } else { + if (StringUtils.isNotBlank(previousDelim)) { + buffer.append(previousDelim + "\n"); + previousDelim = ""; + } + if (isSelectingHeader && config.matchesHeaderDelim(currentLine)) { isRecordLoaded = true; + } else if (!isSelectingHeader && config.matchesRecordDelim(currentLine)) { + Delimiter applicableDelim = config.getApplicableDelim(currentLine); + // if the delimiter says we're at the start of the record, + // and this is the first record, we need to treat it differently + boolean isFirstDelimOfInterest = 0 == recordNumber && !wasDelimMatched; + if (applicableDelim.isDelimAtStartOfRecord() && isFirstDelimOfInterest) { + // this is the first record, so we don't stop here + wasDelimMatched = true; + } else { + if (applicableDelim.isRetainDelim()) { + previousDelim = currentLine; + } + isRecordLoaded = true; + } } } - } - if (StringUtils.isNotBlank(currentLine)) { - buffer.append(currentLine + "\n"); + if (StringUtils.isNotBlank(currentLine)) { + buffer.append(currentLine + "\n"); + } } } if (isReaderFinished && buffer.length() == 0) { diff --git a/src/main/java/com/sonalake/utah/config/Config.java b/src/main/java/com/sonalake/utah/config/Config.java index 6fb381e..e810524 100644 --- a/src/main/java/com/sonalake/utah/config/Config.java +++ b/src/main/java/com/sonalake/utah/config/Config.java @@ -1,12 +1,10 @@ package com.sonalake.utah.config; -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.fasterxml.jackson.dataformat.xml.annotation.JacksonXmlElementWrapper; import com.fasterxml.jackson.dataformat.xml.annotation.JacksonXmlProperty; import com.fasterxml.jackson.dataformat.xml.annotation.JacksonXmlRootElement; import org.apache.commons.lang3.StringUtils; - import java.util.Collections; import java.util.List; import java.util.Map; @@ -26,7 +24,8 @@ public class Config { protected List delimiters; /** - * The list of searches that are to be used by values section to find values using a regex + * The list of searches that are to be used by values section to find values + * using a regex */ @JacksonXmlElementWrapper(localName = "searches") @JacksonXmlProperty(localName = "search") @@ -47,6 +46,13 @@ public class Config { @JacksonXmlProperty(localName = "values") protected List values; + /** + * Lines to be ignored + */ + @JacksonXmlElementWrapper(useWrapping = false) + @JacksonXmlProperty(localName = "ignore") + protected List ignores; + /** * Precompile the patterns, but only do it the once. */ @@ -60,6 +66,12 @@ void compilePatterns() { for (Delimiter delimiter : delimiters) { delimiter.compile(searches); } + + if (null != ignores) { + for (Ignorer ignorer : ignores) { + ignorer.compile(searches); + } + } } /** @@ -130,7 +142,8 @@ public boolean matchesHeaderDelim(String candidate) { } /** - * Validates if the delimiters are valid. Checks all the delimiters to see if they are well-forrmed + * Validates if the delimiters are valid. Checks all the delimiters to see if + * they are well-forrmed * * @return true if the delimiters are valid */ @@ -148,7 +161,26 @@ public boolean isDelimiterValid() { } /** - * Get the applicable delimiter for the candidate. The first delimiter that matches the text as used. + * Validates if the line should be ignored + * + * @return true if the line is ignored during the record build + */ + public boolean isIgnorable(String candidate) { + if (null == candidate || null == ignores || ignores.isEmpty()) { + return false; + } else { + for (Ignorer ignorer : ignores) { + if (ignorer.matches(candidate)) { + return true; + } + } + } + return false; + } + + /** + * Get the applicable delimiter for the candidate. The first delimiter that + * matches the text as used. * * @param candidate the candidate text * @return the applicable delimiter, or null if there are none. @@ -183,10 +215,7 @@ public boolean hasHeaderDelim() { @Override public String toString() { - return String.format( - "CLIConfig: delim [%s], searches: [%s], values: [%s]", - delimiters, searches, values - ); + return String.format("CLIConfig: delim [%s], searches: [%s], values: [%s]", delimiters, searches, values); } } diff --git a/src/main/java/com/sonalake/utah/config/Ignorer.java b/src/main/java/com/sonalake/utah/config/Ignorer.java new file mode 100644 index 0000000..55a29a7 --- /dev/null +++ b/src/main/java/com/sonalake/utah/config/Ignorer.java @@ -0,0 +1,38 @@ +package com.sonalake.utah.config; + +import javax.xml.bind.annotation.XmlValue; +import java.util.List; +import java.util.regex.Pattern; + +/** + * A header delimiter - used to identify the header where there are values in the header of the file that are to be + * added to every record. + */ +public class Ignorer { + + /** + * Raw delimiter string from the config file + */ + @XmlValue + protected String delimiter; + + /** + * The compiled pattern, this is the one used at runtime + */ + private Pattern compiledPattern; + + public boolean matches(String candidate) { + return compiledPattern.matcher(candidate).matches(); + } + + /** + * Compile the delimiter based on the searches + * + * @param searches the searches, processed in this order + */ + void compile(List searches) { + String valueText = SearchHelper.translate(delimiter, searches); + compiledPattern = Pattern.compile(".*?" + valueText + ".*?"); + } + +}