diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/datasource/parser/AbstractParser.java')
-rw-r--r-- | src/de/lmu/ifi/dbs/elki/datasource/parser/AbstractParser.java | 38 |
1 files changed, 33 insertions, 5 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/parser/AbstractParser.java b/src/de/lmu/ifi/dbs/elki/datasource/parser/AbstractParser.java index e8201db1..53d814ea 100644 --- a/src/de/lmu/ifi/dbs/elki/datasource/parser/AbstractParser.java +++ b/src/de/lmu/ifi/dbs/elki/datasource/parser/AbstractParser.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.parser; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,9 +23,11 @@ package de.lmu.ifi.dbs.elki.datasource.parser; along with this program. If not, see <http://www.gnu.org/licenses/>. */ +import java.util.regex.Matcher; import java.util.regex.Pattern; import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.utilities.io.Tokenizer; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; @@ -45,7 +47,7 @@ public abstract class AbstractParser { /** * A pattern defining whitespace. */ - public static final String DEFAULT_SEPARATOR = "(\\s+|\\s*[,;]\\s*)"; + public static final String DEFAULT_SEPARATOR = "\\s*[,;\\s]\\s*"; /** * A quote pattern @@ -74,7 +76,7 @@ public abstract class AbstractParser { /** * Comment pattern. */ - protected Pattern comment = null; + private Matcher comment = null; /** * String tokenizer. @@ -91,10 +93,16 @@ public abstract class AbstractParser { public AbstractParser(Pattern colSep, String quoteChars, Pattern comment) { super(); this.tokenizer = new Tokenizer(colSep, quoteChars); - this.comment = comment; + this.comment = comment.matcher(""); } - public static int lengthWithoutLinefeed(String line) { + /** + * Get the length of the string, not taking trailing linefeeds into account. + * + * @param line Input line + * @return Length + */ + public static int lengthWithoutLinefeed(CharSequence line) { int length = line.length(); while(length > 0) { char last = line.charAt(length - 1); @@ -114,6 +122,26 @@ public abstract class AbstractParser { protected abstract Logging getLogger(); /** + * Cleanup internal data structures. + */ + public void cleanup() { + tokenizer.cleanup(); + if(comment != null) { + comment.reset(""); + } + } + + /** + * Match a comment line. + * + * @param line Line to test + * @return {@code true} if the line matches the comment pattern. + */ + protected boolean isComment(CharSequence line) { + return (comment != null && comment.reset(line).matches()); + } + + /** * Returns a string representation of the object. * * @return a string representation of the object. |