summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/datasource/parser/AbstractParser.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/datasource/parser/AbstractParser.java')
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/parser/AbstractParser.java38
1 files changed, 33 insertions, 5 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/parser/AbstractParser.java b/src/de/lmu/ifi/dbs/elki/datasource/parser/AbstractParser.java
index e8201db1..53d814ea 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/parser/AbstractParser.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/parser/AbstractParser.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.parser;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -23,9 +23,11 @@ package de.lmu.ifi.dbs.elki.datasource.parser;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import java.util.regex.Matcher;
import java.util.regex.Pattern;
import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.utilities.io.Tokenizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
@@ -45,7 +47,7 @@ public abstract class AbstractParser {
/**
* A pattern defining whitespace.
*/
- public static final String DEFAULT_SEPARATOR = "(\\s+|\\s*[,;]\\s*)";
+ public static final String DEFAULT_SEPARATOR = "\\s*[,;\\s]\\s*";
/**
* A quote pattern
@@ -74,7 +76,7 @@ public abstract class AbstractParser {
/**
* Comment pattern.
*/
- protected Pattern comment = null;
+ private Matcher comment = null;
/**
* String tokenizer.
@@ -91,10 +93,16 @@ public abstract class AbstractParser {
public AbstractParser(Pattern colSep, String quoteChars, Pattern comment) {
super();
this.tokenizer = new Tokenizer(colSep, quoteChars);
- this.comment = comment;
+ this.comment = comment.matcher("");
}
- public static int lengthWithoutLinefeed(String line) {
+ /**
+ * Get the length of the string, not taking trailing linefeeds into account.
+ *
+ * @param line Input line
+ * @return Length
+ */
+ public static int lengthWithoutLinefeed(CharSequence line) {
int length = line.length();
while(length > 0) {
char last = line.charAt(length - 1);
@@ -114,6 +122,26 @@ public abstract class AbstractParser {
protected abstract Logging getLogger();
/**
+ * Cleanup internal data structures.
+ */
+ public void cleanup() {
+ tokenizer.cleanup();
+ if(comment != null) {
+ comment.reset("");
+ }
+ }
+
+ /**
+ * Match a comment line.
+ *
+ * @param line Line to test
+ * @return {@code true} if the line matches the comment pattern.
+ */
+ protected boolean isComment(CharSequence line) {
+ return (comment != null && comment.reset(line).matches());
+ }
+
+ /**
* Returns a string representation of the object.
*
* @return a string representation of the object.