summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/datasource/parser/SparseBitVectorLabelParser.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/datasource/parser/SparseBitVectorLabelParser.java')
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/parser/SparseBitVectorLabelParser.java73
1 files changed, 37 insertions, 36 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/parser/SparseBitVectorLabelParser.java b/src/de/lmu/ifi/dbs/elki/datasource/parser/SparseBitVectorLabelParser.java
index 35e53bb7..5f9e5e05 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/parser/SparseBitVectorLabelParser.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/parser/SparseBitVectorLabelParser.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.parser;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -64,11 +64,12 @@ public class SparseBitVectorLabelParser extends AbstractParser implements Parser
/**
* Constructor.
*
- * @param colSep
- * @param quoteChar
+ * @param colSep Column separator
+ * @param quoteChar Quotation character
+ * @param comment Comment pattern
*/
- public SparseBitVectorLabelParser(Pattern colSep, char quoteChar) {
- super(colSep, quoteChar);
+ public SparseBitVectorLabelParser(Pattern colSep, char quoteChar, Pattern comment) {
+ super(colSep, quoteChar, comment);
}
@Override
@@ -76,54 +77,54 @@ public class SparseBitVectorLabelParser extends AbstractParser implements Parser
BufferedReader reader = new BufferedReader(new InputStreamReader(in));
int lineNumber = 0;
int dimensionality = -1;
- List<BitVector> vectors = new ArrayList<BitVector>();
- List<LabelList> lblc = new ArrayList<LabelList>();
+ List<BitVector> vectors = new ArrayList<>();
+ List<LabelList> lblc = new ArrayList<>();
try {
- List<BitSet> bitSets = new ArrayList<BitSet>();
- List<LabelList> allLabels = new ArrayList<LabelList>();
- for(String line; (line = reader.readLine()) != null; lineNumber++) {
- if(!line.startsWith(COMMENT) && line.length() > 0) {
- List<String> entries = tokenize(line);
- BitSet bitSet = new BitSet();
- LabelList labels = null;
-
- for(String entry : entries) {
- try {
- int index = Integer.parseInt(entry);
- bitSet.set(index);
- dimensionality = Math.max(dimensionality, index);
- }
- catch(NumberFormatException e) {
- if(labels == null) {
- labels = new LabelList(1);
- }
- labels.add(entry);
+ List<BitSet> bitSets = new ArrayList<>();
+ List<LabelList> allLabels = new ArrayList<>();
+ for (String line; (line = reader.readLine()) != null; lineNumber++) {
+ // Skip empty lines and comments
+ if (line.length() <= 0 || (comment != null && comment.matcher(line).matches())) {
+ continue;
+ }
+ List<String> entries = tokenize(line);
+ BitSet bitSet = new BitSet();
+ LabelList labels = null;
+
+ for (String entry : entries) {
+ try {
+ int index = Integer.parseInt(entry);
+ bitSet.set(index);
+ dimensionality = Math.max(dimensionality, index);
+ } catch (NumberFormatException e) {
+ if (labels == null) {
+ labels = new LabelList(1);
}
+ labels.add(entry);
}
-
- bitSets.add(bitSet);
- allLabels.add(labels);
}
+
+ bitSets.add(bitSet);
+ allLabels.add(labels);
}
dimensionality++;
- for(int i = 0; i < bitSets.size(); i++) {
+ for (int i = 0; i < bitSets.size(); i++) {
BitSet bitSet = bitSets.get(i);
LabelList labels = allLabels.get(i);
vectors.add(new BitVector(bitSet, dimensionality));
lblc.add(labels);
}
- }
- catch(IOException e) {
+ } catch (IOException e) {
throw new IllegalArgumentException("Error while parsing line " + lineNumber + ".");
}
return MultipleObjectsBundle.makeSimple(getTypeInformation(dimensionality), vectors, TypeUtil.LABELLIST, lblc);
}
protected VectorFieldTypeInformation<BitVector> getTypeInformation(int dimensionality) {
- return new VectorFieldTypeInformation<BitVector>(BitVector.FACTORY, dimensionality);
+ return new VectorFieldTypeInformation<>(BitVector.FACTORY, dimensionality);
}
-
+
@Override
protected Logging getLogger() {
return LOG;
@@ -139,7 +140,7 @@ public class SparseBitVectorLabelParser extends AbstractParser implements Parser
public static class Parameterizer extends AbstractParser.Parameterizer {
@Override
protected SparseBitVectorLabelParser makeInstance() {
- return new SparseBitVectorLabelParser(colSep, quoteChar);
+ return new SparseBitVectorLabelParser(colSep, quoteChar, comment);
}
}
-} \ No newline at end of file
+}