diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/datasource/parser/SparseBitVectorLabelParser.java')
-rw-r--r-- | src/de/lmu/ifi/dbs/elki/datasource/parser/SparseBitVectorLabelParser.java | 73 |
1 files changed, 37 insertions, 36 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/parser/SparseBitVectorLabelParser.java b/src/de/lmu/ifi/dbs/elki/datasource/parser/SparseBitVectorLabelParser.java index 35e53bb7..5f9e5e05 100644 --- a/src/de/lmu/ifi/dbs/elki/datasource/parser/SparseBitVectorLabelParser.java +++ b/src/de/lmu/ifi/dbs/elki/datasource/parser/SparseBitVectorLabelParser.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.parser; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -64,11 +64,12 @@ public class SparseBitVectorLabelParser extends AbstractParser implements Parser /** * Constructor. * - * @param colSep - * @param quoteChar + * @param colSep Column separator + * @param quoteChar Quotation character + * @param comment Comment pattern */ - public SparseBitVectorLabelParser(Pattern colSep, char quoteChar) { - super(colSep, quoteChar); + public SparseBitVectorLabelParser(Pattern colSep, char quoteChar, Pattern comment) { + super(colSep, quoteChar, comment); } @Override @@ -76,54 +77,54 @@ public class SparseBitVectorLabelParser extends AbstractParser implements Parser BufferedReader reader = new BufferedReader(new InputStreamReader(in)); int lineNumber = 0; int dimensionality = -1; - List<BitVector> vectors = new ArrayList<BitVector>(); - List<LabelList> lblc = new ArrayList<LabelList>(); + List<BitVector> vectors = new ArrayList<>(); + List<LabelList> lblc = new ArrayList<>(); try { - List<BitSet> bitSets = new ArrayList<BitSet>(); - List<LabelList> allLabels = new ArrayList<LabelList>(); - for(String line; (line = reader.readLine()) != null; lineNumber++) { - if(!line.startsWith(COMMENT) && line.length() > 0) { - List<String> entries = tokenize(line); - BitSet bitSet = new BitSet(); - LabelList labels = null; - - for(String entry : entries) { - try { - int index = Integer.parseInt(entry); - bitSet.set(index); - dimensionality = Math.max(dimensionality, index); - } - catch(NumberFormatException e) { - if(labels == null) { - labels = new LabelList(1); - } - labels.add(entry); + List<BitSet> bitSets = new ArrayList<>(); + List<LabelList> allLabels = new ArrayList<>(); + for (String line; (line = reader.readLine()) != null; lineNumber++) { + // Skip empty lines and comments + if (line.length() <= 0 || (comment != null && comment.matcher(line).matches())) { + continue; + } + List<String> entries = tokenize(line); + BitSet bitSet = new BitSet(); + LabelList labels = null; + + for (String entry : entries) { + try { + int index = Integer.parseInt(entry); + bitSet.set(index); + dimensionality = Math.max(dimensionality, index); + } catch (NumberFormatException e) { + if (labels == null) { + labels = new LabelList(1); } + labels.add(entry); } - - bitSets.add(bitSet); - allLabels.add(labels); } + + bitSets.add(bitSet); + allLabels.add(labels); } dimensionality++; - for(int i = 0; i < bitSets.size(); i++) { + for (int i = 0; i < bitSets.size(); i++) { BitSet bitSet = bitSets.get(i); LabelList labels = allLabels.get(i); vectors.add(new BitVector(bitSet, dimensionality)); lblc.add(labels); } - } - catch(IOException e) { + } catch (IOException e) { throw new IllegalArgumentException("Error while parsing line " + lineNumber + "."); } return MultipleObjectsBundle.makeSimple(getTypeInformation(dimensionality), vectors, TypeUtil.LABELLIST, lblc); } protected VectorFieldTypeInformation<BitVector> getTypeInformation(int dimensionality) { - return new VectorFieldTypeInformation<BitVector>(BitVector.FACTORY, dimensionality); + return new VectorFieldTypeInformation<>(BitVector.FACTORY, dimensionality); } - + @Override protected Logging getLogger() { return LOG; @@ -139,7 +140,7 @@ public class SparseBitVectorLabelParser extends AbstractParser implements Parser public static class Parameterizer extends AbstractParser.Parameterizer { @Override protected SparseBitVectorLabelParser makeInstance() { - return new SparseBitVectorLabelParser(colSep, quoteChar); + return new SparseBitVectorLabelParser(colSep, quoteChar, comment); } } -}
\ No newline at end of file +} |