diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/datasource/parser/BitVectorLabelParser.java')
-rw-r--r-- | src/de/lmu/ifi/dbs/elki/datasource/parser/BitVectorLabelParser.java | 74 |
1 files changed, 37 insertions, 37 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/parser/BitVectorLabelParser.java b/src/de/lmu/ifi/dbs/elki/datasource/parser/BitVectorLabelParser.java index 32a26d7d..07019040 100644 --- a/src/de/lmu/ifi/dbs/elki/datasource/parser/BitVectorLabelParser.java +++ b/src/de/lmu/ifi/dbs/elki/datasource/parser/BitVectorLabelParser.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.parser; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -63,11 +63,12 @@ public class BitVectorLabelParser extends AbstractParser implements Parser { /** * Constructor. * - * @param colSep - * @param quoteChar + * @param colSep Column separator + * @param quoteChar Quotation character + * @param comment Comment pattern */ - public BitVectorLabelParser(Pattern colSep, char quoteChar) { - super(colSep, quoteChar); + public BitVectorLabelParser(Pattern colSep, char quoteChar, Pattern comment) { + super(colSep, quoteChar, comment); } @Override @@ -75,48 +76,47 @@ public class BitVectorLabelParser extends AbstractParser implements Parser { BufferedReader reader = new BufferedReader(new InputStreamReader(in)); int lineNumber = 0; int dimensionality = -1; - List<BitVector> vectors = new ArrayList<BitVector>(); - List<LabelList> labels = new ArrayList<LabelList>(); + List<BitVector> vectors = new ArrayList<>(); + List<LabelList> labels = new ArrayList<>(); try { - for(String line; (line = reader.readLine()) != null; lineNumber++) { - if(!line.startsWith(COMMENT) && line.length() > 0) { - List<String> entries = tokenize(line); - // FIXME: use more efficient storage right away? - List<Bit> attributes = new ArrayList<Bit>(); - LabelList ll = null; - for(String entry : entries) { - try { - Bit attribute = Bit.valueOf(entry); - attributes.add(attribute); - } - catch(NumberFormatException e) { - if(ll == null) { - ll = new LabelList(1); - } - ll.add(entry); + for (String line; (line = reader.readLine()) != null; lineNumber++) { + // Skip empty lines and comments + if (line.length() <= 0 || (comment != null && comment.matcher(line).matches())) { + continue; + } + List<String> entries = tokenize(line); + // FIXME: use more efficient storage right away? + List<Bit> attributes = new ArrayList<>(); + LabelList ll = null; + for (String entry : entries) { + try { + Bit attribute = Bit.valueOf(entry); + attributes.add(attribute); + } catch (NumberFormatException e) { + if (ll == null) { + ll = new LabelList(1); } + ll.add(entry); } + } - if(dimensionality < 0) { - dimensionality = attributes.size(); - } - else if(dimensionality != attributes.size()) { - throw new IllegalArgumentException("Differing dimensionality in line " + lineNumber + "."); - } - - vectors.add(new BitVector(attributes.toArray(new Bit[attributes.size()]))); - labels.add(ll); + if (dimensionality < 0) { + dimensionality = attributes.size(); + } else if (dimensionality != attributes.size()) { + throw new IllegalArgumentException("Differing dimensionality in line " + lineNumber + "."); } + + vectors.add(new BitVector(attributes.toArray(new Bit[attributes.size()]))); + labels.add(ll); } - } - catch(IOException e) { + } catch (IOException e) { throw new IllegalArgumentException("Error while parsing line " + lineNumber + "."); } return MultipleObjectsBundle.makeSimple(getTypeInformation(dimensionality), vectors, TypeUtil.LABELLIST, labels); } protected VectorFieldTypeInformation<BitVector> getTypeInformation(int dimensionality) { - return new VectorFieldTypeInformation<BitVector>(BitVector.FACTORY, dimensionality); + return new VectorFieldTypeInformation<>(BitVector.FACTORY, dimensionality); } @Override @@ -134,7 +134,7 @@ public class BitVectorLabelParser extends AbstractParser implements Parser { public static class Parameterizer extends AbstractParser.Parameterizer { @Override protected BitVectorLabelParser makeInstance() { - return new BitVectorLabelParser(colSep, quoteChar); + return new BitVectorLabelParser(colSep, quoteChar, comment); } } -}
\ No newline at end of file +} |