diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/datasource/parser/BitVectorLabelParser.java')
-rw-r--r-- | src/de/lmu/ifi/dbs/elki/datasource/parser/BitVectorLabelParser.java | 50 |
1 files changed, 26 insertions, 24 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/parser/BitVectorLabelParser.java b/src/de/lmu/ifi/dbs/elki/datasource/parser/BitVectorLabelParser.java index 07019040..26bc38af 100644 --- a/src/de/lmu/ifi/dbs/elki/datasource/parser/BitVectorLabelParser.java +++ b/src/de/lmu/ifi/dbs/elki/datasource/parser/BitVectorLabelParser.java @@ -28,10 +28,10 @@ import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.util.ArrayList; +import java.util.BitSet; import java.util.List; import java.util.regex.Pattern; -import de.lmu.ifi.dbs.elki.data.Bit; import de.lmu.ifi.dbs.elki.data.BitVector; import de.lmu.ifi.dbs.elki.data.LabelList; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; @@ -64,11 +64,11 @@ public class BitVectorLabelParser extends AbstractParser implements Parser { * Constructor. * * @param colSep Column separator - * @param quoteChar Quotation character + * @param quoteChars Quotation character * @param comment Comment pattern */ - public BitVectorLabelParser(Pattern colSep, char quoteChar, Pattern comment) { - super(colSep, quoteChar, comment); + public BitVectorLabelParser(Pattern colSep, String quoteChars, Pattern comment) { + super(colSep, quoteChars, comment); } @Override @@ -78,38 +78,40 @@ public class BitVectorLabelParser extends AbstractParser implements Parser { int dimensionality = -1; List<BitVector> vectors = new ArrayList<>(); List<LabelList> labels = new ArrayList<>(); + ArrayList<String> ll = new ArrayList<>(); try { - for (String line; (line = reader.readLine()) != null; lineNumber++) { + for(String line; (line = reader.readLine()) != null; lineNumber++) { // Skip empty lines and comments - if (line.length() <= 0 || (comment != null && comment.matcher(line).matches())) { + if(line.length() <= 0 || (comment != null && comment.matcher(line).matches())) { continue; } - List<String> entries = tokenize(line); - // FIXME: use more efficient storage right away? - List<Bit> attributes = new ArrayList<>(); - LabelList ll = null; - for (String entry : entries) { + BitSet bitSet = new BitSet(); + ll.clear(); + int i = 0; + for(tokenizer.initialize(line, 0, lengthWithoutLinefeed(line)); tokenizer.valid(); tokenizer.advance()) { try { - Bit attribute = Bit.valueOf(entry); - attributes.add(attribute); - } catch (NumberFormatException e) { - if (ll == null) { - ll = new LabelList(1); + if(tokenizer.getLongBase10() > 0) { + bitSet.set(i); } - ll.add(entry); + ++i; + } + catch(NumberFormatException e) { + ll.add(tokenizer.getSubstring()); } } - if (dimensionality < 0) { - dimensionality = attributes.size(); - } else if (dimensionality != attributes.size()) { + if(dimensionality < 0) { + dimensionality = i; + } + else if(dimensionality != i) { throw new IllegalArgumentException("Differing dimensionality in line " + lineNumber + "."); } - vectors.add(new BitVector(attributes.toArray(new Bit[attributes.size()]))); - labels.add(ll); + vectors.add(new BitVector(bitSet, dimensionality)); + labels.add(LabelList.make(ll)); } - } catch (IOException e) { + } + catch(IOException e) { throw new IllegalArgumentException("Error while parsing line " + lineNumber + "."); } return MultipleObjectsBundle.makeSimple(getTypeInformation(dimensionality), vectors, TypeUtil.LABELLIST, labels); @@ -134,7 +136,7 @@ public class BitVectorLabelParser extends AbstractParser implements Parser { public static class Parameterizer extends AbstractParser.Parameterizer { @Override protected BitVectorLabelParser makeInstance() { - return new BitVectorLabelParser(colSep, quoteChar, comment); + return new BitVectorLabelParser(colSep, quoteChars, comment); } } } |