summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/datasource/parser/BitVectorLabelParser.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/datasource/parser/BitVectorLabelParser.java')
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/parser/BitVectorLabelParser.java50
1 files changed, 26 insertions, 24 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/parser/BitVectorLabelParser.java b/src/de/lmu/ifi/dbs/elki/datasource/parser/BitVectorLabelParser.java
index 07019040..26bc38af 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/parser/BitVectorLabelParser.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/parser/BitVectorLabelParser.java
@@ -28,10 +28,10 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
+import java.util.BitSet;
import java.util.List;
import java.util.regex.Pattern;
-import de.lmu.ifi.dbs.elki.data.Bit;
import de.lmu.ifi.dbs.elki.data.BitVector;
import de.lmu.ifi.dbs.elki.data.LabelList;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
@@ -64,11 +64,11 @@ public class BitVectorLabelParser extends AbstractParser implements Parser {
* Constructor.
*
* @param colSep Column separator
- * @param quoteChar Quotation character
+ * @param quoteChars Quotation character
* @param comment Comment pattern
*/
- public BitVectorLabelParser(Pattern colSep, char quoteChar, Pattern comment) {
- super(colSep, quoteChar, comment);
+ public BitVectorLabelParser(Pattern colSep, String quoteChars, Pattern comment) {
+ super(colSep, quoteChars, comment);
}
@Override
@@ -78,38 +78,40 @@ public class BitVectorLabelParser extends AbstractParser implements Parser {
int dimensionality = -1;
List<BitVector> vectors = new ArrayList<>();
List<LabelList> labels = new ArrayList<>();
+ ArrayList<String> ll = new ArrayList<>();
try {
- for (String line; (line = reader.readLine()) != null; lineNumber++) {
+ for(String line; (line = reader.readLine()) != null; lineNumber++) {
// Skip empty lines and comments
- if (line.length() <= 0 || (comment != null && comment.matcher(line).matches())) {
+ if(line.length() <= 0 || (comment != null && comment.matcher(line).matches())) {
continue;
}
- List<String> entries = tokenize(line);
- // FIXME: use more efficient storage right away?
- List<Bit> attributes = new ArrayList<>();
- LabelList ll = null;
- for (String entry : entries) {
+ BitSet bitSet = new BitSet();
+ ll.clear();
+ int i = 0;
+ for(tokenizer.initialize(line, 0, lengthWithoutLinefeed(line)); tokenizer.valid(); tokenizer.advance()) {
try {
- Bit attribute = Bit.valueOf(entry);
- attributes.add(attribute);
- } catch (NumberFormatException e) {
- if (ll == null) {
- ll = new LabelList(1);
+ if(tokenizer.getLongBase10() > 0) {
+ bitSet.set(i);
}
- ll.add(entry);
+ ++i;
+ }
+ catch(NumberFormatException e) {
+ ll.add(tokenizer.getSubstring());
}
}
- if (dimensionality < 0) {
- dimensionality = attributes.size();
- } else if (dimensionality != attributes.size()) {
+ if(dimensionality < 0) {
+ dimensionality = i;
+ }
+ else if(dimensionality != i) {
throw new IllegalArgumentException("Differing dimensionality in line " + lineNumber + ".");
}
- vectors.add(new BitVector(attributes.toArray(new Bit[attributes.size()])));
- labels.add(ll);
+ vectors.add(new BitVector(bitSet, dimensionality));
+ labels.add(LabelList.make(ll));
}
- } catch (IOException e) {
+ }
+ catch(IOException e) {
throw new IllegalArgumentException("Error while parsing line " + lineNumber + ".");
}
return MultipleObjectsBundle.makeSimple(getTypeInformation(dimensionality), vectors, TypeUtil.LABELLIST, labels);
@@ -134,7 +136,7 @@ public class BitVectorLabelParser extends AbstractParser implements Parser {
public static class Parameterizer extends AbstractParser.Parameterizer {
@Override
protected BitVectorLabelParser makeInstance() {
- return new BitVectorLabelParser(colSep, quoteChar, comment);
+ return new BitVectorLabelParser(colSep, quoteChars, comment);
}
}
}