summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/datasource/parser/SparseNumberVectorLabelParser.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/datasource/parser/SparseNumberVectorLabelParser.java')
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/parser/SparseNumberVectorLabelParser.java63
1 files changed, 33 insertions, 30 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/parser/SparseNumberVectorLabelParser.java b/src/de/lmu/ifi/dbs/elki/datasource/parser/SparseNumberVectorLabelParser.java
index 917dd2aa..f4ec8c59 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/parser/SparseNumberVectorLabelParser.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/parser/SparseNumberVectorLabelParser.java
@@ -30,7 +30,6 @@ import java.util.List;
import java.util.regex.Pattern;
import de.lmu.ifi.dbs.elki.data.LabelList;
-import de.lmu.ifi.dbs.elki.data.SparseDoubleVector;
import de.lmu.ifi.dbs.elki.data.SparseFloatVector;
import de.lmu.ifi.dbs.elki.data.SparseNumberVector;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
@@ -74,15 +73,17 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
* @author Arthur Zimek
*
* @apiviz.has SparseNumberVector
+ *
+ * @param <V> vector type
*/
// FIXME: Maxdim!
@Title("Sparse Vector Label Parser")
@Description("Parser for the following line format:\n" + "A single line provides a single point. Entries are separated by whitespace. " + "The values will be parsed as floats (resulting in a set of SparseFloatVectors). A line is expected in the following format: The first entry of each line is the number of attributes with coordinate value not zero. Subsequent entries are of the form (index, value), where index is the number of the corresponding dimension, and value is the value of the corresponding attribute." + "Any pair of two subsequent substrings not containing whitespace is tried to be read as int and float. If this fails for the first of the pair (interpreted ans index), it will be appended to a label. (Thus, any label must not be parseable as Integer.) If the float component is not parseable, an exception will be thrown. Empty lines and lines beginning with \"#\" will be ignored.")
-public class SparseNumberVectorLabelParser<V extends SparseNumberVector<V, ?>> extends NumberVectorLabelParser<V> {
+public class SparseNumberVectorLabelParser<V extends SparseNumberVector<?>> extends NumberVectorLabelParser<V> {
/**
- * Class logger
+ * Class logger.
*/
- private static final Logging logger = Logging.getLogger(SparseNumberVectorLabelParser.class);
+ private static final Logging LOG = Logging.getLogger(SparseNumberVectorLabelParser.class);
/**
* Holds the dimensionality of the parsed data which is the maximum occurring
@@ -91,6 +92,11 @@ public class SparseNumberVectorLabelParser<V extends SparseNumberVector<V, ?>> e
private int maxdim = -1;
/**
+ * Same as {@link #factory}, but subtype.
+ */
+ private SparseNumberVector.Factory<V, ?> sparsefactory;
+
+ /**
* Constructor.
*
* @param colSep Column separator
@@ -98,8 +104,9 @@ public class SparseNumberVectorLabelParser<V extends SparseNumberVector<V, ?>> e
* @param labelIndices Label indexes
* @param factory Vector factory
*/
- public SparseNumberVectorLabelParser(Pattern colSep, char quoteChar, BitSet labelIndices, V factory) {
+ public SparseNumberVectorLabelParser(Pattern colSep, char quoteChar, BitSet labelIndices, SparseNumberVector.Factory<V, ?> factory) {
super(colSep, quoteChar, labelIndices, factory);
+ this.sparsefactory = factory;
}
@Override
@@ -110,55 +117,51 @@ public class SparseNumberVectorLabelParser<V extends SparseNumberVector<V, ?>> e
TIntDoubleHashMap values = new TIntDoubleHashMap(cardinality, 1);
LabelList labels = null;
- for(int i = 1; i < entries.size() - 1; i++) {
- if(!labelIndices.get(i)) {
+ for (int i = 1; i < entries.size() - 1; i++) {
+ if (labelIndices == null || !labelIndices.get(i)) {
try {
- int index = Integer.valueOf(entries.get(i));
- if(index >= maxdim) {
+ int index = Integer.parseInt(entries.get(i));
+ if (index >= maxdim) {
maxdim = index + 1;
}
- double attribute = Double.valueOf(entries.get(i));
+ double attribute = Double.parseDouble(entries.get(i));
values.put(index, attribute);
i++;
- }
- catch(NumberFormatException e) {
- if(labels == null) {
+ } catch (NumberFormatException e) {
+ if (labels == null) {
labels = new LabelList(1);
}
labels.add(entries.get(i));
continue;
}
- }
- else {
- if(labels == null) {
+ } else {
+ if (labels == null) {
labels = new LabelList(1);
}
labels.add(entries.get(i));
}
}
- if(values.size() > maxdim) {
+ if (values.size() > maxdim) {
throw new AbortException("Invalid sparse vector seen: " + line);
}
- curvec = factory.newNumberVector(values, maxdim);
+ curvec = sparsefactory.newNumberVector(values, maxdim);
curlbl = labels;
}
@Override
protected SimpleTypeInformation<V> getTypeInformation(int dimensionality) {
- @SuppressWarnings("unchecked")
- Class<V> cls = (Class<V>) factory.getClass();
- if(dimensionality > 0) {
- return new VectorFieldTypeInformation<V>(cls, dimensionality, factory.newNumberVector(SparseDoubleVector.EMPTYMAP, dimensionality));
+ if (dimensionality > 0) {
+ return new VectorFieldTypeInformation<V>(factory, dimensionality);
}
- if(dimensionality == DIMENSIONALITY_VARIABLE) {
- return new SimpleTypeInformation<V>(cls);
+ if (dimensionality == DIMENSIONALITY_VARIABLE) {
+ return new SimpleTypeInformation<V>(factory.getRestrictionClass(), factory.getDefaultSerializer());
}
throw new AbortException("No vectors were read from the input file - cannot determine vector data type.");
}
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -168,18 +171,18 @@ public class SparseNumberVectorLabelParser<V extends SparseNumberVector<V, ?>> e
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends SparseNumberVector<V, ?>> extends NumberVectorLabelParser.Parameterizer<V> {
+ public static class Parameterizer<V extends SparseNumberVector<?>> extends NumberVectorLabelParser.Parameterizer<V> {
@Override
protected void getFactory(Parameterization config) {
- ObjectParameter<V> factoryP = new ObjectParameter<V>(VECTOR_TYPE_ID, SparseNumberVector.class, SparseFloatVector.class);
- if(config.grab(factoryP)) {
+ ObjectParameter<SparseNumberVector.Factory<V, ?>> factoryP = new ObjectParameter<SparseNumberVector.Factory<V, ?>>(VECTOR_TYPE_ID, SparseNumberVector.Factory.class, SparseFloatVector.Factory.class);
+ if (config.grab(factoryP)) {
factory = factoryP.instantiateClass(config);
}
}
@Override
protected SparseNumberVectorLabelParser<V> makeInstance() {
- return new SparseNumberVectorLabelParser<V>(colSep, quoteChar, labelIndices, factory);
+ return new SparseNumberVectorLabelParser<V>(colSep, quoteChar, labelIndices, (SparseNumberVector.Factory<V, ?>) factory);
}
}
-} \ No newline at end of file
+}