summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/datasource/filter
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/datasource/filter')
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractConversionFilter.java13
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractRandomFeatureSelectionFilter.java158
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractStreamConversionFilter.java12
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractVectorConversionFilter.java51
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractVectorStreamConversionFilter.java51
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/ByLabelFilter.java12
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/ClassLabelFilter.java38
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/DoubleVectorProjectionFilter.java85
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/DoubleVectorRandomProjectionFilter.java87
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/ExternalIDFilter.java30
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/FilterUtil.java22
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/FixedDBIDsFilter.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/HistogramJitterFilter.java164
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/NoMissingValuesFilter.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/RandomSamplingStreamFilter.java43
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/ShuffleObjectsFilter.java47
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/SortByLabelFilter.java28
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/SparseNumberVectorProjectionFilter.java86
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/SparseNumberVectorRandomProjectionFilter.java83
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/SparseVectorFieldFilter.java10
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/SplitNumberVectorFilter.java62
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/StreamFilter.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AbstractNormalization.java12
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AbstractStreamNormalization.java10
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseErfNormalization.java8
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseMinMaxNormalization.java45
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseVarianceNormalization.java51
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/InverseDocumentFrequencyNormalization.java20
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/LengthNormalization.java18
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/RankTieNormalization.java12
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/TFIDFNormalization.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/package-info.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/transform/GlobalPrincipalComponentAnalysisTransform.java116
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/transform/NumberVectorFeatureSelectionFilter.java (renamed from src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractFeatureSelectionFilter.java)296
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/transform/NumberVectorRandomFeatureSelectionFilter.java174
35 files changed, 978 insertions, 890 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractConversionFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractConversionFilter.java
index 34fb6bad..5948cd83 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractConversionFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractConversionFilter.java
@@ -45,6 +45,9 @@ public abstract class AbstractConversionFilter<I, O> implements ObjectFilter {
*
* In the main pass, each object is then filtered using
* {@link #filterSingleObject}.
+ *
+ * @param objects Objects to filter
+ * @return Filtered bundle
*/
@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
@@ -100,14 +103,14 @@ public abstract class AbstractConversionFilter<I, O> implements ObjectFilter {
* @param obj Database object to normalize
* @return Normalized database object
*/
- abstract protected O filterSingleObject(I obj);
+ protected abstract O filterSingleObject(I obj);
/**
* Get the input type restriction used for negotiating the data query.
*
* @return Type restriction
*/
- abstract protected SimpleTypeInformation<? super I> getInputTypeRestriction();
+ protected abstract SimpleTypeInformation<? super I> getInputTypeRestriction();
/**
* Get the output type from the input type after conversion.
@@ -115,10 +118,10 @@ public abstract class AbstractConversionFilter<I, O> implements ObjectFilter {
* @param in input type restriction
* @return output type restriction
*/
- abstract protected SimpleTypeInformation<? super O> convertedType(SimpleTypeInformation<I> in);
+ protected abstract SimpleTypeInformation<? super O> convertedType(SimpleTypeInformation<I> in);
/**
- * Return "true" when the normalization needs initialization (two-pass filtering!)
+ * Return "true" when the normalization needs initialization (two-pass filtering!).
*
* @param in Input type information
* @return true or false
@@ -137,7 +140,7 @@ public abstract class AbstractConversionFilter<I, O> implements ObjectFilter {
}
/**
- * Complete the initialization phase
+ * Complete the initialization phase.
*/
protected void prepareComplete() {
// optional - default NOOP.
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractRandomFeatureSelectionFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractRandomFeatureSelectionFilter.java
deleted file mode 100644
index 6c16abfc..00000000
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractRandomFeatureSelectionFilter.java
+++ /dev/null
@@ -1,158 +0,0 @@
-package de.lmu.ifi.dbs.elki.datasource.filter;
-
-/*
- This file is part of ELKI:
- Environment for Developing KDD-Applications Supported by Index-Structures
-
- Copyright (C) 2012
- Ludwig-Maximilians-Universität München
- Lehr- und Forschungseinheit für Datenbanksysteme
- ELKI Development Team
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-import java.util.BitSet;
-import java.util.Random;
-
-import de.lmu.ifi.dbs.elki.data.FeatureVector;
-import de.lmu.ifi.dbs.elki.data.NumberVector;
-import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
-import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
-import de.lmu.ifi.dbs.elki.utilities.Util;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.LongParameter;
-
-/**
- * <p>
- * A RandomProjectionParser selects a subset of attributes randomly for
- * projection of a ParsingResult.
- * </p>
- *
- * The cardinality of the subset of attributes is specified as a parameter.
- *
- * @author Arthur Zimek
- * @author Erich Schubert
- *
- * @param <V> the type of FeatureVector contained in both the original data of
- * the base parser and the projected data of this ProjectionParser
- */
-public abstract class AbstractRandomFeatureSelectionFilter<V extends FeatureVector<?, ?>> extends AbstractStreamConversionFilter<V, V> {
- /**
- * The selected attributes
- */
- protected BitSet selectedAttributes = null;
-
- /**
- * Parameter for the desired cardinality of the subset of attributes selected
- * for projection.
- *
- * <p>
- * Key: <code>-randomprojection.numberselected</code>
- * </p>
- * <p>
- * Default: <code>1</code>
- * </p>
- * <p>
- * Constraint: &ge;1
- * </p>
- */
- public static final OptionID NUMBER_SELECTED_ATTRIBUTES_ID = OptionID.getOrCreateOptionID("randomprojection.numberselected", "number of selected attributes");
-
- /**
- * Optional parameter to specify a seed for random projection.
- * If unused, system time is used as seed.
- * <p>
- * Key: {@code -randomprojection.seed}
- * </p>
- */
- public static final OptionID SEED_ID = OptionID.getOrCreateOptionID("randomprojection.seed", "Seed for random selection of projection attributes.");
-
-
- /**
- * Holds the desired cardinality of the subset of attributes selected for
- * projection.
- */
- protected int k;
-
- /**
- * Holds a random object.
- */
- protected final Random random;
-
- /**
- * Constructor.
- *
- * @param dim dimensionality
- */
- public AbstractRandomFeatureSelectionFilter(int dim) {
- super();
- this.k = dim;
- this.random = new Random();
- }
-
- /**
- * Constructor.
- *
- * @param dim dimensionality
- * @param seed seed for random
- */
- public AbstractRandomFeatureSelectionFilter(int dim, long seed) {
- super();
- this.k = dim;
- this.random = new Random(seed);
- }
-
- /**
- * Initialize random attributes.
- *
- * Invoke this from {@link #convertedType}!
- *
- * @param in Type information.
- */
- void initializeRandomAttributes(SimpleTypeInformation<V> in) {
- int d = ((VectorFieldTypeInformation<V>) in).dimensionality();
- selectedAttributes = Util.randomBitSet(k, d, random);
- }
-
- /**
- * Parameterization class.
- *
- * @author Erich Schubert
- *
- * @apiviz.exclude
- */
- public static abstract class Parameterizer<V extends NumberVector<V, ?>> extends AbstractParameterizer {
- protected int k = 0;
-
- protected long seed = System.currentTimeMillis();
-
- @Override
- protected void makeOptions(Parameterization config) {
- super.makeOptions(config);
- IntParameter kP = new IntParameter(NUMBER_SELECTED_ATTRIBUTES_ID, new GreaterEqualConstraint(1), 1);
- if(config.grab(kP)) {
- k = kP.getValue();
- }
- LongParameter seedP = new LongParameter(SEED_ID, true);
- if(config.grab(seedP)) {
- seed = seedP.getValue();
- }
- }
- }
-} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractStreamConversionFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractStreamConversionFilter.java
index 1c8acb72..9b628f2d 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractStreamConversionFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractStreamConversionFilter.java
@@ -37,15 +37,15 @@ import de.lmu.ifi.dbs.elki.datasource.bundle.BundleMeta;
*/
public abstract class AbstractStreamConversionFilter<I, O> extends AbstractStreamFilter {
/**
- * The filtered meta
+ * The filtered meta.
*/
BundleMeta meta;
/**
- * The column to filter
+ * The column to filter.
*/
int column = -1;
-
+
@Override
public BundleMeta getMeta() {
return meta;
@@ -98,14 +98,14 @@ public abstract class AbstractStreamConversionFilter<I, O> extends AbstractStrea
* @param obj Database object to normalize
* @return Normalized database object
*/
- abstract protected O filterSingleObject(I obj);
+ protected abstract O filterSingleObject(I obj);
/**
* Get the input type restriction used for negotiating the data query.
*
* @return Type restriction
*/
- abstract protected SimpleTypeInformation<? super I> getInputTypeRestriction();
+ protected abstract SimpleTypeInformation<? super I> getInputTypeRestriction();
/**
* Get the output type from the input type after conversion.
@@ -113,5 +113,5 @@ public abstract class AbstractStreamConversionFilter<I, O> extends AbstractStrea
* @param in input type restriction
* @return output type restriction
*/
- abstract protected SimpleTypeInformation<? super O> convertedType(SimpleTypeInformation<I> in);
+ protected abstract SimpleTypeInformation<? super O> convertedType(SimpleTypeInformation<I> in);
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractVectorConversionFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractVectorConversionFilter.java
new file mode 100644
index 00000000..66d10967
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractVectorConversionFilter.java
@@ -0,0 +1,51 @@
+package de.lmu.ifi.dbs.elki.datasource.filter;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
+
+/**
+ * Abstract class for filters that produce number vectors.
+ *
+ * @author Erich Schubert
+ *
+ * @param <I> Input vector type
+ * @param <O> Output vector type
+ */
+public abstract class AbstractVectorConversionFilter<I, O extends NumberVector<?>> extends AbstractConversionFilter<I, O> {
+ /**
+ * Number vector factory.
+ */
+ protected NumberVector.Factory<O, ?> factory;
+
+ /**
+ * Initialize factory from a data type.
+ *
+ * @param type Output data type information.
+ */
+ protected void initializeOutputType(SimpleTypeInformation<O> type) {
+ factory = FilterUtil.guessFactory(type);
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractVectorStreamConversionFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractVectorStreamConversionFilter.java
new file mode 100644
index 00000000..695a54e0
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractVectorStreamConversionFilter.java
@@ -0,0 +1,51 @@
+package de.lmu.ifi.dbs.elki.datasource.filter;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
+
+/**
+ * Abstract base class for streaming filters that produce vectors.
+ *
+ * @author Erich Schubert
+ *
+ * @param <I> Input type
+ * @param <O> Output vector type
+ */
+public abstract class AbstractVectorStreamConversionFilter<I, O extends NumberVector<?>> extends AbstractStreamConversionFilter<I, O> {
+ /**
+ * Number vector factory.
+ */
+ protected NumberVector.Factory<O, ?> factory;
+
+ /**
+ * Initialize factory from a data type.
+ *
+ * @param type Output data type information.
+ */
+ protected void initializeOutputType(SimpleTypeInformation<O> type) {
+ factory = FilterUtil.guessFactory(type);
+ }
+
+}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/ByLabelFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/ByLabelFilter.java
index ebf01cfd..21f05739 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/ByLabelFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/ByLabelFilter.java
@@ -46,7 +46,7 @@ public class ByLabelFilter extends AbstractStreamFilter {
/**
* Class logger
*/
- private static final Logging logger = Logging.getLogger(ByLabelFilter.class);
+ private static final Logging LOG = Logging.getLogger(ByLabelFilter.class);
/**
* The filter pattern
@@ -92,7 +92,7 @@ public class ByLabelFilter extends AbstractStreamFilter {
switch(ev){
case END_OF_STREAM:
if (lblcol < 0) {
- logger.warning("By label filter was used, but never saw a label relation!");
+ LOG.warning("By label filter was used, but never saw a label relation!");
}
return Event.END_OF_STREAM;
case META_CHANGED:
@@ -136,7 +136,7 @@ public class ByLabelFilter extends AbstractStreamFilter {
}
return Event.NEXT_OBJECT;
default:
- logger.warning("Unknown event: " + ev);
+ LOG.warning("Unknown event: " + ev);
}
}
}
@@ -155,7 +155,7 @@ public class ByLabelFilter extends AbstractStreamFilter {
* Key: {@code -patternfilter.pattern}
* </p>
*/
- public static final OptionID LABELFILTER_PATTERN_ID = OptionID.getOrCreateOptionID("patternfilter.pattern", "The filter pattern to use.");
+ public static final OptionID LABELFILTER_PATTERN_ID = new OptionID("patternfilter.pattern", "The filter pattern to use.");
/**
* Flag to use the pattern in inverted mode
@@ -163,7 +163,7 @@ public class ByLabelFilter extends AbstractStreamFilter {
* Key: {@code -patternfilter.invert}
* </p>
*/
- public static final OptionID LABELFILTER_PATTERN_INVERT_ID = OptionID.getOrCreateOptionID("patternfilter.invert", "Flag to invert pattern.");
+ public static final OptionID LABELFILTER_PATTERN_INVERT_ID = new OptionID("patternfilter.invert", "Flag to invert pattern.");
/**
* The pattern configured.
@@ -184,7 +184,7 @@ public class ByLabelFilter extends AbstractStreamFilter {
}
final Flag invertedF = new Flag(LABELFILTER_PATTERN_INVERT_ID);
if(config.grab(invertedF)) {
- inverted = invertedF.getValue();
+ inverted = invertedF.getValue().booleanValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/ClassLabelFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/ClassLabelFilter.java
index 95596773..4a349d3d 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/ClassLabelFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/ClassLabelFilter.java
@@ -30,7 +30,6 @@ import de.lmu.ifi.dbs.elki.data.ClassLabel;
import de.lmu.ifi.dbs.elki.data.LabelList;
import de.lmu.ifi.dbs.elki.data.SimpleClassLabel;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
-import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
@@ -56,7 +55,7 @@ public class ClassLabelFilter implements ObjectFilter {
* Key: {@code -dbc.classLabelIndex}
* </p>
*/
- public static final OptionID CLASS_LABEL_INDEX_ID = OptionID.getOrCreateOptionID("dbc.classLabelIndex", "The index of the label to be used as class label.");
+ public static final OptionID CLASS_LABEL_INDEX_ID = new OptionID("dbc.classLabelIndex", "The index of the label to be used as class label.");
/**
* Parameter to specify the class of occurring class labels.
@@ -64,7 +63,7 @@ public class ClassLabelFilter implements ObjectFilter {
* Key: {@code -dbc.classLabelClass}
* </p>
*/
- public static final OptionID CLASS_LABEL_CLASS_ID = OptionID.getOrCreateOptionID("dbc.classLabelClass", "Class label class to use.");
+ public static final OptionID CLASS_LABEL_CLASS_ID = new OptionID("dbc.classLabelClass", "Class label class to use.");
/**
* The index of the label to be used as class label, null if no class label is
@@ -95,10 +94,10 @@ public class ClassLabelFilter implements ObjectFilter {
// Find a labellist column
boolean done = false;
boolean keeplabelcol = false;
- for(int i = 0; i < objects.metaLength(); i++) {
+ for (int i = 0; i < objects.metaLength(); i++) {
SimpleTypeInformation<?> meta = objects.meta(i);
// Skip non-labellist columns - or if we already had a labellist
- if(done || meta.getRestrictionClass() != LabelList.class) {
+ if (done || !LabelList.class.equals(meta.getRestrictionClass())) {
bundle.appendColumn(meta, objects.getColumn(i));
continue;
}
@@ -109,29 +108,27 @@ public class ClassLabelFilter implements ObjectFilter {
List<LabelList> lblcol = new ArrayList<LabelList>(objects.dataLength());
// Split the column
- for(Object obj : objects.getColumn(i)) {
- if(obj != null) {
+ for (Object obj : objects.getColumn(i)) {
+ if (obj != null) {
LabelList ll = (LabelList) obj;
try {
ClassLabel lbl = classLabelFactory.makeFromString(ll.remove(classLabelIndex));
clscol.add(lbl);
- }
- catch(Exception e) {
- throw new AbortException("Cannot initialize class labels: "+e.getMessage(), e);
+ } catch (Exception e) {
+ throw new AbortException("Cannot initialize class labels: " + e.getMessage(), e);
}
lblcol.add(ll);
- if(ll.size() > 0) {
+ if (ll.size() > 0) {
keeplabelcol = true;
}
- }
- else {
+ } else {
clscol.add(null);
lblcol.add(null);
}
}
- bundle.appendColumn(TypeUtil.CLASSLABEL, clscol);
+ bundle.appendColumn(classLabelFactory.getTypeInformation(), clscol);
// Only add the label column when it's not empty.
- if(keeplabelcol) {
+ if (keeplabelcol) {
bundle.appendColumn(meta, lblcol);
}
}
@@ -150,7 +147,7 @@ public class ClassLabelFilter implements ObjectFilter {
* The index of the label to be used as class label, null if no class label
* is specified.
*/
- protected Integer classLabelIndex;
+ protected int classLabelIndex;
/**
* The class label factory to use.
@@ -161,13 +158,14 @@ public class ClassLabelFilter implements ObjectFilter {
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
// parameter class label index
- final IntParameter classLabelIndexParam = new IntParameter(CLASS_LABEL_INDEX_ID, new GreaterEqualConstraint(0));
+ final IntParameter classLabelIndexParam = new IntParameter(CLASS_LABEL_INDEX_ID);
+ classLabelIndexParam.addConstraint(new GreaterEqualConstraint(0));
final ObjectParameter<ClassLabel.Factory<?>> classlabelClassParam = new ObjectParameter<ClassLabel.Factory<?>>(CLASS_LABEL_CLASS_ID, ClassLabel.Factory.class, SimpleClassLabel.Factory.class);
config.grab(classLabelIndexParam);
config.grab(classlabelClassParam);
- if(classLabelIndexParam.isDefined() && classlabelClassParam.isDefined()) {
- classLabelIndex = classLabelIndexParam.getValue();
+ if (classLabelIndexParam.isDefined() && classlabelClassParam.isDefined()) {
+ classLabelIndex = classLabelIndexParam.intValue();
classLabelFactory = classlabelClassParam.instantiateClass(config);
}
}
@@ -177,4 +175,4 @@ public class ClassLabelFilter implements ObjectFilter {
return new ClassLabelFilter(classLabelIndex, classLabelFactory);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/DoubleVectorProjectionFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/DoubleVectorProjectionFilter.java
deleted file mode 100644
index 4793b041..00000000
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/DoubleVectorProjectionFilter.java
+++ /dev/null
@@ -1,85 +0,0 @@
-package de.lmu.ifi.dbs.elki.datasource.filter;
-/*
-This file is part of ELKI:
-Environment for Developing KDD-Applications Supported by Index-Structures
-
-Copyright (C) 2012
-Ludwig-Maximilians-Universität München
-Lehr- und Forschungseinheit für Datenbanksysteme
-ELKI Development Team
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
-
-import java.util.BitSet;
-
-import de.lmu.ifi.dbs.elki.data.DoubleVector;
-import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
-import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
-import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
-import de.lmu.ifi.dbs.elki.utilities.Util;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
-
-/**
- * <p>Parser to project the ParsingResult obtained by a suitable base parser
- * onto a selected subset of attributes.</p>
- *
- * @author Arthur Zimek
- *
- * @apiviz.uses DoubleVector
- */
-public class DoubleVectorProjectionFilter extends AbstractFeatureSelectionFilter<DoubleVector> {
- /**
- * Constructor.
- *
- * @param selectedAttributes
- */
- public DoubleVectorProjectionFilter(BitSet selectedAttributes) {
- super(selectedAttributes);
- }
-
- @Override
- protected DoubleVector filterSingleObject(DoubleVector obj) {
- return Util.project(obj, getSelectedAttributes());
- }
-
- @Override
- protected SimpleTypeInformation<? super DoubleVector> getInputTypeRestriction() {
- return TypeUtil.DOUBLE_VECTOR_FIELD;
- }
-
- @Override
- protected SimpleTypeInformation<? super DoubleVector> convertedType(SimpleTypeInformation<DoubleVector> in) {
- return new VectorFieldTypeInformation<DoubleVector>(DoubleVector.class, DoubleVector.STATIC, getDimensionality(), DoubleVector.STATIC);
- }
-
- /**
- * Parameterization class.
- *
- * @author Erich Schubert
- *
- * @apiviz.exclude
- */
- public static class Parameterizer extends AbstractFeatureSelectionFilter.Parameterizer<DoubleVector> {
- @Override
- protected void makeOptions(Parameterization config) {
- super.makeOptions(config);
- }
-
- @Override
- protected DoubleVectorProjectionFilter makeInstance() {
- return new DoubleVectorProjectionFilter(selectedAttributes);
- }
- }
-} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/DoubleVectorRandomProjectionFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/DoubleVectorRandomProjectionFilter.java
deleted file mode 100644
index b21e7cea..00000000
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/DoubleVectorRandomProjectionFilter.java
+++ /dev/null
@@ -1,87 +0,0 @@
-package de.lmu.ifi.dbs.elki.datasource.filter;
-/*
-This file is part of ELKI:
-Environment for Developing KDD-Applications Supported by Index-Structures
-
-Copyright (C) 2012
-Ludwig-Maximilians-Universität München
-Lehr- und Forschungseinheit für Datenbanksysteme
-ELKI Development Team
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
-
-import de.lmu.ifi.dbs.elki.data.DoubleVector;
-import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
-import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
-import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
-import de.lmu.ifi.dbs.elki.utilities.Util;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
-
-/**
- * <p>
- * Parser to project the ParsingResult obtained by a suitable base parser onto a
- * randomly selected subset of attributes.
- * </p>
- *
- * @author Arthur Zimek
- *
- * @apiviz.uses DoubleVector
- */
-public class DoubleVectorRandomProjectionFilter extends AbstractRandomFeatureSelectionFilter<DoubleVector> {
- /**
- * Constructor.
- *
- * @param dim
- * @param seed
- */
- public DoubleVectorRandomProjectionFilter(int dim, long seed) {
- super(dim, seed);
- }
-
- @Override
- protected DoubleVector filterSingleObject(DoubleVector obj) {
- return Util.project(obj, selectedAttributes);
- }
-
- @Override
- protected SimpleTypeInformation<? super DoubleVector> getInputTypeRestriction() {
- return TypeUtil.DOUBLE_VECTOR_FIELD;
- }
-
- @Override
- protected SimpleTypeInformation<? super DoubleVector> convertedType(SimpleTypeInformation<DoubleVector> in) {
- initializeRandomAttributes(in);
- return new VectorFieldTypeInformation<DoubleVector>(DoubleVector.class, DoubleVector.STATIC, k, DoubleVector.STATIC);
- }
-
- /**
- * Parameterization class.
- *
- * @author Erich Schubert
- *
- * @apiviz.exclude
- */
- public static class Parameterizer extends AbstractRandomFeatureSelectionFilter.Parameterizer<DoubleVector> {
- @Override
- protected void makeOptions(Parameterization config) {
- super.makeOptions(config);
- }
-
- @Override
- protected DoubleVectorRandomProjectionFilter makeInstance() {
- return new DoubleVectorRandomProjectionFilter(k,seed);
- }
- }
-} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/ExternalIDFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/ExternalIDFilter.java
index f48810f5..2753534a 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/ExternalIDFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/ExternalIDFilter.java
@@ -48,13 +48,13 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
// TODO: use a non-string class for external ids?
public class ExternalIDFilter implements ObjectFilter {
/**
- * Parameter that specifies the index of the label to be used as
- * external Id, must be an integer equal to or greater than 0.
+ * Parameter that specifies the index of the label to be used as external Id,
+ * must be an integer equal to or greater than 0.
* <p>
* Key: {@code -dbc.externalIdIndex}
* </p>
*/
- public static final OptionID EXTERNALID_INDEX_ID = OptionID.getOrCreateOptionID("dbc.externalIdIndex", "The index of the label to be used as external Id.");
+ public static final OptionID EXTERNALID_INDEX_ID = new OptionID("dbc.externalIdIndex", "The index of the label to be used as external Id.");
/**
* The index of the label to be used as external Id.
@@ -77,10 +77,10 @@ public class ExternalIDFilter implements ObjectFilter {
// Find a labellist column
boolean done = false;
boolean keeplabelcol = false;
- for(int i = 0; i < objects.metaLength(); i++) {
+ for (int i = 0; i < objects.metaLength(); i++) {
SimpleTypeInformation<?> meta = objects.meta(i);
// Skip non-labellist columns - or if we already had a labellist
- if(done || meta.getRestrictionClass() != LabelList.class) {
+ if (done || !LabelList.class.equals(meta.getRestrictionClass())) {
bundle.appendColumn(meta, objects.getColumn(i));
continue;
}
@@ -91,16 +91,15 @@ public class ExternalIDFilter implements ObjectFilter {
List<LabelList> lblcol = new ArrayList<LabelList>(objects.dataLength());
// Split the column
- for(Object obj : objects.getColumn(i)) {
- if(obj != null) {
+ for (Object obj : objects.getColumn(i)) {
+ if (obj != null) {
LabelList ll = (LabelList) obj;
eidcol.add(new ExternalID(ll.remove(externalIdIndex)));
lblcol.add(ll);
- if(ll.size() > 0) {
+ if (ll.size() > 0) {
keeplabelcol = true;
}
- }
- else {
+ } else {
eidcol.add(null);
lblcol.add(null);
}
@@ -108,7 +107,7 @@ public class ExternalIDFilter implements ObjectFilter {
bundle.appendColumn(TypeUtil.EXTERNALID, eidcol);
// Only add the label column when it's not empty.
- if(keeplabelcol) {
+ if (keeplabelcol) {
bundle.appendColumn(meta, lblcol);
}
}
@@ -128,9 +127,10 @@ public class ExternalIDFilter implements ObjectFilter {
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final IntParameter externalIdIndexParam = new IntParameter(EXTERNALID_INDEX_ID, new GreaterEqualConstraint(0));
- if(config.grab(externalIdIndexParam)) {
- externalIdIndex = externalIdIndexParam.getValue();
+ final IntParameter externalIdIndexParam = new IntParameter(EXTERNALID_INDEX_ID);
+ externalIdIndexParam.addConstraint(new GreaterEqualConstraint(0));
+ if (config.grab(externalIdIndexParam)) {
+ externalIdIndex = externalIdIndexParam.intValue();
}
}
@@ -139,4 +139,4 @@ public class ExternalIDFilter implements ObjectFilter {
return new ExternalIDFilter(externalIdIndex);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/FilterUtil.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/FilterUtil.java
index 0015dce1..7b794066 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/FilterUtil.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/FilterUtil.java
@@ -37,25 +37,33 @@ import de.lmu.ifi.dbs.elki.logging.LoggingUtil;
*/
public final class FilterUtil {
/**
- * Try to guess the factory
+ * Fake constructor: do not instantiate.
+ */
+ private FilterUtil() {
+ // Do not instantiate.
+ }
+
+ /**
+ * Try to guess the appropriate factory.
*
* @param in Input type
+ * @param <V> Vector type
* @return Factory
*/
@SuppressWarnings("unchecked")
- protected static <V extends NumberVector<?, ?>> V guessFactory(SimpleTypeInformation<V> in) {
- V factory = null;
+ public static <V extends NumberVector<?>> NumberVector.Factory<V, ?> guessFactory(SimpleTypeInformation<V> in) {
+ NumberVector.Factory<V, ?> factory = null;
if(in instanceof VectorFieldTypeInformation) {
- factory = ((VectorFieldTypeInformation<V>) in).getFactory();
+ factory = (NumberVector.Factory<V, ?>) ((VectorFieldTypeInformation<V>) in).getFactory();
}
if(factory == null) {
// FIXME: hack. Add factories to simple type information, too?
try {
- Field f = in.getRestrictionClass().getField("STATIC");
- factory = (V) f.get(null);
+ Field f = in.getRestrictionClass().getField("FACTORY");
+ factory = (NumberVector.Factory<V, ?>) f.get(null);
}
catch(Exception e) {
- LoggingUtil.warning("Cannot determine factory for type " + in.getRestrictionClass());
+ LoggingUtil.warning("Cannot determine factory for type " + in.getRestrictionClass(), e);
}
}
return factory;
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/FixedDBIDsFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/FixedDBIDsFilter.java
index c34ecbe7..2e5071a4 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/FixedDBIDsFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/FixedDBIDsFilter.java
@@ -47,7 +47,7 @@ public class FixedDBIDsFilter extends AbstractStreamFilter {
* Key: {@code -dbc.startid}
* </p>
*/
- public static final OptionID IDSTART_ID = OptionID.getOrCreateOptionID("dbc.startid", "Object ID to start counting with");
+ public static final OptionID IDSTART_ID = new OptionID("dbc.startid", "Object ID to start counting with");
/**
* The filtered meta
@@ -116,7 +116,7 @@ public class FixedDBIDsFilter extends AbstractStreamFilter {
super.makeOptions(config);
IntParameter startidParam = new IntParameter(IDSTART_ID);
if(config.grab(startidParam)) {
- startid = startidParam.getValue();
+ startid = startidParam.getValue().intValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/HistogramJitterFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/HistogramJitterFilter.java
new file mode 100644
index 00000000..6723a12a
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/HistogramJitterFilter.java
@@ -0,0 +1,164 @@
+package de.lmu.ifi.dbs.elki.datasource.filter;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import de.lmu.ifi.dbs.elki.data.DoubleVector;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.math.statistics.distribution.ExponentialDistribution;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
+
+/**
+ * Add Jitter, preserving the histogram properties (same sum, nonnegative).
+ *
+ * For each vector, the total sum of all dimensions is computed.<br />
+ * Then a random vector of the average length <code>jitter * scale</code> is
+ * added and the result normalized to the original vectors sum. The individual
+ * dimensions are drawn from an exponential distribution with scale
+ * <code>jitter / dimensionality</code>, so it is expected that the error in
+ * most dimensions will be low, and higher in few.
+ *
+ * This is designed to degrade the quality of a histogram, while preserving the
+ * total sum (e.g. to keep the normalization). The factor "jitter" can be used
+ * to control the degradation amount.
+ *
+ * @author Erich Schubert
+ *
+ * @param <V> Vector type
+ */
+@Description("Add uniform Jitter to a dataset, while preserving the total vector sum.")
+public class HistogramJitterFilter<V extends NumberVector<?>> extends AbstractVectorStreamConversionFilter<V, V> {
+ /**
+ * Jitter amount.
+ */
+ double jitter;
+
+ /**
+ * Random generator.
+ */
+ ExponentialDistribution rnd;
+
+ /**
+ * Constructor.
+ *
+ * @param jitter Relative amount of jitter to add
+ * @param rnd Random generator
+ */
+ public HistogramJitterFilter(double jitter, RandomFactory rnd) {
+ super();
+ this.jitter = jitter;
+ this.rnd = new ExponentialDistribution(1, rnd.getRandom());
+ }
+
+ @Override
+ protected V filterSingleObject(V obj) {
+ final int dim = obj.getDimensionality();
+ // Compute the total sum.
+ double osum = 0;
+ for (int i = 0; i < dim; i++) {
+ osum += obj.doubleValue(i);
+ }
+ // Actual maximum jitter amount:
+ final double maxjitter = 2 * jitter / dim * osum;
+ // Generate jitter vector
+ double[] raw = new double[dim];
+ double jsum = 0; // Sum of jitter
+ for (int i = 0; i < raw.length; i++) {
+ raw[i] = rnd.nextRandom() * maxjitter;
+ jsum += raw[i];
+ }
+ final double mix = jsum / osum;
+ // Combine the two vector
+ for (int i = 0; i < raw.length; i++) {
+ raw[i] = raw[i] + (1 - mix) * obj.doubleValue(i);
+ }
+ return factory.newNumberVector(raw);
+ }
+
+ @Override
+ protected SimpleTypeInformation<? super V> getInputTypeRestriction() {
+ return TypeUtil.NUMBER_VECTOR_VARIABLE_LENGTH;
+ }
+
+ @Override
+ protected SimpleTypeInformation<V> convertedType(SimpleTypeInformation<V> in) {
+ initializeOutputType(in);
+ return in;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ /**
+ * Option ID for the jitter strength.
+ */
+ public static final OptionID JITTER_ID = new OptionID("jitter.amount", "Jitter amount relative to data.");
+
+ /**
+ * Option ID for the jitter random seed.
+ */
+ public static final OptionID SEED_ID = new OptionID("jitter.seed", "Jitter random seed.");
+
+ /**
+ * Jitter amount.
+ */
+ double jitter = 0.1;
+
+ /**
+ * Random generator seed.
+ */
+ RandomFactory rnd;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ DoubleParameter jitterP = new DoubleParameter(JITTER_ID);
+ jitterP.addConstraint(new GreaterEqualConstraint(Double.valueOf(0.0)));
+ if (config.grab(jitterP)) {
+ jitter = jitterP.getValue().doubleValue();
+ }
+ RandomParameter rndP = new RandomParameter(SEED_ID);
+ if (config.grab(rndP)) {
+ rnd = rndP.getValue();
+ }
+ }
+
+ @Override
+ protected HistogramJitterFilter<DoubleVector> makeInstance() {
+ return new HistogramJitterFilter<DoubleVector>(jitter, rnd);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/NoMissingValuesFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/NoMissingValuesFilter.java
index 7d5b6c44..bfc6ad5c 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/NoMissingValuesFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/NoMissingValuesFilter.java
@@ -39,7 +39,7 @@ public class NoMissingValuesFilter extends AbstractStreamFilter {
/**
* Class logger
*/
- private static final Logging logger = Logging.getLogger(NoMissingValuesFilter.class);
+ private static final Logging LOG = Logging.getLogger(NoMissingValuesFilter.class);
/**
* Number of columns
@@ -91,8 +91,8 @@ public class NoMissingValuesFilter extends AbstractStreamFilter {
@Override
public MultipleObjectsBundle filter(final MultipleObjectsBundle objects) {
- if(logger.isDebugging()) {
- logger.debug("Filtering the data set");
+ if(LOG.isDebugging()) {
+ LOG.debug("Filtering the data set");
}
MultipleObjectsBundle bundle = new MultipleObjectsBundle();
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/RandomSamplingStreamFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/RandomSamplingStreamFilter.java
index 56509d8a..0fbec083 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/RandomSamplingStreamFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/RandomSamplingStreamFilter.java
@@ -26,13 +26,14 @@ package de.lmu.ifi.dbs.elki.datasource.filter;
import java.util.Random;
import de.lmu.ifi.dbs.elki.datasource.bundle.BundleMeta;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.IntervalConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.IntervalConstraint.IntervalBoundary;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessEqualConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.LongParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
/**
* Subsampling stream filter.
@@ -54,12 +55,12 @@ public class RandomSamplingStreamFilter extends AbstractStreamFilter {
* Constructor.
*
* @param prob Probability
- * @param seed Random seed
+ * @param rnd Random generator
*/
- public RandomSamplingStreamFilter(double prob, Long seed) {
+ public RandomSamplingStreamFilter(double prob, RandomFactory rnd) {
super();
this.prob = prob;
- this.random = (seed != null) ? new Random(seed) : new Random();
+ this.random = rnd.getRandom();
}
@Override
@@ -74,15 +75,15 @@ public class RandomSamplingStreamFilter extends AbstractStreamFilter {
@Override
public Event nextEvent() {
- while(true) {
+ while (true) {
Event ev = source.nextEvent();
- switch(ev){
+ switch(ev) {
case END_OF_STREAM:
return ev;
case META_CHANGED:
return ev;
case NEXT_OBJECT:
- if(random.nextDouble() < prob) {
+ if (random.nextDouble() < prob) {
return ev;
}
continue;
@@ -101,12 +102,12 @@ public class RandomSamplingStreamFilter extends AbstractStreamFilter {
/**
* Option ID for sampling probability
*/
- private static final OptionID PROB_ID = OptionID.getOrCreateOptionID("sampling.p", "Sampling probability. Each object has a chance of being samples with this probability.");
+ private static final OptionID PROB_ID = new OptionID("sampling.p", "Sampling probability. Each object has a chance of being samples with this probability.");
/**
* Option ID for random seed
*/
- private static final OptionID SEED_ID = OptionID.getOrCreateOptionID("sampling.seed", "Random generator seed for sampling.");
+ private static final OptionID SEED_ID = new OptionID("sampling.seed", "Random generator seed for sampling.");
/**
* Probability
@@ -114,26 +115,28 @@ public class RandomSamplingStreamFilter extends AbstractStreamFilter {
protected double prob;
/**
- * Random seed
+ * Random generator
*/
- protected Long seed = null;
+ protected RandomFactory rnd;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- DoubleParameter probP = new DoubleParameter(PROB_ID, new IntervalConstraint(0, IntervalBoundary.CLOSE, 1.0, IntervalBoundary.CLOSE));
- if(config.grab(probP)) {
- prob = probP.getValue();
+ DoubleParameter probP = new DoubleParameter(PROB_ID);
+ probP.addConstraint(new GreaterEqualConstraint(0.0));
+ probP.addConstraint(new LessEqualConstraint(1.0));
+ if (config.grab(probP)) {
+ prob = probP.getValue().doubleValue();
}
- LongParameter seedP = new LongParameter(SEED_ID, true);
- if(config.grab(seedP)) {
- seed = seedP.getValue();
+ RandomParameter rndP = new RandomParameter(SEED_ID);
+ if (config.grab(rndP)) {
+ rnd = rndP.getValue();
}
}
@Override
protected RandomSamplingStreamFilter makeInstance() {
- return new RandomSamplingStreamFilter(prob, seed);
+ return new RandomSamplingStreamFilter(prob, rnd);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/ShuffleObjectsFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/ShuffleObjectsFilter.java
index a8bf2cec..01a6da10 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/ShuffleObjectsFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/ShuffleObjectsFilter.java
@@ -29,10 +29,11 @@ import java.util.Random;
import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle;
import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.LongParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
/**
* A filter to shuffle the dataset.
@@ -43,49 +44,47 @@ public class ShuffleObjectsFilter implements ObjectFilter {
/**
* Class logger
*/
- private static final Logging logger = Logging.getLogger(ShuffleObjectsFilter.class);
+ private static final Logging LOG = Logging.getLogger(ShuffleObjectsFilter.class);
/**
* Optional parameter to specify a seed for randomly shuffling the rows of the
- * database. If unused, no shuffling will be performed. Shuffling takes time
+ * database. If not set, a random seed will be used. Shuffling takes time
* linearly dependent from the size of the database.
* <p>
* Key: {@code -shuffle.seed}
* </p>
*/
- public static final OptionID SEED_ID = OptionID.getOrCreateOptionID("shuffle.seed", "Seed for randomly shuffling the rows for the database. If the parameter is not set, no shuffling will be performed.");
+ public static final OptionID SEED_ID = new OptionID("shuffle.seed", "Seed for randomly shuffling the rows for the database. If the parameter is not set, a random seed will be used.");
/**
- * Seed for randomly shuffling the rows of the database. If null, no shuffling
- * will be performed. Shuffling takes time linearly dependent from the size of
- * the database.
+ * Random generator.
*/
- final Long seed;
+ final RandomFactory rnd;
/**
* Constructor.
*
- * @param seed Seed value, may be {@code null} for a random seed.
+ * @param rnd Random generator
*/
- public ShuffleObjectsFilter(Long seed) {
+ public ShuffleObjectsFilter(RandomFactory rnd) {
super();
- this.seed = seed;
+ this.rnd = rnd;
}
@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
- if(logger.isDebugging()) {
- logger.debug("Shuffling the data set");
+ if (LOG.isDebugging()) {
+ LOG.debug("Shuffling the data set");
}
- final Random random = (seed == null) ? new Random() : new Random(seed);
+ final Random random = rnd.getRandom();
final int size = objects.dataLength();
final int[] offsets = new int[size];
- for(int i = 0; i < size; i++) {
+ for (int i = 0; i < size; i++) {
offsets[i] = i;
}
// Randomize the offset array
- for(int i = size; i > 1; i--) {
+ for (int i = size; i > 1; i--) {
final int j = random.nextInt(i);
// Swap the elements at positions j and i - 1:
final int temp = offsets[j];
@@ -94,11 +93,11 @@ public class ShuffleObjectsFilter implements ObjectFilter {
}
MultipleObjectsBundle bundle = new MultipleObjectsBundle();
- for(int j = 0; j < objects.metaLength(); j++) {
+ for (int j = 0; j < objects.metaLength(); j++) {
// Reorder column accordingly
List<?> in = objects.getColumn(j);
List<Object> data = new ArrayList<Object>(size);
- for(int i = 0; i < size; i++) {
+ for (int i = 0; i < size; i++) {
data.add(in.get(offsets[i]));
}
bundle.appendColumn(objects.meta(j), data);
@@ -114,20 +113,20 @@ public class ShuffleObjectsFilter implements ObjectFilter {
* @apiviz.exclude
*/
public static class Parameterizer extends AbstractParameterizer {
- Long seed = null;
+ RandomFactory rnd;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- LongParameter seedParam = new LongParameter(SEED_ID, true);
- if(config.grab(seedParam)) {
- seed = seedParam.getValue();
+ RandomParameter rndP = new RandomParameter(SEED_ID);
+ if (config.grab(rndP)) {
+ rnd = rndP.getValue();
}
}
@Override
protected Object makeInstance() {
- return new ShuffleObjectsFilter(seed);
+ return new ShuffleObjectsFilter(rnd);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/SortByLabelFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/SortByLabelFilter.java
index 5aedc79c..308a54b1 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/SortByLabelFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/SortByLabelFilter.java
@@ -24,13 +24,13 @@ package de.lmu.ifi.dbs.elki.datasource.filter;
*/
import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Comparator;
import java.util.List;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle;
import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.arrays.IntegerArrayQuickSort;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.arrays.IntegerComparator;
/**
* A filter to sort the data set by some label.
@@ -43,7 +43,7 @@ public class SortByLabelFilter implements ObjectFilter {
/**
* Class logger
*/
- private static final Logging logger = Logging.getLogger(SortByLabelFilter.class);
+ private static final Logging LOG = Logging.getLogger(SortByLabelFilter.class);
/**
* Constructor.
@@ -54,31 +54,31 @@ public class SortByLabelFilter implements ObjectFilter {
@Override
public MultipleObjectsBundle filter(final MultipleObjectsBundle objects) {
- if(logger.isDebugging()) {
- logger.debug("Shuffling the data set");
+ if (LOG.isDebugging()) {
+ LOG.debug("Shuffling the data set");
}
// Prepare a reposition array for cheap resorting
final int size = objects.dataLength();
- final Integer[] offsets = new Integer[size];
- for(int i = 0; i < size; i++) {
+ final int[] offsets = new int[size];
+ for (int i = 0; i < size; i++) {
offsets[i] = i;
}
// Sort by labels - identify a label column
final int lblcol;
{
int lblc = -1;
- for(int i = 0; i < objects.metaLength(); i++) {
- if(TypeUtil.GUESSED_LABEL.isAssignableFromType(objects.meta(i))) {
+ for (int i = 0; i < objects.metaLength(); i++) {
+ if (TypeUtil.GUESSED_LABEL.isAssignableFromType(objects.meta(i))) {
lblc = i;
break;
}
}
lblcol = lblc; // make static
}
- Arrays.sort(offsets, new Comparator<Integer>() {
+ IntegerArrayQuickSort.sort(offsets, new IntegerComparator() {
@Override
- public int compare(Integer o1, Integer o2) {
+ public int compare(int o1, int o2) {
String l1 = objects.data(o1, lblcol).toString();
String l2 = objects.data(o2, lblcol).toString();
return l1.compareToIgnoreCase(l2);
@@ -86,15 +86,15 @@ public class SortByLabelFilter implements ObjectFilter {
});
MultipleObjectsBundle bundle = new MultipleObjectsBundle();
- for(int j = 0; j < objects.metaLength(); j++) {
+ for (int j = 0; j < objects.metaLength(); j++) {
// Reorder column accordingly
List<?> in = objects.getColumn(j);
List<Object> data = new ArrayList<Object>(size);
- for(int i = 0; i < size; i++) {
+ for (int i = 0; i < size; i++) {
data.add(in.get(offsets[i]));
}
bundle.appendColumn(objects.meta(j), data);
}
return bundle;
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/SparseNumberVectorProjectionFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/SparseNumberVectorProjectionFilter.java
deleted file mode 100644
index 79a671c5..00000000
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/SparseNumberVectorProjectionFilter.java
+++ /dev/null
@@ -1,86 +0,0 @@
-package de.lmu.ifi.dbs.elki.datasource.filter;
-/*
-This file is part of ELKI:
-Environment for Developing KDD-Applications Supported by Index-Structures
-
-Copyright (C) 2012
-Ludwig-Maximilians-Universität München
-Lehr- und Forschungseinheit für Datenbanksysteme
-ELKI Development Team
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
-
-import java.util.BitSet;
-
-import de.lmu.ifi.dbs.elki.data.SparseNumberVector;
-import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
-import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
-import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
-import de.lmu.ifi.dbs.elki.utilities.Util;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
-
-/**
- * <p>
- * Parser to project the ParsingResult obtained by a suitable base parser onto a
- * selected subset of attributes.
- * </p>
- *
- * @author Arthur Zimek
- */
-public class SparseNumberVectorProjectionFilter<V extends SparseNumberVector<V, ?>> extends AbstractFeatureSelectionFilter<V> {
- /**
- * Constructor.
- *
- * @param selectedAttributes
- */
- public SparseNumberVectorProjectionFilter(BitSet selectedAttributes) {
- super(selectedAttributes);
- }
-
- @Override
- protected V filterSingleObject(V obj) {
- return Util.project(obj, getSelectedAttributes());
- }
-
- @Override
- protected SimpleTypeInformation<? super V> getInputTypeRestriction() {
- return TypeUtil.SPARSE_VECTOR_FIELD;
- }
-
- @Override
- protected SimpleTypeInformation<? super V> convertedType(SimpleTypeInformation<V> in) {
- V factory = FilterUtil.guessFactory(in);
- return new VectorFieldTypeInformation<V>(in.getRestrictionClass(), getDimensionality(), factory);
- }
-
- /**
- * Parameterization class.
- *
- * @author Erich Schubert
- *
- * @apiviz.exclude
- */
- public static class Parameterizer<V extends SparseNumberVector<V, ?>> extends AbstractFeatureSelectionFilter.Parameterizer<V> {
- @Override
- protected void makeOptions(Parameterization config) {
- super.makeOptions(config);
- }
-
- @Override
- protected SparseNumberVectorProjectionFilter<V> makeInstance() {
- return new SparseNumberVectorProjectionFilter<V>(selectedAttributes);
- }
- }
-} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/SparseNumberVectorRandomProjectionFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/SparseNumberVectorRandomProjectionFilter.java
deleted file mode 100644
index 8597d659..00000000
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/SparseNumberVectorRandomProjectionFilter.java
+++ /dev/null
@@ -1,83 +0,0 @@
-package de.lmu.ifi.dbs.elki.datasource.filter;
-/*
-This file is part of ELKI:
-Environment for Developing KDD-Applications Supported by Index-Structures
-
-Copyright (C) 2012
-Ludwig-Maximilians-Universität München
-Lehr- und Forschungseinheit für Datenbanksysteme
-ELKI Development Team
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
-
-import de.lmu.ifi.dbs.elki.data.SparseNumberVector;
-import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
-import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
-import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
-import de.lmu.ifi.dbs.elki.utilities.Util;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
-
-/**
- * <p>Parser to project the ParsingResult obtained by a suitable base parser
- * onto a randomly selected subset of attributes.</p>
- *
- * @author Arthur Zimek
- */
-public class SparseNumberVectorRandomProjectionFilter<V extends SparseNumberVector<V, ?>> extends AbstractRandomFeatureSelectionFilter<V> {
- /**
- * Constructor.
- *
- * @param dim
- */
- public SparseNumberVectorRandomProjectionFilter(int dim) {
- super(dim);
- }
-
- @Override
- protected V filterSingleObject(V obj) {
- return Util.project(obj, selectedAttributes);
- }
-
- @Override
- protected SimpleTypeInformation<? super V> getInputTypeRestriction() {
- return TypeUtil.SPARSE_VECTOR_FIELD;
- }
-
- @Override
- protected SimpleTypeInformation<? super V> convertedType(SimpleTypeInformation<V> in) {
- initializeRandomAttributes(in);
- V factory = FilterUtil.guessFactory(in);
- return new VectorFieldTypeInformation<V>(in.getRestrictionClass(), k, factory);
- }
-
- /**
- * Parameterization class.
- *
- * @author Erich Schubert
- *
- * @apiviz.exclude
- */
- public static class Parameterizer<V extends SparseNumberVector<V, ?>> extends AbstractRandomFeatureSelectionFilter.Parameterizer<V> {
- @Override
- protected void makeOptions(Parameterization config) {
- super.makeOptions(config);
- }
-
- @Override
- protected SparseNumberVectorRandomProjectionFilter<V> makeInstance() {
- return new SparseNumberVectorRandomProjectionFilter<V>(k);
- }
- }
-} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/SparseVectorFieldFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/SparseVectorFieldFilter.java
index 9d34057b..d3ef418d 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/SparseVectorFieldFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/SparseVectorFieldFilter.java
@@ -33,10 +33,12 @@ import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
* the maximum dimensionality for each vector.
*
* @author Erich Schubert
+ *
+ * @param <V> Vector type
*/
-public class SparseVectorFieldFilter<V extends SparseNumberVector<V, ?>> extends AbstractConversionFilter<V, V> {
+public class SparseVectorFieldFilter<V extends SparseNumberVector<?>> extends AbstractConversionFilter<V, V> {
/**
- * Maximum dimension
+ * Maximum dimension.
*/
int maxdim = -1;
@@ -71,7 +73,7 @@ public class SparseVectorFieldFilter<V extends SparseNumberVector<V, ?>> extends
@Override
protected SimpleTypeInformation<? super V> convertedType(SimpleTypeInformation<V> in) {
- V factory = FilterUtil.guessFactory(in);
- return new VectorFieldTypeInformation<V>(in.getRestrictionClass(), maxdim, factory);
+ SparseNumberVector.Factory<V, ?> factory = (SparseNumberVector.Factory<V, ?>) FilterUtil.guessFactory(in);
+ return new VectorFieldTypeInformation<V>(factory, maxdim);
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/SplitNumberVectorFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/SplitNumberVectorFilter.java
index 827a5011..898eeff7 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/SplitNumberVectorFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/SplitNumberVectorFilter.java
@@ -27,6 +27,7 @@ import java.util.ArrayList;
import java.util.List;
import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.NumberVector.Factory;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
@@ -34,7 +35,8 @@ import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ListGreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ListEachConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntListParameter;
@@ -44,8 +46,10 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntListParameter;
* @author Erich Schubert
*
* @apiviz.uses NumberVector
+ *
+ * @param <V> Vector type
*/
-public class SplitNumberVectorFilter<V extends NumberVector<V, ?>> implements ObjectFilter {
+public class SplitNumberVectorFilter<V extends NumberVector<?>> implements ObjectFilter {
/**
* Selected dimensions.
*/
@@ -63,46 +67,47 @@ public class SplitNumberVectorFilter<V extends NumberVector<V, ?>> implements Ob
@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
- if(objects.dataLength() == 0) {
+ if (objects.dataLength() == 0) {
return objects;
}
MultipleObjectsBundle bundle = new MultipleObjectsBundle();
- for(int r = 0; r < objects.metaLength(); r++) {
+ for (int r = 0; r < objects.metaLength(); r++) {
@SuppressWarnings("unchecked")
SimpleTypeInformation<Object> type = (SimpleTypeInformation<Object>) objects.meta(r);
@SuppressWarnings("unchecked")
final List<Object> column = (List<Object>) objects.getColumn(r);
- if(!getInputTypeRestriction().isAssignableFromType(type)) {
+ if (!getInputTypeRestriction().isAssignableFromType(type)) {
bundle.appendColumn(type, column);
continue;
}
// Should be a vector type after above test.
@SuppressWarnings("unchecked")
final VectorFieldTypeInformation<V> vtype = VectorFieldTypeInformation.class.cast(type);
+ Factory<V, ?> factory = FilterUtil.guessFactory(vtype);
// Get the replacement type informations
- VectorFieldTypeInformation<V> type1 = new VectorFieldTypeInformation<V>(type.getRestrictionClass(), type.getSerializer(), dims.length, dims.length);
- VectorFieldTypeInformation<V> type2 = new VectorFieldTypeInformation<V>(type.getRestrictionClass(), type.getSerializer(), vtype.dimensionality() - dims.length, vtype.dimensionality() - dims.length);
+ VectorFieldTypeInformation<V> type1 = new VectorFieldTypeInformation<V>(factory, dims.length);
+ VectorFieldTypeInformation<V> type2 = new VectorFieldTypeInformation<V>(factory, vtype.getDimensionality() - dims.length);
final List<V> col1 = new ArrayList<V>(column.size());
final List<V> col2 = new ArrayList<V>(column.size());
bundle.appendColumn(type1, col1);
bundle.appendColumn(type2, col2);
// Build other dimensions array.
- int[] odims = new int[vtype.dimensionality() - dims.length];
+ int[] odims = new int[vtype.getDimensionality() - dims.length];
{
int i = 0;
- for(int d = 1; d <= vtype.dimensionality(); d++) {
+ for (int d = 0; d < vtype.getDimensionality(); d++) {
boolean found = false;
- for(int j = 0; j < dims.length; j++) {
- if(dims[j] == d) {
+ for (int j = 0; j < dims.length; j++) {
+ if (dims[j] == d) {
found = true;
break;
}
}
- if(!found) {
- if(i >= odims.length) {
+ if (!found) {
+ if (i >= odims.length) {
throw new AbortException("Dimensionalities not proper!");
}
odims[i] = d;
@@ -110,20 +115,20 @@ public class SplitNumberVectorFilter<V extends NumberVector<V, ?>> implements Ob
}
}
}
- // Normalization scan
- for(int i = 0; i < objects.dataLength(); i++) {
+ // Splitting scan.
+ for (int i = 0; i < objects.dataLength(); i++) {
@SuppressWarnings("unchecked")
final V obj = (V) column.get(i);
double[] part1 = new double[dims.length];
double[] part2 = new double[obj.getDimensionality() - dims.length];
- for(int d = 0; d < dims.length; d++) {
+ for (int d = 0; d < dims.length; d++) {
part1[d] = obj.doubleValue(dims[d]);
}
- for(int d = 0; d < odims.length; d++) {
+ for (int d = 0; d < odims.length; d++) {
part2[d] = obj.doubleValue(odims[d]);
}
- col1.add(obj.newNumberVector(part1));
- col2.add(obj.newNumberVector(part2));
+ col1.add(factory.newNumberVector(part1));
+ col2.add(factory.newNumberVector(part2));
}
}
return bundle;
@@ -137,10 +142,10 @@ public class SplitNumberVectorFilter<V extends NumberVector<V, ?>> implements Ob
private TypeInformation getInputTypeRestriction() {
// Find maximum dimension requested
int m = dims[0];
- for(int i = 1; i < dims.length; i++) {
+ for (int i = 1; i < dims.length; i++) {
m = Math.max(dims[i], m);
}
- return new VectorFieldTypeInformation<NumberVector<?, ?>>(NumberVector.class, m, Integer.MAX_VALUE);
+ return new VectorFieldTypeInformation<NumberVector<?>>(NumberVector.class, m, Integer.MAX_VALUE);
}
/**
@@ -150,11 +155,11 @@ public class SplitNumberVectorFilter<V extends NumberVector<V, ?>> implements Ob
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
/**
* The parameter listing the split dimensions.
*/
- public static final OptionID SELECTED_ATTRIBUTES_ID = OptionID.getOrCreateOptionID("split.dims", "Dimensions to split into the first relation.");
+ public static final OptionID SELECTED_ATTRIBUTES_ID = new OptionID("split.dims", "Dimensions to split into the first relation.");
/**
* Dimensions to use.
@@ -164,12 +169,13 @@ public class SplitNumberVectorFilter<V extends NumberVector<V, ?>> implements Ob
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- IntListParameter selectedAttributesP = new IntListParameter(SELECTED_ATTRIBUTES_ID, new ListGreaterEqualConstraint<Integer>(1));
- if(config.grab(selectedAttributesP)) {
+ IntListParameter selectedAttributesP = new IntListParameter(SELECTED_ATTRIBUTES_ID);
+ selectedAttributesP.addConstraint(new ListEachConstraint<Integer>(new GreaterEqualConstraint(0)));
+ if (config.grab(selectedAttributesP)) {
List<Integer> dimensionList = selectedAttributesP.getValue();
dims = new int[dimensionList.size()];
- for(int i = 0; i < dimensionList.size(); i++) {
- dims[i] = dimensionList.get(i);
+ for (int i = 0; i < dimensionList.size(); i++) {
+ dims[i] = dimensionList.get(i).intValue();
}
}
}
@@ -179,4 +185,4 @@ public class SplitNumberVectorFilter<V extends NumberVector<V, ?>> implements Ob
return new SplitNumberVectorFilter<V>(dims);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/StreamFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/StreamFilter.java
index e40565f9..5d121659 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/StreamFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/StreamFilter.java
@@ -31,6 +31,8 @@ import de.lmu.ifi.dbs.elki.datasource.bundle.BundleStreamSource;
*
* @author Erich Schubert
*
+ * @apiviz.landmark
+ *
* @apiviz.uses BundleStreamSource - - «filters»
*/
public interface StreamFilter extends ObjectFilter, BundleStreamSource {
@@ -40,4 +42,4 @@ public interface StreamFilter extends ObjectFilter, BundleStreamSource {
* @param source Stream source
*/
public void init(BundleStreamSource source);
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AbstractNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AbstractNormalization.java
index 5b6c02e3..2dcf09f8 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AbstractNormalization.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AbstractNormalization.java
@@ -23,9 +23,10 @@ package de.lmu.ifi.dbs.elki.datasource.filter.normalization;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle;
-import de.lmu.ifi.dbs.elki.datasource.filter.AbstractConversionFilter;
+import de.lmu.ifi.dbs.elki.datasource.filter.AbstractVectorConversionFilter;
import de.lmu.ifi.dbs.elki.math.linearalgebra.LinearEquationSystem;
/**
@@ -35,7 +36,7 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.LinearEquationSystem;
*
* @param <O> Object type processed
*/
-public abstract class AbstractNormalization<O> extends AbstractConversionFilter<O, O> implements Normalization<O> {
+public abstract class AbstractNormalization<O extends NumberVector<?>> extends AbstractVectorConversionFilter<O, O> implements Normalization<O> {
/**
* Initializes the option handler and the parameter map.
*/
@@ -45,11 +46,12 @@ public abstract class AbstractNormalization<O> extends AbstractConversionFilter<
@Override
protected SimpleTypeInformation<? super O> convertedType(SimpleTypeInformation<O> in) {
+ initializeOutputType(in);
return in;
}
@Override
- public MultipleObjectsBundle normalizeObjects(MultipleObjectsBundle objects) throws NonNumericFeaturesException {
+ public MultipleObjectsBundle normalizeObjects(MultipleObjectsBundle objects) {
return super.filter(objects);
}
@@ -61,8 +63,6 @@ public abstract class AbstractNormalization<O> extends AbstractConversionFilter<
@Override
public String toString() {
- StringBuffer result = new StringBuffer();
- result.append("normalization class: ").append(getClass().getName());
- return result.toString();
+ return getClass().getName();
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AbstractStreamNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AbstractStreamNormalization.java
index c1524788..a1e2c55e 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AbstractStreamNormalization.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AbstractStreamNormalization.java
@@ -23,9 +23,10 @@ package de.lmu.ifi.dbs.elki.datasource.filter.normalization;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle;
-import de.lmu.ifi.dbs.elki.datasource.filter.AbstractStreamConversionFilter;
+import de.lmu.ifi.dbs.elki.datasource.filter.AbstractVectorStreamConversionFilter;
import de.lmu.ifi.dbs.elki.math.linearalgebra.LinearEquationSystem;
/**
@@ -35,7 +36,7 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.LinearEquationSystem;
*
* @param <O> Object type processed
*/
-public abstract class AbstractStreamNormalization<O> extends AbstractStreamConversionFilter<O, O> implements Normalization<O> {
+public abstract class AbstractStreamNormalization<O extends NumberVector<?>> extends AbstractVectorStreamConversionFilter<O, O> implements Normalization<O> {
/**
* Initializes the option handler and the parameter map.
*/
@@ -45,11 +46,12 @@ public abstract class AbstractStreamNormalization<O> extends AbstractStreamConve
@Override
protected SimpleTypeInformation<? super O> convertedType(SimpleTypeInformation<O> in) {
+ initializeOutputType(in);
return in;
}
@Override
- public MultipleObjectsBundle normalizeObjects(MultipleObjectsBundle objects) throws NonNumericFeaturesException {
+ public MultipleObjectsBundle normalizeObjects(MultipleObjectsBundle objects) {
return super.filter(objects);
}
@@ -61,7 +63,7 @@ public abstract class AbstractStreamNormalization<O> extends AbstractStreamConve
@Override
public String toString() {
- StringBuffer result = new StringBuffer();
+ StringBuilder result = new StringBuilder();
result.append("normalization class: ").append(getClass().getName());
return result.toString();
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseErfNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseErfNormalization.java
index c0f2a955..f5e24bca 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseErfNormalization.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseErfNormalization.java
@@ -38,7 +38,7 @@ import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution;
*
* @apiviz.uses NumberVector
*/
-public class AttributeWiseErfNormalization<O extends NumberVector<O, ?>> extends AbstractNormalization<O> {
+public class AttributeWiseErfNormalization<O extends NumberVector<?>> extends AbstractNormalization<O> {
/**
* Constructor.
*/
@@ -47,7 +47,7 @@ public class AttributeWiseErfNormalization<O extends NumberVector<O, ?>> extends
}
@Override
- public O restore(O featureVector) throws NonNumericFeaturesException {
+ public O restore(O featureVector) {
throw new UnsupportedOperationException("Not implemented yet.");
}
@@ -55,9 +55,9 @@ public class AttributeWiseErfNormalization<O extends NumberVector<O, ?>> extends
protected O filterSingleObject(O obj) {
double[] val = new double[obj.getDimensionality()];
for(int i = 0; i < val.length; i++) {
- val[i] = NormalDistribution.erf(obj.doubleValue(i + 1));
+ val[i] = NormalDistribution.erf(obj.doubleValue(i));
}
- return obj.newNumberVector(val);
+ return factory.newNumberVector(val);
}
@Override
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseMinMaxNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseMinMaxNormalization.java
index 4cf3c606..62c0bf12 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseMinMaxNormalization.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseMinMaxNormalization.java
@@ -50,16 +50,16 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Parameter;
* @apiviz.uses NumberVector
*/
// TODO: extract superclass AbstractAttributeWiseNormalization
-public class AttributeWiseMinMaxNormalization<V extends NumberVector<V, ?>> extends AbstractNormalization<V> {
+public class AttributeWiseMinMaxNormalization<V extends NumberVector<?>> extends AbstractNormalization<V> {
/**
* Parameter for minimum.
*/
- public static final OptionID MINIMA_ID = OptionID.getOrCreateOptionID("normalize.min", "a comma separated concatenation of the minimum values in each dimension that are mapped to 0. If no value is specified, the minimum value of the attribute range in this dimension will be taken.");
+ public static final OptionID MINIMA_ID = new OptionID("normalize.min", "a comma separated concatenation of the minimum values in each dimension that are mapped to 0. If no value is specified, the minimum value of the attribute range in this dimension will be taken.");
/**
* Parameter for maximum.
*/
- public static final OptionID MAXIMA_ID = OptionID.getOrCreateOptionID("normalize.max", "a comma separated concatenation of the maximum values in each dimension that are mapped to 1. If no value is specified, the maximum value of the attribute range in this dimension will be taken.");
+ public static final OptionID MAXIMA_ID = new OptionID("normalize.max", "a comma separated concatenation of the maximum values in each dimension that are mapped to 1. If no value is specified, the maximum value of the attribute range in this dimension will be taken.");
/**
* Stores the maximum in each dimension.
@@ -103,13 +103,13 @@ public class AttributeWiseMinMaxNormalization<V extends NumberVector<V, ?>> exte
if(minima.length != featureVector.getDimensionality()) {
throw new IllegalArgumentException("FeatureVectors differ in length.");
}
- for(int d = 1; d <= featureVector.getDimensionality(); d++) {
+ for(int d = 0; d < featureVector.getDimensionality(); d++) {
final double val = featureVector.doubleValue(d);
- if(val > maxima[d - 1]) {
- maxima[d - 1] = val;
+ if(val > maxima[d]) {
+ maxima[d] = val;
}
- if(val < minima[d - 1]) {
- minima[d - 1] = val;
+ if(val < minima[d]) {
+ minima[d] = val;
}
}
}
@@ -120,20 +120,20 @@ public class AttributeWiseMinMaxNormalization<V extends NumberVector<V, ?>> exte
if(minima.length != featureVector.getDimensionality()) {
throw new IllegalArgumentException("FeatureVectors and given Minima/Maxima differ in length.");
}
- for(int d = 1; d <= featureVector.getDimensionality(); d++) {
- values[d - 1] = (featureVector.doubleValue(d) - minima[d - 1]) / factor(d);
+ for(int d = 0; d < featureVector.getDimensionality(); d++) {
+ values[d] = (featureVector.doubleValue(d) - minima[d]) / factor(d);
}
- return featureVector.newNumberVector(values);
+ return factory.newNumberVector(values);
}
@Override
public V restore(V featureVector) throws NonNumericFeaturesException {
if(featureVector.getDimensionality() == maxima.length && featureVector.getDimensionality() == minima.length) {
double[] values = new double[featureVector.getDimensionality()];
- for(int d = 1; d <= featureVector.getDimensionality(); d++) {
- values[d - 1] = (featureVector.doubleValue(d) * (factor(d)) + minima[d - 1]);
+ for(int d = 0; d < featureVector.getDimensionality(); d++) {
+ values[d] = (featureVector.doubleValue(d) * (factor(d)) + minima[d]);
}
- return featureVector.newNumberVector(values);
+ return factory.newNumberVector(values);
}
else {
throw new NonNumericFeaturesException("Attributes cannot be resized: current dimensionality: " + featureVector.getDimensionality() + " former dimensionality: " + maxima.length);
@@ -151,7 +151,7 @@ public class AttributeWiseMinMaxNormalization<V extends NumberVector<V, ?>> exte
* @return a factor for normalization in a certain dimension
*/
private double factor(int dimension) {
- return maxima[dimension - 1] != minima[dimension - 1] ? maxima[dimension - 1] - minima[dimension - 1] : maxima[dimension - 1] != 0 ? maxima[dimension - 1] : 1;
+ return maxima[dimension] > minima[dimension] ? maxima[dimension] - minima[dimension] : maxima[dimension] > 0 ? maxima[dimension] : 1;
}
@Override
@@ -161,13 +161,12 @@ public class AttributeWiseMinMaxNormalization<V extends NumberVector<V, ?>> exte
int[] row = linearEquationSystem.getRowPermutations();
int[] col = linearEquationSystem.getColumnPermutations();
- // noinspection ForLoopReplaceableByForEach
for(int i = 0; i < coeff.length; i++) {
for(int r = 0; r < coeff.length; r++) {
double sum = 0.0;
for(int c = 0; c < coeff[0].length; c++) {
- sum += minima[c] * coeff[row[r]][col[c]] / factor(c + 1);
- coeff[row[r]][col[c]] = coeff[row[r]][col[c]] / factor(c + 1);
+ sum += minima[c] * coeff[row[r]][col[c]] / factor(c);
+ coeff[row[r]][col[c]] = coeff[row[r]][col[c]] / factor(c);
}
rhs[row[r]] = rhs[row[r]] + sum;
}
@@ -179,11 +178,11 @@ public class AttributeWiseMinMaxNormalization<V extends NumberVector<V, ?>> exte
@Override
public String toString() {
- StringBuffer result = new StringBuffer();
+ StringBuilder result = new StringBuilder();
result.append("normalization class: ").append(getClass().getName());
- result.append("\n");
+ result.append('\n');
result.append("normalization minima: ").append(FormatUtil.format(minima));
- result.append("\n");
+ result.append('\n');
result.append("normalization maxima: ").append(FormatUtil.format(maxima));
return result.toString();
}
@@ -200,7 +199,7 @@ public class AttributeWiseMinMaxNormalization<V extends NumberVector<V, ?>> exte
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
/**
* Stores the maximum in each dimension.
*/
@@ -223,7 +222,7 @@ public class AttributeWiseMinMaxNormalization<V extends NumberVector<V, ?>> exte
maxima = ArrayLikeUtil.toPrimitiveDoubleArray(maximaP.getValue());
}
- ArrayList<Parameter<?, ?>> global_1 = new ArrayList<Parameter<?, ?>>();
+ ArrayList<Parameter<?>> global_1 = new ArrayList<Parameter<?>>();
global_1.add(minimaP);
global_1.add(maximaP);
config.checkConstraint(new AllOrNoneMustBeSetGlobalConstraint(global_1));
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseVarianceNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseVarianceNormalization.java
index 52a0499f..0671231d 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseVarianceNormalization.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseVarianceNormalization.java
@@ -53,21 +53,21 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Parameter;
* @apiviz.uses NumberVector
*/
// TODO: extract superclass AbstractAttributeWiseNormalization
-public class AttributeWiseVarianceNormalization<V extends NumberVector<V, ?>> extends AbstractNormalization<V> {
+public class AttributeWiseVarianceNormalization<V extends NumberVector<?>> extends AbstractNormalization<V> {
/**
* Class logger.
*/
- public static final Logging logger = Logging.getLogger(AttributeWiseVarianceNormalization.class);
+ private static final Logging LOG = Logging.getLogger(AttributeWiseVarianceNormalization.class);
/**
* Parameter for means.
*/
- public static final OptionID MEAN_ID = OptionID.getOrCreateOptionID("normalize.mean", "a comma separated concatenation of the mean values in each dimension that are mapped to 0. If no value is specified, the mean value of the attribute range in this dimension will be taken.");
+ public static final OptionID MEAN_ID = new OptionID("normalize.mean", "a comma separated concatenation of the mean values in each dimension that are mapped to 0. If no value is specified, the mean value of the attribute range in this dimension will be taken.");
/**
* Parameter for stddevs.
*/
- public static final OptionID STDDEV_ID = OptionID.getOrCreateOptionID("normalize.stddev", "a comma separated concatenation of the standard deviations in each dimension that are scaled to 1. If no value is specified, the standard deviation of the attribute range in this dimension will be taken.");
+ public static final OptionID STDDEV_ID = new OptionID("normalize.stddev", "a comma separated concatenation of the standard deviations in each dimension that are scaled to 1. If no value is specified, the standard deviation of the attribute range in this dimension will be taken.");
/**
* Stores the mean in each dimension.
@@ -108,14 +108,14 @@ public class AttributeWiseVarianceNormalization<V extends NumberVector<V, ?>> ex
int dimensionality = featureVector.getDimensionality();
mvs = MeanVariance.newArray(dimensionality);
}
- for(int d = 1; d <= featureVector.getDimensionality(); d++) {
- mvs[d - 1].put(featureVector.doubleValue(d));
+ for(int d = 0; d < featureVector.getDimensionality(); d++) {
+ mvs[d].put(featureVector.doubleValue(d));
}
}
@Override
protected void prepareComplete() {
- StringBuffer buf = logger.isVerbose() ? new StringBuffer() : null;
+ StringBuilder buf = LOG.isVerbose() ? new StringBuilder() : null;
final int dimensionality = mvs.length;
mean = new double[dimensionality];
stddev = new double[dimensionality];
@@ -134,33 +134,40 @@ public class AttributeWiseVarianceNormalization<V extends NumberVector<V, ?>> ex
}
mvs = null;
if(buf != null) {
- logger.debugFine(buf.toString());
+ LOG.debugFine(buf.toString());
}
}
@Override
protected V filterSingleObject(V featureVector) {
double[] values = new double[featureVector.getDimensionality()];
- for(int d = 1; d <= featureVector.getDimensionality(); d++) {
- values[d - 1] = normalize(d - 1, featureVector.doubleValue(d));
+ for(int d = 0; d < featureVector.getDimensionality(); d++) {
+ values[d] = normalize(d, featureVector.doubleValue(d));
}
- return featureVector.newNumberVector(values);
+ return factory.newNumberVector(values);
}
@Override
public V restore(V featureVector) throws NonNumericFeaturesException {
if(featureVector.getDimensionality() == mean.length) {
double[] values = new double[featureVector.getDimensionality()];
- for(int d = 1; d <= featureVector.getDimensionality(); d++) {
- values[d - 1] = restore(d - 1, featureVector.doubleValue(d));
+ for(int d = 0; d < featureVector.getDimensionality(); d++) {
+ values[d] = restore(d, featureVector.doubleValue(d));
}
- return featureVector.newNumberVector(values);
+ return factory.newNumberVector(values);
}
else {
throw new NonNumericFeaturesException("Attributes cannot be resized: current dimensionality: " + featureVector.getDimensionality() + " former dimensionality: " + mean.length);
}
}
+ /**
+ * Normalize a single dimension.
+ *
+ * @param d Dimension
+ * @param val Value
+ * @return Normalized value
+ */
private double normalize(int d, double val) {
if(mean.length == 1) {
return (val - mean[0]) / stddev[0];
@@ -170,6 +177,12 @@ public class AttributeWiseVarianceNormalization<V extends NumberVector<V, ?>> ex
}
}
+ /**
+ * Restore a single dimension.
+ * @param d Dimension
+ * @param val Value
+ * @return Normalized value
+ */
private double restore(int d, double val) {
if(mean.length == 1) {
return (val * stddev[0]) + mean[0];
@@ -208,11 +221,11 @@ public class AttributeWiseVarianceNormalization<V extends NumberVector<V, ?>> ex
@Override
public String toString() {
- StringBuffer result = new StringBuffer();
+ StringBuilder result = new StringBuilder();
result.append("normalization class: ").append(getClass().getName());
- result.append("\n");
+ result.append('\n');
result.append("normalization means: ").append(FormatUtil.format(mean));
- result.append("\n");
+ result.append('\n');
result.append("normalization stddevs: ").append(FormatUtil.format(stddev));
return result.toString();
@@ -225,7 +238,7 @@ public class AttributeWiseVarianceNormalization<V extends NumberVector<V, ?>> ex
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
/**
* Stores the mean in each dimension.
*/
@@ -255,7 +268,7 @@ public class AttributeWiseVarianceNormalization<V extends NumberVector<V, ?>> ex
}
}
- ArrayList<Parameter<?, ?>> global_1 = new ArrayList<Parameter<?, ?>>();
+ ArrayList<Parameter<?>> global_1 = new ArrayList<Parameter<?>>();
global_1.add(meanP);
global_1.add(stddevP);
config.checkConstraint(new AllOrNoneMustBeSetGlobalConstraint(global_1));
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/InverseDocumentFrequencyNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/InverseDocumentFrequencyNormalization.java
index 9350426b..24f3a850 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/InverseDocumentFrequencyNormalization.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/InverseDocumentFrequencyNormalization.java
@@ -40,15 +40,17 @@ import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
* @author Erich Schubert
*
* @apiviz.uses SparseNumberVector
+ *
+ * @param <V> Vector type
*/
-public class InverseDocumentFrequencyNormalization<V extends SparseNumberVector<V, ?>> extends AbstractNormalization<V> {
+public class InverseDocumentFrequencyNormalization<V extends SparseNumberVector<?>> extends AbstractNormalization<V> {
/**
- * The IDF storage
+ * The IDF storage.
*/
TIntDoubleMap idf = new TIntDoubleHashMap();
/**
- * The number of objects in the dataset
+ * The number of objects in the dataset.
*/
int objcnt = 0;
@@ -73,13 +75,7 @@ public class InverseDocumentFrequencyNormalization<V extends SparseNumberVector<
BitSet b = featureVector.getNotNullMask();
for(int i = b.nextSetBit(0); i >= 0; i = b.nextSetBit(i + 1)) {
if(featureVector.doubleValue(i) >= 0.0) {
- Number c = idf.get(i);
- if(c == null) {
- idf.put(i, 1);
- }
- else {
- idf.put(i, c.intValue() + 1);
- }
+ idf.put(i, idf.get(i) + 1);
}
}
objcnt += 1;
@@ -103,7 +99,7 @@ public class InverseDocumentFrequencyNormalization<V extends SparseNumberVector<
for(int i = b.nextSetBit(0); i >= 0; i = b.nextSetBit(i + 1)) {
vals.put(i, (float) (featureVector.doubleValue(i) * idf.get(i)));
}
- return featureVector.newNumberVector(vals, featureVector.getDimensionality());
+ return ((SparseNumberVector.Factory<V, ?>) factory).newNumberVector(vals, featureVector.getDimensionality());
}
@Override
@@ -113,7 +109,7 @@ public class InverseDocumentFrequencyNormalization<V extends SparseNumberVector<
for(int i = b.nextSetBit(0); i >= 0; i = b.nextSetBit(i + 1)) {
vals.put(i, (float) (featureVector.doubleValue(i) / idf.get(i)));
}
- return featureVector.newNumberVector(vals, featureVector.getDimensionality());
+ return ((SparseNumberVector.Factory<V, ?>) factory).newNumberVector(vals, featureVector.getDimensionality());
}
@Override
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/LengthNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/LengthNormalization.java
index 2edeebf9..457cc6eb 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/LengthNormalization.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/LengthNormalization.java
@@ -42,14 +42,14 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
*
* @param <V> vector type
*/
-public class LengthNormalization<V extends NumberVector<V, ?>> extends AbstractStreamNormalization<V> {
+public class LengthNormalization<V extends NumberVector<?>> extends AbstractStreamNormalization<V> {
/**
- * Norm to use
+ * Norm to use.
*/
DoubleNorm<? super V> norm;
/**
- * Constructor
+ * Constructor.
*
* @param norm Norm to use
*/
@@ -61,11 +61,11 @@ public class LengthNormalization<V extends NumberVector<V, ?>> extends AbstractS
@Override
protected V filterSingleObject(V featureVector) {
final double d = norm.doubleNorm(featureVector);
- return featureVector.newNumberVector(featureVector.getColumnVector().timesEquals(1 / d).getArrayRef());
+ return factory.newNumberVector(featureVector.getColumnVector().timesEquals(1 / d).getArrayRef());
}
@Override
- public V restore(V featureVector) throws NonNumericFeaturesException {
+ public V restore(V featureVector) {
throw new UnsupportedOperationException();
}
@@ -87,14 +87,14 @@ public class LengthNormalization<V extends NumberVector<V, ?>> extends AbstractS
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
/**
- * Option ID for normalization norm
+ * Option ID for normalization norm.
*/
- public static final OptionID NORM_ID = OptionID.getOrCreateOptionID("normalization.norm", "Norm (length function) to use for computing the vector length.");
+ public static final OptionID NORM_ID = new OptionID("normalization.norm", "Norm (length function) to use for computing the vector length.");
/**
- * Norm to use
+ * Norm to use.
*/
DoubleNorm<? super V> norm;
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/RankTieNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/RankTieNormalization.java
index be8c1166..519a3743 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/RankTieNormalization.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/RankTieNormalization.java
@@ -65,10 +65,10 @@ public class RankTieNormalization implements ObjectFilter {
continue;
}
@SuppressWarnings("unchecked")
- final List<? extends NumberVector<?, ?>> castColumn = (List<? extends NumberVector<?, ?>>) column;
+ final List<? extends NumberVector<?>> castColumn = (List<? extends NumberVector<?>>) column;
// Get the replacement type information
- final int dim = ((VectorFieldTypeInformation<?>) type).dimensionality();
- final VectorFieldTypeInformation<IntegerVector> outType = new VectorFieldTypeInformation<IntegerVector>(IntegerVector.class, dim, IntegerVector.STATIC);
+ final int dim = ((VectorFieldTypeInformation<?>) type).getDimensionality();
+ final VectorFieldTypeInformation<IntegerVector> outType = new VectorFieldTypeInformation<IntegerVector>(IntegerVector.STATIC, dim);
// Output vectors
int[][] posvecs = new int[len][dim];
@@ -78,7 +78,7 @@ public class RankTieNormalization implements ObjectFilter {
for(int i = 0; i < sorter.length; i++) {
sorter[i] = new DoubleIntPair(Double.NaN, -1);
}
- for(int d = 1; d <= dim; d++) {
+ for(int d = 0; d < dim; d++) {
// fill array
for(int i = 0; i < sorter.length; i++) {
sorter[i].first = castColumn.get(i).doubleValue(d);
@@ -90,12 +90,12 @@ public class RankTieNormalization implements ObjectFilter {
for(int sta = 0; sta < sorter.length;) {
// Compute ties
int end = sta + 1;
- while(end < sorter.length && sorter[sta].first == sorter[end].first) {
+ while(end < sorter.length && !(sorter[sta].first < sorter[end].first)) {
end++;
}
final int pos = (sta + end - 1);
for(int i = sta; i < end; i++) {
- posvecs[sorter[i].second][d - 1] = pos;
+ posvecs[sorter[i].second][d] = pos;
}
sta = end;
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/TFIDFNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/TFIDFNormalization.java
index 031cfb4c..5d203c6b 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/TFIDFNormalization.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/TFIDFNormalization.java
@@ -39,8 +39,10 @@ import de.lmu.ifi.dbs.elki.data.SparseNumberVector;
* Restore will only undo the IDF part of the normalization!
*
* @author Erich Schubert
+ *
+ * @param <V> Vector type
*/
-public class TFIDFNormalization<V extends SparseNumberVector<V, ?>> extends InverseDocumentFrequencyNormalization<V> {
+public class TFIDFNormalization<V extends SparseNumberVector<?>> extends InverseDocumentFrequencyNormalization<V> {
/**
* Constructor.
*/
@@ -62,6 +64,6 @@ public class TFIDFNormalization<V extends SparseNumberVector<V, ?>> extends Inve
for(int i = b.nextSetBit(0); i >= 0; i = b.nextSetBit(i + 1)) {
vals.put(i, (float) (featureVector.doubleValue(i) / sum * idf.get(i)));
}
- return featureVector.newNumberVector(vals, featureVector.getDimensionality());
+ return ((SparseNumberVector.Factory<V, ?>) factory).newNumberVector(vals, featureVector.getDimensionality());
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/package-info.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/package-info.java
index ca52f814..82302cd3 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/package-info.java
@@ -1,5 +1,7 @@
/**
* <p>Data filtering, in particular for normalization and projection.</p>
+ *
+ * @apiviz.exclude de.lmu.ifi.dbs.elki.utilities.*
*/
/*
This file is part of ELKI:
@@ -23,4 +25,4 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-package de.lmu.ifi.dbs.elki.datasource.filter; \ No newline at end of file
+package de.lmu.ifi.dbs.elki.datasource.filter;
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/GlobalPrincipalComponentAnalysisTransform.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/GlobalPrincipalComponentAnalysisTransform.java
index afa21fa6..18537a8d 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/GlobalPrincipalComponentAnalysisTransform.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/GlobalPrincipalComponentAnalysisTransform.java
@@ -23,19 +23,26 @@ package de.lmu.ifi.dbs.elki.datasource.filter.transform;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import java.util.List;
+
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
-import de.lmu.ifi.dbs.elki.datasource.filter.AbstractConversionFilter;
+import de.lmu.ifi.dbs.elki.datasource.filter.AbstractVectorConversionFilter;
+import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix;
import de.lmu.ifi.dbs.elki.math.linearalgebra.EigenPair;
import de.lmu.ifi.dbs.elki.math.linearalgebra.SortedEigenPairs;
import de.lmu.ifi.dbs.elki.math.linearalgebra.VMath;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.EigenPairFilter;
import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAResult;
import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCARunner;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
/**
* Apply principal component analysis to the data set.
@@ -46,22 +53,50 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
*
* @param <O> Vector type
*/
-public class GlobalPrincipalComponentAnalysisTransform<O extends NumberVector<O, ?>> extends AbstractConversionFilter<O, O> {
+public class GlobalPrincipalComponentAnalysisTransform<O extends NumberVector<?>> extends AbstractVectorConversionFilter<O, O> {
+ /**
+ * Class logger.
+ */
+ private static final Logging LOG = Logging.getLogger(GlobalPrincipalComponentAnalysisTransform.class);
+
+ /**
+ * Filter to use for dimensionality reduction.
+ */
+ EigenPairFilter filter = null;
+
+ /**
+ * Actual dataset dimensionality.
+ */
int dim = -1;
+ /**
+ * Covariance matrix builder.
+ */
CovarianceMatrix covmat = null;
+ /**
+ * Final projection after analysis run.
+ */
double[][] proj = null;
+ /**
+ * Projection buffer.
+ */
double[] buf = null;
+ /**
+ * Vector for data set centering.
+ */
double[] mean = null;
/**
* Constructor.
+ *
+ * @param filter Filter to use for dimensionality reduction.
*/
- public GlobalPrincipalComponentAnalysisTransform() {
+ public GlobalPrincipalComponentAnalysisTransform(EigenPairFilter filter) {
super();
+ this.filter = filter;
}
@Override
@@ -69,7 +104,7 @@ public class GlobalPrincipalComponentAnalysisTransform<O extends NumberVector<O,
if(!(in instanceof VectorFieldTypeInformation)) {
throw new AbortException("PCA can only applied to fixed dimensionality vectors");
}
- dim = ((VectorFieldTypeInformation<?>) in).dimensionality();
+ dim = ((VectorFieldTypeInformation<?>) in).getDimensionality();
covmat = new CovarianceMatrix(dim);
return true;
}
@@ -81,19 +116,38 @@ public class GlobalPrincipalComponentAnalysisTransform<O extends NumberVector<O,
@Override
protected void prepareComplete() {
- mean = covmat.getMeanVector().getArrayRef();
+ mean = covmat.getMeanVector().getArrayRef();
PCAResult pcares = (new PCARunner<O>(null)).processCovarMatrix(covmat.destroyToSampleMatrix());
SortedEigenPairs eps = pcares.getEigenPairs();
covmat = null;
- proj = new double[dim][dim];
- for(int d = 0; d < dim; d++) {
- EigenPair ep = eps.getEigenPair(d);
- double[] ev = ep.getEigenvector().getArrayRef();
- double eval = Math.sqrt(ep.getEigenvalue());
- // Fill weighted and transposed:
- for(int i = 0; i < dim; i++) {
- proj[d][i] = ev[i] / eval;
+ if(filter == null) {
+ proj = new double[dim][dim];
+ for(int d = 0; d < dim; d++) {
+ EigenPair ep = eps.getEigenPair(d);
+ double[] ev = ep.getEigenvector().getArrayRef();
+ double eval = Math.sqrt(ep.getEigenvalue());
+ // Fill weighted and transposed:
+ for(int i = 0; i < dim; i++) {
+ proj[d][i] = ev[i] / eval;
+ }
+ }
+ }
+ else {
+ List<EigenPair> axes = filter.filter(eps).getStrongEigenPairs();
+ final int pdim = axes.size(); // Projection dimensionality
+ if (LOG.isVerbose()) {
+ LOG.verbose("Reducing dimensionality from "+dim+" to "+pdim+" via PCA.");
+ }
+ proj = new double[pdim][dim];
+ for(int d = 0; d < pdim; d++) {
+ EigenPair ep = axes.get(d);
+ double[] ev = ep.getEigenvector().getArrayRef();
+ double eval = Math.sqrt(ep.getEigenvalue());
+ // Fill weighted and transposed:
+ for(int i = 0; i < dim; i++) {
+ proj[d][i] = ev[i] / eval;
+ }
}
}
buf = new double[dim];
@@ -103,10 +157,10 @@ public class GlobalPrincipalComponentAnalysisTransform<O extends NumberVector<O,
protected O filterSingleObject(O obj) {
// Shift by mean and copy
for(int i = 0; i < dim; i++) {
- buf[i] = obj.doubleValue(i + 1) - mean[i];
+ buf[i] = obj.doubleValue(i) - mean[i];
}
double[] p = VMath.times(proj, buf);
- return obj.newNumberVector(p);
+ return factory.newNumberVector(p);
}
@Override
@@ -116,7 +170,13 @@ public class GlobalPrincipalComponentAnalysisTransform<O extends NumberVector<O,
@Override
protected SimpleTypeInformation<? super O> convertedType(SimpleTypeInformation<O> in) {
- return in;
+ initializeOutputType(in);
+ if(proj.length == dim) {
+ return in;
+ }
+ else {
+ return new VectorFieldTypeInformation<O>(factory, proj.length);
+ }
}
/**
@@ -126,10 +186,30 @@ public class GlobalPrincipalComponentAnalysisTransform<O extends NumberVector<O,
*
* @apiviz.exclude
*/
- public static class Parameterizer<O extends NumberVector<O, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<O extends NumberVector<?>> extends AbstractParameterizer {
+ /**
+ * To specify the eigenvectors to keep.
+ */
+ public static final OptionID FILTER_ID = new OptionID("globalpca.filter", "Filter to use for dimensionality reduction.");
+
+ /**
+ * Filter to use for dimensionality reduction.
+ */
+ EigenPairFilter filter = null;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ ObjectParameter<EigenPairFilter> filterP = new ObjectParameter<EigenPairFilter>(FILTER_ID, EigenPairFilter.class, true);
+ if(config.grab(filterP)) {
+ filter = filterP.instantiateClass(config);
+ }
+ }
+
@Override
protected Object makeInstance() {
- return new GlobalPrincipalComponentAnalysisTransform<O>();
+ return new GlobalPrincipalComponentAnalysisTransform<O>(filter);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractFeatureSelectionFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/NumberVectorFeatureSelectionFilter.java
index 009296b1..82e7a1b6 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractFeatureSelectionFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/NumberVectorFeatureSelectionFilter.java
@@ -1,136 +1,168 @@
-package de.lmu.ifi.dbs.elki.datasource.filter;
-
-/*
- This file is part of ELKI:
+package de.lmu.ifi.dbs.elki.datasource.filter.transform;
+
+/*
+ This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-
- Copyright (C) 2012
- Ludwig-Maximilians-Universität München
- Lehr- und Forschungseinheit für Datenbanksysteme
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-import java.util.BitSet;
-import java.util.List;
-
-import de.lmu.ifi.dbs.elki.data.FeatureVector;
-import de.lmu.ifi.dbs.elki.data.NumberVector;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ListGreaterEqualConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntListParameter;
-
-/**
- * <p>
- * A ProjectionParser projects the objects of its base parser onto a subspace
- * specified by a BitSet.
- * </p>
- *
- * @author Arthur Zimek
- * @author Erich Schubert
- * @param <V> the type of FeatureVector contained in both the original and
- * projected data.
- */
-public abstract class AbstractFeatureSelectionFilter<V extends FeatureVector<?, ?>> extends AbstractStreamConversionFilter<V, V> {
- /**
- * <p>
- * Selected attributes parameter.
- * </p>
- * <p>
- * Key: <code>-projectionfilter.selectedattributes</code>
- * </p>
- */
- public static final OptionID SELECTED_ATTRIBUTES_ID = OptionID.getOrCreateOptionID("projectionfilter.selectedattributes", "a comma separated array of integer values d_i, where 1 <= d_i <= the " + "dimensionality of the feature space " + "specifying the dimensions to be considered " + "for projection. If this parameter is not set, " + "no dimensions will be considered, i.e. the projection is a zero-dimensional feature space");
-
- /**
- * Keeps the selection of the subspace to project onto.
- */
- private BitSet selectedAttributes;
-
- /**
- * Constructor.
- *
- * @param selectedAttributes
- */
- public AbstractFeatureSelectionFilter(BitSet selectedAttributes) {
- super();
- this.selectedAttributes = selectedAttributes;
- }
-
- /**
- * <p>
- * Sets the bits set to true in the given BitSet as selected attributes in
- * {@link #SELECTED_ATTRIBUTES_ID}.
- * </p>
- *
- * The index in the BitSet is expected to be shifted to the left by one, i.e.,
- * index 0 in the BitSet relates to the first attribute.
- *
- * @param selectedAttributes the new selected attributes
- */
- public void setSelectedAttributes(BitSet selectedAttributes) {
- this.selectedAttributes.or(selectedAttributes);
- }
-
- /**
- * <p>
- * Provides a BitSet with the bits set to true corresponding to the selected
- * attributes in {@link #SELECTED_ATTRIBUTES_ID}.
- * </p>
- *
- * The index in the BitSet is shifted to the left by one, i.e., index 0 in the
- * BitSet relates to the first attribute.
- *
- * @return the selected attributes
- */
- public BitSet getSelectedAttributes() {
- return selectedAttributes;
- }
-
- /**
- * Get the resulting dimensionality.
- *
- * @return dimensionality
- */
- public int getDimensionality() {
- return selectedAttributes.cardinality();
- }
-
- /**
- * Parameterization class.
- *
- * @author Erich Schubert
- *
- * @apiviz.exclude
- */
- public static abstract class Parameterizer<V extends NumberVector<V, ?>> extends AbstractParameterizer {
- protected BitSet selectedAttributes = null;
-
- @Override
- protected void makeOptions(Parameterization config) {
- super.makeOptions(config);
- IntListParameter selectedAttributesP = new IntListParameter(SELECTED_ATTRIBUTES_ID, new ListGreaterEqualConstraint<Integer>(1));
- if(config.grab(selectedAttributesP)) {
- selectedAttributes = new BitSet();
- List<Integer> dimensionList = selectedAttributesP.getValue();
- for(int d : dimensionList) {
- selectedAttributes.set(d - 1);
- }
- }
- }
- }
-} \ No newline at end of file
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.BitSet;
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.data.DoubleVector;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.VectorUtil;
+import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
+import de.lmu.ifi.dbs.elki.datasource.filter.AbstractVectorStreamConversionFilter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.ListEachConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntListParameter;
+
+/**
+ * <p>
+ * Parser to project the ParsingResult obtained by a suitable base parser onto a
+ * selected subset of attributes.
+ * </p>
+ *
+ * @author Arthur Zimek
+ *
+ * @apiviz.uses NumberVector
+ *
+ * @param <V> Vector type
+ */
+public class NumberVectorFeatureSelectionFilter<V extends NumberVector<?>> extends AbstractVectorStreamConversionFilter<V, V> {
+ /**
+ * Keeps the selection of the subspace to project onto.
+ */
+ private BitSet selectedAttributes;
+
+ /**
+ * Constructor.
+ *
+ * @param selectedAttributes Selected attributes
+ */
+ public NumberVectorFeatureSelectionFilter(BitSet selectedAttributes) {
+ super();
+ this.selectedAttributes = selectedAttributes;
+ }
+
+ @Override
+ protected V filterSingleObject(V obj) {
+ return VectorUtil.project(obj, getSelectedAttributes(), factory);
+ }
+
+ @Override
+ protected SimpleTypeInformation<? super V> getInputTypeRestriction() {
+ return TypeUtil.NUMBER_VECTOR_FIELD;
+ }
+
+ @Override
+ protected SimpleTypeInformation<? super V> convertedType(SimpleTypeInformation<V> in) {
+ initializeOutputType(in);
+ return new VectorFieldTypeInformation<V>(factory, getDimensionality());
+ }
+
+ /**
+ * <p>
+ * Sets the bits set to true in the given BitSet as selected attributes in
+ * {@link Parameterizer#SELECTED_ATTRIBUTES_ID}.
+ * </p>
+ *
+ * The index in the BitSet is expected to be shifted to the left by one, i.e.,
+ * index 0 in the BitSet relates to the first attribute.
+ *
+ * @param selectedAttributes the new selected attributes
+ */
+ public void setSelectedAttributes(BitSet selectedAttributes) {
+ this.selectedAttributes.or(selectedAttributes);
+ }
+
+ /**
+ * <p>
+ * Provides a BitSet with the bits set to true corresponding to the selected
+ * attributes in {@link Parameterizer#SELECTED_ATTRIBUTES_ID}.
+ * </p>
+ *
+ * The index in the BitSet is shifted to the left by one, i.e., index 0 in the
+ * BitSet relates to the first attribute.
+ *
+ * @return the selected attributes
+ */
+ public BitSet getSelectedAttributes() {
+ return selectedAttributes;
+ }
+
+ /**
+ * Get the resulting dimensionality.
+ *
+ * @return dimensionality
+ */
+ public int getDimensionality() {
+ return selectedAttributes.cardinality();
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ /**
+ * <p>
+ * Selected attributes parameter.
+ * </p>
+ * <p>
+ * Key: <code>-projectionfilter.selectedattributes</code>
+ * </p>
+ */
+ public static final OptionID SELECTED_ATTRIBUTES_ID = new OptionID("projectionfilter.selectedattributes", "a comma separated array of integer values d_i, where 0 <= d_i < the " + "dimensionality of the feature space " + "specifying the dimensions to be considered " + "for projection. If this parameter is not set, " + "no dimensions will be considered, i.e. the projection is a zero-dimensional feature space");
+
+ /**
+ * Selected attributes.
+ */
+ protected BitSet selectedAttributes = null;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ IntListParameter selectedAttributesP = new IntListParameter(SELECTED_ATTRIBUTES_ID);
+ selectedAttributesP.addConstraint(new ListEachConstraint<Integer>(new GreaterEqualConstraint(0)));
+ if (config.grab(selectedAttributesP)) {
+ selectedAttributes = new BitSet();
+ List<Integer> dimensionList = selectedAttributesP.getValue();
+ for (int d : dimensionList) {
+ selectedAttributes.set(d);
+ }
+ }
+ }
+
+ @Override
+ protected NumberVectorFeatureSelectionFilter<DoubleVector> makeInstance() {
+ return new NumberVectorFeatureSelectionFilter<DoubleVector>(selectedAttributes);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/NumberVectorRandomFeatureSelectionFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/NumberVectorRandomFeatureSelectionFilter.java
new file mode 100644
index 00000000..7d799a1e
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/NumberVectorRandomFeatureSelectionFilter.java
@@ -0,0 +1,174 @@
+package de.lmu.ifi.dbs.elki.datasource.filter.transform;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.BitSet;
+
+import de.lmu.ifi.dbs.elki.data.DoubleVector;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.VectorUtil;
+import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
+import de.lmu.ifi.dbs.elki.datasource.filter.AbstractVectorStreamConversionFilter;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
+import de.lmu.ifi.dbs.elki.utilities.Util;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
+
+/**
+ * Parser to project the ParsingResult obtained by a suitable base parser onto a
+ * randomly selected subset of attributes.
+ *
+ * @author Arthur Zimek
+ *
+ * @apiviz.uses NumberVector
+ *
+ * @param <V> vector type
+ */
+public class NumberVectorRandomFeatureSelectionFilter<V extends NumberVector<?>> extends AbstractVectorStreamConversionFilter<V, V> {
+ /**
+ * The selected attributes.
+ */
+ protected BitSet selectedAttributes = null;
+
+ /**
+ * Holds the desired cardinality of the subset of attributes selected for
+ * projection.
+ */
+ protected int k;
+
+ /**
+ * Holds a random generator.
+ */
+ protected RandomFactory rnd;
+
+ /**
+ * Constructor.
+ *
+ * @param dim Dimensionality
+ * @param rnd Random generator
+ */
+ public NumberVectorRandomFeatureSelectionFilter(int dim, RandomFactory rnd) {
+ super();
+ this.k = dim;
+ this.rnd = rnd;
+ }
+
+ @Override
+ protected V filterSingleObject(V obj) {
+ return VectorUtil.project(obj, selectedAttributes, factory);
+ }
+
+ @Override
+ protected SimpleTypeInformation<? super V> getInputTypeRestriction() {
+ return TypeUtil.NUMBER_VECTOR_FIELD;
+ }
+
+ @Override
+ protected SimpleTypeInformation<? super V> convertedType(SimpleTypeInformation<V> in) {
+ initializeRandomAttributes(in);
+ initializeOutputType(in);
+ return new VectorFieldTypeInformation<V>(factory, k);
+ }
+
+ /**
+ * Initialize random attributes.
+ *
+ * Invoke this from {@link #convertedType}!
+ *
+ * @param in Type information.
+ */
+ void initializeRandomAttributes(SimpleTypeInformation<V> in) {
+ int d = ((VectorFieldTypeInformation<V>) in).getDimensionality();
+ selectedAttributes = Util.randomBitSet(k, d, rnd.getRandom());
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ /**
+ * Parameter for the desired cardinality of the subset of attributes
+ * selected for projection.
+ *
+ * <p>
+ * Key: <code>-randomprojection.numberselected</code>
+ * </p>
+ * <p>
+ * Default: <code>1</code>
+ * </p>
+ * <p>
+ * Constraint: &ge;1
+ * </p>
+ */
+ public static final OptionID NUMBER_SELECTED_ATTRIBUTES_ID = new OptionID("randomprojection.numberselected", "number of selected attributes");
+
+ /**
+ * Optional parameter to specify a seed for random projection. If unused,
+ * system time is used as seed.
+ * <p>
+ * Key: {@code -randomprojection.seed}
+ * </p>
+ */
+ public static final OptionID SEED_ID = new OptionID("randomprojection.seed", "Seed for random selection of projection attributes.");
+
+ /**
+ * Number of attributes to select.
+ */
+ protected int k = 0;
+
+ /**
+ * Random generator.
+ */
+ protected RandomFactory rnd;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ IntParameter kP = new IntParameter(NUMBER_SELECTED_ATTRIBUTES_ID, 1);
+ kP.addConstraint(new GreaterEqualConstraint(1));
+ if (config.grab(kP)) {
+ k = kP.getValue().intValue();
+ }
+ RandomParameter rndP = new RandomParameter(SEED_ID);
+ if (config.grab(rndP)) {
+ rnd = rndP.getValue();
+ }
+ }
+
+ @Override
+ protected NumberVectorRandomFeatureSelectionFilter<DoubleVector> makeInstance() {
+ return new NumberVectorRandomFeatureSelectionFilter<DoubleVector>(k, rnd);
+ }
+ }
+}