summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/datasource/filter/transform
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/datasource/filter/transform')
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/transform/AbstractSupervisedProjectionVectorFilter.java13
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/transform/ClassicMultidimensionalScalingTransform.java33
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/transform/GlobalPrincipalComponentAnalysisTransform.java12
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/transform/HistogramJitterFilter.java165
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/transform/LatLngToECEFFilter.java22
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/transform/LinearDiscriminantAnalysisFilter.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/transform/LngLatToECEFFilter.java22
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/transform/NumberVectorFeatureSelectionFilter.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/transform/NumberVectorRandomFeatureSelectionFilter.java12
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/transform/PerturbationFilter.java436
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/transform/ProjectionFilter.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/transform/package-info.java2
12 files changed, 672 insertions, 61 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/AbstractSupervisedProjectionVectorFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/AbstractSupervisedProjectionVectorFilter.java
index 462db9eb..8c1ef6cb 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/AbstractSupervisedProjectionVectorFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/AbstractSupervisedProjectionVectorFilter.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter.transform;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -33,13 +33,12 @@ import java.util.Map;
import de.lmu.ifi.dbs.elki.data.ClassLabel;
import de.lmu.ifi.dbs.elki.data.NumberVector;
-import de.lmu.ifi.dbs.elki.data.NumberVector.Factory;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle;
-import de.lmu.ifi.dbs.elki.datasource.filter.ClassLabelFilter;
import de.lmu.ifi.dbs.elki.datasource.filter.ObjectFilter;
+import de.lmu.ifi.dbs.elki.datasource.filter.typeconversions.ClassLabelFilter;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
@@ -60,7 +59,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
*
* @param <V> Vector type
*/
-public abstract class AbstractSupervisedProjectionVectorFilter<V extends NumberVector<?>> implements ObjectFilter {
+public abstract class AbstractSupervisedProjectionVectorFilter<V extends NumberVector> implements ObjectFilter {
/**
* The dimensionality to which the data should be reduced.
*/
@@ -114,7 +113,7 @@ public abstract class AbstractSupervisedProjectionVectorFilter<V extends NumberV
List<V> vectorcolumn = (List<V>) column;
final VectorFieldTypeInformation<?> vtype = (VectorFieldTypeInformation<?>) type;
@SuppressWarnings("unchecked")
- NumberVector.Factory<V, ?> factory = (NumberVector.Factory<V, ?>) vtype.getFactory();
+ NumberVector.Factory<V> factory = (NumberVector.Factory<V> ) vtype.getFactory();
int dim = vtype.getDimensionality();
if(tdim > dim) {
@@ -155,7 +154,7 @@ public abstract class AbstractSupervisedProjectionVectorFilter<V extends NumberV
* @param factory Vector factory
* @return output type restriction
*/
- protected SimpleTypeInformation<?> convertedType(SimpleTypeInformation<?> in, Factory<V, ?> factory) {
+ protected SimpleTypeInformation<?> convertedType(SimpleTypeInformation<?> in, NumberVector.Factory<V> factory) {
return new VectorFieldTypeInformation<>(factory, tdim);
}
@@ -206,7 +205,7 @@ public abstract class AbstractSupervisedProjectionVectorFilter<V extends NumberV
*
* @param <V> Vector type
*/
- public abstract static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
+ public abstract static class Parameterizer<V extends NumberVector> extends AbstractParameterizer {
/**
* The number of dimensions to keep.
*/
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/ClassicMultidimensionalScalingTransform.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/ClassicMultidimensionalScalingTransform.java
index d646b489..32024581 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/ClassicMultidimensionalScalingTransform.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/ClassicMultidimensionalScalingTransform.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter.transform;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -30,8 +30,9 @@ import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle;
+import de.lmu.ifi.dbs.elki.datasource.filter.FilterUtil;
import de.lmu.ifi.dbs.elki.datasource.filter.ObjectFilter;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
@@ -54,6 +55,8 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
*
* @author Erich Schubert
*
+ * @apiviz.composedOf SingularValueDecomposition
+ *
* @param <O> Data type
*/
@Alias({ "mds" })
@@ -66,7 +69,7 @@ public class ClassicMultidimensionalScalingTransform<O> implements ObjectFilter
/**
* Distance function to use.
*/
- PrimitiveDoubleDistanceFunction<? super O> dist = null;
+ PrimitiveDistanceFunction<? super O> dist = null;
/**
* Target dimensionality
@@ -79,7 +82,7 @@ public class ClassicMultidimensionalScalingTransform<O> implements ObjectFilter
* @param tdim Target dimensionality.
* @param dist Distance function to use.
*/
- public ClassicMultidimensionalScalingTransform(int tdim, PrimitiveDoubleDistanceFunction<? super O> dist) {
+ public ClassicMultidimensionalScalingTransform(int tdim, PrimitiveDistanceFunction<? super O> dist) {
super();
this.tdim = tdim;
this.dist = dist;
@@ -105,14 +108,14 @@ public class ClassicMultidimensionalScalingTransform<O> implements ObjectFilter
// Get the replacement type information
@SuppressWarnings("unchecked")
final List<O> castColumn = (List<O>) column;
- NumberVector.Factory<? extends NumberVector<?>, ?> factory = null;
+ NumberVector.Factory<? extends NumberVector> factory = null;
{
if (type instanceof VectorFieldTypeInformation) {
final VectorFieldTypeInformation<?> ctype = (VectorFieldTypeInformation<?>) type;
// Note two-step cast, to make stricter compilers happy.
@SuppressWarnings("unchecked")
- final VectorFieldTypeInformation<? extends NumberVector<?>> vtype = (VectorFieldTypeInformation<? extends NumberVector<?>>) ctype;
- factory = (NumberVector.Factory<? extends NumberVector<?>, ?>) vtype.getFactory();
+ final VectorFieldTypeInformation<? extends NumberVector> vtype = (VectorFieldTypeInformation<? extends NumberVector>) ctype;
+ factory = FilterUtil.guessFactory(vtype);
} else {
factory = DoubleVector.FACTORY;
}
@@ -128,16 +131,12 @@ public class ClassicMultidimensionalScalingTransform<O> implements ObjectFilter
final O ox = castColumn.get(x);
for (int y = x + 1; y < size; y++) {
final O oy = castColumn.get(y);
- double distance = Math.abs(dist.doubleDistance(ox, oy));
+ double distance = Math.abs(dist.distance(ox, oy));
imat[x][y] = distance;
- if (dprog != null) {
- dprog.incrementProcessed(LOG);
- }
+ LOG.incrementProcessed(dprog);
}
}
- if (dprog != null) {
- dprog.ensureCompleted(LOG);
- }
+ LOG.ensureCompleted(dprog);
}
// Adjust distance matrix:
if (dist instanceof SquaredEuclideanDistanceFunction) {
@@ -230,7 +229,7 @@ public class ClassicMultidimensionalScalingTransform<O> implements ObjectFilter
*
* @apiviz.exclude
*/
- public static class Parameterizer<O extends NumberVector<?>> extends AbstractParameterizer {
+ public static class Parameterizer<O extends NumberVector> extends AbstractParameterizer {
/**
* Desired dimensionality.
*/
@@ -249,7 +248,7 @@ public class ClassicMultidimensionalScalingTransform<O> implements ObjectFilter
/**
* Distance function to use.
*/
- PrimitiveDoubleDistanceFunction<? super O> dist = null;
+ PrimitiveDistanceFunction<? super O> dist = null;
@Override
protected void makeOptions(Parameterization config) {
@@ -260,7 +259,7 @@ public class ClassicMultidimensionalScalingTransform<O> implements ObjectFilter
tdim = dimP.intValue();
}
- ObjectParameter<PrimitiveDoubleDistanceFunction<? super O>> distP = new ObjectParameter<>(DISTANCE_ID, PrimitiveDoubleDistanceFunction.class, SquaredEuclideanDistanceFunction.class);
+ ObjectParameter<PrimitiveDistanceFunction<? super O>> distP = new ObjectParameter<>(DISTANCE_ID, PrimitiveDistanceFunction.class, SquaredEuclideanDistanceFunction.class);
if (config.grab(distP)) {
dist = distP.instantiateClass(config);
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/GlobalPrincipalComponentAnalysisTransform.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/GlobalPrincipalComponentAnalysisTransform.java
index 3b4193ad..c6bd02a9 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/GlobalPrincipalComponentAnalysisTransform.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/GlobalPrincipalComponentAnalysisTransform.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter.transform;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -55,10 +55,14 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
*
* @author Erich Schubert
*
+ * @apiviz.composedOf PCARunner
+ * @apiviz.composedOf CovarianceMatrix
+ * @apiviz.composedOf EigenPairFilter
+ *
* @param <O> Vector type
*/
@Alias({ "whiten", "whitening", "pca" })
-public class GlobalPrincipalComponentAnalysisTransform<O extends NumberVector<?>> extends AbstractVectorConversionFilter<O, O> {
+public class GlobalPrincipalComponentAnalysisTransform<O extends NumberVector> extends AbstractVectorConversionFilter<O, O> {
/**
* Class logger.
*/
@@ -122,7 +126,7 @@ public class GlobalPrincipalComponentAnalysisTransform<O extends NumberVector<?>
@Override
protected void prepareComplete() {
mean = covmat.getMeanVector().getArrayRef();
- PCAResult pcares = (new PCARunner<O>(null)).processCovarMatrix(covmat.destroyToSampleMatrix());
+ PCAResult pcares = (new PCARunner(null)).processCovarMatrix(covmat.destroyToSampleMatrix());
SortedEigenPairs eps = pcares.getEigenPairs();
covmat = null;
@@ -190,7 +194,7 @@ public class GlobalPrincipalComponentAnalysisTransform<O extends NumberVector<?>
*
* @apiviz.exclude
*/
- public static class Parameterizer<O extends NumberVector<?>> extends AbstractParameterizer {
+ public static class Parameterizer<O extends NumberVector> extends AbstractParameterizer {
/**
* To specify the eigenvectors to keep.
*/
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/HistogramJitterFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/HistogramJitterFilter.java
new file mode 100644
index 00000000..8c34ce37
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/HistogramJitterFilter.java
@@ -0,0 +1,165 @@
+package de.lmu.ifi.dbs.elki.datasource.filter.transform;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import de.lmu.ifi.dbs.elki.data.DoubleVector;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.datasource.filter.AbstractVectorStreamConversionFilter;
+import de.lmu.ifi.dbs.elki.math.random.RandomFactory;
+import de.lmu.ifi.dbs.elki.math.statistics.distribution.ExponentialDistribution;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
+
+/**
+ * Add Jitter, preserving the histogram properties (same sum, nonnegative).
+ *
+ * For each vector, the total sum of all dimensions is computed.<br />
+ * Then a random vector of the average length <code>jitter * scale</code> is
+ * added and the result normalized to the original vectors sum. The individual
+ * dimensions are drawn from an exponential distribution with scale
+ * <code>jitter / dimensionality</code>, so it is expected that the error in
+ * most dimensions will be low, and higher in few.
+ *
+ * This is designed to degrade the quality of a histogram, while preserving the
+ * total sum (e.g. to keep the normalization). The factor "jitter" can be used
+ * to control the degradation amount.
+ *
+ * @author Erich Schubert
+ *
+ * @param <V> Vector type
+ */
+@Description("Add uniform Jitter to a dataset, while preserving the total vector sum.")
+public class HistogramJitterFilter<V extends NumberVector> extends AbstractVectorStreamConversionFilter<V, V> {
+ /**
+ * Jitter amount.
+ */
+ double jitter;
+
+ /**
+ * Random generator.
+ */
+ ExponentialDistribution rnd;
+
+ /**
+ * Constructor.
+ *
+ * @param jitter Relative amount of jitter to add
+ * @param rnd Random generator
+ */
+ public HistogramJitterFilter(double jitter, RandomFactory rnd) {
+ super();
+ this.jitter = jitter;
+ this.rnd = new ExponentialDistribution(1, rnd.getSingleThreadedRandom());
+ }
+
+ @Override
+ protected V filterSingleObject(V obj) {
+ final int dim = obj.getDimensionality();
+ // Compute the total sum.
+ double osum = 0;
+ for(int i = 0; i < dim; i++) {
+ osum += obj.doubleValue(i);
+ }
+ // Actual maximum jitter amount:
+ final double maxjitter = 2 * jitter / dim * osum;
+ // Generate jitter vector
+ double[] raw = new double[dim];
+ double jsum = 0; // Sum of jitter
+ for(int i = 0; i < raw.length; i++) {
+ raw[i] = rnd.nextRandom() * maxjitter;
+ jsum += raw[i];
+ }
+ final double mix = jsum / osum;
+ // Combine the two vector
+ for(int i = 0; i < raw.length; i++) {
+ raw[i] = raw[i] + (1 - mix) * obj.doubleValue(i);
+ }
+ return factory.newNumberVector(raw);
+ }
+
+ @Override
+ protected SimpleTypeInformation<? super V> getInputTypeRestriction() {
+ return TypeUtil.NUMBER_VECTOR_VARIABLE_LENGTH;
+ }
+
+ @Override
+ protected SimpleTypeInformation<V> convertedType(SimpleTypeInformation<V> in) {
+ initializeOutputType(in);
+ return in;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ /**
+ * Option ID for the jitter strength.
+ */
+ public static final OptionID JITTER_ID = new OptionID("jitter.amount", "Jitter amount relative to data.");
+
+ /**
+ * Option ID for the jitter random seed.
+ */
+ public static final OptionID SEED_ID = new OptionID("jitter.seed", "Jitter random seed.");
+
+ /**
+ * Jitter amount.
+ */
+ double jitter = 0.1;
+
+ /**
+ * Random generator seed.
+ */
+ RandomFactory rnd;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ DoubleParameter jitterP = new DoubleParameter(JITTER_ID) //
+ .addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE);
+ if(config.grab(jitterP)) {
+ jitter = jitterP.getValue().doubleValue();
+ }
+ RandomParameter rndP = new RandomParameter(SEED_ID);
+ if(config.grab(rndP)) {
+ rnd = rndP.getValue();
+ }
+ }
+
+ @Override
+ protected HistogramJitterFilter<DoubleVector> makeInstance() {
+ return new HistogramJitterFilter<>(jitter, rnd);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/LatLngToECEFFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/LatLngToECEFFilter.java
index 998c8931..9cb0b492 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/LatLngToECEFFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/LatLngToECEFFilter.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter.transform;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -25,8 +25,10 @@ package de.lmu.ifi.dbs.elki.datasource.filter.transform;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
import de.lmu.ifi.dbs.elki.datasource.filter.AbstractStreamConversionFilter;
+import de.lmu.ifi.dbs.elki.datasource.filter.FilterUtil;
import de.lmu.ifi.dbs.elki.math.geodesy.EarthModel;
import de.lmu.ifi.dbs.elki.math.geodesy.SphericalVincentyEarthModel;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
@@ -39,13 +41,16 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
*
* @author Erich Schubert
*
+ * @apiviz.uses NumberVector
+ * @apiviz.composedOf EarthModel
+ *
* @param <V> Vector type.
*/
-public class LatLngToECEFFilter<V extends NumberVector<?>> extends AbstractStreamConversionFilter<V, V> {
+public class LatLngToECEFFilter<V extends NumberVector> extends AbstractStreamConversionFilter<V, V> {
/**
* Vector factory to use.
*/
- private NumberVector.Factory<V, ?> factory;
+ private NumberVector.Factory<V> factory;
/**
* Earth model to use.
@@ -69,14 +74,13 @@ public class LatLngToECEFFilter<V extends NumberVector<?>> extends AbstractStrea
@Override
protected SimpleTypeInformation<? super V> getInputTypeRestriction() {
- return new VectorFieldTypeInformation<>(NumberVector.class, 2, 2);
+ return TypeUtil.NUMBER_VECTOR_FIELD_2D;
}
@Override
protected SimpleTypeInformation<? super V> convertedType(SimpleTypeInformation<V> in) {
- VectorFieldTypeInformation<V> vin = (VectorFieldTypeInformation<V>) in;
- factory = (NumberVector.Factory<V, ?>) vin.getFactory();
- return new VectorFieldTypeInformation<>(vin.getFactory(), 3, 3, in.getSerializer());
+ factory = FilterUtil.guessFactory(in);
+ return new VectorFieldTypeInformation<>(factory, 3, 3, in.getSerializer());
}
/**
@@ -88,7 +92,7 @@ public class LatLngToECEFFilter<V extends NumberVector<?>> extends AbstractStrea
*
* @param <V> Vector type
*/
- public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector> extends AbstractParameterizer {
/**
* Earth model to use.
*/
@@ -98,7 +102,7 @@ public class LatLngToECEFFilter<V extends NumberVector<?>> extends AbstractStrea
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
ObjectParameter<EarthModel> modelP = new ObjectParameter<>(EarthModel.MODEL_ID, EarthModel.class, SphericalVincentyEarthModel.class);
- if (config.grab(modelP)) {
+ if(config.grab(modelP)) {
model = modelP.instantiateClass(config);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/LinearDiscriminantAnalysisFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/LinearDiscriminantAnalysisFilter.java
index 76546d5c..537bfb20 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/LinearDiscriminantAnalysisFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/LinearDiscriminantAnalysisFilter.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter.transform;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -59,7 +59,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
*/
@Alias("lda")
@Reference(authors = "R. A. Fisher", title = "The use of multiple measurements in taxonomic problems", booktitle = "Annals of eugenics 7.2 (1936)", url = "http://dx.doi.org/10.1111/j.1469-1809.1936.tb02137.x")
-public class LinearDiscriminantAnalysisFilter<V extends NumberVector<?>> extends AbstractSupervisedProjectionVectorFilter<V> {
+public class LinearDiscriminantAnalysisFilter<V extends NumberVector> extends AbstractSupervisedProjectionVectorFilter<V> {
/**
* Class logger.
*/
@@ -156,7 +156,7 @@ public class LinearDiscriminantAnalysisFilter<V extends NumberVector<?>> extends
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<?>> extends AbstractSupervisedProjectionVectorFilter.Parameterizer<V> {
+ public static class Parameterizer<V extends NumberVector> extends AbstractSupervisedProjectionVectorFilter.Parameterizer<V> {
@Override
protected LinearDiscriminantAnalysisFilter<V> makeInstance() {
return new LinearDiscriminantAnalysisFilter<>(tdim);
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/LngLatToECEFFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/LngLatToECEFFilter.java
index ea0d4ef2..d5fba25d 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/LngLatToECEFFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/LngLatToECEFFilter.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter.transform;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -25,8 +25,10 @@ package de.lmu.ifi.dbs.elki.datasource.filter.transform;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
import de.lmu.ifi.dbs.elki.datasource.filter.AbstractStreamConversionFilter;
+import de.lmu.ifi.dbs.elki.datasource.filter.FilterUtil;
import de.lmu.ifi.dbs.elki.math.geodesy.EarthModel;
import de.lmu.ifi.dbs.elki.math.geodesy.SphericalVincentyEarthModel;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
@@ -39,13 +41,16 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
*
* @author Erich Schubert
*
+ * @apiviz.uses NumberVector
+ * @apiviz.composedOf EarthModel
+ *
* @param <V> Vector type.
*/
-public class LngLatToECEFFilter<V extends NumberVector<?>> extends AbstractStreamConversionFilter<V, V> {
+public class LngLatToECEFFilter<V extends NumberVector> extends AbstractStreamConversionFilter<V, V> {
/**
* Vector factory to use.
*/
- private NumberVector.Factory<V, ?> factory;
+ private NumberVector.Factory<V> factory;
/**
* Earth model to use.
@@ -69,14 +74,13 @@ public class LngLatToECEFFilter<V extends NumberVector<?>> extends AbstractStrea
@Override
protected SimpleTypeInformation<? super V> getInputTypeRestriction() {
- return new VectorFieldTypeInformation<>(NumberVector.class, 2, 2);
+ return TypeUtil.NUMBER_VECTOR_FIELD_2D;
}
@Override
protected SimpleTypeInformation<? super V> convertedType(SimpleTypeInformation<V> in) {
- VectorFieldTypeInformation<V> vin = (VectorFieldTypeInformation<V>) in;
- factory = (NumberVector.Factory<V, ?>) vin.getFactory();
- return new VectorFieldTypeInformation<>(vin.getFactory(), 3, 3, in.getSerializer());
+ factory = FilterUtil.guessFactory(in);
+ return new VectorFieldTypeInformation<>(factory, 3, 3, in.getSerializer());
}
/**
@@ -88,7 +92,7 @@ public class LngLatToECEFFilter<V extends NumberVector<?>> extends AbstractStrea
*
* @param <V> Vector type
*/
- public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector> extends AbstractParameterizer {
/**
* Earth model to use.
*/
@@ -98,7 +102,7 @@ public class LngLatToECEFFilter<V extends NumberVector<?>> extends AbstractStrea
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
ObjectParameter<EarthModel> modelP = new ObjectParameter<>(EarthModel.MODEL_ID, EarthModel.class, SphericalVincentyEarthModel.class);
- if (config.grab(modelP)) {
+ if(config.grab(modelP)) {
model = modelP.instantiateClass(config);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/NumberVectorFeatureSelectionFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/NumberVectorFeatureSelectionFilter.java
index e6d0d15d..115d77dd 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/NumberVectorFeatureSelectionFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/NumberVectorFeatureSelectionFilter.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter.transform;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -51,7 +51,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntListParameter;
*
* @param <V> Vector type
*/
-public class NumberVectorFeatureSelectionFilter<V extends NumberVector<?>> extends AbstractVectorStreamConversionFilter<V, V> {
+public class NumberVectorFeatureSelectionFilter<V extends NumberVector> extends AbstractVectorStreamConversionFilter<V, V> {
/**
* Keeps the selection of the subspace to project onto.
*/
@@ -99,10 +99,8 @@ public class NumberVectorFeatureSelectionFilter<V extends NumberVector<?>> exten
}
/**
- * <p>
* Provides a BitSet with the bits set to true corresponding to the selected
* attributes in {@link Parameterizer#SELECTED_ATTRIBUTES_ID}.
- * </p>
*
* The index in the BitSet is shifted to the left by one, i.e., index 0 in the
* BitSet relates to the first attribute.
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/NumberVectorRandomFeatureSelectionFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/NumberVectorRandomFeatureSelectionFilter.java
index 4086270c..dfca33ec 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/NumberVectorRandomFeatureSelectionFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/NumberVectorRandomFeatureSelectionFilter.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter.transform;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -32,7 +32,7 @@ import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
import de.lmu.ifi.dbs.elki.datasource.filter.AbstractVectorStreamConversionFilter;
-import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
+import de.lmu.ifi.dbs.elki.math.random.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.Util;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
@@ -51,7 +51,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
*
* @param <V> vector type
*/
-public class NumberVectorRandomFeatureSelectionFilter<V extends NumberVector<?>> extends AbstractVectorStreamConversionFilter<V, V> {
+public class NumberVectorRandomFeatureSelectionFilter<V extends NumberVector> extends AbstractVectorStreamConversionFilter<V, V> {
/**
* The selected attributes.
*/
@@ -155,10 +155,10 @@ public class NumberVectorRandomFeatureSelectionFilter<V extends NumberVector<?>>
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- IntParameter kP = new IntParameter(NUMBER_SELECTED_ATTRIBUTES_ID, 1);
- kP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ IntParameter kP = new IntParameter(NUMBER_SELECTED_ATTRIBUTES_ID, 1) //
+ .addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
if(config.grab(kP)) {
- k = kP.getValue().intValue();
+ k = kP.intValue();
}
RandomParameter rndP = new RandomParameter(SEED_ID);
if(config.grab(rndP)) {
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/PerturbationFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/PerturbationFilter.java
new file mode 100644
index 00000000..4e5fe9b3
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/PerturbationFilter.java
@@ -0,0 +1,436 @@
+package de.lmu.ifi.dbs.elki.datasource.filter.transform;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.Random;
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.datasource.filter.AbstractVectorConversionFilter;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.math.MeanVarianceMinMax;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayLikeUtil;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.AllOrNoneMustBeSetGlobalConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.EqualSizeGlobalConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleListParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.EnumParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.LongParameter;
+
+/**
+ * A filter to perturb the values by adding micro-noise.
+ *
+ * The added noise is generated, attribute-wise, by a Gaussian with mean=0 and a
+ * specified standard deviation or by a uniform distribution with a specified
+ * range. The standard deviation or the range can be scaled, attribute-wise, to
+ * a given percentage of the original standard deviation in the data
+ * distribution (assuming a Gaussian distribution there), or to a percentage of
+ * the extension in each attribute ({@code maximumValue - minimumValue}).
+ *
+ * This filter has a potentially wide use but has been implemented for the following publication:
+ *
+ * Reference:
+ * <p>
+ * A. Zimek, R. J. G. B. Campello, J. Sander:</br>
+ * Data Perturbation for Outlier Detection Ensembles.<\br>
+ * In: Proc. 26th International Conference on Scientific and Statistical Database Management (SSDBM), Aalborg, Denmark, 2014.
+ * </p>
+ *
+ * @author Arthur Zimek
+ */
+@Title("Data Perturbation for Outlier Detection Ensembles")
+@Description("A filter to perturb a datasset on read by an additive noise component, implemented for use in an outlier ensemble (this reference).")
+@Reference(authors = "A. Zimek, R. J. G. B. Campello, J. Sander",//
+title = "Data Perturbation for Outlier Detection Ensembles", //
+booktitle = "Proc. 26th International Conference on Scientific and Statistical Database Management (SSDBM), Aalborg, Denmark, 2014", //
+url = "http://dx.doi.org/10.1145/2618243.2618257")
+public class PerturbationFilter<V extends NumberVector> extends AbstractVectorConversionFilter<V, V> {
+ /**
+ * Class logger
+ */
+ private static final Logging LOG = Logging.getLogger(PerturbationFilter.class);
+
+ /**
+ * Scaling reference options.
+ *
+ * @author Arthur Zimek
+ *
+ * @apiviz.exclude
+ */
+ public static enum ScalingReference {
+ UNITCUBE, STDDEV, MINMAX
+ }
+
+ /**
+ * Nature of the noise distribution.
+ *
+ * @author Arthur Zimek
+ *
+ * @apiviz.exclude
+ */
+ public static enum NoiseDistribution {
+ GAUSSIAN, UNIFORM
+ }
+
+ /**
+ * Which reference to use for scaling the noise.
+ */
+ private ScalingReference scalingreference;
+
+ /**
+ * Nature of the noise distribution.
+ */
+ private NoiseDistribution noisedistribution;
+
+ /**
+ * Random object to generate the attribute-wise seeds for the noise.
+ */
+ private final Random RANDOM;
+
+ /**
+ * Percentage of the variance of the random noise generation, given the
+ * variance of the corresponding attribute in the data.
+ */
+ private double percentage;
+
+ /**
+ * Temporary storage used during initialization.
+ */
+ private MeanVarianceMinMax[] mvs = null;
+
+ /**
+ * Stores the scaling reference in each dimension.
+ */
+ private double[] scalingreferencevalues = new double[0];
+
+ /**
+ * The random objects to generate noise distributions independently for each
+ * attribute.
+ */
+ private Random[] randomPerAttribute = null;
+
+ /**
+ * Stores the maximum in each dimension.
+ */
+ private double[] maxima;
+
+ /**
+ * Stores the minimum in each dimension.
+ */
+ private double[] minima;
+
+ /**
+ * Stores the dimensionality from the preprocessing.
+ */
+ private int dimensionality = 0;
+
+ /**
+ * Constructor.
+ *
+ * @param seed Seed value, may be {@code null} for a random seed.
+ * @param percentage Relative amount of jitter to add
+ * @param scalingreference Scaling reference
+ * @param minima Preset minimum values. May be {@code null}.
+ * @param maxima Preset maximum values. May be {@code null}.
+ * @param noisedistribution Nature of the noise distribution.
+ */
+ public PerturbationFilter(Long seed, double percentage, ScalingReference scalingreference, double[] minima, double[] maxima, NoiseDistribution noisedistribution) {
+ super();
+ this.percentage = percentage;
+ this.scalingreference = scalingreference;
+ this.minima = minima;
+ this.maxima = maxima;
+ this.noisedistribution = noisedistribution;
+ this.RANDOM = (seed == null) ? new Random() : new Random(seed);
+ }
+
+ @Override
+ protected boolean prepareStart(SimpleTypeInformation<V> in) {
+ if(scalingreference == ScalingReference.MINMAX && minima.length != 0 && maxima.length != 0) {
+ dimensionality = minima.length;
+ scalingreferencevalues = new double[dimensionality];
+ randomPerAttribute = new Random[dimensionality];
+ for(int d = 0; d < dimensionality; d++) {
+ scalingreferencevalues[d] = (maxima[d] - minima[d]) * percentage;
+ if(scalingreferencevalues[d] == 0 || Double.isNaN(scalingreferencevalues[d])) {
+ scalingreferencevalues[d] = percentage;
+ }
+ randomPerAttribute[d] = new Random(RANDOM.nextLong());
+ }
+ return false;
+ }
+ if(scalingreference == ScalingReference.UNITCUBE) {
+ return false;
+ }
+ return (scalingreferencevalues.length == 0);
+ }
+
+ @Override
+ protected void prepareProcessInstance(V featureVector) {
+ // First object? Then init. (We didn't have a dimensionality before!)
+ if(mvs == null) {
+ dimensionality = featureVector.getDimensionality();
+ mvs = MeanVarianceMinMax.newArray(dimensionality);
+ }
+ for(int d = 0; d < featureVector.getDimensionality(); d++) {
+ mvs[d].put(featureVector.doubleValue(d));
+ }
+ }
+
+ @Override
+ protected void prepareComplete() {
+ StringBuilder buf = LOG.isDebuggingFine() ? new StringBuilder() : null;
+ scalingreferencevalues = new double[dimensionality];
+ randomPerAttribute = new Random[dimensionality];
+ if(scalingreference == ScalingReference.STDDEV) {
+ if(buf != null) {
+ buf.append("Standard deviation per attribute: ");
+ }
+ for(int d = 0; d < dimensionality; d++) {
+ scalingreferencevalues[d] = mvs[d].getSampleStddev() * percentage;
+ if(scalingreferencevalues[d] == 0 || Double.isNaN(scalingreferencevalues[d])) {
+ scalingreferencevalues[d] = percentage;
+ }
+ randomPerAttribute[d] = new Random(RANDOM.nextLong());
+ if(buf != null) {
+ buf.append(" ").append(d).append(": ").append(scalingreferencevalues[d] / percentage);
+ }
+ }
+ }
+ else if(scalingreference == ScalingReference.MINMAX && minima.length == 0 && maxima.length == 0) {
+ if(buf != null) {
+ buf.append("extension per attribute: ");
+ }
+ for(int d = 0; d < dimensionality; d++) {
+ scalingreferencevalues[d] = (mvs[d].getMax() - mvs[d].getMin()) * percentage;
+ if(scalingreferencevalues[d] == 0 || Double.isNaN(scalingreferencevalues[d])) {
+ scalingreferencevalues[d] = percentage;
+ }
+ randomPerAttribute[d] = new Random(RANDOM.nextLong());
+ if(buf != null) {
+ buf.append(" ").append(d).append(": ").append(scalingreferencevalues[d] / percentage);
+ }
+ }
+ }
+ mvs = null;
+ if(buf != null) {
+ LOG.debugFine(buf.toString());
+ }
+ }
+
+ @Override
+ protected SimpleTypeInformation<? super V> getInputTypeRestriction() {
+ return TypeUtil.NUMBER_VECTOR_FIELD;
+ }
+
+ @Override
+ protected V filterSingleObject(V featureVector) {
+ if(scalingreference == ScalingReference.UNITCUBE && dimensionality == 0) {
+ dimensionality = featureVector.getDimensionality();
+ scalingreferencevalues = new double[dimensionality];
+ randomPerAttribute = new Random[dimensionality];
+ for(int d = 0; d < dimensionality; d++) {
+ scalingreferencevalues[d] = percentage;
+ randomPerAttribute[d] = new Random(RANDOM.nextLong());
+ }
+ }
+ if(scalingreferencevalues.length != featureVector.getDimensionality()) {
+ throw new IllegalArgumentException("FeatureVectors and given Minima/Maxima differ in length.");
+ }
+ double[] values = new double[featureVector.getDimensionality()];
+ for(int d = 0; d < featureVector.getDimensionality(); d++) {
+ if(this.noisedistribution.equals(NoiseDistribution.GAUSSIAN)) {
+ values[d] = featureVector.doubleValue(d) + randomPerAttribute[d].nextGaussian() * scalingreferencevalues[d];
+ }
+ else if(this.noisedistribution.equals(NoiseDistribution.UNIFORM)) {
+ values[d] = featureVector.doubleValue(d) + randomPerAttribute[d].nextDouble() * scalingreferencevalues[d];
+ }
+ }
+ return factory.newNumberVector(values);
+ }
+
+ @Override
+ protected SimpleTypeInformation<? super V> convertedType(SimpleTypeInformation<V> in) {
+ initializeOutputType(in);
+ return in;
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Arthur Zimek
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<V extends NumberVector> extends AbstractParameterizer {
+ /**
+ * Parameter for minimum.
+ */
+ public static final OptionID MINIMA_ID = new OptionID("perturbationfilter.min", "Only used, if " + ScalingReference.MINMAX + " is set as scaling reference: a comma separated concatenation of the minimum values in each dimension assumed as a reference. If no value is specified, the minimum value of the attribute range in this dimension will be taken.");
+
+ /**
+ * Parameter for maximum.
+ */
+ public static final OptionID MAXIMA_ID = new OptionID("perturbationfilter.max", "Only used, if " + ScalingReference.MINMAX + " is set as scaling reference: a comma separated concatenation of the maximum values in each dimension assumed as a reference. If no value is specified, the maximum value of the attribute range in this dimension will be taken.");
+
+ /**
+ * Stores the maximum in each dimension.
+ */
+ private double[] maxima = new double[0];
+
+ /**
+ * Stores the minimum in each dimension.
+ */
+ private double[] minima = new double[0];
+
+ /**
+ * Optional parameter to specify a seed for random Gaussian noise
+ * generation. If unused, system time is used as seed.
+ * <p>
+ * Key: {@code -perturbationfilter.seed}
+ * </p>
+ */
+ public static final OptionID SEED_ID = new OptionID("perturbationfilter.seed", "Seed for random noise generation.");
+
+ /**
+ * Seed for randomly shuffling the rows of the database. If null, system
+ * time is used as seed.
+ */
+ protected Long seed = null;
+
+ /**
+ * Optional parameter to specify a percentage of the standard deviation of
+ * the random Gaussian noise generation, given the standard deviation of the
+ * corresponding attribute in the original data distribution (assuming a
+ * Gaussian there).
+ *
+ * <p>
+ * Key: {@code -perturbationfilter.percentage}
+ * </p>
+ * <p>
+ * Default: <code>0.01</code>
+ * </p>
+ * <p>
+ * Constraint: 0 &lt; percentage &leq;1
+ * </p>
+ */
+ public static final OptionID PERCENTAGE_ID = new OptionID("perturbationfilter.percentage", "Percentage of the standard deviation of the random Gaussian noise generation per attribute, given the standard deviation of the corresponding attribute in the original data distribution (assuming a Gaussian distribution there).");
+
+ /**
+ * Parameter for selecting scaling reference.
+ * <p>
+ * Key: {@code -perturbationfilter.scalingreference}
+ * </p>
+ * <p>
+ * Default: <code>ScalingReference.UNITCUBE</code>
+ * </p>
+ */
+ public static final OptionID SCALINGREFERENCE_ID = new OptionID("perturbationfilter.scalingreference", "The reference for scaling the Gaussian noise. Default is " + ScalingReference.UNITCUBE + ", parameter " + PERCENTAGE_ID.getName() + " will then directly define the standard deviation of all noise Gaussians. For options " + ScalingReference.STDDEV + " and " + ScalingReference.MINMAX + ", the percentage of the attributewise standard deviation or extension, repectively, will define the attributewise standard deviation of the noise Gaussians.");
+
+ /**
+ * Parameter for selecting the noise distribution.
+ *
+ * <p>
+ * Key: {@code -perturbationfilter.noisedistribution}
+ * </p>
+ * <p>
+ * Default: <code>NoiseDistribution.UNIFORM</code>
+ * </p>
+ *
+ */
+ public static final OptionID NOISEDISTRIBUTION_ID = new OptionID("perturbationfilter.noisedistribution", "The nature of the noise distribution, default is " + NoiseDistribution.UNIFORM);
+
+ /**
+ * Percentage of the variance of the random Gaussian noise generation or of
+ * the range of the uniform distribution, given the variance of the
+ * corresponding attribute in the data.
+ */
+ protected double percentage;
+
+ /**
+ * The option which reference to use for scaling the noise.
+ */
+ protected ScalingReference scalingreference;
+
+ /**
+ * The option which nature of noise distribution to choose.
+ */
+ protected NoiseDistribution noisedistribution;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ EnumParameter<ScalingReference> scalingReferenceP = new EnumParameter<>(SCALINGREFERENCE_ID, ScalingReference.class, ScalingReference.UNITCUBE);
+ if(config.grab(scalingReferenceP)) {
+ scalingreference = scalingReferenceP.getValue();
+ }
+ EnumParameter<NoiseDistribution> noisedistributionP = new EnumParameter<>(NOISEDISTRIBUTION_ID, NoiseDistribution.class, NoiseDistribution.UNIFORM);
+ if(config.grab(noisedistributionP)) {
+ noisedistribution = noisedistributionP.getValue();
+ }
+ DoubleParameter percentageP = new DoubleParameter(PERCENTAGE_ID, .01);
+ percentageP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
+ percentageP.addConstraint(CommonConstraints.LESS_EQUAL_ONE_DOUBLE);
+ if(config.grab(percentageP)) {
+ percentage = percentageP.getValue();
+ }
+ LongParameter seedP = new LongParameter(SEED_ID);
+ seedP.setOptional(true);
+ if(config.grab(seedP)) {
+ seed = seedP.getValue();
+ }
+ DoubleListParameter minimaP = new DoubleListParameter(MINIMA_ID);
+ minimaP.setOptional(true);
+ if(config.grab(minimaP)) {
+ minima = ArrayLikeUtil.toPrimitiveDoubleArray(minimaP.getValue());
+ }
+ DoubleListParameter maximaP = new DoubleListParameter(MAXIMA_ID);
+ maximaP.setOptional(true);
+ if(config.grab(maximaP)) {
+ maxima = ArrayLikeUtil.toPrimitiveDoubleArray(maximaP.getValue());
+ }
+
+ config.checkConstraint(new AllOrNoneMustBeSetGlobalConstraint(minimaP, maximaP));
+ config.checkConstraint(new EqualSizeGlobalConstraint(minimaP, maximaP));
+ }
+
+ @Override
+ protected PerturbationFilter<V> makeInstance() {
+ return new PerturbationFilter<>(seed, percentage, scalingreference, minima, maxima, noisedistribution);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/ProjectionFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/ProjectionFilter.java
index af3f4c6e..e58ea3b0 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/ProjectionFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/ProjectionFilter.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter.transform;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -37,6 +37,8 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
*
* @author Erich Schubert
*
+ * @apiviz.composedOf Projection
+ *
* @param <I> Input type
* @param <O> Output type
*/
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/package-info.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/package-info.java
index 7082f103..3a81b989 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/package-info.java
@@ -5,7 +5,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2013
+Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team