summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/datasource/filter
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/datasource/filter')
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractConversionFilter.java25
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractStreamConversionFilter.java5
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractStreamFilter.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractVectorConversionFilter.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractVectorStreamConversionFilter.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/ByLabelFilter.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/ClassLabelFilter.java8
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/ClassLabelFromPatternFilter.java213
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/ExternalIDFilter.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/FilterUtil.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/FixedDBIDsFilter.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/HistogramJitterFilter.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/NaNFilter.java185
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/NoMissingValuesFilter.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/NoOpFilter.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/ObjectFilter.java5
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/RandomSamplingStreamFilter.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/ShuffleObjectsFilter.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/SortByLabelFilter.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/SparseVectorFieldFilter.java17
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/SplitNumberVectorFilter.java14
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/StreamFilter.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AbstractNormalization.java10
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AbstractStreamNormalization.java10
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseCDFNormalization.java294
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseErfNormalization.java15
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseMADNormalization.java202
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseMinMaxNormalization.java56
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseVarianceNormalization.java24
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/InverseDocumentFrequencyNormalization.java13
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/LengthNormalization.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/NonNumericFeaturesException.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/Normalization.java13
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/RankTieNormalization.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/TFIDFNormalization.java13
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/transform/AbstractSupervisedProjectionVectorFilter.java230
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/transform/ClassicMultidimensionalScalingTransform.java274
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/transform/GlobalPrincipalComponentAnalysisTransform.java50
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/transform/LatLngToECEFFilter.java111
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/transform/LinearDiscriminantAnalysisFilter.java165
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/transform/LngLatToECEFFilter.java111
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/transform/NumberVectorFeatureSelectionFilter.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/transform/NumberVectorRandomFeatureSelectionFilter.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/transform/ProjectionFilter.java113
-rw-r--r--src/de/lmu/ifi/dbs/elki/datasource/filter/transform/package-info.java2
47 files changed, 2107 insertions, 139 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractConversionFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractConversionFilter.java
index 5948cd83..1cb68b30 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractConversionFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractConversionFilter.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -27,6 +27,8 @@ import java.util.List;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
/**
@@ -71,10 +73,17 @@ public abstract class AbstractConversionFilter<I, O> implements ObjectFilter {
// When necessary, perform an initialization scan
if(prepareStart(castType)) {
+ FiniteProgress pprog = getLogger().isVerbose() ? new FiniteProgress("Preparing normalization.", objects.dataLength(), getLogger()) : null;
for(Object o : column) {
@SuppressWarnings("unchecked")
final I obj = (I) o;
prepareProcessInstance(obj);
+ if (pprog != null) {
+ pprog.incrementProcessed(getLogger());
+ }
+ }
+ if (pprog != null) {
+ pprog.ensureCompleted(getLogger());
}
prepareComplete();
}
@@ -84,17 +93,31 @@ public abstract class AbstractConversionFilter<I, O> implements ObjectFilter {
bundle.appendColumn(convertedType(castType), castColumn);
// Normalization scan
+ FiniteProgress nprog = getLogger().isVerbose() ? new FiniteProgress("Data normalization.", objects.dataLength(), getLogger()) : null;
for(int i = 0; i < objects.dataLength(); i++) {
@SuppressWarnings("unchecked")
final I obj = (I) column.get(i);
final O normalizedObj = filterSingleObject(obj);
castColumn.set(i, normalizedObj);
+ if (nprog != null) {
+ nprog.incrementProcessed(getLogger());
+ }
+ }
+ if (nprog != null) {
+ nprog.ensureCompleted(getLogger());
}
}
return bundle;
}
/**
+ * Class logger.
+ *
+ * @return Logger
+ */
+ abstract protected Logging getLogger();
+
+ /**
* Normalize a single instance.
*
* You can implement this as UnsupportedOperationException if you override
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractStreamConversionFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractStreamConversionFilter.java
index 9b628f2d..5b48a8c0 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractStreamConversionFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractStreamConversionFilter.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -24,6 +24,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter;
*/
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.datasource.bundle.BundleMeta;
/**
@@ -105,7 +106,7 @@ public abstract class AbstractStreamConversionFilter<I, O> extends AbstractStrea
*
* @return Type restriction
*/
- protected abstract SimpleTypeInformation<? super I> getInputTypeRestriction();
+ protected abstract TypeInformation getInputTypeRestriction();
/**
* Get the output type from the input type after conversion.
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractStreamFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractStreamFilter.java
index 368be1a2..6a210db3 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractStreamFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractStreamFilter.java
@@ -8,7 +8,7 @@ import de.lmu.ifi.dbs.elki.datasource.bundle.StreamFromBundle;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractVectorConversionFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractVectorConversionFilter.java
index 66d10967..b9305aa6 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractVectorConversionFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractVectorConversionFilter.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractVectorStreamConversionFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractVectorStreamConversionFilter.java
index 695a54e0..6a15c41c 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractVectorStreamConversionFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/AbstractVectorStreamConversionFilter.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/ByLabelFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/ByLabelFilter.java
index 21f05739..2109761a 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/ByLabelFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/ByLabelFilter.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/ClassLabelFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/ClassLabelFilter.java
index 4a349d3d..e8dc69c3 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/ClassLabelFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/ClassLabelFilter.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -104,8 +104,8 @@ public class ClassLabelFilter implements ObjectFilter {
done = true;
// We split the label column into two parts
- List<ClassLabel> clscol = new ArrayList<ClassLabel>(objects.dataLength());
- List<LabelList> lblcol = new ArrayList<LabelList>(objects.dataLength());
+ List<ClassLabel> clscol = new ArrayList<>(objects.dataLength());
+ List<LabelList> lblcol = new ArrayList<>(objects.dataLength());
// Split the column
for (Object obj : objects.getColumn(i)) {
@@ -160,7 +160,7 @@ public class ClassLabelFilter implements ObjectFilter {
// parameter class label index
final IntParameter classLabelIndexParam = new IntParameter(CLASS_LABEL_INDEX_ID);
classLabelIndexParam.addConstraint(new GreaterEqualConstraint(0));
- final ObjectParameter<ClassLabel.Factory<?>> classlabelClassParam = new ObjectParameter<ClassLabel.Factory<?>>(CLASS_LABEL_CLASS_ID, ClassLabel.Factory.class, SimpleClassLabel.Factory.class);
+ final ObjectParameter<ClassLabel.Factory<?>> classlabelClassParam = new ObjectParameter<>(CLASS_LABEL_CLASS_ID, ClassLabel.Factory.class, SimpleClassLabel.Factory.class);
config.grab(classLabelIndexParam);
config.grab(classlabelClassParam);
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/ClassLabelFromPatternFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/ClassLabelFromPatternFilter.java
new file mode 100644
index 00000000..97624ac8
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/ClassLabelFromPatternFilter.java
@@ -0,0 +1,213 @@
+package de.lmu.ifi.dbs.elki.datasource.filter;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.BitSet;
+import java.util.regex.Pattern;
+
+import de.lmu.ifi.dbs.elki.data.LabelList;
+import de.lmu.ifi.dbs.elki.data.SimpleClassLabel;
+import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.datasource.bundle.BundleMeta;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.PatternParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.StringParameter;
+
+/**
+ * Streaming filter to derive an outlier class label.
+ *
+ * @author Erich Schubert
+ */
+public class ClassLabelFromPatternFilter extends AbstractStreamFilter {
+ /**
+ * Current meta data
+ */
+ BundleMeta meta = null;
+
+ /**
+ * Bitset of label columns
+ */
+ BitSet labelcols = new BitSet();
+
+ /**
+ * Label to return for positive matches.
+ */
+ SimpleClassLabel positive;
+
+ /**
+ * Label to return for negative matches.
+ */
+ SimpleClassLabel negative;
+
+ /**
+ * Matching pattern.
+ */
+ Pattern pattern;
+
+ /**
+ * Constructor.
+ *
+ * @param pattern Pattern for matching
+ * @param positive Positive label
+ * @param negative Negative label
+ */
+ public ClassLabelFromPatternFilter(Pattern pattern, String positive, String negative) {
+ super();
+ this.pattern = pattern;
+ this.positive = new SimpleClassLabel(positive);
+ this.negative = new SimpleClassLabel(negative);
+ }
+
+ /**
+ * Constructor.
+ *
+ * @param pattern Pattern for matching
+ * @param positive Positive label
+ * @param negative Negative label
+ */
+ public ClassLabelFromPatternFilter(Pattern pattern, SimpleClassLabel positive, SimpleClassLabel negative) {
+ super();
+ this.pattern = pattern;
+ this.positive = positive;
+ this.negative = negative;
+ }
+
+ @Override
+ public BundleMeta getMeta() {
+ if (meta == null) {
+ // Rebuild metadata.
+ BundleMeta origmeta = source.getMeta();
+ meta = new BundleMeta(origmeta.size() + 1);
+ meta.add(TypeUtil.SIMPLE_CLASSLABEL);
+ labelcols.clear();
+ for (int i = 0; i < origmeta.size(); i++) {
+ final SimpleTypeInformation<?> orig = origmeta.get(i);
+ if (TypeUtil.GUESSED_LABEL.isAssignableFromType(orig)) {
+ labelcols.set(i);
+ }
+ meta.add(orig);
+ }
+ }
+ return meta;
+ }
+
+ @Override
+ public Object data(int rnum) {
+ if (rnum > 0) {
+ return source.data(rnum - 1);
+ }
+ if (meta == null) {
+ getMeta(); // Trigger build
+ }
+ for (int i = labelcols.nextSetBit(0); i >= 0; i = labelcols.nextSetBit(i + 1)) {
+ Object o = source.data(i);
+ if (o == null) {
+ continue;
+ }
+ if (o instanceof LabelList) {
+ for (String l : (LabelList) o) {
+ if (pattern.matcher(l).find()) {
+ return positive;
+ }
+ }
+ continue;
+ }
+ if (pattern.matcher(o.toString()).find()) {
+ return positive;
+ }
+ }
+ return negative;
+ }
+
+ @Override
+ public Event nextEvent() {
+ final Event ev = source.nextEvent();
+ if (Event.META_CHANGED.equals(ev)) {
+ meta = null;
+ }
+ return ev;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ /**
+ * Pattern for recognizing positive objects.
+ */
+ public static final OptionID PATTERN_ID = new OptionID("class.pattern", "Regular expression to identify positive objects.");
+
+ /**
+ * Class label to assign to positive instances.
+ */
+ public static final OptionID POSITIVE_ID = new OptionID("class.positive", "Class label to use for positive instances.");
+
+ /**
+ * Class label to assign to negative instances.
+ */
+ public static final OptionID NEGATIVE_ID = new OptionID("class.negative", "Class label to use for negative instances.");
+
+ /**
+ * Matching pattern.
+ */
+ Pattern pattern;
+
+ /**
+ * Names for positive and negative classes.
+ */
+ String positive, negative;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ PatternParameter patternP = new PatternParameter(PATTERN_ID);
+ if (config.grab(patternP)) {
+ pattern = patternP.getValue();
+ }
+
+ StringParameter positiveP = new StringParameter(POSITIVE_ID, "positive");
+ if (config.grab(positiveP)) {
+ positive = positiveP.getValue();
+ }
+
+ StringParameter negativeP = new StringParameter(NEGATIVE_ID, "negative");
+ if (config.grab(negativeP)) {
+ negative = negativeP.getValue();
+ }
+ }
+
+ @Override
+ protected ClassLabelFromPatternFilter makeInstance() {
+ return new ClassLabelFromPatternFilter(pattern, positive, negative);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/ExternalIDFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/ExternalIDFilter.java
index 2753534a..926ebe99 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/ExternalIDFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/ExternalIDFilter.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -87,8 +87,8 @@ public class ExternalIDFilter implements ObjectFilter {
done = true;
// We split the label column into two parts
- List<ExternalID> eidcol = new ArrayList<ExternalID>(objects.dataLength());
- List<LabelList> lblcol = new ArrayList<LabelList>(objects.dataLength());
+ List<ExternalID> eidcol = new ArrayList<>(objects.dataLength());
+ List<LabelList> lblcol = new ArrayList<>(objects.dataLength());
// Split the column
for (Object obj : objects.getColumn(i)) {
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/FilterUtil.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/FilterUtil.java
index 7b794066..9ef9d34f 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/FilterUtil.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/FilterUtil.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/FixedDBIDsFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/FixedDBIDsFilter.java
index 2e5071a4..7f09b905 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/FixedDBIDsFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/FixedDBIDsFilter.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/HistogramJitterFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/HistogramJitterFilter.java
index 6723a12a..37f8f8d9 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/HistogramJitterFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/HistogramJitterFilter.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -158,7 +158,7 @@ public class HistogramJitterFilter<V extends NumberVector<?>> extends AbstractVe
@Override
protected HistogramJitterFilter<DoubleVector> makeInstance() {
- return new HistogramJitterFilter<DoubleVector>(jitter, rnd);
+ return new HistogramJitterFilter<>(jitter, rnd);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/NaNFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/NaNFilter.java
new file mode 100644
index 00000000..769f3009
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/NaNFilter.java
@@ -0,0 +1,185 @@
+package de.lmu.ifi.dbs.elki.datasource.filter;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.ArrayList;
+import java.util.BitSet;
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.datasource.bundle.BundleMeta;
+import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+
+/**
+ * A filter to drop all records that contain NaN values.
+ *
+ * Note: currently, only dense vector columns are supported.
+ *
+ * TODO: add support for sparse vectors.
+ *
+ * @author Erich Schubert
+ */
+public class NaNFilter extends AbstractStreamFilter {
+ /**
+ * Class logger
+ */
+ private static final Logging LOG = Logging.getLogger(NaNFilter.class);
+
+ /**
+ * Columns to check.
+ */
+ private BitSet densecols = null;
+
+ /**
+ * Constructor.
+ */
+ public NaNFilter() {
+ super();
+ }
+
+ @Override
+ public BundleMeta getMeta() {
+ return source.getMeta();
+ }
+
+ @Override
+ public Object data(int rnum) {
+ return source.data(rnum);
+ }
+
+ @Override
+ public Event nextEvent() {
+ while (true) {
+ Event ev = source.nextEvent();
+ switch(ev) {
+ case END_OF_STREAM:
+ return ev;
+ case META_CHANGED:
+ updateMeta(source.getMeta());
+ return ev;
+ case NEXT_OBJECT:
+ if (densecols == null) {
+ updateMeta(source.getMeta());
+ }
+ boolean good = true;
+ for (int j = densecols.nextSetBit(0); j >= 0; j = densecols.nextSetBit(j + 1)) {
+ NumberVector<?> v = (NumberVector<?>) source.data(j);
+ if (v == null) {
+ good = false;
+ break;
+ }
+ for (int i = 0; i < v.getDimensionality(); i++) {
+ if (Double.isNaN(v.doubleValue(i))) {
+ good = false;
+ break;
+ }
+ }
+ }
+ if (good) {
+ return ev;
+ }
+ continue;
+ }
+ }
+ }
+
+ /**
+ * Process an updated meta record.
+ *
+ * @param meta Meta record
+ */
+ private void updateMeta(BundleMeta meta) {
+ int cols = meta.size();
+ if (densecols == null) {
+ densecols = new BitSet();
+ } else {
+ densecols.clear();
+ }
+ for (int i = 0; i < cols; i++) {
+ if (TypeUtil.SPARSE_VECTOR_VARIABLE_LENGTH.isAssignableFromType(meta.get(i))) {
+ throw new AbortException("Filtering sparse vectors is not yet supported by this filter. Please contribute.");
+ }
+ // TODO: only check for double and float?
+ if (TypeUtil.NUMBER_VECTOR_VARIABLE_LENGTH.isAssignableFromType(meta.get(i))) {
+ densecols.set(i);
+ continue;
+ }
+ if (TypeUtil.DOUBLE_VECTOR_FIELD.isAssignableFromType(meta.get(i))) {
+ densecols.set(i);
+ continue;
+ }
+ }
+ }
+
+ @Override
+ public MultipleObjectsBundle filter(final MultipleObjectsBundle objects) {
+ if (LOG.isDebuggingFinest()) {
+ LOG.debugFinest("Removing records with NaN values.");
+ }
+
+ updateMeta(objects.meta());
+ MultipleObjectsBundle bundle = new MultipleObjectsBundle();
+ for (int j = 0; j < objects.metaLength(); j++) {
+ bundle.appendColumn(objects.meta(j), new ArrayList<>());
+ }
+ for (int i = 0; i < objects.dataLength(); i++) {
+ final Object[] row = objects.getRow(i);
+ boolean good = true;
+ for (int j = densecols.nextSetBit(0); j >= 0; j = densecols.nextSetBit(j + 1)) {
+ NumberVector<?> v = (NumberVector<?>) row[j];
+ if (v == null) {
+ good = false;
+ break;
+ }
+ for (int d = 0; d < v.getDimensionality(); d++) {
+ if (Double.isNaN(v.doubleValue(d))) {
+ good = false;
+ break;
+ }
+ }
+ }
+ if (good) {
+ bundle.appendSimple(row);
+ }
+ }
+ return bundle;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ @Override
+ protected Object makeInstance() {
+ return new NaNFilter();
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/NoMissingValuesFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/NoMissingValuesFilter.java
index bfc6ad5c..b3f0af53 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/NoMissingValuesFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/NoMissingValuesFilter.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -97,7 +97,7 @@ public class NoMissingValuesFilter extends AbstractStreamFilter {
MultipleObjectsBundle bundle = new MultipleObjectsBundle();
for(int j = 0; j < objects.metaLength(); j++) {
- bundle.appendColumn(objects.meta(j), new ArrayList<Object>());
+ bundle.appendColumn(objects.meta(j), new ArrayList<>());
}
for(int i = 0; i < objects.dataLength(); i++) {
boolean good = true;
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/NoOpFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/NoOpFilter.java
index 264f58fd..ce758763 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/NoOpFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/NoOpFilter.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/ObjectFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/ObjectFilter.java
index b3670e9b..5073cea8 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/ObjectFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/ObjectFilter.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -24,7 +24,6 @@ package de.lmu.ifi.dbs.elki.datasource.filter;
*/
import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle;
-import de.lmu.ifi.dbs.elki.utilities.InspectionUtilFrequentlyScanned;
/**
* Object filters as part of the input step.
@@ -35,7 +34,7 @@ import de.lmu.ifi.dbs.elki.utilities.InspectionUtilFrequentlyScanned;
*
* @apiviz.uses MultipleObjectsBundle oneway - - «filters»
*/
-public interface ObjectFilter extends InspectionUtilFrequentlyScanned {
+public interface ObjectFilter {
/**
* Filter a set of object packages.
*
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/RandomSamplingStreamFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/RandomSamplingStreamFilter.java
index 0fbec083..5c8d07d0 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/RandomSamplingStreamFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/RandomSamplingStreamFilter.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/ShuffleObjectsFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/ShuffleObjectsFilter.java
index 01a6da10..b8bf968b 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/ShuffleObjectsFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/ShuffleObjectsFilter.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -96,7 +96,7 @@ public class ShuffleObjectsFilter implements ObjectFilter {
for (int j = 0; j < objects.metaLength(); j++) {
// Reorder column accordingly
List<?> in = objects.getColumn(j);
- List<Object> data = new ArrayList<Object>(size);
+ List<Object> data = new ArrayList<>(size);
for (int i = 0; i < size; i++) {
data.add(in.get(offsets[i]));
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/SortByLabelFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/SortByLabelFilter.java
index 308a54b1..d35d9cde 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/SortByLabelFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/SortByLabelFilter.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -89,7 +89,7 @@ public class SortByLabelFilter implements ObjectFilter {
for (int j = 0; j < objects.metaLength(); j++) {
// Reorder column accordingly
List<?> in = objects.getColumn(j);
- List<Object> data = new ArrayList<Object>(size);
+ List<Object> data = new ArrayList<>(size);
for (int i = 0; i < size; i++) {
data.add(in.get(offsets[i]));
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/SparseVectorFieldFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/SparseVectorFieldFilter.java
index d3ef418d..97960907 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/SparseVectorFieldFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/SparseVectorFieldFilter.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -27,6 +27,7 @@ import de.lmu.ifi.dbs.elki.data.SparseNumberVector;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
+import de.lmu.ifi.dbs.elki.logging.Logging;
/**
* Class that turns sparse float vectors into a proper vector field, by setting
@@ -38,6 +39,11 @@ import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
*/
public class SparseVectorFieldFilter<V extends SparseNumberVector<?>> extends AbstractConversionFilter<V, V> {
/**
+ * Class logger.
+ */
+ private static final Logging LOG = Logging.getLogger(SparseVectorFieldFilter.class);
+
+ /**
* Maximum dimension.
*/
int maxdim = -1;
@@ -74,6 +80,11 @@ public class SparseVectorFieldFilter<V extends SparseNumberVector<?>> extends Ab
@Override
protected SimpleTypeInformation<? super V> convertedType(SimpleTypeInformation<V> in) {
SparseNumberVector.Factory<V, ?> factory = (SparseNumberVector.Factory<V, ?>) FilterUtil.guessFactory(in);
- return new VectorFieldTypeInformation<V>(factory, maxdim);
+ return new VectorFieldTypeInformation<>(factory, maxdim);
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/SplitNumberVectorFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/SplitNumberVectorFilter.java
index 898eeff7..8146bd5b 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/SplitNumberVectorFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/SplitNumberVectorFilter.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -87,10 +87,10 @@ public class SplitNumberVectorFilter<V extends NumberVector<?>> implements Objec
Factory<V, ?> factory = FilterUtil.guessFactory(vtype);
// Get the replacement type informations
- VectorFieldTypeInformation<V> type1 = new VectorFieldTypeInformation<V>(factory, dims.length);
- VectorFieldTypeInformation<V> type2 = new VectorFieldTypeInformation<V>(factory, vtype.getDimensionality() - dims.length);
- final List<V> col1 = new ArrayList<V>(column.size());
- final List<V> col2 = new ArrayList<V>(column.size());
+ VectorFieldTypeInformation<V> type1 = new VectorFieldTypeInformation<>(factory, dims.length);
+ VectorFieldTypeInformation<V> type2 = new VectorFieldTypeInformation<>(factory, vtype.getDimensionality() - dims.length);
+ final List<V> col1 = new ArrayList<>(column.size());
+ final List<V> col2 = new ArrayList<>(column.size());
bundle.appendColumn(type1, col1);
bundle.appendColumn(type2, col2);
@@ -145,7 +145,7 @@ public class SplitNumberVectorFilter<V extends NumberVector<?>> implements Objec
for (int i = 1; i < dims.length; i++) {
m = Math.max(dims[i], m);
}
- return new VectorFieldTypeInformation<NumberVector<?>>(NumberVector.class, m, Integer.MAX_VALUE);
+ return new VectorFieldTypeInformation<>(NumberVector.class, m, Integer.MAX_VALUE);
}
/**
@@ -182,7 +182,7 @@ public class SplitNumberVectorFilter<V extends NumberVector<?>> implements Objec
@Override
protected SplitNumberVectorFilter<V> makeInstance() {
- return new SplitNumberVectorFilter<V>(dims);
+ return new SplitNumberVectorFilter<>(dims);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/StreamFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/StreamFilter.java
index 5d121659..798cd05d 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/StreamFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/StreamFilter.java
@@ -6,7 +6,7 @@ import de.lmu.ifi.dbs.elki.datasource.bundle.BundleStreamSource;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AbstractNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AbstractNormalization.java
index 2dcf09f8..0b4d7ae0 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AbstractNormalization.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AbstractNormalization.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter.normalization;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -25,7 +25,6 @@ package de.lmu.ifi.dbs.elki.datasource.filter.normalization;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
-import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle;
import de.lmu.ifi.dbs.elki.datasource.filter.AbstractVectorConversionFilter;
import de.lmu.ifi.dbs.elki.math.linearalgebra.LinearEquationSystem;
@@ -51,11 +50,6 @@ public abstract class AbstractNormalization<O extends NumberVector<?>> extends A
}
@Override
- public MultipleObjectsBundle normalizeObjects(MultipleObjectsBundle objects) {
- return super.filter(objects);
- }
-
- @Override
public LinearEquationSystem transform(LinearEquationSystem linearEquationSystem) {
// FIXME: implement.
throw new UnsupportedOperationException("Not yet implemented!");
@@ -65,4 +59,4 @@ public abstract class AbstractNormalization<O extends NumberVector<?>> extends A
public String toString() {
return getClass().getName();
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AbstractStreamNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AbstractStreamNormalization.java
index a1e2c55e..54fc7794 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AbstractStreamNormalization.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AbstractStreamNormalization.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter.normalization;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -25,7 +25,6 @@ package de.lmu.ifi.dbs.elki.datasource.filter.normalization;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
-import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle;
import de.lmu.ifi.dbs.elki.datasource.filter.AbstractVectorStreamConversionFilter;
import de.lmu.ifi.dbs.elki.math.linearalgebra.LinearEquationSystem;
@@ -49,11 +48,6 @@ public abstract class AbstractStreamNormalization<O extends NumberVector<?>> ext
initializeOutputType(in);
return in;
}
-
- @Override
- public MultipleObjectsBundle normalizeObjects(MultipleObjectsBundle objects) {
- return super.filter(objects);
- }
@Override
public LinearEquationSystem transform(LinearEquationSystem linearEquationSystem) {
@@ -67,4 +61,4 @@ public abstract class AbstractStreamNormalization<O extends NumberVector<?>> ext
result.append("normalization class: ").append(getClass().getName());
return result.toString();
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseCDFNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseCDFNormalization.java
new file mode 100644
index 00000000..8fd46336
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseCDFNormalization.java
@@ -0,0 +1,294 @@
+package de.lmu.ifi.dbs.elki.datasource.filter.normalization;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
+import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle;
+import de.lmu.ifi.dbs.elki.datasource.filter.FilterUtil;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.LinearEquationSystem;
+import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution;
+import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.DistributionEstimator;
+import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.meta.BestFitEstimator;
+import de.lmu.ifi.dbs.elki.math.statistics.tests.KolmogorovSmirnovTest;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.ExceptionMessages;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectListParameter;
+
+/**
+ * Class to perform and undo a normalization on real vectors by estimating the
+ * distribution of values along each dimension independently, then rescaling
+ * objects to the cumulative density function (CDF) value at the original
+ * coordinate.
+ *
+ * This process is for example also discussed in section 3.4 of
+ * <p>
+ * Effects of Feature Normalization on Image Retrieval <br/>
+ * S. Aksoy, R. M. Haralick
+ * </p>
+ * but they do not detail how to obtain an appropriate function `F`.
+ *
+ * @author Erich Schubert
+ * @param <V> vector type
+ *
+ * @apiviz.uses NumberVector
+ * @apiviz.uses DistributionEstimator
+ */
+// TODO: extract superclass AbstractAttributeWiseNormalization
+public class AttributeWiseCDFNormalization<V extends NumberVector<?>> implements Normalization<V> {
+ /**
+ * Class logger.
+ */
+ private static final Logging LOG = Logging.getLogger(AttributeWiseCDFNormalization.class);
+
+ /**
+ * Stores the distribution estimators
+ */
+ private List<DistributionEstimator<?>> estimators;
+
+ /**
+ * Stores the estimated distributions
+ */
+ private List<Distribution> dists;
+
+ /**
+ * Number vector factory.
+ */
+ protected NumberVector.Factory<V, ?> factory;
+
+ /**
+ * Constructor.
+ *
+ * @param estimators Distribution estimators
+ */
+ public AttributeWiseCDFNormalization(List<DistributionEstimator<?>> estimators) {
+ super();
+ this.estimators = estimators;
+ }
+
+ @Override
+ public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
+ if (objects.dataLength() == 0) {
+ return objects;
+ }
+ for (int r = 0; r < objects.metaLength(); r++) {
+ SimpleTypeInformation<?> type = (SimpleTypeInformation<?>) objects.meta(r);
+ final List<?> column = (List<?>) objects.getColumn(r);
+ if (!TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(type)) {
+ continue;
+ }
+ @SuppressWarnings("unchecked")
+ final List<V> castColumn = (List<V>) column;
+ // Get the replacement type information
+ @SuppressWarnings("unchecked")
+ final VectorFieldTypeInformation<V> castType = (VectorFieldTypeInformation<V>) type;
+ factory = FilterUtil.guessFactory(castType);
+
+ // Scan to find the best
+ final int dim = castType.getDimensionality();
+ dists = new ArrayList<>(dim);
+ // Scratch space for testing:
+ double[] test = new double[castColumn.size()];
+
+ // We iterate over dimensions, this kind of filter needs fast random
+ // access.
+ Adapter<V> adapter = new Adapter<>();
+ for (int d = 0; d < dim; d++) {
+ adapter.dim = d;
+ if (estimators.size() == 1) {
+ dists.add(estimators.get(0).estimate(castColumn, adapter));
+ } else {
+ Distribution best = null;
+ double bestq = Double.POSITIVE_INFINITY;
+ trials: for (DistributionEstimator<?> est : estimators) {
+ try {
+ Distribution dist = est.estimate(castColumn, adapter);
+ for (int i = 0; i < test.length; i++) {
+ test[i] = dist.cdf(castColumn.get(i).doubleValue(d));
+ if (Double.isNaN(test[i])) {
+ LOG.warning("Got NaN after fitting " + est.toString() + ": " + dist.toString());
+ continue trials;
+ }
+ if (Double.isInfinite(test[i])) {
+ LOG.warning("Got infinite value after fitting " + est.toString() + ": " + dist.toString());
+ continue trials;
+ }
+ }
+ Arrays.sort(test);
+ double q = KolmogorovSmirnovTest.simpleTest(test);
+ if (LOG.isVeryVerbose()) {
+ LOG.veryverbose("Estimator " + est.toString() + " (" + dist.toString() + ") has maximum deviation " + q + " for dimension " + d);
+ }
+ if (best == null || q < bestq) {
+ best = dist;
+ bestq = q;
+ }
+ } catch (ArithmeticException e) {
+ if (LOG.isVeryVerbose()) {
+ LOG.veryverbose("Fitting distribution " + est + " failed: " + e.getMessage());
+ }
+ continue;
+ }
+ }
+ if (LOG.isVerbose()) {
+ LOG.verbose("Best fit for dimension " + d + ": " + best.toString());
+ }
+ dists.add(best);
+ }
+ }
+
+ // Normalization scan
+ double[] buf = new double[dim];
+ for (int i = 0; i < objects.dataLength(); i++) {
+ final V obj = castColumn.get(i);
+ for (int d = 0; d < dim; d++) {
+ buf[d] = dists.get(d).cdf(obj.doubleValue(d));
+ }
+ castColumn.set(i, factory.newNumberVector(buf));
+ }
+ }
+ return objects;
+ }
+
+ @Override
+ public V restore(V featureVector) throws NonNumericFeaturesException {
+ throw new UnsupportedOperationException(ExceptionMessages.UNSUPPORTED_NOT_YET);
+ }
+
+ @Override
+ public LinearEquationSystem transform(LinearEquationSystem linearEquationSystem) {
+ throw new UnsupportedOperationException(ExceptionMessages.UNSUPPORTED_NOT_YET);
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder result = new StringBuilder();
+ result.append("normalization class: ").append(getClass().getName());
+ result.append('\n');
+ result.append("normalization distributions: ");
+ boolean first = true;
+ for (DistributionEstimator<?> est : estimators) {
+ if (!first) {
+ result.append(',');
+ }
+ first = false;
+ result.append(est.getClass().getSimpleName());
+ }
+ return result.toString();
+ }
+
+ private static class Adapter<V extends NumberVector<?>> implements NumberArrayAdapter<Double, List<V>> {
+ /**
+ * Dimension to process.
+ */
+
+ int dim;
+
+ @Override
+ public int size(List<V> array) {
+ return array.size();
+ }
+
+ @Override
+ public Double get(List<V> array, int off) throws IndexOutOfBoundsException {
+ return getDouble(array, off);
+ }
+
+ @Override
+ public double getDouble(List<V> array, int off) throws IndexOutOfBoundsException {
+ return array.get(off).doubleValue(dim);
+ }
+
+ @Override
+ public float getFloat(List<V> array, int off) throws IndexOutOfBoundsException {
+ return array.get(off).floatValue(dim);
+ }
+
+ @Override
+ public int getInteger(List<V> array, int off) throws IndexOutOfBoundsException {
+ return array.get(off).intValue(dim);
+ }
+
+ @Override
+ public short getShort(List<V> array, int off) throws IndexOutOfBoundsException {
+ return array.get(off).shortValue(dim);
+ }
+
+ @Override
+ public long getLong(List<V> array, int off) throws IndexOutOfBoundsException {
+ return array.get(off).longValue(dim);
+ }
+
+ @Override
+ public byte getByte(List<V> array, int off) throws IndexOutOfBoundsException {
+ return array.get(off).byteValue(dim);
+ }
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
+ /**
+ * Parameter for distribution estimators.
+ */
+ public static final OptionID DISTRIBUTIONS_ID = new OptionID("normalize.distributions", "A list of the distribution estimators to try.");
+
+ /**
+ * Stores the distribution estimators
+ */
+ private List<DistributionEstimator<?>> estimators;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ ObjectListParameter<DistributionEstimator<?>> estP = new ObjectListParameter<>(DISTRIBUTIONS_ID, DistributionEstimator.class);
+ List<Class<? extends DistributionEstimator<?>>> def = new ArrayList<>(1);
+ def.add(BestFitEstimator.class);
+ estP.setDefaultValue(def);
+ if (config.grab(estP)) {
+ estimators = estP.instantiateClasses(config);
+ }
+ }
+
+ @Override
+ protected AttributeWiseCDFNormalization<V> makeInstance() {
+ return new AttributeWiseCDFNormalization<>(estimators);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseErfNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseErfNormalization.java
index f5e24bca..9a263171 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseErfNormalization.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseErfNormalization.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter.normalization;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -26,6 +26,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter.normalization;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution;
/**
@@ -40,6 +41,11 @@ import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution;
*/
public class AttributeWiseErfNormalization<O extends NumberVector<?>> extends AbstractNormalization<O> {
/**
+ * Class logger.
+ */
+ private static final Logging LOG = Logging.getLogger(AttributeWiseErfNormalization.class);
+
+ /**
* Constructor.
*/
public AttributeWiseErfNormalization() {
@@ -54,7 +60,7 @@ public class AttributeWiseErfNormalization<O extends NumberVector<?>> extends Ab
@Override
protected O filterSingleObject(O obj) {
double[] val = new double[obj.getDimensionality()];
- for(int i = 0; i < val.length; i++) {
+ for (int i = 0; i < val.length; i++) {
val[i] = NormalDistribution.erf(obj.doubleValue(i));
}
return factory.newNumberVector(val);
@@ -64,4 +70,9 @@ public class AttributeWiseErfNormalization<O extends NumberVector<?>> extends Ab
protected SimpleTypeInformation<? super O> getInputTypeRestriction() {
return TypeUtil.NUMBER_VECTOR_FIELD;
}
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseMADNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseMADNormalization.java
new file mode 100644
index 00000000..8c4f15e1
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseMADNormalization.java
@@ -0,0 +1,202 @@
+package de.lmu.ifi.dbs.elki.datasource.filter.normalization;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
+import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle;
+import de.lmu.ifi.dbs.elki.datasource.filter.FilterUtil;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.LinearEquationSystem;
+import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution;
+import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.QuickSelect;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.ExceptionMessages;
+
+/**
+ * Median Absolute Deviation is used for scaling the data set as follows:
+ *
+ * First, the median, and median absolute deviation are computed in each axis.
+ * Then, each value is projected to (x - median(X)) / MAD(X).
+ *
+ * This is similar to z-standardization of data sets, except that it is more
+ * robust towards outliers, and only slightly more expensive to compute.
+ *
+ * @author Erich Schubert
+ * @param <V> vector type
+ *
+ * @apiviz.uses NumberVector
+ */
+// TODO: extract superclass AbstractAttributeWiseNormalization
+public class AttributeWiseMADNormalization<V extends NumberVector<?>> implements Normalization<V> {
+ /**
+ * Class logger.
+ */
+ private static final Logging LOG = Logging.getLogger(AttributeWiseMADNormalization.class);
+
+ /**
+ * Number vector factory.
+ */
+ protected NumberVector.Factory<V, ?> factory;
+
+ /**
+ * Stores the median in each dimension.
+ */
+ private double[] median = new double[0];
+
+ /**
+ * Stores the median absolute deviation in each dimension.
+ */
+ private double[] madsigma = new double[0];
+
+ /**
+ * Constructor.
+ */
+ public AttributeWiseMADNormalization() {
+ super();
+ }
+
+ @Override
+ public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
+ if (objects.dataLength() == 0) {
+ return objects;
+ }
+ for (int r = 0; r < objects.metaLength(); r++) {
+ SimpleTypeInformation<?> type = (SimpleTypeInformation<?>) objects.meta(r);
+ final List<?> column = (List<?>) objects.getColumn(r);
+ if (!TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(type)) {
+ continue;
+ }
+ @SuppressWarnings("unchecked")
+ final List<V> castColumn = (List<V>) column;
+ // Get the replacement type information
+ @SuppressWarnings("unchecked")
+ final VectorFieldTypeInformation<V> castType = (VectorFieldTypeInformation<V>) type;
+ factory = FilterUtil.guessFactory(castType);
+
+ // Scan to find the best
+ final int dim = castType.getDimensionality();
+ median = new double[dim];
+ madsigma = new double[dim];
+ // Scratch space for testing:
+ double[] test = new double[castColumn.size()];
+
+ FiniteProgress dprog = LOG.isVerbose() ? new FiniteProgress("Analyzing data.", dim, LOG) : null;
+ // We iterate over dimensions, this kind of filter needs fast random
+ // access.
+ for (int d = 0; d < dim; d++) {
+ for (int i = 0; i < test.length; i++) {
+ test[i] = castColumn.get(i).doubleValue(d);
+ }
+ final double med = QuickSelect.median(test);
+ median[d] = med;
+ for (int i = 0; i < test.length; i++) {
+ test[i] = Math.abs(test[i] - med);
+ }
+ // Rescale the true MAD for the best standard deviation estimate:
+ madsigma[d] = QuickSelect.median(test) * NormalDistribution.ONEBYPHIINV075;
+ if (dprog != null) {
+ dprog.incrementProcessed(LOG);
+ }
+ }
+ if (dprog != null) {
+ dprog.ensureCompleted(LOG);
+ }
+
+ FiniteProgress nprog = LOG.isVerbose() ? new FiniteProgress("Data normalization.", objects.dataLength(), LOG) : null;
+ // Normalization scan
+ double[] buf = new double[dim];
+ for (int i = 0; i < objects.dataLength(); i++) {
+ final V obj = castColumn.get(i);
+ for (int d = 0; d < dim; d++) {
+ buf[d] = normalize(d, obj.doubleValue(d));
+ }
+ castColumn.set(i, factory.newNumberVector(buf));
+ if (nprog != null) {
+ nprog.incrementProcessed(LOG);
+ }
+ }
+ if (nprog != null) {
+ nprog.ensureCompleted(LOG);
+ }
+ }
+ return objects;
+ }
+
+ @Override
+ public V restore(V featureVector) throws NonNumericFeaturesException {
+ if (featureVector.getDimensionality() == median.length) {
+ double[] values = new double[featureVector.getDimensionality()];
+ for (int d = 0; d < featureVector.getDimensionality(); d++) {
+ values[d] = restore(d, featureVector.doubleValue(d));
+ }
+ return factory.newNumberVector(values);
+ } else {
+ throw new NonNumericFeaturesException("Attributes cannot be resized: current dimensionality: " + featureVector.getDimensionality() + " former dimensionality: " + median.length);
+ }
+ }
+
+ @Override
+ public LinearEquationSystem transform(LinearEquationSystem linearEquationSystem) throws NonNumericFeaturesException {
+ throw new UnsupportedOperationException(ExceptionMessages.UNSUPPORTED_NOT_YET);
+ }
+
+ /**
+ * Normalize a single dimension.
+ *
+ * @param d Dimension
+ * @param val Value
+ * @return Normalized value
+ */
+ private double normalize(int d, double val) {
+ return (val - median[d]) / madsigma[d];
+ }
+
+ /**
+ * Restore a single dimension.
+ *
+ * @param d Dimension
+ * @param val Value
+ * @return Normalized value
+ */
+ private double restore(int d, double val) {
+ return (val * madsigma[d]) + median[d];
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder result = new StringBuilder();
+ result.append("normalization class: ").append(getClass().getName());
+ result.append('\n');
+ result.append("normalization median: ").append(FormatUtil.format(median));
+ result.append('\n');
+ result.append("normalization MAD sigma: ").append(FormatUtil.format(madsigma));
+ return result.toString();
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseMinMaxNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseMinMaxNormalization.java
index 62c0bf12..31f72660 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseMinMaxNormalization.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseMinMaxNormalization.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter.normalization;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -28,6 +28,7 @@ import java.util.ArrayList;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.linearalgebra.LinearEquationSystem;
import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayLikeUtil;
@@ -52,6 +53,11 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Parameter;
// TODO: extract superclass AbstractAttributeWiseNormalization
public class AttributeWiseMinMaxNormalization<V extends NumberVector<?>> extends AbstractNormalization<V> {
/**
+ * Class logger.
+ */
+ private static final Logging LOG = Logging.getLogger(AttributeWiseMinMaxNormalization.class);
+
+ /**
* Parameter for minimum.
*/
public static final OptionID MINIMA_ID = new OptionID("normalize.min", "a comma separated concatenation of the minimum values in each dimension that are mapped to 0. If no value is specified, the minimum value of the attribute range in this dimension will be taken.");
@@ -91,24 +97,24 @@ public class AttributeWiseMinMaxNormalization<V extends NumberVector<?>> extends
@Override
protected void prepareProcessInstance(V featureVector) {
// First object? Then initialize.
- if(minima.length == 0 || maxima.length == 0) {
+ if (minima.length == 0 || maxima.length == 0) {
int dimensionality = featureVector.getDimensionality();
minima = new double[dimensionality];
maxima = new double[dimensionality];
- for(int i = 0; i < dimensionality; i++) {
+ for (int i = 0; i < dimensionality; i++) {
maxima[i] = -Double.MAX_VALUE;
minima[i] = Double.MAX_VALUE;
}
}
- if(minima.length != featureVector.getDimensionality()) {
+ if (minima.length != featureVector.getDimensionality()) {
throw new IllegalArgumentException("FeatureVectors differ in length.");
}
- for(int d = 0; d < featureVector.getDimensionality(); d++) {
+ for (int d = 0; d < featureVector.getDimensionality(); d++) {
final double val = featureVector.doubleValue(d);
- if(val > maxima[d]) {
+ if (val > maxima[d]) {
maxima[d] = val;
}
- if(val < minima[d]) {
+ if (val < minima[d]) {
minima[d] = val;
}
}
@@ -117,10 +123,10 @@ public class AttributeWiseMinMaxNormalization<V extends NumberVector<?>> extends
@Override
protected V filterSingleObject(V featureVector) {
double[] values = new double[featureVector.getDimensionality()];
- if(minima.length != featureVector.getDimensionality()) {
+ if (minima.length != featureVector.getDimensionality()) {
throw new IllegalArgumentException("FeatureVectors and given Minima/Maxima differ in length.");
}
- for(int d = 0; d < featureVector.getDimensionality(); d++) {
+ for (int d = 0; d < featureVector.getDimensionality(); d++) {
values[d] = (featureVector.doubleValue(d) - minima[d]) / factor(d);
}
return factory.newNumberVector(values);
@@ -128,14 +134,13 @@ public class AttributeWiseMinMaxNormalization<V extends NumberVector<?>> extends
@Override
public V restore(V featureVector) throws NonNumericFeaturesException {
- if(featureVector.getDimensionality() == maxima.length && featureVector.getDimensionality() == minima.length) {
+ if (featureVector.getDimensionality() == maxima.length && featureVector.getDimensionality() == minima.length) {
double[] values = new double[featureVector.getDimensionality()];
- for(int d = 0; d < featureVector.getDimensionality(); d++) {
+ for (int d = 0; d < featureVector.getDimensionality(); d++) {
values[d] = (featureVector.doubleValue(d) * (factor(d)) + minima[d]);
}
return factory.newNumberVector(values);
- }
- else {
+ } else {
throw new NonNumericFeaturesException("Attributes cannot be resized: current dimensionality: " + featureVector.getDimensionality() + " former dimensionality: " + maxima.length);
}
}
@@ -161,10 +166,10 @@ public class AttributeWiseMinMaxNormalization<V extends NumberVector<?>> extends
int[] row = linearEquationSystem.getRowPermutations();
int[] col = linearEquationSystem.getColumnPermutations();
- for(int i = 0; i < coeff.length; i++) {
- for(int r = 0; r < coeff.length; r++) {
+ for (int i = 0; i < coeff.length; i++) {
+ for (int r = 0; r < coeff.length; r++) {
double sum = 0.0;
- for(int c = 0; c < coeff[0].length; c++) {
+ for (int c = 0; c < coeff[0].length; c++) {
sum += minima[c] * coeff[row[r]][col[c]] / factor(c);
coeff[row[r]][col[c]] = coeff[row[r]][col[c]] / factor(c);
}
@@ -186,12 +191,17 @@ public class AttributeWiseMinMaxNormalization<V extends NumberVector<?>> extends
result.append("normalization maxima: ").append(FormatUtil.format(maxima));
return result.toString();
}
-
+
@Override
protected SimpleTypeInformation<? super V> getInputTypeRestriction() {
return TypeUtil.NUMBER_VECTOR_FIELD;
}
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
/**
* Parameterization class.
*
@@ -214,20 +224,20 @@ public class AttributeWiseMinMaxNormalization<V extends NumberVector<?>> extends
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
DoubleListParameter minimaP = new DoubleListParameter(MINIMA_ID, true);
- if(config.grab(minimaP)) {
+ if (config.grab(minimaP)) {
minima = ArrayLikeUtil.toPrimitiveDoubleArray(minimaP.getValue());
}
DoubleListParameter maximaP = new DoubleListParameter(MAXIMA_ID, true);
- if(config.grab(maximaP)) {
+ if (config.grab(maximaP)) {
maxima = ArrayLikeUtil.toPrimitiveDoubleArray(maximaP.getValue());
}
- ArrayList<Parameter<?>> global_1 = new ArrayList<Parameter<?>>();
+ ArrayList<Parameter<?>> global_1 = new ArrayList<>();
global_1.add(minimaP);
global_1.add(maximaP);
config.checkConstraint(new AllOrNoneMustBeSetGlobalConstraint(global_1));
- ArrayList<ListParameter<?>> global = new ArrayList<ListParameter<?>>();
+ ArrayList<ListParameter<?>> global = new ArrayList<>();
global.add(minimaP);
global.add(maximaP);
config.checkConstraint(new EqualSizeGlobalConstraint(global));
@@ -235,7 +245,7 @@ public class AttributeWiseMinMaxNormalization<V extends NumberVector<?>> extends
@Override
protected AttributeWiseMinMaxNormalization<V> makeInstance() {
- return new AttributeWiseMinMaxNormalization<V>(minima, maxima);
+ return new AttributeWiseMinMaxNormalization<>(minima, maxima);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseVarianceNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseVarianceNormalization.java
index 0671231d..072d1a68 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseVarianceNormalization.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/AttributeWiseVarianceNormalization.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter.normalization;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -96,15 +96,22 @@ public class AttributeWiseVarianceNormalization<V extends NumberVector<?>> exten
this.stddev = stddev;
}
+ /**
+ * Constructor.
+ */
+ public AttributeWiseVarianceNormalization() {
+ super();
+ }
+
@Override
protected boolean prepareStart(SimpleTypeInformation<V> in) {
- return (mean.length == 0 || stddev.length == 0);
+ return (mean == null || stddev == null || mean.length == 0 || stddev.length == 0);
}
@Override
protected void prepareProcessInstance(V featureVector) {
// First object? Then init. (We didn't have a dimensionality before!)
- if(mvs == null) {
+ if(mvs == null || mvs.length == 0) {
int dimensionality = featureVector.getDimensionality();
mvs = MeanVariance.newArray(dimensionality);
}
@@ -231,6 +238,11 @@ public class AttributeWiseVarianceNormalization<V extends NumberVector<?>> exten
return result.toString();
}
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
/**
* Parameterization class.
*
@@ -268,12 +280,12 @@ public class AttributeWiseVarianceNormalization<V extends NumberVector<?>> exten
}
}
- ArrayList<Parameter<?>> global_1 = new ArrayList<Parameter<?>>();
+ ArrayList<Parameter<?>> global_1 = new ArrayList<>();
global_1.add(meanP);
global_1.add(stddevP);
config.checkConstraint(new AllOrNoneMustBeSetGlobalConstraint(global_1));
- ArrayList<ListParameter<?>> global = new ArrayList<ListParameter<?>>();
+ ArrayList<ListParameter<?>> global = new ArrayList<>();
global.add(meanP);
global.add(stddevP);
config.checkConstraint(new EqualSizeGlobalConstraint(global));
@@ -281,7 +293,7 @@ public class AttributeWiseVarianceNormalization<V extends NumberVector<?>> exten
@Override
protected AttributeWiseVarianceNormalization<V> makeInstance() {
- return new AttributeWiseVarianceNormalization<V>(mean, stddev);
+ return new AttributeWiseVarianceNormalization<>(mean, stddev);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/InverseDocumentFrequencyNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/InverseDocumentFrequencyNormalization.java
index 24f3a850..94bcb32f 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/InverseDocumentFrequencyNormalization.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/InverseDocumentFrequencyNormalization.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter.normalization;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -32,6 +32,7 @@ import java.util.BitSet;
import de.lmu.ifi.dbs.elki.data.SparseNumberVector;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.logging.Logging;
/**
* Normalization for text frequency vectors, using the inverse document
@@ -45,6 +46,11 @@ import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
*/
public class InverseDocumentFrequencyNormalization<V extends SparseNumberVector<?>> extends AbstractNormalization<V> {
/**
+ * Class logger.
+ */
+ private static final Logging LOG = Logging.getLogger(InverseDocumentFrequencyNormalization.class);
+
+ /**
* The IDF storage.
*/
TIntDoubleMap idf = new TIntDoubleHashMap();
@@ -116,4 +122,9 @@ public class InverseDocumentFrequencyNormalization<V extends SparseNumberVector<
protected SimpleTypeInformation<? super V> getInputTypeRestriction() {
return TypeUtil.SPARSE_VECTOR_FIELD;
}
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/LengthNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/LengthNormalization.java
index 457cc6eb..a12dea3b 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/LengthNormalization.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/LengthNormalization.java
@@ -27,7 +27,7 @@ import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DoubleNorm;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.math.linearalgebra.LinearEquationSystem;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
@@ -101,7 +101,7 @@ public class LengthNormalization<V extends NumberVector<?>> extends AbstractStre
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- ObjectParameter<DoubleNorm<? super V>> normP = new ObjectParameter<DoubleNorm<? super V>>(NORM_ID, DoubleNorm.class, EuclideanDistanceFunction.class);
+ ObjectParameter<DoubleNorm<? super V>> normP = new ObjectParameter<>(NORM_ID, DoubleNorm.class, EuclideanDistanceFunction.class);
if(config.grab(normP)) {
norm = normP.instantiateClass(config);
}
@@ -109,7 +109,7 @@ public class LengthNormalization<V extends NumberVector<?>> extends AbstractStre
@Override
protected LengthNormalization<V> makeInstance() {
- return new LengthNormalization<V>(norm);
+ return new LengthNormalization<>(norm);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/NonNumericFeaturesException.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/NonNumericFeaturesException.java
index 9f26482a..0abaac95 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/NonNumericFeaturesException.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/NonNumericFeaturesException.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter.normalization;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/Normalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/Normalization.java
index 96f6bdc1..3c3e7bdf 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/Normalization.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/Normalization.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter.normalization;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -23,7 +23,6 @@ package de.lmu.ifi.dbs.elki.datasource.filter.normalization;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle;
import de.lmu.ifi.dbs.elki.datasource.filter.ObjectFilter;
import de.lmu.ifi.dbs.elki.math.linearalgebra.LinearEquationSystem;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.Parameterizable;
@@ -44,16 +43,6 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.Parameterizable;
*/
public interface Normalization<O> extends ObjectFilter, Parameterizable {
/**
- * Performs a normalization on a database object bundle.
- *
- * @param objects the database objects package
- * @return modified object bundle
- * @throws NonNumericFeaturesException if feature vectors differ in length or values are not
- * suitable to normalization
- */
- MultipleObjectsBundle normalizeObjects(MultipleObjectsBundle objects) throws NonNumericFeaturesException;
-
- /**
* Transforms a feature vector to the original attribute ranges.
*
* @param featureVector a feature vector to be transformed into original space
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/RankTieNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/RankTieNormalization.java
index 519a3743..bb9c2aec 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/RankTieNormalization.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/RankTieNormalization.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter.normalization;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -68,7 +68,7 @@ public class RankTieNormalization implements ObjectFilter {
final List<? extends NumberVector<?>> castColumn = (List<? extends NumberVector<?>>) column;
// Get the replacement type information
final int dim = ((VectorFieldTypeInformation<?>) type).getDimensionality();
- final VectorFieldTypeInformation<IntegerVector> outType = new VectorFieldTypeInformation<IntegerVector>(IntegerVector.STATIC, dim);
+ final VectorFieldTypeInformation<IntegerVector> outType = new VectorFieldTypeInformation<>(IntegerVector.STATIC, dim);
// Output vectors
int[][] posvecs = new int[len][dim];
@@ -102,7 +102,7 @@ public class RankTieNormalization implements ObjectFilter {
}
// Prepare output data
- final List<IntegerVector> outColumn = new ArrayList<IntegerVector>(len);
+ final List<IntegerVector> outColumn = new ArrayList<>(len);
for(int i = 0; i < len; i++) {
outColumn.add(new IntegerVector(posvecs[i]));
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/TFIDFNormalization.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/TFIDFNormalization.java
index 5d203c6b..5110d6fe 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/TFIDFNormalization.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/TFIDFNormalization.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter.normalization;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -28,6 +28,7 @@ import gnu.trove.map.hash.TIntDoubleHashMap;
import java.util.BitSet;
import de.lmu.ifi.dbs.elki.data.SparseNumberVector;
+import de.lmu.ifi.dbs.elki.logging.Logging;
/**
* Perform full TF-IDF Normalization as commonly used in text mining.
@@ -44,6 +45,11 @@ import de.lmu.ifi.dbs.elki.data.SparseNumberVector;
*/
public class TFIDFNormalization<V extends SparseNumberVector<?>> extends InverseDocumentFrequencyNormalization<V> {
/**
+ * Class logger.
+ */
+ private static final Logging LOG = Logging.getLogger(TFIDFNormalization.class);
+
+ /**
* Constructor.
*/
public TFIDFNormalization() {
@@ -66,4 +72,9 @@ public class TFIDFNormalization<V extends SparseNumberVector<?>> extends Inverse
}
return ((SparseNumberVector.Factory<V, ?>) factory).newNumberVector(vals, featureVector.getDimensionality());
}
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/package-info.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/package-info.java
index c0c10a7c..15d689d7 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/normalization/package-info.java
@@ -5,7 +5,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2012
+Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/package-info.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/package-info.java
index 82302cd3..87684499 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/package-info.java
@@ -7,7 +7,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2012
+Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/AbstractSupervisedProjectionVectorFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/AbstractSupervisedProjectionVectorFilter.java
new file mode 100644
index 00000000..742eb977
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/AbstractSupervisedProjectionVectorFilter.java
@@ -0,0 +1,230 @@
+package de.lmu.ifi.dbs.elki.datasource.filter.transform;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import gnu.trove.list.TIntList;
+import gnu.trove.list.array.TIntArrayList;
+
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import de.lmu.ifi.dbs.elki.data.ClassLabel;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.NumberVector.Factory;
+import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
+import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle;
+import de.lmu.ifi.dbs.elki.datasource.filter.ClassLabelFilter;
+import de.lmu.ifi.dbs.elki.datasource.filter.ObjectFilter;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayLikeUtil;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+
+/**
+ * Base class for supervised projection methods.
+ *
+ * TODO: re-add sampling.
+ *
+ * @author Angela Peng
+ * @author Erich Schubert
+ *
+ * @param <V> Vector type
+ */
+public abstract class AbstractSupervisedProjectionVectorFilter<V extends NumberVector<?>> implements ObjectFilter {
+ /**
+ * r: the dimension to which the data should be reduced
+ */
+ protected int tdim;
+
+ /**
+ * Constructor.
+ *
+ * @param projdimension Projection dimensionality
+ */
+ public AbstractSupervisedProjectionVectorFilter(int projdimension) {
+ super();
+ this.tdim = projdimension;
+ }
+
+ @Override
+ public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
+ final int dataLength = objects.dataLength();
+ if (dataLength == 0) {
+ return objects;
+ }
+
+ List<? extends ClassLabel> classcolumn = null;
+ // First of all, identify a class label column.
+ for (int r = 0; r < objects.metaLength(); r++) {
+ SimpleTypeInformation<?> type = objects.meta(r);
+ List<?> column = objects.getColumn(r);
+ if (TypeUtil.CLASSLABEL.isAssignableFromType(type)) {
+ @SuppressWarnings("unchecked")
+ final List<? extends ClassLabel> castcolumn = (List<? extends ClassLabel>) column;
+ classcolumn = castcolumn;
+ break;
+ }
+ }
+ if (classcolumn == null) {
+ getLogger().warning("No class label column found (try " + ClassLabelFilter.class.getSimpleName() + ") -- cannot run " + this.getClass().getSimpleName());
+ return objects;
+ }
+
+ boolean somesuccess = false;
+ MultipleObjectsBundle bundle = new MultipleObjectsBundle();
+ // Secondly, look for columns to train the projection on.
+ for (int r = 0; r < objects.metaLength(); r++) {
+ SimpleTypeInformation<?> type = objects.meta(r);
+ List<?> column = objects.getColumn(r);
+ if (!TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(type)) {
+ bundle.appendColumn(type, column);
+ continue;
+ }
+ @SuppressWarnings("unchecked")
+ List<V> vectorcolumn = (List<V>) column;
+ final VectorFieldTypeInformation<?> vtype = (VectorFieldTypeInformation<?>) type;
+ @SuppressWarnings("unchecked")
+ NumberVector.Factory<V, ?> factory = (NumberVector.Factory<V, ?>) vtype.getFactory();
+ int dim = vtype.getDimensionality();
+
+ if (tdim > dim) {
+ if (getLogger().isVerbose()) {
+ getLogger().verbose("Setting projection dimension to original dimension: projection dimension: " + tdim + " larger than original dimension: " + dim);
+ }
+ tdim = dim;
+ }
+
+ try {
+ Matrix proj = computeProjectionMatrix(vectorcolumn, classcolumn, dim);
+ for (int i = 0; i < dataLength; i++) {
+ final Vector pv = proj.times(vectorcolumn.get(i).getColumnVector());
+ V filteredObj = factory.newNumberVector(pv, ArrayLikeUtil.VECTORADAPTER);
+ vectorcolumn.set(i, filteredObj);
+ }
+ bundle.appendColumn(convertedType(type, factory), column);
+ somesuccess = true;
+ } catch (Exception e) {
+ getLogger().error("Projection failed -- continuing with unprojected data!", e);
+ bundle.appendColumn(type, column);
+ continue;
+ }
+ }
+
+ if (!somesuccess) {
+ getLogger().warning("No vector field of fixed dimensionality found.");
+ return objects;
+ }
+ return bundle;
+ }
+
+ /**
+ * Get the output type from the input type after conversion.
+ *
+ * @param in input type restriction
+ * @param factory Vector factory
+ * @return output type restriction
+ */
+ protected SimpleTypeInformation<?> convertedType(SimpleTypeInformation<?> in, Factory<V, ?> factory) {
+ return new VectorFieldTypeInformation<>(factory, tdim);
+ }
+
+ /**
+ * Class logger.
+ *
+ * @return Logger
+ */
+ protected abstract Logging getLogger();
+
+ /**
+ * computes the projection matrix
+ *
+ * @param vectorcolumn Vectors
+ * @param classcolumn Class information
+ * @param dim Dimensionality Dimensionality
+ * @return Projection matrix
+ */
+ protected abstract Matrix computeProjectionMatrix(List<V> vectorcolumn, List<? extends ClassLabel> classcolumn, int dim);
+
+ /**
+ * Partition the bundle based on the class label.
+ *
+ * @param classcolumn
+ * @return
+ */
+ protected <O> Map<O, TIntList> partition(List<? extends O> classcolumn) {
+ Map<O, TIntList> classes = new HashMap<>();
+ Iterator<? extends O> iter = classcolumn.iterator();
+ for (int i = 0; iter.hasNext(); i++) {
+ O lbl = iter.next();
+ TIntList ids = classes.get(lbl);
+ if (ids == null) {
+ ids = new TIntArrayList();
+ classes.put(lbl, ids);
+ }
+ ids.add(i);
+ }
+ return classes;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ *
+ * @param <V> Vector type
+ */
+ public abstract static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
+ /**
+ * The number of dimensions to keep.
+ */
+ public static final OptionID P_ID = new OptionID("projection.dim", "Projection dimensionality");
+
+ /**
+ * Target dimensionality.
+ */
+ protected int tdim;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ IntParameter dimP = new IntParameter(P_ID, 2);
+ dimP.addConstraint(new GreaterConstraint(0));
+
+ if (config.grab(dimP)) {
+ tdim = dimP.getValue();
+ }
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/ClassicMultidimensionalScalingTransform.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/ClassicMultidimensionalScalingTransform.java
new file mode 100644
index 00000000..d646b489
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/ClassicMultidimensionalScalingTransform.java
@@ -0,0 +1,274 @@
+package de.lmu.ifi.dbs.elki.datasource.filter.transform;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.data.DoubleVector;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
+import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle;
+import de.lmu.ifi.dbs.elki.datasource.filter.ObjectFilter;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.SingularValueDecomposition;
+import de.lmu.ifi.dbs.elki.utilities.Alias;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * Rescale the data set using multidimensional scaling, MDS.
+ *
+ * Note: the current implementation is rather expensive, both memory- and
+ * runtime wise. Don't use for large data sets!
+ *
+ * TODO: a contributed block Lanczos algorithm would be beneficial, to speed up MDS.
+ *
+ * @author Erich Schubert
+ *
+ * @param <O> Data type
+ */
+@Alias({ "mds" })
+public class ClassicMultidimensionalScalingTransform<O> implements ObjectFilter {
+ /**
+ * Class logger.
+ */
+ private static final Logging LOG = Logging.getLogger(ClassicMultidimensionalScalingTransform.class);
+
+ /**
+ * Distance function to use.
+ */
+ PrimitiveDoubleDistanceFunction<? super O> dist = null;
+
+ /**
+ * Target dimensionality
+ */
+ int tdim;
+
+ /**
+ * Constructor.
+ *
+ * @param tdim Target dimensionality.
+ * @param dist Distance function to use.
+ */
+ public ClassicMultidimensionalScalingTransform(int tdim, PrimitiveDoubleDistanceFunction<? super O> dist) {
+ super();
+ this.tdim = tdim;
+ this.dist = dist;
+ }
+
+ @Override
+ public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
+ final int size = objects.dataLength();
+ if (size == 0) {
+ return objects;
+ }
+ MultipleObjectsBundle bundle = new MultipleObjectsBundle();
+
+ for (int r = 0; r < objects.metaLength(); r++) {
+ @SuppressWarnings("unchecked")
+ SimpleTypeInformation<Object> type = (SimpleTypeInformation<Object>) objects.meta(r);
+ @SuppressWarnings("unchecked")
+ final List<Object> column = (List<Object>) objects.getColumn(r);
+ if (!dist.getInputTypeRestriction().isAssignableFromType(type)) {
+ bundle.appendColumn(type, column);
+ continue;
+ }
+ // Get the replacement type information
+ @SuppressWarnings("unchecked")
+ final List<O> castColumn = (List<O>) column;
+ NumberVector.Factory<? extends NumberVector<?>, ?> factory = null;
+ {
+ if (type instanceof VectorFieldTypeInformation) {
+ final VectorFieldTypeInformation<?> ctype = (VectorFieldTypeInformation<?>) type;
+ // Note two-step cast, to make stricter compilers happy.
+ @SuppressWarnings("unchecked")
+ final VectorFieldTypeInformation<? extends NumberVector<?>> vtype = (VectorFieldTypeInformation<? extends NumberVector<?>>) ctype;
+ factory = (NumberVector.Factory<? extends NumberVector<?>, ?>) vtype.getFactory();
+ } else {
+ factory = DoubleVector.FACTORY;
+ }
+ bundle.appendColumn(new VectorFieldTypeInformation<>(factory, tdim), castColumn);
+ }
+
+ // Compute distance matrix.
+ Matrix mat = new Matrix(size, size);
+ double[][] imat = mat.getArrayRef();
+ {
+ FiniteProgress dprog = LOG.isVerbose() ? new FiniteProgress("Computing distance matrix.", (size * (size - 1)) >>> 1, LOG) : null;
+ for (int x = 0; x < size; x++) {
+ final O ox = castColumn.get(x);
+ for (int y = x + 1; y < size; y++) {
+ final O oy = castColumn.get(y);
+ double distance = Math.abs(dist.doubleDistance(ox, oy));
+ imat[x][y] = distance;
+ if (dprog != null) {
+ dprog.incrementProcessed(LOG);
+ }
+ }
+ }
+ if (dprog != null) {
+ dprog.ensureCompleted(LOG);
+ }
+ }
+ // Adjust distance matrix:
+ if (dist instanceof SquaredEuclideanDistanceFunction) {
+ // Don't square squared euclidean twice.
+ for (int x = 0; x < size; x++) {
+ for (int y = x + 1; y < size; y++) {
+ imat[x][y] *= -.5;
+ }
+ }
+ } else {
+ for (int x = 0; x < size; x++) {
+ for (int y = x + 1; y < size; y++) {
+ imat[x][y] *= -.5 * imat[x][y];
+ }
+ }
+ }
+ doubleCenterSymmetric(imat);
+ // Find eigenvectors.
+ {
+ // TODO: implement Block-Lanczos algorithm for partial SVD.
+ SingularValueDecomposition svd = new SingularValueDecomposition(mat);
+ Matrix u = svd.getU();
+ double[] lambda = svd.getSingularValues();
+ for (int i = 0; i < tdim; i++) {
+ lambda[i] = Math.sqrt(Math.abs(lambda[i]));
+ }
+
+ double[] buf = new double[tdim];
+ double[][] uraw = u.getArrayRef();
+ for (int i = 0; i < size; i++) {
+ double[] raw = uraw[i];
+ for (int x = 0; x < buf.length; x++) {
+ buf[x] = lambda[x] * raw[x];
+ }
+ column.set(i, factory.newNumberVector(buf));
+ }
+ }
+ }
+ return bundle;
+ }
+
+ /**
+ * Double-center the given matrix (only upper triangle is used).
+ *
+ * For improved numerical precision, we perform incremental updates to the
+ * mean values, instead of computing a large sum and then performing division.
+ *
+ * @param m Matrix to double-center.
+ */
+ public static void doubleCenterSymmetric(double[][] m) {
+ final int size = m.length;
+ // Storage for mean values - initially all 0.
+ double means[] = new double[size];
+ for (int x = 0; x < m.length; x++) {
+ final double[] rowx = m[x];
+ // We already added "x" values in previous iterations.
+ // Fake-add 0: mean + (0 - mean) / (x + 1)
+ double rmean = means[x] - means[x] / (x + 1);
+ for (int y = x + 1; y < rowx.length; y++) {
+ final double nv = rowx[y];
+ final double dx = nv - rmean, dy = nv - means[y];
+ // For x < y, this is the yth entry.
+ rmean += dx / (y + 1);
+ // For y > x, this is the xth entry
+ means[y] += dy / (x + 1);
+ }
+ means[x] = rmean;
+ }
+ // Compute total mean by averaging column means.
+ double mean = means[0];
+ for (int x = 1; x < size; x++) {
+ double dm = means[x] - mean;
+ mean += dm / (x + 1);
+ }
+ // Row and column center; also make symmetric.
+ for (int x = 0; x < size; x++) {
+ m[x][x] = -2. * means[x] + mean;
+ for (int y = x + 1; y < size; y++) {
+ final double nv = m[x][y] - means[x] - means[y] + mean;
+ m[x][y] = nv;
+ m[y][x] = nv;
+ }
+ }
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<O extends NumberVector<?>> extends AbstractParameterizer {
+ /**
+ * Desired dimensionality.
+ */
+ public static final OptionID DIM_ID = new OptionID("mds.dim", "Output dimensionality.");
+
+ /**
+ * Distant metric.
+ */
+ public static final OptionID DISTANCE_ID = new OptionID("mds.distance", "Distance function to use.");
+
+ /**
+ * Target dimensionality.
+ */
+ int tdim;
+
+ /**
+ * Distance function to use.
+ */
+ PrimitiveDoubleDistanceFunction<? super O> dist = null;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ IntParameter dimP = new IntParameter(DIM_ID);
+ if (config.grab(dimP)) {
+ tdim = dimP.intValue();
+ }
+
+ ObjectParameter<PrimitiveDoubleDistanceFunction<? super O>> distP = new ObjectParameter<>(DISTANCE_ID, PrimitiveDoubleDistanceFunction.class, SquaredEuclideanDistanceFunction.class);
+ if (config.grab(distP)) {
+ dist = distP.instantiateClass(config);
+ }
+ }
+
+ @Override
+ protected ClassicMultidimensionalScalingTransform<O> makeInstance() {
+ return new ClassicMultidimensionalScalingTransform<>(tdim, dist);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/GlobalPrincipalComponentAnalysisTransform.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/GlobalPrincipalComponentAnalysisTransform.java
index 18537a8d..3b4193ad 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/GlobalPrincipalComponentAnalysisTransform.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/GlobalPrincipalComponentAnalysisTransform.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.datasource.filter.transform;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -38,6 +38,7 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.VMath;
import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.EigenPairFilter;
import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAResult;
import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCARunner;
+import de.lmu.ifi.dbs.elki.utilities.Alias;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
@@ -47,12 +48,16 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
/**
* Apply principal component analysis to the data set.
*
- * TODO: add dimensionality reduction
+ * This process is also known as "Whitening transformation".
+ *
+ * If you want to also reduce dimensionality, set the
+ * {@link Parameterizer#FILTER_ID} parameter!
*
* @author Erich Schubert
*
* @param <O> Vector type
*/
+@Alias({ "whiten", "whitening", "pca" })
public class GlobalPrincipalComponentAnalysisTransform<O extends NumberVector<?>> extends AbstractVectorConversionFilter<O, O> {
/**
* Class logger.
@@ -101,7 +106,7 @@ public class GlobalPrincipalComponentAnalysisTransform<O extends NumberVector<?>
@Override
protected boolean prepareStart(SimpleTypeInformation<O> in) {
- if(!(in instanceof VectorFieldTypeInformation)) {
+ if (!(in instanceof VectorFieldTypeInformation)) {
throw new AbortException("PCA can only applied to fixed dimensionality vectors");
}
dim = ((VectorFieldTypeInformation<?>) in).getDimensionality();
@@ -121,31 +126,30 @@ public class GlobalPrincipalComponentAnalysisTransform<O extends NumberVector<?>
SortedEigenPairs eps = pcares.getEigenPairs();
covmat = null;
- if(filter == null) {
+ if (filter == null) {
proj = new double[dim][dim];
- for(int d = 0; d < dim; d++) {
+ for (int d = 0; d < dim; d++) {
EigenPair ep = eps.getEigenPair(d);
double[] ev = ep.getEigenvector().getArrayRef();
double eval = Math.sqrt(ep.getEigenvalue());
// Fill weighted and transposed:
- for(int i = 0; i < dim; i++) {
+ for (int i = 0; i < dim; i++) {
proj[d][i] = ev[i] / eval;
}
}
- }
- else {
+ } else {
List<EigenPair> axes = filter.filter(eps).getStrongEigenPairs();
final int pdim = axes.size(); // Projection dimensionality
if (LOG.isVerbose()) {
- LOG.verbose("Reducing dimensionality from "+dim+" to "+pdim+" via PCA.");
+ LOG.verbose("Reducing dimensionality from " + dim + " to " + pdim + " via PCA.");
}
proj = new double[pdim][dim];
- for(int d = 0; d < pdim; d++) {
+ for (int d = 0; d < pdim; d++) {
EigenPair ep = axes.get(d);
double[] ev = ep.getEigenvector().getArrayRef();
double eval = Math.sqrt(ep.getEigenvalue());
// Fill weighted and transposed:
- for(int i = 0; i < dim; i++) {
+ for (int i = 0; i < dim; i++) {
proj[d][i] = ev[i] / eval;
}
}
@@ -156,7 +160,7 @@ public class GlobalPrincipalComponentAnalysisTransform<O extends NumberVector<?>
@Override
protected O filterSingleObject(O obj) {
// Shift by mean and copy
- for(int i = 0; i < dim; i++) {
+ for (int i = 0; i < dim; i++) {
buf[i] = obj.doubleValue(i) - mean[i];
}
double[] p = VMath.times(proj, buf);
@@ -171,12 +175,12 @@ public class GlobalPrincipalComponentAnalysisTransform<O extends NumberVector<?>
@Override
protected SimpleTypeInformation<? super O> convertedType(SimpleTypeInformation<O> in) {
initializeOutputType(in);
- if(proj.length == dim) {
- return in;
- }
- else {
- return new VectorFieldTypeInformation<O>(factory, proj.length);
- }
+ return new VectorFieldTypeInformation<>(factory, proj.length);
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
}
/**
@@ -201,15 +205,15 @@ public class GlobalPrincipalComponentAnalysisTransform<O extends NumberVector<?>
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- ObjectParameter<EigenPairFilter> filterP = new ObjectParameter<EigenPairFilter>(FILTER_ID, EigenPairFilter.class, true);
- if(config.grab(filterP)) {
+ ObjectParameter<EigenPairFilter> filterP = new ObjectParameter<>(FILTER_ID, EigenPairFilter.class, true);
+ if (config.grab(filterP)) {
filter = filterP.instantiateClass(config);
}
}
@Override
- protected Object makeInstance() {
- return new GlobalPrincipalComponentAnalysisTransform<O>(filter);
+ protected GlobalPrincipalComponentAnalysisTransform<O> makeInstance() {
+ return new GlobalPrincipalComponentAnalysisTransform<>(filter);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/LatLngToECEFFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/LatLngToECEFFilter.java
new file mode 100644
index 00000000..998c8931
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/LatLngToECEFFilter.java
@@ -0,0 +1,111 @@
+package de.lmu.ifi.dbs.elki.datasource.filter.transform;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
+import de.lmu.ifi.dbs.elki.datasource.filter.AbstractStreamConversionFilter;
+import de.lmu.ifi.dbs.elki.math.geodesy.EarthModel;
+import de.lmu.ifi.dbs.elki.math.geodesy.SphericalVincentyEarthModel;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * Project a 2D data set (latitude, longitude) to a 3D coordinate system (X, Y,
+ * Z), such that Euclidean distance is line-of-sight.
+ *
+ * @author Erich Schubert
+ *
+ * @param <V> Vector type.
+ */
+public class LatLngToECEFFilter<V extends NumberVector<?>> extends AbstractStreamConversionFilter<V, V> {
+ /**
+ * Vector factory to use.
+ */
+ private NumberVector.Factory<V, ?> factory;
+
+ /**
+ * Earth model to use.
+ */
+ private EarthModel model;
+
+ /**
+ * Constructor.
+ *
+ * @param model Earth model
+ */
+ public LatLngToECEFFilter(EarthModel model) {
+ super();
+ this.model = model;
+ }
+
+ @Override
+ protected V filterSingleObject(V obj) {
+ return factory.newNumberVector(model.latLngDegToECEF(obj.doubleValue(0), obj.doubleValue(1)));
+ }
+
+ @Override
+ protected SimpleTypeInformation<? super V> getInputTypeRestriction() {
+ return new VectorFieldTypeInformation<>(NumberVector.class, 2, 2);
+ }
+
+ @Override
+ protected SimpleTypeInformation<? super V> convertedType(SimpleTypeInformation<V> in) {
+ VectorFieldTypeInformation<V> vin = (VectorFieldTypeInformation<V>) in;
+ factory = (NumberVector.Factory<V, ?>) vin.getFactory();
+ return new VectorFieldTypeInformation<>(vin.getFactory(), 3, 3, in.getSerializer());
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ *
+ * @param <V> Vector type
+ */
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
+ /**
+ * Earth model to use.
+ */
+ private EarthModel model;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ ObjectParameter<EarthModel> modelP = new ObjectParameter<>(EarthModel.MODEL_ID, EarthModel.class, SphericalVincentyEarthModel.class);
+ if (config.grab(modelP)) {
+ model = modelP.instantiateClass(config);
+ }
+ }
+
+ @Override
+ protected LatLngToECEFFilter<V> makeInstance() {
+ return new LatLngToECEFFilter<>(model);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/LinearDiscriminantAnalysisFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/LinearDiscriminantAnalysisFilter.java
new file mode 100644
index 00000000..76546d5c
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/LinearDiscriminantAnalysisFilter.java
@@ -0,0 +1,165 @@
+package de.lmu.ifi.dbs.elki.datasource.filter.transform;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import gnu.trove.iterator.TIntIterator;
+import gnu.trove.list.TIntList;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+import de.lmu.ifi.dbs.elki.data.ClassLabel;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.EigenvalueDecomposition;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.SortedEigenPairs;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+import de.lmu.ifi.dbs.elki.utilities.Alias;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+
+/**
+ * Linear Discriminant Analysis (LDA) / Fisher's linear discriminant.
+ *
+ * Reference:
+ * <p>
+ * R. A. Fisher<br />
+ * The use of multiple measurements in taxonomic problems<br />
+ * Annals of Eugenics 7.2 (1936): 179-188.
+ * </p>
+ *
+ * @author Angela Peng
+ * @author Erich Schubert
+ *
+ * @param <V> Vector type
+ */
+@Alias("lda")
+@Reference(authors = "R. A. Fisher", title = "The use of multiple measurements in taxonomic problems", booktitle = "Annals of eugenics 7.2 (1936)", url = "http://dx.doi.org/10.1111/j.1469-1809.1936.tb02137.x")
+public class LinearDiscriminantAnalysisFilter<V extends NumberVector<?>> extends AbstractSupervisedProjectionVectorFilter<V> {
+ /**
+ * Class logger.
+ */
+ private static final Logging LOG = Logging.getLogger(LinearDiscriminantAnalysisFilter.class);
+
+ /**
+ * Constructor.
+ *
+ * @param projdimension Projection dimensionality.
+ */
+ public LinearDiscriminantAnalysisFilter(int projdimension) {
+ super(projdimension);
+ }
+
+ @Override
+ protected Matrix computeProjectionMatrix(List<V> vectorcolumn, List<? extends ClassLabel> classcolumn, int dim) {
+ Map<ClassLabel, TIntList> classes = partition(classcolumn);
+ // Fix indexing of classes:
+ List<ClassLabel> keys = new ArrayList<>(classes.keySet());
+ // Compute centroids:
+ List<Centroid> centroids = computeCentroids(dim, vectorcolumn, keys, classes);
+
+ final Matrix sigmaB, sigmaI;
+ // Between classes covariance:
+ {
+ CovarianceMatrix covmake = new CovarianceMatrix(dim);
+ for (Centroid c : centroids) {
+ covmake.put(c);
+ }
+ sigmaB = covmake.destroyToSampleMatrix();
+ }
+ {
+ // (Average) within class variance:
+ CovarianceMatrix covmake = new CovarianceMatrix(dim);
+ int numc = keys.size();
+ for (int i = 0; i < numc; i++) {
+ Centroid c = centroids.get(i);
+ // TODO: different weighting strategies? Sampling?
+ // Note: GNU Trove iterator, not ELKI style!
+ for (TIntIterator it = classes.get(keys.get(i)).iterator(); it.hasNext();) {
+ Vector delta = vectorcolumn.get(it.next()).getColumnVector().minusEquals(c);
+ covmake.put(delta);
+ }
+ }
+ sigmaI = covmake.destroyToSampleMatrix();
+ if (sigmaI.det() == 0) {
+ sigmaI.cheatToAvoidSingularity(1e-10);
+ }
+ }
+
+ Matrix sol = sigmaI.inverse().times(sigmaB);
+ EigenvalueDecomposition decomp = new EigenvalueDecomposition(sol);
+ SortedEigenPairs sorted = new SortedEigenPairs(decomp, false);
+ return sorted.eigenVectors(tdim).transpose();
+ }
+
+ /**
+ * Compute the centroid for each class.
+ *
+ * @param dim Dimensionality
+ * @param vectorcolumn Vector column
+ * @param keys Key index
+ * @param classes Classes
+ * @return Centroids for each class.
+ */
+ protected List<Centroid> computeCentroids(int dim, List<V> vectorcolumn, List<ClassLabel> keys, Map<ClassLabel, TIntList> classes) {
+ final int numc = keys.size();
+ List<Centroid> centroids = new ArrayList<>(numc);
+ for (int i = 0; i < numc; i++) {
+ Centroid c = new Centroid(dim);
+ // Note: GNU Trove iterator, not ELKI style!
+ for (TIntIterator it = classes.get(keys.get(i)).iterator(); it.hasNext();) {
+ c.put(vectorcolumn.get(it.next()));
+ }
+ centroids.add(c);
+ }
+ return centroids;
+ }
+
+ /**
+ * Class logger.
+ *
+ * @return Logger
+ */
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Angela Peng
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractSupervisedProjectionVectorFilter.Parameterizer<V> {
+ @Override
+ protected LinearDiscriminantAnalysisFilter<V> makeInstance() {
+ return new LinearDiscriminantAnalysisFilter<>(tdim);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/LngLatToECEFFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/LngLatToECEFFilter.java
new file mode 100644
index 00000000..ea0d4ef2
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/LngLatToECEFFilter.java
@@ -0,0 +1,111 @@
+package de.lmu.ifi.dbs.elki.datasource.filter.transform;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
+import de.lmu.ifi.dbs.elki.datasource.filter.AbstractStreamConversionFilter;
+import de.lmu.ifi.dbs.elki.math.geodesy.EarthModel;
+import de.lmu.ifi.dbs.elki.math.geodesy.SphericalVincentyEarthModel;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * Project a 2D data set (longitude, latitude) to a 3D coordinate system (X, Y,
+ * Z), such that Euclidean distance is line-of-sight.
+ *
+ * @author Erich Schubert
+ *
+ * @param <V> Vector type.
+ */
+public class LngLatToECEFFilter<V extends NumberVector<?>> extends AbstractStreamConversionFilter<V, V> {
+ /**
+ * Vector factory to use.
+ */
+ private NumberVector.Factory<V, ?> factory;
+
+ /**
+ * Earth model to use.
+ */
+ private EarthModel model;
+
+ /**
+ * Constructor.
+ *
+ * @param model Earth model
+ */
+ public LngLatToECEFFilter(EarthModel model) {
+ super();
+ this.model = model;
+ }
+
+ @Override
+ protected V filterSingleObject(V obj) {
+ return factory.newNumberVector(model.latLngDegToECEF(obj.doubleValue(1), obj.doubleValue(0)));
+ }
+
+ @Override
+ protected SimpleTypeInformation<? super V> getInputTypeRestriction() {
+ return new VectorFieldTypeInformation<>(NumberVector.class, 2, 2);
+ }
+
+ @Override
+ protected SimpleTypeInformation<? super V> convertedType(SimpleTypeInformation<V> in) {
+ VectorFieldTypeInformation<V> vin = (VectorFieldTypeInformation<V>) in;
+ factory = (NumberVector.Factory<V, ?>) vin.getFactory();
+ return new VectorFieldTypeInformation<>(vin.getFactory(), 3, 3, in.getSerializer());
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ *
+ * @param <V> Vector type
+ */
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
+ /**
+ * Earth model to use.
+ */
+ private EarthModel model;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ ObjectParameter<EarthModel> modelP = new ObjectParameter<>(EarthModel.MODEL_ID, EarthModel.class, SphericalVincentyEarthModel.class);
+ if (config.grab(modelP)) {
+ model = modelP.instantiateClass(config);
+ }
+ }
+
+ @Override
+ protected LngLatToECEFFilter<V> makeInstance() {
+ return new LngLatToECEFFilter<>(model);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/NumberVectorFeatureSelectionFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/NumberVectorFeatureSelectionFilter.java
index 82e7a1b6..720c88df 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/NumberVectorFeatureSelectionFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/NumberVectorFeatureSelectionFilter.java
@@ -81,7 +81,7 @@ public class NumberVectorFeatureSelectionFilter<V extends NumberVector<?>> exten
@Override
protected SimpleTypeInformation<? super V> convertedType(SimpleTypeInformation<V> in) {
initializeOutputType(in);
- return new VectorFieldTypeInformation<V>(factory, getDimensionality());
+ return new VectorFieldTypeInformation<>(factory, getDimensionality());
}
/**
@@ -139,7 +139,7 @@ public class NumberVectorFeatureSelectionFilter<V extends NumberVector<?>> exten
* Key: <code>-projectionfilter.selectedattributes</code>
* </p>
*/
- public static final OptionID SELECTED_ATTRIBUTES_ID = new OptionID("projectionfilter.selectedattributes", "a comma separated array of integer values d_i, where 0 <= d_i < the " + "dimensionality of the feature space " + "specifying the dimensions to be considered " + "for projection. If this parameter is not set, " + "no dimensions will be considered, i.e. the projection is a zero-dimensional feature space");
+ public static final OptionID SELECTED_ATTRIBUTES_ID = new OptionID("projectionfilter.selectedattributes", "a comma separated array of integer values d_i, where 0 <= d_i < the dimensionality of the feature space specifying the dimensions to be considered for projection. If this parameter is not set, no dimensions will be considered, i.e. the projection is a zero-dimensional feature space");
/**
* Selected attributes.
@@ -162,7 +162,7 @@ public class NumberVectorFeatureSelectionFilter<V extends NumberVector<?>> exten
@Override
protected NumberVectorFeatureSelectionFilter<DoubleVector> makeInstance() {
- return new NumberVectorFeatureSelectionFilter<DoubleVector>(selectedAttributes);
+ return new NumberVectorFeatureSelectionFilter<>(selectedAttributes);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/NumberVectorRandomFeatureSelectionFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/NumberVectorRandomFeatureSelectionFilter.java
index 7d799a1e..9b1ddbff 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/NumberVectorRandomFeatureSelectionFilter.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/NumberVectorRandomFeatureSelectionFilter.java
@@ -94,7 +94,7 @@ public class NumberVectorRandomFeatureSelectionFilter<V extends NumberVector<?>>
protected SimpleTypeInformation<? super V> convertedType(SimpleTypeInformation<V> in) {
initializeRandomAttributes(in);
initializeOutputType(in);
- return new VectorFieldTypeInformation<V>(factory, k);
+ return new VectorFieldTypeInformation<>(factory, k);
}
/**
@@ -168,7 +168,7 @@ public class NumberVectorRandomFeatureSelectionFilter<V extends NumberVector<?>>
@Override
protected NumberVectorRandomFeatureSelectionFilter<DoubleVector> makeInstance() {
- return new NumberVectorRandomFeatureSelectionFilter<DoubleVector>(k, rnd);
+ return new NumberVectorRandomFeatureSelectionFilter<>(k, rnd);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/ProjectionFilter.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/ProjectionFilter.java
new file mode 100644
index 00000000..af3f4c6e
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/ProjectionFilter.java
@@ -0,0 +1,113 @@
+package de.lmu.ifi.dbs.elki.datasource.filter.transform;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.data.projection.Projection;
+import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.datasource.filter.AbstractStreamConversionFilter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * Apply a projection to the data.
+ *
+ * @author Erich Schubert
+ *
+ * @param <I> Input type
+ * @param <O> Output type
+ */
+public class ProjectionFilter<I, O> extends AbstractStreamConversionFilter<I, O> {
+ /**
+ * Projection to apply.
+ */
+ Projection<I, O> projection;
+
+ /**
+ * Constructor.
+ *
+ * @param projection Projection
+ */
+ public ProjectionFilter(Projection<I, O> projection) {
+ super();
+ this.projection = projection;
+ }
+
+ @Override
+ protected O filterSingleObject(I obj) {
+ return projection.project(obj);
+ }
+
+ @Override
+ protected TypeInformation getInputTypeRestriction() {
+ return projection.getInputDataTypeInformation();
+ }
+
+ @Override
+ protected SimpleTypeInformation<? super O> convertedType(SimpleTypeInformation<I> in) {
+ projection.initialize(in);
+ return projection.getOutputDataTypeInformation();
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ *
+ * @param <I> Input type
+ * @param <O> Output type
+ */
+ public static class Parameterizer<I, O> extends AbstractParameterizer {
+ /**
+ * Parameter to specify the projection to use
+ * <p>
+ * Key: {@code -projection}
+ * </p>
+ */
+ public static final OptionID PROJ_ID = new OptionID("projection", "Projection to use.");
+
+ /**
+ * Projection to apply.
+ */
+ Projection<I, O> projection;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ ObjectParameter<Projection<I, O>> projP = new ObjectParameter<>(PROJ_ID, Projection.class);
+ if (config.grab(projP)) {
+ projection = projP.instantiateClass(config);
+ }
+ }
+
+ @Override
+ protected ProjectionFilter<I, O> makeInstance() {
+ return new ProjectionFilter<>(projection);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/package-info.java b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/package-info.java
index 9f8d3262..7082f103 100644
--- a/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/datasource/filter/transform/package-info.java
@@ -5,7 +5,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2012
+Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team