summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredRunner.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredRunner.java')
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredRunner.java232
1 files changed, 232 insertions, 0 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredRunner.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredRunner.java
new file mode 100644
index 00000000..297abac0
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredRunner.java
@@ -0,0 +1,232 @@
+package de.lmu.ifi.dbs.elki.math.linearalgebra.pca;
+/*
+This file is part of ELKI:
+Environment for Developing KDD-Applications Supported by Index-Structures
+
+Copyright (C) 2011
+Ludwig-Maximilians-Universität München
+Lehr- und Forschungseinheit für Datenbanksysteme
+ELKI Development Team
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+import java.util.Collection;
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.EigenvalueDecomposition;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.SortedEigenPairs;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessGlobalConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * PCA runner that will do dimensionality reduction. PCA is computed as with the
+ * regular runner, but afterwards, an {@link EigenPairFilter} is applied.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.landmark
+ * @apiviz.uses PCAFilteredResult oneway - - «create»
+ * @apiviz.composedOf EigenPairFilter
+ *
+ * @param <V> Vector class to use
+ */
+public class PCAFilteredRunner<V extends NumberVector<? extends V, ?>> extends PCARunner<V> {
+ /**
+ * Parameter to specify the filter for determination of the strong and weak
+ * eigenvectors, must be a subclass of {@link EigenPairFilter}.
+ * <p/>
+ * Default value: {@link PercentageEigenPairFilter}
+ * </p>
+ * <p/>
+ * Key: {@code -pca.filter}
+ * </p>
+ */
+ public static final OptionID PCA_EIGENPAIR_FILTER = OptionID.getOrCreateOptionID("pca.filter", "Filter class to determine the strong and weak eigenvectors.");
+
+ /**
+ * Parameter to specify a constant big value to reset high eigenvalues, must
+ * be a double greater than 0.
+ * <p>
+ * Default value: {@code 1.0}
+ * </p>
+ * <p>
+ * Key: {@code -pca.big}
+ * </p>
+ */
+ public static final OptionID BIG_ID = OptionID.getOrCreateOptionID("pca.big", "A constant big value to reset high eigenvalues.");
+
+ /**
+ * Parameter to specify a constant small value to reset low eigenvalues, must
+ * be a double greater than 0.
+ * <p>
+ * Default value: {@code 0.0}
+ * </p>
+ * <p>
+ * Key: {@code -pca.small}
+ * </p>
+ */
+ public static final OptionID SMALL_ID = OptionID.getOrCreateOptionID("pca.small", "A constant small value to reset low eigenvalues.");
+
+ /**
+ * Holds the instance of the EigenPairFilter specified by
+ * {@link #PCA_EIGENPAIR_FILTER}.
+ */
+ private EigenPairFilter eigenPairFilter;
+
+ /**
+ * Holds the value of {@link #BIG_ID}.
+ */
+ private double big;
+
+ /**
+ * Holds the value of {@link #SMALL_ID}.
+ */
+ private double small;
+
+ /**
+ * Constructor.
+ *
+ * @param covarianceMatrixBuilder
+ * @param eigenPairFilter
+ * @param big
+ * @param small
+ */
+ public PCAFilteredRunner(CovarianceMatrixBuilder<V> covarianceMatrixBuilder, EigenPairFilter eigenPairFilter, double big, double small) {
+ super(covarianceMatrixBuilder);
+ this.eigenPairFilter = eigenPairFilter;
+ this.big = big;
+ this.small = small;
+ }
+
+ /**
+ * Run PCA on a collection of database IDs
+ *
+ * @param ids a collection of ids
+ * @param database the database used
+ * @return PCA result
+ */
+ @Override
+ public PCAFilteredResult processIds(DBIDs ids, Relation<? extends V> database) {
+ return processCovarMatrix(covarianceMatrixBuilder.processIds(ids, database));
+ }
+
+ /**
+ * Run PCA on a QueryResult Collection
+ *
+ * @param results a collection of QueryResults
+ * @param database the database used
+ * @return PCA result
+ */
+ @Override
+ public <D extends NumberDistance<?, ?>> PCAFilteredResult processQueryResult(Collection<DistanceResultPair<D>> results, Relation<? extends V> database) {
+ return processCovarMatrix(covarianceMatrixBuilder.processQueryResults(results, database));
+ }
+
+ /**
+ * Process an existing Covariance Matrix
+ *
+ * @param covarMatrix the matrix used for performing PCA
+ */
+ @Override
+ public PCAFilteredResult processCovarMatrix(Matrix covarMatrix) {
+ // TODO: add support for a different implementation to do EVD?
+ EigenvalueDecomposition evd = covarMatrix.eig();
+ return processEVD(evd);
+ }
+
+ /**
+ * Process an existing eigenvalue decomposition
+ *
+ * @param evd eigenvalue decomposition to use
+ */
+ @Override
+ public PCAFilteredResult processEVD(EigenvalueDecomposition evd) {
+ SortedEigenPairs eigenPairs = new SortedEigenPairs(evd, false);
+ FilteredEigenPairs filteredEigenPairs = eigenPairFilter.filter(eigenPairs);
+ return new PCAFilteredResult(eigenPairs, filteredEigenPairs, big, small);
+ }
+
+ /**
+ * Retrieve the {@link EigenPairFilter} to be used. For derived PCA Runners
+ *
+ * @return eigenpair filter configured.
+ */
+ protected EigenPairFilter getEigenPairFilter() {
+ return eigenPairFilter;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<V extends NumberVector<? extends V, ?>> extends PCARunner.Parameterizer<V> {
+ /**
+ * Holds the instance of the EigenPairFilter specified by
+ * {@link #PCA_EIGENPAIR_FILTER}.
+ */
+ protected EigenPairFilter eigenPairFilter;
+
+ /**
+ * Holds the value of {@link #BIG_ID}.
+ */
+ protected double big;
+
+ /**
+ * Holds the value of {@link #SMALL_ID}.
+ */
+ protected double small;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ ObjectParameter<EigenPairFilter> EIGENPAIR_FILTER_PARAM = new ObjectParameter<EigenPairFilter>(PCA_EIGENPAIR_FILTER, EigenPairFilter.class, PercentageEigenPairFilter.class);
+ if(config.grab(EIGENPAIR_FILTER_PARAM)) {
+ eigenPairFilter = EIGENPAIR_FILTER_PARAM.instantiateClass(config);
+ }
+
+ DoubleParameter BIG_PARAM = new DoubleParameter(BIG_ID, new GreaterConstraint(0), 1.0);
+ if(config.grab(BIG_PARAM)) {
+ big = BIG_PARAM.getValue();
+
+ }
+
+ DoubleParameter SMALL_PARAM = new DoubleParameter(SMALL_ID, new GreaterEqualConstraint(0), 0.0);
+ if(config.grab(SMALL_PARAM)) {
+ small = SMALL_PARAM.getValue();
+ }
+
+ // global constraint small <--> big
+ config.checkConstraint(new LessGlobalConstraint<Double>(SMALL_PARAM, BIG_PARAM));
+ }
+
+ @Override
+ protected PCAFilteredRunner<V> makeInstance() {
+ return new PCAFilteredRunner<V>(covarianceMatrixBuilder, eigenPairFilter, big, small);
+ }
+ }
+} \ No newline at end of file