diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredRunner.java')
-rw-r--r-- | src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredRunner.java | 232 |
1 files changed, 232 insertions, 0 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredRunner.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredRunner.java new file mode 100644 index 00000000..297abac0 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredRunner.java @@ -0,0 +1,232 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import java.util.Collection; + +import de.lmu.ifi.dbs.elki.data.NumberVector; +import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; +import de.lmu.ifi.dbs.elki.math.linearalgebra.EigenvalueDecomposition; +import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix; +import de.lmu.ifi.dbs.elki.math.linearalgebra.SortedEigenPairs; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessGlobalConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; + +/** + * PCA runner that will do dimensionality reduction. PCA is computed as with the + * regular runner, but afterwards, an {@link EigenPairFilter} is applied. + * + * @author Erich Schubert + * + * @apiviz.landmark + * @apiviz.uses PCAFilteredResult oneway - - «create» + * @apiviz.composedOf EigenPairFilter + * + * @param <V> Vector class to use + */ +public class PCAFilteredRunner<V extends NumberVector<? extends V, ?>> extends PCARunner<V> { + /** + * Parameter to specify the filter for determination of the strong and weak + * eigenvectors, must be a subclass of {@link EigenPairFilter}. + * <p/> + * Default value: {@link PercentageEigenPairFilter} + * </p> + * <p/> + * Key: {@code -pca.filter} + * </p> + */ + public static final OptionID PCA_EIGENPAIR_FILTER = OptionID.getOrCreateOptionID("pca.filter", "Filter class to determine the strong and weak eigenvectors."); + + /** + * Parameter to specify a constant big value to reset high eigenvalues, must + * be a double greater than 0. + * <p> + * Default value: {@code 1.0} + * </p> + * <p> + * Key: {@code -pca.big} + * </p> + */ + public static final OptionID BIG_ID = OptionID.getOrCreateOptionID("pca.big", "A constant big value to reset high eigenvalues."); + + /** + * Parameter to specify a constant small value to reset low eigenvalues, must + * be a double greater than 0. + * <p> + * Default value: {@code 0.0} + * </p> + * <p> + * Key: {@code -pca.small} + * </p> + */ + public static final OptionID SMALL_ID = OptionID.getOrCreateOptionID("pca.small", "A constant small value to reset low eigenvalues."); + + /** + * Holds the instance of the EigenPairFilter specified by + * {@link #PCA_EIGENPAIR_FILTER}. + */ + private EigenPairFilter eigenPairFilter; + + /** + * Holds the value of {@link #BIG_ID}. + */ + private double big; + + /** + * Holds the value of {@link #SMALL_ID}. + */ + private double small; + + /** + * Constructor. + * + * @param covarianceMatrixBuilder + * @param eigenPairFilter + * @param big + * @param small + */ + public PCAFilteredRunner(CovarianceMatrixBuilder<V> covarianceMatrixBuilder, EigenPairFilter eigenPairFilter, double big, double small) { + super(covarianceMatrixBuilder); + this.eigenPairFilter = eigenPairFilter; + this.big = big; + this.small = small; + } + + /** + * Run PCA on a collection of database IDs + * + * @param ids a collection of ids + * @param database the database used + * @return PCA result + */ + @Override + public PCAFilteredResult processIds(DBIDs ids, Relation<? extends V> database) { + return processCovarMatrix(covarianceMatrixBuilder.processIds(ids, database)); + } + + /** + * Run PCA on a QueryResult Collection + * + * @param results a collection of QueryResults + * @param database the database used + * @return PCA result + */ + @Override + public <D extends NumberDistance<?, ?>> PCAFilteredResult processQueryResult(Collection<DistanceResultPair<D>> results, Relation<? extends V> database) { + return processCovarMatrix(covarianceMatrixBuilder.processQueryResults(results, database)); + } + + /** + * Process an existing Covariance Matrix + * + * @param covarMatrix the matrix used for performing PCA + */ + @Override + public PCAFilteredResult processCovarMatrix(Matrix covarMatrix) { + // TODO: add support for a different implementation to do EVD? + EigenvalueDecomposition evd = covarMatrix.eig(); + return processEVD(evd); + } + + /** + * Process an existing eigenvalue decomposition + * + * @param evd eigenvalue decomposition to use + */ + @Override + public PCAFilteredResult processEVD(EigenvalueDecomposition evd) { + SortedEigenPairs eigenPairs = new SortedEigenPairs(evd, false); + FilteredEigenPairs filteredEigenPairs = eigenPairFilter.filter(eigenPairs); + return new PCAFilteredResult(eigenPairs, filteredEigenPairs, big, small); + } + + /** + * Retrieve the {@link EigenPairFilter} to be used. For derived PCA Runners + * + * @return eigenpair filter configured. + */ + protected EigenPairFilter getEigenPairFilter() { + return eigenPairFilter; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer<V extends NumberVector<? extends V, ?>> extends PCARunner.Parameterizer<V> { + /** + * Holds the instance of the EigenPairFilter specified by + * {@link #PCA_EIGENPAIR_FILTER}. + */ + protected EigenPairFilter eigenPairFilter; + + /** + * Holds the value of {@link #BIG_ID}. + */ + protected double big; + + /** + * Holds the value of {@link #SMALL_ID}. + */ + protected double small; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + ObjectParameter<EigenPairFilter> EIGENPAIR_FILTER_PARAM = new ObjectParameter<EigenPairFilter>(PCA_EIGENPAIR_FILTER, EigenPairFilter.class, PercentageEigenPairFilter.class); + if(config.grab(EIGENPAIR_FILTER_PARAM)) { + eigenPairFilter = EIGENPAIR_FILTER_PARAM.instantiateClass(config); + } + + DoubleParameter BIG_PARAM = new DoubleParameter(BIG_ID, new GreaterConstraint(0), 1.0); + if(config.grab(BIG_PARAM)) { + big = BIG_PARAM.getValue(); + + } + + DoubleParameter SMALL_PARAM = new DoubleParameter(SMALL_ID, new GreaterEqualConstraint(0), 0.0); + if(config.grab(SMALL_PARAM)) { + small = SMALL_PARAM.getValue(); + } + + // global constraint small <--> big + config.checkConstraint(new LessGlobalConstraint<Double>(SMALL_PARAM, BIG_PARAM)); + } + + @Override + protected PCAFilteredRunner<V> makeInstance() { + return new PCAFilteredRunner<V>(covarianceMatrixBuilder, eigenPairFilter, big, small); + } + } +}
\ No newline at end of file |