summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/distance/distancefunction/correlation/PCABasedCorrelationDistanceFunction.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/distance/distancefunction/correlation/PCABasedCorrelationDistanceFunction.java')
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/distancefunction/correlation/PCABasedCorrelationDistanceFunction.java291
1 files changed, 0 insertions, 291 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/correlation/PCABasedCorrelationDistanceFunction.java b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/correlation/PCABasedCorrelationDistanceFunction.java
deleted file mode 100644
index b7a22b32..00000000
--- a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/correlation/PCABasedCorrelationDistanceFunction.java
+++ /dev/null
@@ -1,291 +0,0 @@
-package de.lmu.ifi.dbs.elki.distance.distancefunction.correlation;
-
-/*
- This file is part of ELKI:
- Environment for Developing KDD-Applications Supported by Index-Structures
-
- Copyright (C) 2013
- Ludwig-Maximilians-Universität München
- Lehr- und Forschungseinheit für Datenbanksysteme
- ELKI Development Team
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-import de.lmu.ifi.dbs.elki.data.NumberVector;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
-import de.lmu.ifi.dbs.elki.database.relation.Relation;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractIndexBasedDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.FilteredLocalPCABasedDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.PCACorrelationDistance;
-import de.lmu.ifi.dbs.elki.index.IndexFactory;
-import de.lmu.ifi.dbs.elki.index.preprocessed.localpca.FilteredLocalPCAIndex;
-import de.lmu.ifi.dbs.elki.index.preprocessed.localpca.KNNQueryFilteredPCAIndex;
-import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
-import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
-import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredResult;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
-
-/**
- * Provides the correlation distance for real valued vectors.
- *
- * @author Elke Achtert
- *
- * @apiviz.has Instance
- */
-public class PCABasedCorrelationDistanceFunction extends AbstractIndexBasedDistanceFunction<NumberVector<?>, FilteredLocalPCAIndex<NumberVector<?>>, PCACorrelationDistance> implements FilteredLocalPCABasedDistanceFunction<NumberVector<?>, FilteredLocalPCAIndex<NumberVector<?>>, PCACorrelationDistance> {
- /**
- * Parameter to specify the threshold of a distance between a vector q and a
- * given space that indicates that q adds a new dimension to the space, must
- * be a double equal to or greater than 0.
- * <p>
- * Default value: {@code 0.25}
- * </p>
- * <p>
- * Key: {@code -pcabasedcorrelationdf.delta}
- * </p>
- */
- public static final OptionID DELTA_ID = new OptionID("pcabasedcorrelationdf.delta", "Threshold of a distance between a vector q and a given space that indicates that " + "q adds a new dimension to the space.");
-
- /**
- * Holds the value of {@link #DELTA_ID}.
- */
- private double delta;
-
- /**
- * Constructor
- *
- * @param indexFactory index factory
- * @param delta Delta parameter
- */
- public PCABasedCorrelationDistanceFunction(IndexFactory<NumberVector<?>, FilteredLocalPCAIndex<NumberVector<?>>> indexFactory, double delta) {
- super(indexFactory);
- this.delta = delta;
- }
-
- @Override
- public PCACorrelationDistance getDistanceFactory() {
- return PCACorrelationDistance.FACTORY;
- }
-
- @Override
- public <T extends NumberVector<?>> Instance<T> instantiate(Relation<T> database) {
- // We can't really avoid these warnings, due to a limitation in Java
- // Generics (AFAICT)
- @SuppressWarnings("unchecked")
- FilteredLocalPCAIndex<T> indexinst = (FilteredLocalPCAIndex<T>) indexFactory.instantiate((Relation<NumberVector<?>>) database);
- return new Instance<>(database, indexinst, delta, this);
- }
-
- @Override
- public boolean equals(Object obj) {
- if(obj == null) {
- return false;
- }
- if(!this.getClass().equals(obj.getClass())) {
- return false;
- }
- PCABasedCorrelationDistanceFunction other = (PCABasedCorrelationDistanceFunction) obj;
- return (this.delta == other.delta);
- }
-
- /**
- * The actual instance bound to a particular database.
- *
- * @author Erich Schubert
- */
- public static class Instance<V extends NumberVector<?>> extends AbstractIndexBasedDistanceFunction.Instance<V, FilteredLocalPCAIndex<V>, PCACorrelationDistance, PCABasedCorrelationDistanceFunction> implements FilteredLocalPCABasedDistanceFunction.Instance<V, FilteredLocalPCAIndex<V>, PCACorrelationDistance> {
- /**
- * Delta value
- */
- final double delta;
-
- /**
- * Constructor.
- *
- * @param database Database
- * @param index Index to use
- * @param delta Delta
- * @param distanceFunction Distance function
- */
- public Instance(Relation<V> database, FilteredLocalPCAIndex<V> index, double delta, PCABasedCorrelationDistanceFunction distanceFunction) {
- super(database, index, distanceFunction);
- this.delta = delta;
- }
-
- @Override
- public PCACorrelationDistance distance(DBIDRef id1, DBIDRef id2) {
- PCAFilteredResult pca1 = index.getLocalProjection(id1);
- PCAFilteredResult pca2 = index.getLocalProjection(id2);
- V dv1 = relation.get(id1);
- V dv2 = relation.get(id2);
-
- int correlationDistance = correlationDistance(pca1, pca2, dv1.getDimensionality());
- double euclideanDistance = euclideanDistance(dv1, dv2);
-
- return new PCACorrelationDistance(correlationDistance, euclideanDistance);
- }
-
- /**
- * Computes the correlation distance between the two subspaces defined by
- * the specified PCAs.
- *
- * @param pca1 first PCA
- * @param pca2 second PCA
- * @param dimensionality the dimensionality of the data space
- * @return the correlation distance between the two subspaces defined by the
- * specified PCAs
- */
- public int correlationDistance(PCAFilteredResult pca1, PCAFilteredResult pca2, int dimensionality) {
- // TODO nur in eine Richtung?
- // pca of rv1
- Matrix v1 = pca1.getEigenvectors().copy();
- Matrix v1_strong = pca1.adapatedStrongEigenvectors().copy();
- Matrix e1_czech = pca1.selectionMatrixOfStrongEigenvectors().copy();
- int lambda1 = pca1.getCorrelationDimension();
-
- // pca of rv2
- Matrix v2 = pca2.getEigenvectors().copy();
- Matrix v2_strong = pca2.adapatedStrongEigenvectors().copy();
- Matrix e2_czech = pca2.selectionMatrixOfStrongEigenvectors().copy();
- int lambda2 = pca2.getCorrelationDimension();
-
- // for all strong eigenvectors of rv2
- Matrix m1_czech = pca1.dissimilarityMatrix();
- for(int i = 0; i < v2_strong.getColumnDimensionality(); i++) {
- Vector v2_i = v2_strong.getCol(i);
- // check, if distance of v2_i to the space of rv1 > delta
- // (i.e., if v2_i spans up a new dimension)
- double dist = Math.sqrt(v2_i.transposeTimes(v2_i) - v2_i.transposeTimesTimes(m1_czech, v2_i));
-
- // if so, insert v2_i into v1 and adjust v1
- // and compute m1_czech new, increase lambda1
- if(lambda1 < dimensionality && dist > delta) {
- adjust(v1, e1_czech, v2_i, lambda1++);
- m1_czech = v1.times(e1_czech).timesTranspose(v1);
- }
- }
-
- // for all strong eigenvectors of rv1
- Matrix m2_czech = pca2.dissimilarityMatrix();
- for(int i = 0; i < v1_strong.getColumnDimensionality(); i++) {
- Vector v1_i = v1_strong.getCol(i);
- // check, if distance of v1_i to the space of rv2 > delta
- // (i.e., if v1_i spans up a new dimension)
- double dist = Math.sqrt(v1_i.transposeTimes(v1_i) - v1_i.transposeTimes(m2_czech).times(v1_i).get(0));
-
- // if so, insert v1_i into v2 and adjust v2
- // and compute m2_czech new , increase lambda2
- if(lambda2 < dimensionality && dist > delta) {
- adjust(v2, e2_czech, v1_i, lambda2++);
- m2_czech = v2.times(e2_czech).timesTranspose(v2);
- }
- }
-
- int correlationDistance = Math.max(lambda1, lambda2);
-
- // TODO delta einbauen
- // Matrix m_1_czech = pca1.dissimilarityMatrix();
- // double dist_1 = normalizedDistance(dv1, dv2, m1_czech);
- // Matrix m_2_czech = pca2.dissimilarityMatrix();
- // double dist_2 = normalizedDistance(dv1, dv2, m2_czech);
- // if (dist_1 > delta || dist_2 > delta) {
- // correlationDistance++;
- // }
-
- return correlationDistance;
- }
-
- /**
- * Inserts the specified vector into the given orthonormal matrix
- * <code>v</code> at column <code>corrDim</code>. After insertion the matrix
- * <code>v</code> is orthonormalized and column <code>corrDim</code> of
- * matrix <code>e_czech</code> is set to the <code>corrDim</code>-th unit
- * vector.
- *
- * @param v the orthonormal matrix of the eigenvectors
- * @param e_czech the selection matrix of the strong eigenvectors
- * @param vector the vector to be inserted
- * @param corrDim the column at which the vector should be inserted
- */
- private void adjust(Matrix v, Matrix e_czech, Vector vector, int corrDim) {
- int dim = v.getRowDimensionality();
-
- // set e_czech[corrDim][corrDim] := 1
- e_czech.set(corrDim, corrDim, 1);
-
- // normalize v
- Vector v_i = vector.copy();
- Vector sum = new Vector(dim);
- for(int k = 0; k < corrDim; k++) {
- Vector v_k = v.getCol(k);
- sum.plusTimesEquals(v_k, v_i.transposeTimes(v_k));
- }
- v_i.minusEquals(sum);
- v_i.normalize();
- v.setCol(corrDim, v_i);
- }
-
- /**
- * Computes the Euclidean distance between the given two vectors.
- *
- * @param dv1 first FeatureVector
- * @param dv2 second FeatureVector
- * @return the Euclidean distance between the given two vectors
- */
- private double euclideanDistance(V dv1, V dv2) {
- if(dv1.getDimensionality() != dv2.getDimensionality()) {
- throw new IllegalArgumentException("Different dimensionality of FeatureVectors\n first argument: " + dv1.toString() + "\n second argument: " + dv2.toString());
- }
-
- double sqrDist = 0;
- for(int i = 0; i < dv1.getDimensionality(); i++) {
- double manhattanI = dv1.doubleValue(i) - dv2.doubleValue(i);
- sqrDist += manhattanI * manhattanI;
- }
- return Math.sqrt(sqrDist);
- }
- }
-
- /**
- * Parameterization class.
- *
- * @author Erich Schubert
- *
- * @apiviz.exclude
- */
- public static class Parameterizer extends AbstractIndexBasedDistanceFunction.Parameterizer<FilteredLocalPCAIndex.Factory<NumberVector<?>, FilteredLocalPCAIndex<NumberVector<?>>>> {
- protected double delta = 0.0;
-
- @Override
- protected void makeOptions(Parameterization config) {
- super.makeOptions(config);
- configIndexFactory(config, FilteredLocalPCAIndex.Factory.class, KNNQueryFilteredPCAIndex.Factory.class);
-
- final DoubleParameter param = new DoubleParameter(DELTA_ID, 0.25);
- param.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE);
- if(config.grab(param)) {
- delta = param.doubleValue();
- }
- }
-
- @Override
- protected PCABasedCorrelationDistanceFunction makeInstance() {
- return new PCABasedCorrelationDistanceFunction(factory, delta);
- }
- }
-}