summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/distance/distancefunction/correlation/ERiCDistanceFunction.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/distance/distancefunction/correlation/ERiCDistanceFunction.java')
-rw-r--r--src/de/lmu/ifi/dbs/elki/distance/distancefunction/correlation/ERiCDistanceFunction.java289
1 files changed, 0 insertions, 289 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/correlation/ERiCDistanceFunction.java b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/correlation/ERiCDistanceFunction.java
deleted file mode 100644
index b8c18304..00000000
--- a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/correlation/ERiCDistanceFunction.java
+++ /dev/null
@@ -1,289 +0,0 @@
-package de.lmu.ifi.dbs.elki.distance.distancefunction.correlation;
-
-/*
- This file is part of ELKI:
- Environment for Developing KDD-Applications Supported by Index-Structures
-
- Copyright (C) 2013
- Ludwig-Maximilians-Universität München
- Lehr- und Forschungseinheit für Datenbanksysteme
- ELKI Development Team
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-import de.lmu.ifi.dbs.elki.data.NumberVector;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
-import de.lmu.ifi.dbs.elki.database.relation.Relation;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractIndexBasedDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.FilteredLocalPCABasedDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.WeightedDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.BitDistance;
-import de.lmu.ifi.dbs.elki.index.IndexFactory;
-import de.lmu.ifi.dbs.elki.index.preprocessed.localpca.FilteredLocalPCAIndex;
-import de.lmu.ifi.dbs.elki.index.preprocessed.localpca.KNNQueryFilteredPCAIndex;
-import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
-import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
-import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredResult;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
-
-/**
- * Provides a distance function for building the hierarchy in the ERiC
- * algorithm.
- *
- * @author Elke Achtert
- *
- * @apiviz.has Instance
- */
-public class ERiCDistanceFunction extends AbstractIndexBasedDistanceFunction<NumberVector<?>, FilteredLocalPCAIndex<NumberVector<?>>, BitDistance> implements FilteredLocalPCABasedDistanceFunction<NumberVector<?>, FilteredLocalPCAIndex<NumberVector<?>>, BitDistance> {
- /**
- * Parameter to specify the threshold for approximate linear dependency: the
- * strong eigenvectors of q are approximately linear dependent from the strong
- * eigenvectors p if the following condition holds for all strong eigenvectors
- * q_i of q (lambda_q < lambda_p): q_i' * M^check_p * q_i <= delta^2, must be
- * a double equal to or greater than 0.
- * <p>
- * Default value: {@code 0.1}
- * </p>
- * <p>
- * Key: {@code -ericdf.delta}
- * </p>
- */
- public static final OptionID DELTA_ID = new OptionID("ericdf.delta", "Threshold for approximate linear dependency: " + "the strong eigenvectors of q are approximately linear dependent " + "from the strong eigenvectors p if the following condition " + "holds for all stroneg eigenvectors q_i of q (lambda_q < lambda_p): " + "q_i' * M^check_p * q_i <= delta^2.");
-
- /**
- * Parameter to specify the threshold for the maximum distance between two
- * approximately linear dependent subspaces of two objects p and q (lambda_q <
- * lambda_p) before considering them as parallel, must be a double equal to or
- * greater than 0.
- * <p>
- * Default value: {@code 0.1}
- * </p>
- * <p>
- * Key: {@code -ericdf.tau}
- * </p>
- */
- public static final OptionID TAU_ID = new OptionID("ericdf.tau", "Threshold for the maximum distance between two approximately linear " + "dependent subspaces of two objects p and q " + "(lambda_q < lambda_p) before considering them as parallel.");
-
- /**
- * Holds the value of {@link #DELTA_ID}.
- */
- private double delta;
-
- /**
- * Holds the value of {@link #TAU_ID}.
- */
- private double tau;
-
- /**
- * Constructor.
- *
- * @param indexFactory Index factory.
- * @param delta Delta parameter
- * @param tau Tau parameter
- */
- public ERiCDistanceFunction(IndexFactory<NumberVector<?>, FilteredLocalPCAIndex<NumberVector<?>>> indexFactory, double delta, double tau) {
- super(indexFactory);
- this.delta = delta;
- this.tau = tau;
- }
-
- @Override
- public BitDistance getDistanceFactory() {
- return BitDistance.FACTORY;
- }
-
- @Override
- public <T extends NumberVector<?>> Instance<T> instantiate(Relation<T> database) {
- // We can't really avoid these warnings, due to a limitation in Java
- // Generics (AFAICT)
- @SuppressWarnings("unchecked")
- FilteredLocalPCAIndex<T> indexinst = (FilteredLocalPCAIndex<T>) indexFactory.instantiate((Relation<NumberVector<?>>) database);
- return new Instance<>(database, indexinst, this, delta, tau);
- }
-
- /**
- * Returns true, if the strong eigenvectors of the two specified pcas span up
- * the same space. Note, that the first pca must have equal ore more strong
- * eigenvectors than the second pca.
- *
- * @param pca1 first PCA
- * @param pca2 second PCA
- * @return true, if the strong eigenvectors of the two specified pcas span up
- * the same space
- */
- private boolean approximatelyLinearDependent(PCAFilteredResult pca1, PCAFilteredResult pca2) {
- Matrix m1_czech = pca1.dissimilarityMatrix();
- Matrix v2_strong = pca2.adapatedStrongEigenvectors();
- for(int i = 0; i < v2_strong.getColumnDimensionality(); i++) {
- Vector v2_i = v2_strong.getCol(i);
- // check, if distance of v2_i to the space of pca_1 > delta
- // (i.e., if v2_i spans up a new dimension)
- double dist = Math.sqrt(v2_i.transposeTimes(v2_i) - v2_i.transposeTimesTimes(m1_czech, v2_i));
-
- // if so, return false
- if(dist > delta) {
- return false;
- }
- }
-
- return true;
- }
-
- /**
- * Computes the distance between two given DatabaseObjects according to this
- * distance function. Note, that the first pca must have equal or more strong
- * eigenvectors than the second pca.
- *
- * @param v1 first DatabaseObject
- * @param v2 second DatabaseObject
- * @param pca1 first PCA
- * @param pca2 second PCA
- * @return the distance between two given DatabaseObjects according to this
- * distance function
- */
- public BitDistance distance(NumberVector<?> v1, NumberVector<?> v2, PCAFilteredResult pca1, PCAFilteredResult pca2) {
- if(pca1.getCorrelationDimension() < pca2.getCorrelationDimension()) {
- throw new IllegalStateException("pca1.getCorrelationDimension() < pca2.getCorrelationDimension(): " + pca1.getCorrelationDimension() + " < " + pca2.getCorrelationDimension());
- }
-
- boolean approximatelyLinearDependent;
- if(pca1.getCorrelationDimension() == pca2.getCorrelationDimension()) {
- approximatelyLinearDependent = approximatelyLinearDependent(pca1, pca2) && approximatelyLinearDependent(pca2, pca1);
- }
- else {
- approximatelyLinearDependent = approximatelyLinearDependent(pca1, pca2);
- }
-
- if(!approximatelyLinearDependent) {
- return new BitDistance(true);
- }
-
- else {
- double affineDistance;
-
- if(pca1.getCorrelationDimension() == pca2.getCorrelationDimension()) {
- WeightedDistanceFunction df1 = new WeightedDistanceFunction(pca1.similarityMatrix());
- WeightedDistanceFunction df2 = new WeightedDistanceFunction(pca2.similarityMatrix());
- affineDistance = Math.max(df1.distance(v1, v2).doubleValue(), df2.distance(v1, v2).doubleValue());
- }
- else {
- WeightedDistanceFunction df1 = new WeightedDistanceFunction(pca1.similarityMatrix());
- affineDistance = df1.distance(v1, v2).doubleValue();
- }
-
- if(affineDistance > tau) {
- return new BitDistance(true);
- }
-
- return new BitDistance(false);
- }
- }
-
- @Override
- public boolean equals(Object obj) {
- if(obj == null) {
- return false;
- }
- if(!this.getClass().equals(obj.getClass())) {
- return false;
- }
- ERiCDistanceFunction other = (ERiCDistanceFunction) obj;
- return (this.delta == other.delta) && (this.tau == other.tau);
- }
-
- /**
- * The actual instance bound to a particular database.
- *
- * @author Erich Schubert
- */
- public static class Instance<V extends NumberVector<?>> extends AbstractIndexBasedDistanceFunction.Instance<V, FilteredLocalPCAIndex<V>, BitDistance, ERiCDistanceFunction> implements FilteredLocalPCABasedDistanceFunction.Instance<V, FilteredLocalPCAIndex<V>, BitDistance> {
- /**
- * Holds the value of {@link #DELTA_ID}.
- */
- private final double delta;
-
- /**
- * Holds the value of {@link #TAU_ID}.
- */
- private final double tau;
-
- /**
- * Constructor.
- *
- * @param database Database
- * @param index Index
- * @param parent Parent distance
- * @param delta Delta parameter
- * @param tau Tau parameter
- */
- public Instance(Relation<V> database, FilteredLocalPCAIndex<V> index, ERiCDistanceFunction parent, double delta, double tau) {
- super(database, index, parent);
- this.delta = delta;
- this.tau = tau;
- }
-
- /**
- * Note, that the pca of o1 must have equal ore more strong eigenvectors
- * than the pca of o2.
- */
- @Override
- public BitDistance distance(DBIDRef id1, DBIDRef id2) {
- PCAFilteredResult pca1 = index.getLocalProjection(id1);
- PCAFilteredResult pca2 = index.getLocalProjection(id2);
- V v1 = relation.get(id1);
- V v2 = relation.get(id2);
- return parent.distance(v1, v2, pca1, pca2);
- }
- }
-
- /**
- * Parameterization class.
- *
- * @author Erich Schubert
- *
- * @apiviz.exclude
- */
- public static class Parameterizer extends AbstractIndexBasedDistanceFunction.Parameterizer<IndexFactory<NumberVector<?>, FilteredLocalPCAIndex<NumberVector<?>>>> {
- double delta = 0.0;
-
- double tau = 0.0;
-
- @Override
- protected void makeOptions(Parameterization config) {
- super.makeOptions(config);
- configIndexFactory(config, FilteredLocalPCAIndex.Factory.class, KNNQueryFilteredPCAIndex.Factory.class);
-
- final DoubleParameter deltaP = new DoubleParameter(DELTA_ID, 0.1);
- deltaP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE);
- if(config.grab(deltaP)) {
- delta = deltaP.doubleValue();
- }
-
- final DoubleParameter tauP = new DoubleParameter(TAU_ID, 0.1);
- tauP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE);
- if(config.grab(tauP)) {
- tau = tauP.doubleValue();
- }
- }
-
- @Override
- protected ERiCDistanceFunction makeInstance() {
- return new ERiCDistanceFunction(factory, delta, tau);
- }
- }
-}