Import Debian changes 0.6.5~20141030-1

elki (0.6.5~20141030-1) unstable; urgency=medium * New upstream beta release * Urgency medium: 0.6.0 suffers from a performance issue with duplicates. * Repackaged tarball from .jar to .tar.bz2 * Add dependency on libsvm3-java * Enable line numbers for debugging (ant debuglevel)
author: Erich Schubert <erich@debian.org> 2014-10-31 03:43:51 +0100
committer: Andrej Shadura <andrewsh@debian.org> 2019-03-09 22:30:40 +0000
commit: 596d8876dca5627dd76e8c23bf40a24cc305eeed (patch)
tree: d269ddb46561469f6b1fff67b19e0cd2b4608f5b /src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ERiC.java
parent: ee31d687b1a0e2f2f1e6e71375c7cc3b094919b8 (diff)
parent: 337087b668d3a54f3afee3a9adb597a32e9f7e94 (diff)
1 files changed, 254 insertions, 139 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ERiC.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ERiC.java
index d535e136..87f7d519 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ERiC.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ERiC.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation;
  This file is part of ELKI:
  Environment for Developing KDD-Applications Supported by Index-Structures
 
- Copyright (C) 2013
+ Copyright (C) 2014
  Ludwig-Maximilians-Universität München
  Lehr- und Forschungseinheit für Datenbanksysteme
  ELKI Development Team
@@ -29,50 +29,51 @@ import java.util.List;
 import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
 import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm;
 import de.lmu.ifi.dbs.elki.algorithm.clustering.DBSCAN;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan.CorePredicate;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan.ERiCNeighborPredicate;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan.GeneralizedDBSCAN;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan.MinPtsCorePredicate;
 import de.lmu.ifi.dbs.elki.data.Cluster;
 import de.lmu.ifi.dbs.elki.data.Clustering;
 import de.lmu.ifi.dbs.elki.data.NumberVector;
 import de.lmu.ifi.dbs.elki.data.model.CorrelationModel;
-import de.lmu.ifi.dbs.elki.data.model.DimensionModel;
 import de.lmu.ifi.dbs.elki.data.model.Model;
 import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
 import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
 import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
 import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
 import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
 import de.lmu.ifi.dbs.elki.database.relation.Relation;
 import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.ProxyDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.correlation.ERiCDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.BitDistance;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.IntegerDistance;
 import de.lmu.ifi.dbs.elki.logging.Logging;
 import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
 import de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid;
 import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.FirstNEigenPairFilter;
 import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredResult;
 import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredRunner;
-import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.StandardCovarianceMatrixBuilder;
 import de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy.Hierarchy;
 import de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy.Hierarchy.Iter;
 import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
 import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
 import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
 import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
 import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
 
 /**
  * Performs correlation clustering on the data partitioned according to local
  * correlation dimensionality and builds a hierarchy of correlation clusters
  * that allows multiple inheritance from the clustering result.
+ * 
+ * Reference:
  * <p>
- * Reference: E. Achtert, C. Böhm, H.-P. Kriegel, P. Kröger, and A. Zimek: On
- * Exploring Complex Relationships of Correlation Clusters. <br>
+ * E. Achtert, C. Böhm, H.-P. Kriegel, P. Kröger, and A. Zimek:<br />
+ * On Exploring Complex Relationships of Correlation Clusters.<br />
  * In Proc. 19th International Conference on Scientific and Statistical Database
  * Management (SSDBM 2007), Banff, Canada, 2007.
  * </p>
@@ -81,36 +82,39 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz
  * 
  * @apiviz.composedOf COPAC
  * @apiviz.composedOf DBSCAN
- * @apiviz.composedOf ERiCDistanceFunction
  * @apiviz.composedOf FirstNEigenPairFilter
  * @apiviz.composedOf PCAFilteredRunner
+ * @apiviz.composedOf ERiCNeighborPredicate
  * @apiviz.has CorrelationModel
  * 
  * @param <V> the type of NumberVector handled by this Algorithm
  */
-// TODO: Re-use PCARunner objects somehow?
 @Title("ERiC: Exploring Relationships among Correlation Clusters")
-@Description("Performs the DBSCAN algorithm on the data using a special distance function taking into account correlations among attributes and builds " + "a hierarchy that allows multiple inheritance from the correlation clustering result.")
-@Reference(authors = "E. Achtert, C. Böhm, H.-P. Kriegel, P. Kröger, and A. Zimek", title = "On Exploring Complex Relationships of Correlation Clusters", booktitle = "Proc. 19th International Conference on Scientific and Statistical Database Management (SSDBM 2007), Banff, Canada, 2007", url = "http://dx.doi.org/10.1109/SSDBM.2007.21")
-public class ERiC<V extends NumberVector<?>> extends AbstractAlgorithm<Clustering<CorrelationModel<V>>> implements ClusteringAlgorithm<Clustering<CorrelationModel<V>>> {
+@Description("Performs the DBSCAN algorithm on the data using a special distance function taking into account correlations among attributes and builds " //
+    + "a hierarchy that allows multiple inheritance from the correlation clustering result.")
+@Reference(authors = "E. Achtert, C. Böhm, H.-P. Kriegel, P. Kröger, and A. Zimek",//
+title = "On Exploring Complex Relationships of Correlation Clusters", //
+booktitle = "Proc. 19th International Conference on Scientific and Statistical Database Management (SSDBM 2007), Banff, Canada, 2007", //
+url = "http://dx.doi.org/10.1109/SSDBM.2007.21")
+public class ERiC<V extends NumberVector> extends AbstractAlgorithm<Clustering<CorrelationModel<V>>> implements ClusteringAlgorithm<Clustering<CorrelationModel<V>>> {
   /**
    * The logger for this class.
    */
   private static final Logging LOG = Logging.getLogger(ERiC.class);
 
   /**
-   * The COPAC clustering algorithm.
+   * ERiC Settings.
    */
-  private COPAC<V, IntegerDistance> copacAlgorithm;
+  private ERiC.Settings settings;
 
   /**
    * Constructor.
    * 
-   * @param copacAlgorithm COPAC to use
+   * @param settings ERiC clustering settings
    */
-  public ERiC(COPAC<V, IntegerDistance> copacAlgorithm) {
+  public ERiC(ERiC.Settings settings) {
     super();
-    this.copacAlgorithm = copacAlgorithm;
+    this.settings = settings;
   }
 
   /**
@@ -119,30 +123,27 @@ public class ERiC<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
    * @param relation Relation to process
    * @return Clustering result
    */
-  public Clustering<CorrelationModel<V>> run(Relation<V> relation) {
+  public Clustering<CorrelationModel<V>> run(Database database, Relation<V> relation) {
     final int dimensionality = RelationUtil.dimensionality(relation);
 
     StepProgress stepprog = LOG.isVerbose() ? new StepProgress(3) : null;
 
-    // run COPAC
-    if (stepprog != null) {
-      stepprog.beginStep(1, "Preprocessing local correlation dimensionalities and partitioning data", LOG);
-    }
-    Clustering<Model> copacResult = copacAlgorithm.run(relation);
-
-    DistanceQuery<V, IntegerDistance> query = copacAlgorithm.getPartitionDistanceQuery();
+    // Run Generalized DBSCAN
+    LOG.beginStep(stepprog, 1, "Preprocessing local correlation dimensionalities and partitioning data");
+    // FIXME: how to ensure we are running on the same relation?
+    ERiCNeighborPredicate<V>.Instance npred = new ERiCNeighborPredicate<V>(settings).instantiate(database, relation);
+    CorePredicate.Instance<DBIDs> cpred = new MinPtsCorePredicate(settings.minpts).instantiate(database, TypeUtil.DBIDS);
+    Clustering<Model> copacResult = new GeneralizedDBSCAN.Instance<>(npred, cpred, false).run();
 
     // extract correlation clusters
-    if (stepprog != null) {
-      stepprog.beginStep(2, "Extract correlation clusters", LOG);
-    }
-    List<List<Cluster<CorrelationModel<V>>>> clusterMap = extractCorrelationClusters(copacResult, relation, dimensionality);
-    if (LOG.isDebugging()) {
+    LOG.beginStep(stepprog, 2, "Extract correlation clusters");
+    List<List<Cluster<CorrelationModel<V>>>> clusterMap = extractCorrelationClusters(copacResult, relation, dimensionality, npred);
+    if(LOG.isDebugging()) {
       StringBuilder msg = new StringBuilder("Step 2: Extract correlation clusters...");
-      for (int corrDim = 0; corrDim < clusterMap.size(); corrDim++) {
+      for(int corrDim = 0; corrDim < clusterMap.size(); corrDim++) {
         List<Cluster<CorrelationModel<V>>> correlationClusters = clusterMap.get(corrDim);
         msg.append("\n\ncorrDim ").append(corrDim);
-        for (Cluster<CorrelationModel<V>> cluster : correlationClusters) {
+        for(Cluster<CorrelationModel<V>> cluster : correlationClusters) {
           msg.append("\n  cluster ").append(cluster).append(", ids: ").append(cluster.getIDs().size());
           // .append(", level: ").append(cluster.getLevel()).append(", index: ").append(cluster.getLevelIndex());
           // msg.append("\n  basis " +
@@ -152,42 +153,38 @@ public class ERiC<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
       }
       LOG.debugFine(msg.toString());
     }
-    if (LOG.isVerbose()) {
+    if(LOG.isVerbose()) {
       int clusters = 0;
-      for (List<Cluster<CorrelationModel<V>>> correlationClusters : clusterMap) {
+      for(List<Cluster<CorrelationModel<V>>> correlationClusters : clusterMap) {
         clusters += correlationClusters.size();
       }
       LOG.verbose(clusters + " clusters extracted.");
     }
 
     // build hierarchy
-    if (stepprog != null) {
-      stepprog.beginStep(3, "Building hierarchy", LOG);
-    }
+    LOG.beginStep(stepprog, 3, "Building hierarchy");
     Clustering<CorrelationModel<V>> clustering = new Clustering<>("ERiC clustering", "eric-clustering");
-    buildHierarchy(clustering, clusterMap, query);
-    if (LOG.isDebugging()) {
+    buildHierarchy(clustering, clusterMap, npred);
+    if(LOG.isDebugging()) {
       StringBuilder msg = new StringBuilder("Step 3: Build hierarchy");
-      for (int corrDim = 0; corrDim < clusterMap.size(); corrDim++) {
+      for(int corrDim = 0; corrDim < clusterMap.size(); corrDim++) {
         List<Cluster<CorrelationModel<V>>> correlationClusters = clusterMap.get(corrDim);
-        for (Cluster<CorrelationModel<V>> cluster : correlationClusters) {
+        for(Cluster<CorrelationModel<V>> cluster : correlationClusters) {
           msg.append("\n  cluster ").append(cluster).append(", ids: ").append(cluster.getIDs().size());
           // .append(", level: ").append(cluster.getLevel()).append(", index: ").append(cluster.getLevelIndex());
-          for (Iter<Cluster<CorrelationModel<V>>> iter = clustering.getClusterHierarchy().iterParents(cluster); iter.valid(); iter.advance()) {
+          for(Iter<Cluster<CorrelationModel<V>>> iter = clustering.getClusterHierarchy().iterParents(cluster); iter.valid(); iter.advance()) {
             msg.append("\n   parent ").append(iter.get());
           }
-          for (Iter<Cluster<CorrelationModel<V>>> iter = clustering.getClusterHierarchy().iterChildren(cluster); iter.valid(); iter.advance()) {
+          for(Iter<Cluster<CorrelationModel<V>>> iter = clustering.getClusterHierarchy().iterChildren(cluster); iter.valid(); iter.advance()) {
             msg.append("\n   child ").append(iter.get());
           }
         }
       }
       LOG.debugFine(msg.toString());
     }
-    if (stepprog != null) {
-      stepprog.setCompleted(LOG);
-    }
+    LOG.setCompleted(stepprog);
 
-    for (Cluster<CorrelationModel<V>> rc : clusterMap.get(clusterMap.size() - 1)) {
+    for(Cluster<CorrelationModel<V>> rc : clusterMap.get(clusterMap.size() - 1)) {
       clustering.addToplevelCluster(rc);
     }
     return clustering;
@@ -199,16 +196,17 @@ public class ERiC<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
    * correlation dimension. Each cluster is defined by the basis vectors
    * defining the subspace in which the cluster appears.
    * 
-   * @param copacResult
+   * @param dbscanResult
    * 
    * @param database the database containing the objects
    * @param dimensionality the dimensionality of the feature space
+   * @param npred ERiC predicate
    * @return a list of clusters for each dimensionality
    */
-  private List<List<Cluster<CorrelationModel<V>>>> extractCorrelationClusters(Clustering<Model> copacResult, Relation<V> database, int dimensionality) {
+  private List<List<Cluster<CorrelationModel<V>>>> extractCorrelationClusters(Clustering<Model> dbscanResult, Relation<V> database, int dimensionality, ERiCNeighborPredicate<V>.Instance npred) {
     // result
     List<List<Cluster<CorrelationModel<V>>>> clusterMap = new ArrayList<>();
-    for (int i = 0; i <= dimensionality; i++) {
+    for(int i = 0; i <= dimensionality; i++) {
       clusterMap.add(new ArrayList<Cluster<CorrelationModel<V>>>());
     }
 
@@ -216,47 +214,38 @@ public class ERiC<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
     Cluster<Model> noise = null;
 
     // iterate over correlation dimensions
-    for (Cluster<Model> clus : copacResult.getAllClusters()) {
+    for(Cluster<Model> clus : dbscanResult.getAllClusters()) {
       DBIDs group = clus.getIDs();
-      if (clus.getModel() != null && clus.getModel() instanceof DimensionModel) {
-        int correlationDimension = ((DimensionModel) clus.getModel()).getDimension();
+      int dim = clus.isNoise() ? dimensionality : npred.dimensionality(clus.getIDs().iter());
 
-        ListParameterization parameters = pcaParameters(correlationDimension);
-        Class<PCAFilteredRunner<V>> cls = ClassGenericsUtil.uglyCastIntoSubclass(PCAFilteredRunner.class);
-        PCAFilteredRunner<V> pca = parameters.tryInstantiate(cls);
-        parameters.failOnErrors();
+      if(dim < dimensionality) {
+        PCAFilteredRunner pca = new PCAFilteredRunner(new StandardCovarianceMatrixBuilder(), new FirstNEigenPairFilter(dim), 1., 0.);
 
         // get cluster list for this dimension.
-        List<Cluster<CorrelationModel<V>>> correlationClusters = clusterMap.get(correlationDimension);
+        List<Cluster<CorrelationModel<V>>> correlationClusters = clusterMap.get(dim);
         PCAFilteredResult pcares = pca.processIds(group, database);
 
         V centroid = Centroid.make(database, group).toVector(database);
-        Cluster<CorrelationModel<V>> correlationCluster = new Cluster<>("[" + correlationDimension + "_" + correlationClusters.size() + "]", group, new CorrelationModel<>(pcares, centroid));
+        Cluster<CorrelationModel<V>> correlationCluster = new Cluster<>("[" + dim + "_" + correlationClusters.size() + "]", group, new CorrelationModel<>(pcares, centroid));
         correlationClusters.add(correlationCluster);
       }
       // partition containing noise
-      else if (clus.getModel() != null && clus.isNoise()) {
-        if (noise == null) {
+      else {
+        if(noise == null) {
           noise = clus;
-        } else {
+        }
+        else {
           ModifiableDBIDs merged = DBIDUtil.newHashSet(noise.getIDs());
           merged.addDBIDs(clus.getIDs());
           noise.setIDs(merged);
         }
-      } else {
-        throw new IllegalStateException("Unexpected group returned: " + clus.getClass().getName());
       }
     }
 
-    if (noise != null && noise.size() > 0) {
+    if(noise != null && noise.size() > 0) {
       // get cluster list for this dimension.
       List<Cluster<CorrelationModel<V>>> correlationClusters = clusterMap.get(dimensionality);
-      ListParameterization parameters = pcaParameters(dimensionality);
-      Class<PCAFilteredRunner<V>> cls = ClassGenericsUtil.uglyCastIntoSubclass(PCAFilteredRunner.class);
-      PCAFilteredRunner<V> pca = parameters.tryInstantiate(cls);
-      for (ParameterException e : parameters.getErrors()) {
-        LOG.warning("Error in internal parameterization: " + e.getMessage());
-      }
+      PCAFilteredRunner pca = new PCAFilteredRunner(new StandardCovarianceMatrixBuilder(), new FirstNEigenPairFilter(dimensionality), 1., 0.);
       PCAFilteredResult pcares = pca.processIds(noise.getIDs(), database);
 
       V centroid = Centroid.make(database, noise.getIDs()).toVector(database);
@@ -265,8 +254,8 @@ public class ERiC<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
     }
 
     // Delete dimensionalities not found.
-    for (int i = dimensionality; i > 0; i--) {
-      if (clusterMap.get(i).size() > 0) {
+    for(int i = dimensionality; i > 0; i--) {
+      if(clusterMap.get(i).size() > 0) {
         break;
       }
       clusterMap.remove(i);
@@ -275,63 +264,35 @@ public class ERiC<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
     return clusterMap;
   }
 
-  /**
-   * Returns the parameters for the PCA for the specified correlation dimension.
-   * 
-   * @param correlationDimension the correlation dimension
-   * @return the parameters for the PCA for the specified correlation dimension
-   */
-  private ListParameterization pcaParameters(int correlationDimension) {
-    ListParameterization parameters = new ListParameterization();
-
-    parameters.addParameter(PCAFilteredRunner.PCA_EIGENPAIR_FILTER, FirstNEigenPairFilter.class);
-    parameters.addParameter(FirstNEigenPairFilter.EIGENPAIR_FILTER_N, correlationDimension);
-
-    return parameters;
-  }
-
-  private void buildHierarchy(Clustering<CorrelationModel<V>> clustering, List<List<Cluster<CorrelationModel<V>>>> clusterMap, DistanceQuery<V, IntegerDistance> query) {
+  private void buildHierarchy(Clustering<CorrelationModel<V>> clustering, List<List<Cluster<CorrelationModel<V>>>> clusterMap, ERiCNeighborPredicate<V>.Instance npred) {
     StringBuilder msg = LOG.isDebuggingFine() ? new StringBuilder() : null;
     Hierarchy<Cluster<CorrelationModel<V>>> hier = clustering.getClusterHierarchy();
 
-    DBSCAN<V, DoubleDistance> dbscan = ClassGenericsUtil.castWithGenericsOrNull(DBSCAN.class, copacAlgorithm.getPartitionAlgorithm(query));
-    if (dbscan == null) {
-      // TODO: appropriate exception class?
-      throw new IllegalArgumentException("ERiC was run without DBSCAN as COPAC algorithm!");
-    }
-    DistanceFunction<? super V, ?> dfun = ProxyDistanceFunction.unwrapDistance(dbscan.getDistanceFunction());
-    ERiCDistanceFunction distanceFunction = ClassGenericsUtil.castWithGenericsOrNull(ERiCDistanceFunction.class, dfun);
-    if (distanceFunction == null) {
-      // TODO: appropriate exception class?
-      throw new IllegalArgumentException("ERiC was run without ERiCDistanceFunction as distance function: got " + dfun.getClass());
-    }
     // Find maximum dimensionality found:
     int lambda_max = clusterMap.size() - 1;
 
-    for (int childCorrDim = 0; childCorrDim < lambda_max; childCorrDim++) {
+    for(int childCorrDim = 0; childCorrDim < lambda_max; childCorrDim++) {
       List<Cluster<CorrelationModel<V>>> children = clusterMap.get(childCorrDim);
-      // SortedMap<Integer, List<Cluster<CorrelationModel<V>>>> parentMap =
-      // clusterMap.tailMap(childCorrDim + 1);
-      if (msg != null) {
+      if(msg != null) {
         msg.append("\ncorrdim ").append(childCorrDim);
-        // msg.append("\nparents ").append(parentMap.keySet());
       }
 
-      for (Cluster<CorrelationModel<V>> child : children) {
-        for (int parentCorrDim = childCorrDim + 1; parentCorrDim <= lambda_max; parentCorrDim++) {
+      for(Cluster<CorrelationModel<V>> child : children) {
+        for(int parentCorrDim = childCorrDim + 1; parentCorrDim <= lambda_max; parentCorrDim++) {
           List<Cluster<CorrelationModel<V>>> parents = clusterMap.get(parentCorrDim);
-          for (Cluster<CorrelationModel<V>> parent : parents) {
+          for(Cluster<CorrelationModel<V>> parent : parents) {
             int subspaceDim_parent = parent.getModel().getPCAResult().getCorrelationDimension();
-            if (subspaceDim_parent == lambda_max && hier.numParents(child) == 0) {
+            if(subspaceDim_parent == lambda_max && hier.numParents(child) == 0) {
               clustering.addChildCluster(parent, child);
-              if (msg != null) {
+              if(msg != null) {
                 msg.append('\n').append(parent).append(" is parent of ").append(child);
               }
-            } else {
-              BitDistance dist = distanceFunction.distance(parent.getModel().getCentroid(), child.getModel().getCentroid(), parent.getModel().getPCAResult(), child.getModel().getPCAResult());
-              if (!dist.bitValue() && (hier.numParents(child) == 0 || !isParent(distanceFunction, parent, hier.iterParents(child)))) {
+            }
+            else {
+              boolean dist = npred.weakNeighbors(parent.getModel().getCentroid(), child.getModel().getCentroid(), parent.getModel().getPCAResult(), child.getModel().getPCAResult());
+              if(!dist && (hier.numParents(child) == 0 || !isParent(npred, parent, hier.iterParents(child)))) {
                 clustering.addChildCluster(parent, child);
-                if (msg != null) {
+                if(msg != null) {
                   msg.append('\n').append(parent).append(" is parent of ").append(child);
                 }
               }
@@ -340,45 +301,43 @@ public class ERiC<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
         }
       }
     }
-    if (msg != null) {
+    if(msg != null) {
       LOG.debugFine(msg.toString());
     }
-
   }
 
   /**
    * Returns true, if the specified parent cluster is a parent of one child of
    * the children clusters.
    * 
-   * @param distanceFunction the distance function for distance computation
-   *        between the clusters
+   * @param npred Neighborhood predicate
    * @param parent the parent to be tested
    * @param iter the list of children to be tested
    * @return true, if the specified parent cluster is a parent of one child of
    *         the children clusters, false otherwise
    */
-  private boolean isParent(ERiCDistanceFunction distanceFunction, Cluster<CorrelationModel<V>> parent, Iter<Cluster<CorrelationModel<V>>> iter) {
+  private boolean isParent(ERiCNeighborPredicate<V>.Instance npred, Cluster<CorrelationModel<V>> parent, Iter<Cluster<CorrelationModel<V>>> iter) {
     StringBuilder msg = LOG.isDebugging() ? new StringBuilder() : null;
 
-    for (; iter.valid(); iter.advance()) {
+    for(; iter.valid(); iter.advance()) {
       Cluster<CorrelationModel<V>> child = iter.get();
-      if (parent.getModel().getPCAResult().getCorrelationDimension() == child.getModel().getPCAResult().getCorrelationDimension()) {
+      if(parent.getModel().getPCAResult().getCorrelationDimension() == child.getModel().getPCAResult().getCorrelationDimension()) {
         return false;
       }
 
-      BitDistance dist = distanceFunction.distance(parent.getModel().getCentroid(), child.getModel().getCentroid(), parent.getModel().getPCAResult(), child.getModel().getPCAResult());
-      if (msg != null) {
+      boolean dist = npred.weakNeighbors(parent.getModel().getCentroid(), child.getModel().getCentroid(), parent.getModel().getPCAResult(), child.getModel().getPCAResult());
+      if(msg != null) {
         msg.append("\ndist(").append(child).append(" - ").append(parent).append(") = ").append(dist);
       }
-      if (!dist.bitValue()) {
-        if (msg != null) {
+      if(dist) {
+        if(msg != null) {
           LOG.debugFine(msg);
         }
         return true;
       }
     }
 
-    if (msg != null) {
+    if(msg != null) {
       LOG.debugFine(msg.toString());
     }
     return false;
@@ -395,28 +354,184 @@ public class ERiC<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
   }
 
   /**
+   * Class to wrap the ERiC settings.
+   * 
+   * @author Erich Schubert
+   * 
+   * @apiviz.exclude
+   */
+  public static class Settings {
+    /**
+     * Neighborhood size.
+     */
+    public int k;
+
+    /**
+     * Class to compute PCA.
+     */
+    public PCAFilteredRunner pca;
+
+    /**
+     * Parameter to specify the threshold for approximate linear dependency: the
+     * strong eigenvectors of q are approximately linear dependent from the
+     * strong eigenvectors p if the following condition holds for all strong
+     * eigenvectors q_i of q (lambda_q < lambda_p): q_i' * M^check_p * q_i <=
+     * delta^2, must be a double equal to or greater than 0.
+     */
+    public double delta;
+
+    /**
+     * Parameter to specify the threshold for the maximum distance between two
+     * approximately linear dependent subspaces of two objects p and q (lambda_q
+     * < lambda_p) before considering them as parallel, must be a double equal
+     * to or greater than 0.
+     */
+    public double tau;
+
+    /**
+     * Minimum neighborhood size (density).
+     */
+    public int minpts;
+
+    /**
+     * Parameterization class.
+     * 
+     * @author Erich Schubert
+     * 
+     * @apiviz.exclude
+     */
+    public static class Parameterizer extends AbstractParameterizer {
+      /**
+       * Size for the kNN neighborhood used in the PCA step of ERiC.
+       */
+      public static final OptionID K_ID = new OptionID("eric.k", "Number of neighbors to use for PCA.");
+
+      /**
+       * Parameter to specify the threshold for approximate linear dependency:
+       * the strong eigenvectors of q are approximately linear dependent from
+       * the strong eigenvectors p if the following condition holds for all
+       * strong eigenvectors q_i of q (lambda_q < lambda_p): q_i' * M^check_p *
+       * q_i <= delta^2, must be a double equal to or greater than 0.
+       * <p>
+       * Default value: {@code 0.1}
+       * </p>
+       * <p>
+       * Key: {@code -ericdf.delta}
+       * </p>
+       */
+      public static final OptionID DELTA_ID = new OptionID("ericdf.delta", "Threshold for approximate linear dependency: " + "the strong eigenvectors of q are approximately linear dependent " + "from the strong eigenvectors p if the following condition " + "holds for all stroneg eigenvectors q_i of q (lambda_q < lambda_p): " + "q_i' * M^check_p * q_i <= delta^2.");
+
+      /**
+       * Parameter to specify the threshold for the maximum distance between two
+       * approximately linear dependent subspaces of two objects p and q
+       * (lambda_q < lambda_p) before considering them as parallel, must be a
+       * double equal to or greater than 0.
+       * <p>
+       * Default value: {@code 0.1}
+       * </p>
+       * <p>
+       * Key: {@code -ericdf.tau}
+       * </p>
+       */
+      public static final OptionID TAU_ID = new OptionID("ericdf.tau", "Threshold for the maximum distance between two approximately linear " + "dependent subspaces of two objects p and q " + "(lambda_q < lambda_p) before considering them as parallel.");
+
+      /**
+       * Settings to build.
+       */
+      Settings settings;
+
+      @Override
+      public void makeOptions(Parameterization config) {
+        settings = new Settings();
+        configK(config);
+        // TODO: allow using other PCA runners?
+        settings.pca = config.tryInstantiate(PCAFilteredRunner.class);
+        configDelta(config);
+        configTau(config);
+        configMinPts(config);
+      }
+
+      /**
+       * Configure the kNN parameter.
+       * 
+       * @param config Parameter source
+       */
+      protected void configK(Parameterization config) {
+        IntParameter kP = new IntParameter(K_ID) //
+        .addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+        if(config.grab(kP)) {
+          settings.k = kP.intValue();
+        }
+      }
+
+      /**
+       * Configure the delta parameter.
+       * 
+       * @param config Parameter source
+       */
+      protected void configDelta(Parameterization config) {
+        final DoubleParameter deltaP = new DoubleParameter(DELTA_ID, 0.1);
+        deltaP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE);
+        if(config.grab(deltaP)) {
+          settings.delta = deltaP.doubleValue();
+        }
+      }
+
+      /**
+       * Configure the tau parameter.
+       * 
+       * @param config Parameter source
+       */
+      protected void configTau(Parameterization config) {
+        final DoubleParameter tauP = new DoubleParameter(TAU_ID, 0.1);
+        tauP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE);
+        if(config.grab(tauP)) {
+          settings.tau = tauP.doubleValue();
+        }
+      }
+
+      /**
+       * Configure the minPts aka "mu" parameter.
+       * 
+       * @param config Parameter source
+       */
+      protected void configMinPts(Parameterization config) {
+        IntParameter minptsP = new IntParameter(DBSCAN.Parameterizer.MINPTS_ID) //
+        .addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+        if(config.grab(minptsP)) {
+          settings.minpts = minptsP.intValue();
+        }
+      }
+
+      @Override
+      public Settings makeInstance() {
+        return settings;
+      }
+    }
+  }
+
+  /**
    * Parameterization class.
    * 
    * @author Erich Schubert
    * 
    * @apiviz.exclude
    */
-  public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
+  public static class Parameterizer<V extends NumberVector> extends AbstractParameterizer {
     /**
-     * The COPAC instance to use
+     * The settings to use.
      */
-    protected COPAC<V, IntegerDistance> copac;
+    protected ERiC.Settings settings;
 
-    @SuppressWarnings("unchecked")
     @Override
     protected void makeOptions(Parameterization config) {
       super.makeOptions(config);
-      copac = config.tryInstantiate(COPAC.class);
+      settings = config.tryInstantiate(ERiC.Settings.class);
     }
 
     @Override
     protected ERiC<V> makeInstance() {
-      return new ERiC<>(copac);
+      return new ERiC<>(settings);
     }
   }
 }
author	Erich Schubert <erich@debian.org>	2014-10-31 03:43:51 +0100
committer	Andrej Shadura <andrewsh@debian.org>	2019-03-09 22:30:40 +0000
commit	596d8876dca5627dd76e8c23bf40a24cc305eeed (patch)
tree	d269ddb46561469f6b1fff67b19e0cd2b4608f5b /src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ERiC.java
parent	ee31d687b1a0e2f2f1e6e71375c7cc3b094919b8 (diff)
parent	337087b668d3a54f3afee3a9adb597a32e9f7e94 (diff)