summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SLINK.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/algorithm/clustering/SLINK.java')
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/SLINK.java818
1 files changed, 0 insertions, 818 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SLINK.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SLINK.java
deleted file mode 100644
index 3e1f0650..00000000
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SLINK.java
+++ /dev/null
@@ -1,818 +0,0 @@
-package de.lmu.ifi.dbs.elki.algorithm.clustering;
-
-/*
- This file is part of ELKI:
- Environment for Developing KDD-Applications Supported by Index-Structures
-
- Copyright (C) 2012
- Ludwig-Maximilians-Universität München
- Lehr- und Forschungseinheit für Datenbanksysteme
- ELKI Development Team
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-import gnu.trove.list.array.TDoubleArrayList;
-
-import java.util.ArrayList;
-import java.util.Comparator;
-
-import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
-import de.lmu.ifi.dbs.elki.data.Cluster;
-import de.lmu.ifi.dbs.elki.data.Clustering;
-import de.lmu.ifi.dbs.elki.data.model.DendrogramModel;
-import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
-import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
-import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
-import de.lmu.ifi.dbs.elki.database.Database;
-import de.lmu.ifi.dbs.elki.database.datastore.DBIDDataStore;
-import de.lmu.ifi.dbs.elki.database.datastore.DataStore;
-import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
-import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
-import de.lmu.ifi.dbs.elki.database.datastore.DoubleDistanceDataStore;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableDBIDDataStore;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDistanceDataStore;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDVar;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
-import de.lmu.ifi.dbs.elki.database.relation.Relation;
-import de.lmu.ifi.dbs.elki.distance.DistanceUtil;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
-import de.lmu.ifi.dbs.elki.logging.Logging;
-import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
-import de.lmu.ifi.dbs.elki.result.BasicResult;
-import de.lmu.ifi.dbs.elki.result.OrderingFromDataStore;
-import de.lmu.ifi.dbs.elki.result.Result;
-import de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy.HierarchyHashmapList;
-import de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy.ModifiableHierarchy;
-import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
-import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
-import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
-
-/**
- * Implementation of the efficient Single-Link Algorithm SLINK of R. Sibson.
- * <p>
- * Reference: R. Sibson: SLINK: An optimally efficient algorithm for the
- * single-link cluster method. <br>
- * In: The Computer Journal 16 (1973), No. 1, p. 30-34.
- * </p>
- *
- * @author Elke Achtert
- * @param <O> the type of DatabaseObject the algorithm is applied on
- * @param <D> the type of Distance used
- */
-@Title("SLINK: Single Link Clustering")
-@Description("Hierarchical clustering algorithm based on single-link connectivity.")
-@Reference(authors = "R. Sibson", title = "SLINK: An optimally efficient algorithm for the single-link cluster method", booktitle = "The Computer Journal 16 (1973), No. 1, p. 30-34.", url = "http://dx.doi.org/10.1093/comjnl/16.1.30")
-public class SLINK<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm<O, D, Result> {
- /**
- * The logger for this class.
- */
- private static final Logging LOG = Logging.getLogger(SLINK.class);
-
- /**
- * Minimum number of clusters to extract
- */
- private int minclusters = -1;
-
- /**
- * Constructor.
- *
- * @param distanceFunction Distance function
- * @param minclusters Minimum clusters to extract. Can be {@code -1}.
- */
- public SLINK(DistanceFunction<? super O, D> distanceFunction, int minclusters) {
- super(distanceFunction);
- this.minclusters = minclusters;
- }
-
- /**
- * Performs the SLINK algorithm on the given database.
- */
- public Result run(Database database, Relation<O> relation) {
- DistanceQuery<O, D> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
- @SuppressWarnings("unchecked")
- Class<D> distCls = (Class<D>) getDistanceFunction().getDistanceFactory().getClass();
- WritableDBIDDataStore pi = DataStoreUtil.makeDBIDStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
- WritableDataStore<D> lambda = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, distCls);
- // Temporary storage for m.
- WritableDataStore<D> m = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, distCls);
-
- FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Running SLINK", relation.size(), LOG) : null;
- // has to be an array for monotonicity reasons!
- ModifiableDBIDs processedIDs = DBIDUtil.newArray(relation.size());
-
- // Optimized code path for double distances
- if (getDistanceFunction() instanceof PrimitiveDoubleDistanceFunction && lambda instanceof WritableDoubleDistanceDataStore && m instanceof WritableDoubleDistanceDataStore) {
- @SuppressWarnings("unchecked")
- PrimitiveDoubleDistanceFunction<? super O> dist = (PrimitiveDoubleDistanceFunction<? super O>) getDistanceFunction();
- WritableDoubleDistanceDataStore lambdad = (WritableDoubleDistanceDataStore) lambda;
- WritableDoubleDistanceDataStore md = (WritableDoubleDistanceDataStore) m;
- // apply the algorithm
- for (DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) {
- step1double(id, pi, lambdad);
- step2double(id, processedIDs, distQuery.getRelation(), dist, md);
- step3double(id, pi, lambdad, processedIDs, md);
- step4double(id, pi, lambdad, processedIDs);
-
- processedIDs.add(id);
-
- if (progress != null) {
- progress.incrementProcessed(LOG);
- }
- }
- } else {
- // apply the algorithm
- for (DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) {
- step1(id, pi, lambda);
- step2(id, processedIDs, distQuery, m);
- step3(id, pi, lambda, processedIDs, m);
- step4(id, pi, lambda, processedIDs);
-
- processedIDs.add(id);
-
- if (progress != null) {
- progress.incrementProcessed(LOG);
- }
- }
- }
-
- if (progress != null) {
- progress.ensureCompleted(LOG);
- }
- // We don't need m anymore.
- m.destroy();
- m = null;
-
- // Build dendrogam clusters identified by their target object
- if (LOG.isVerbose()) {
- LOG.verbose("Extracting clusters.");
- }
- final BasicResult result;
- if (lambda instanceof DoubleDistanceDataStore) {
- result = extractClustersDouble(relation.getDBIDs(), pi, (DoubleDistanceDataStore) lambda, minclusters);
- } else {
- result = extractClusters(relation.getDBIDs(), pi, lambda, minclusters);
- }
-
- result.addChildResult(new MaterializedRelation<DBID>("SLINK pi", "slink-order", TypeUtil.DBID, pi, processedIDs));
- result.addChildResult(new MaterializedRelation<D>("SLINK lambda", "slink-order", new SimpleTypeInformation<D>(distCls), lambda, processedIDs));
- result.addChildResult(new OrderingFromDataStore<D>("SLINK order", "slink-order", processedIDs, lambda));
- return result;
- }
-
- /**
- * First step: Initialize P(id) = id, L(id) = infinity.
- *
- * @param id the id of the object to be inserted into the pointer
- * representation
- * @param pi Pi data store
- * @param lambda Lambda data store
- */
- private void step1(DBIDRef id, WritableDBIDDataStore pi, WritableDataStore<D> lambda) {
- // P(n+1) = n+1:
- pi.put(id, id);
- // L(n+1) = infinity
- lambda.put(id, getDistanceFunction().getDistanceFactory().infiniteDistance());
- }
-
- /**
- * Second step: Determine the pairwise distances from all objects in the
- * pointer representation to the new object with the specified id.
- *
- * @param id the id of the object to be inserted into the pointer
- * representation
- * @param processedIDs the already processed ids
- * @param m Data store
- * @param distFunc Distance function to use
- */
- private void step2(DBIDRef id, DBIDs processedIDs, DistanceQuery<O, D> distFunc, WritableDataStore<D> m) {
- O newObj = distFunc.getRelation().get(id);
- for (DBIDIter it = processedIDs.iter(); it.valid(); it.advance()) {
- // M(i) = dist(i, n+1)
- m.put(it, distFunc.distance(it, newObj));
- }
- }
-
- /**
- * Third step: Determine the values for P and L
- *
- * @param id the id of the object to be inserted into the pointer
- * representation
- * @param pi Pi data store
- * @param lambda Lambda data store
- * @param processedIDs the already processed ids
- * @param m Data store
- */
- private void step3(DBIDRef id, WritableDBIDDataStore pi, WritableDataStore<D> lambda, DBIDs processedIDs, WritableDataStore<D> m) {
- DBIDVar p_i = DBIDUtil.newVar();
- // for i = 1..n
- for (DBIDIter it = processedIDs.iter(); it.valid(); it.advance()) {
- D l_i = lambda.get(it);
- D m_i = m.get(it);
- pi.assignVar(it, p_i); // p_i = pi(it)
- D mp_i = m.get(p_i);
-
- // if L(i) >= M(i)
- if (l_i.compareTo(m_i) >= 0) {
- // M(P(i)) = min { M(P(i)), L(i) }
- m.put(p_i, DistanceUtil.min(mp_i, l_i));
-
- // L(i) = M(i)
- lambda.put(it, m_i);
-
- // P(i) = n+1;
- pi.put(it, id);
- } else {
- // M(P(i)) = min { M(P(i)), M(i) }
- m.put(p_i, DistanceUtil.min(mp_i, m_i));
- }
- }
- }
-
- /**
- * Fourth step: Actualize the clusters if necessary
- *
- * @param id the id of the current object
- * @param pi Pi data store
- * @param lambda Lambda data store
- * @param processedIDs the already processed ids
- */
- private void step4(DBIDRef id, WritableDBIDDataStore pi, WritableDataStore<D> lambda, DBIDs processedIDs) {
- DBIDVar p_i = DBIDUtil.newVar();
- // for i = 1..n
- for (DBIDIter it = processedIDs.iter(); it.valid(); it.advance()) {
- D l_i = lambda.get(it);
- pi.assignVar(it, p_i); // p_i = pi(it)
- D lp_i = lambda.get(p_i);
-
- // if L(i) >= L(P(i))
- if (l_i.compareTo(lp_i) >= 0) {
- // P(i) = n+1
- pi.put(it, id);
- }
- }
- }
-
- /**
- * First step: Initialize P(id) = id, L(id) = infinity.
- *
- * @param id the id of the object to be inserted into the pointer
- * representation
- * @param pi Pi data store
- * @param lambda Lambda data store
- */
- private void step1double(DBIDRef id, WritableDBIDDataStore pi, WritableDoubleDistanceDataStore lambda) {
- // P(n+1) = n+1:
- pi.put(id, id);
- // L(n+1) = infinity
- lambda.putDouble(id, Double.POSITIVE_INFINITY);
- }
-
- /**
- * Second step: Determine the pairwise distances from all objects in the
- * pointer representation to the new object with the specified id.
- *
- * @param id the id of the object to be inserted into the pointer
- * representation
- * @param processedIDs the already processed ids
- * @param m Data store
- * @param relation Data relation
- * @param distFunc Distance function to use
- */
- private void step2double(DBIDRef id, DBIDs processedIDs, Relation<? extends O> relation, PrimitiveDoubleDistanceFunction<? super O> distFunc, WritableDoubleDistanceDataStore m) {
- O newObj = relation.get(id);
- for (DBIDIter it = processedIDs.iter(); it.valid(); it.advance()) {
- // M(i) = dist(i, n+1)
- m.putDouble(it, distFunc.doubleDistance(relation.get(it), newObj));
- }
- }
-
- /**
- * Third step: Determine the values for P and L
- *
- * @param id the id of the object to be inserted into the pointer
- * representation
- * @param pi Pi data store
- * @param lambda Lambda data store
- * @param processedIDs the already processed ids
- * @param m Data store
- */
- private void step3double(DBIDRef id, WritableDBIDDataStore pi, WritableDoubleDistanceDataStore lambda, DBIDs processedIDs, WritableDoubleDistanceDataStore m) {
- DBIDVar p_i = DBIDUtil.newVar();
- // for i = 1..n
- for (DBIDIter it = processedIDs.iter(); it.valid(); it.advance()) {
- double l_i = lambda.doubleValue(it);
- double m_i = m.doubleValue(it);
- pi.assignVar(it, p_i); // p_i = pi(it)
- double mp_i = m.doubleValue(p_i);
-
- // if L(i) >= M(i)
- if (l_i >= m_i) {
- // M(P(i)) = min { M(P(i)), L(i) }
- m.putDouble(p_i, Math.min(mp_i, l_i));
-
- // L(i) = M(i)
- lambda.putDouble(it, m_i);
-
- // P(i) = n+1;
- pi.put(it, id);
- } else {
- // M(P(i)) = min { M(P(i)), M(i) }
- m.putDouble(p_i, Math.min(mp_i, m_i));
- }
- }
- }
-
- /**
- * Fourth step: Actualize the clusters if necessary
- *
- * @param id the id of the current object
- * @param pi Pi data store
- * @param lambda Lambda data store
- * @param processedIDs the already processed ids
- */
- private void step4double(DBIDRef id, WritableDBIDDataStore pi, WritableDoubleDistanceDataStore lambda, DBIDs processedIDs) {
- DBIDVar p_i = DBIDUtil.newVar();
- // for i = 1..n
- for (DBIDIter it = processedIDs.iter(); it.valid(); it.advance()) {
- double l_i = lambda.doubleValue(it);
- pi.assignVar(it, p_i); // p_i = pi(it)
- double lp_i = lambda.doubleValue(p_i);
-
- // if L(i) >= L(P(i))
- if (l_i >= lp_i) {
- // P(i) = n+1
- pi.put(it, id);
- }
- }
- }
-
- /**
- * Extract all clusters from the pi-lambda-representation.
- *
- * @param ids Object ids to process
- * @param pi Pi store
- * @param lambda Lambda store
- * @param minclusters Minimum number of clusters to extract
- *
- * @return Hierarchical clustering
- */
- private Clustering<DendrogramModel<D>> extractClusters(DBIDs ids, final DBIDDataStore pi, final DataStore<D> lambda, int minclusters) {
- FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Extracting clusters", ids.size(), LOG) : null;
- D nulldist = getDistanceFunction().getDistanceFactory().nullDistance();
-
- // Sort DBIDs by lambda. We need this for two things:
- // a) to determine the stop distance from "minclusters" parameter
- // b) to process arrows in decreasing / increasing order
- ArrayModifiableDBIDs order = DBIDUtil.newArray(ids);
- order.sort(new CompareByLambda<D>(lambda));
-
- // Stop distance:
- final D stopdist = (minclusters > 0) ? lambda.get(order.get(ids.size() - minclusters)) : null;
-
- // The initial pass is top-down.
- DBIDArrayIter it = order.iter();
- int split = (minclusters > 0) ? Math.max(ids.size() - minclusters, 0) : 0;
- // Tie handling: decrement split.
- if (stopdist != null) {
- while (split > 0) {
- it.seek(split - 1);
- if (stopdist.compareTo(lambda.get(it)) == 0) {
- split--;
- minclusters++;
- } else {
- break;
- }
- }
- }
-
- // Extract the child clusters
- int cnum = 0;
- int expcnum = Math.max(0, minclusters);
- WritableIntegerDataStore cluster_map = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_TEMP, -1);
- ArrayList<ModifiableDBIDs> cluster_dbids = new ArrayList<ModifiableDBIDs>(expcnum);
- ArrayList<D> cluster_dist = new ArrayList<D>(expcnum);
- ArrayModifiableDBIDs cluster_leads = DBIDUtil.newArray(expcnum);
-
- DBIDVar succ = DBIDUtil.newVar(); // Variable for successor.
- // Go backwards on the lower part.
- for (it.seek(split - 1); it.valid(); it.retract()) {
- D dist = lambda.get(it); // Distance to successor
- pi.assignVar(it, succ); // succ = pi(it)
- int clusterid = cluster_map.intValue(succ);
- // Successor cluster has already been created:
- if (clusterid >= 0) {
- cluster_dbids.get(clusterid).add(it);
- cluster_map.putInt(it, clusterid);
- // Update distance to maximum encountered:
- if (cluster_dist.get(clusterid).compareTo(dist) < 0) {
- cluster_dist.set(clusterid, dist);
- }
- } else {
- // Need to start a new cluster:
- clusterid = cnum; // next cluster number.
- ModifiableDBIDs cids = DBIDUtil.newArray();
- // Add element and successor as initial members:
- cids.add(succ);
- cluster_map.putInt(succ, clusterid);
- cids.add(it);
- cluster_map.putInt(it, clusterid);
- // Store new cluster.
- cluster_dbids.add(cids);
- cluster_leads.add(succ);
- cluster_dist.add(dist);
- cnum++;
- }
-
- // Decrement counter
- if (progress != null) {
- progress.incrementProcessed(LOG);
- }
- }
- // Build a hierarchy out of these clusters.
- Cluster<DendrogramModel<D>> root = null;
- ModifiableHierarchy<Cluster<DendrogramModel<D>>> hier = new HierarchyHashmapList<Cluster<DendrogramModel<D>>>();
- ArrayList<Cluster<DendrogramModel<D>>> clusters = new ArrayList<Cluster<DendrogramModel<D>>>(ids.size() + expcnum - split);
- // Convert initial clusters to cluster objects
- {
- int i = 0;
- for (DBIDIter it2 = cluster_leads.iter(); it2.valid(); it2.advance(), i++) {
- clusters.add(makeCluster(it2, cluster_dist.get(i), cluster_dbids.get(i), hier));
- }
- cluster_dist = null; // Invalidate
- cluster_dbids = null; // Invalidate
- }
- // Process the upper part, bottom-up.
- for (it.seek(split); it.valid(); it.advance()) {
- int clusterid = cluster_map.intValue(it);
- // The current cluster:
- final Cluster<DendrogramModel<D>> clus;
- if (clusterid >= 0) {
- clus = clusters.get(clusterid);
- } else {
- ArrayModifiableDBIDs cids = DBIDUtil.newArray(1);
- cids.add(it);
- clus = makeCluster(it, nulldist, cids, hier);
- // No need to store in clusters: cannot have another incoming pi
- // pointer!
- }
- // The successor to join:
- pi.assignVar(it, succ); // succ = pi(it)
- if (DBIDUtil.equal(it, succ)) {
- assert (root == null);
- root = clus;
- } else {
- // Parent cluster:
- int parentid = cluster_map.intValue(succ);
- D depth = lambda.get(it);
- // Parent cluster exists - merge as a new cluster:
- if (parentid >= 0) {
- Cluster<DendrogramModel<D>> pclus = makeCluster(succ, depth, DBIDUtil.EMPTYDBIDS, hier);
- hier.add(pclus, clusters.get(parentid));
- hier.add(pclus, clus);
- clusters.set(parentid, pclus); // Replace existing parent cluster
- } else {
- // Create a new, one-element, parent cluster.
- parentid = cnum;
- cnum++;
- ArrayModifiableDBIDs cids = DBIDUtil.newArray(1);
- cids.add(succ);
- Cluster<DendrogramModel<D>> pclus = makeCluster(succ, depth, cids, hier);
- hier.add(pclus, clus);
- assert (clusters.size() == parentid);
- clusters.add(pclus); // Remember parent cluster
- cluster_map.putInt(succ, parentid); // Reference
- }
- }
-
- // Decrement counter
- if (progress != null) {
- progress.incrementProcessed(LOG);
- }
- }
-
- if (progress != null) {
- progress.ensureCompleted(LOG);
- }
- // build hierarchy
- final Clustering<DendrogramModel<D>> dendrogram = new Clustering<DendrogramModel<D>>("Single-Link-Dendrogram", "slink-dendrogram");
- dendrogram.addCluster(root);
-
- return dendrogram;
- }
-
- /**
- * Extract all clusters from the pi-lambda-representation.
- *
- * @param ids Object ids to process
- * @param pi Pi store
- * @param lambda Lambda store
- * @param minclusters Minimum number of clusters to extract
- *
- * @return Hierarchical clustering
- */
- private Clustering<DendrogramModel<D>> extractClustersDouble(DBIDs ids, final DBIDDataStore pi, final DoubleDistanceDataStore lambda, int minclusters) {
- FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Extracting clusters", ids.size(), LOG) : null;
- D nulldist = getDistanceFunction().getDistanceFactory().nullDistance();
-
- // Sort DBIDs by lambda. We need this for two things:
- // a) to determine the stop distance from "minclusters" parameter
- // b) to process arrows in decreasing / increasing order
- ArrayModifiableDBIDs order = DBIDUtil.newArray(ids);
- order.sort(new CompareByDoubleLambda(lambda));
-
- // Stop distance:
- final double stopdist = (minclusters > 0) ? lambda.doubleValue(order.get(ids.size() - minclusters)) : Double.POSITIVE_INFINITY;
-
- // The initial pass is top-down.
- DBIDArrayIter it = order.iter();
- int split = (minclusters > 0) ? Math.max(ids.size() - minclusters, 0) : 0;
- // Tie handling: decrement split.
- if (minclusters > 0) {
- while (split > 0) {
- it.seek(split - 1);
- if (stopdist <= lambda.doubleValue(it)) {
- split--;
- minclusters++;
- } else {
- break;
- }
- }
- }
-
- // Extract the child clusters
- int cnum = 0;
- int expcnum = Math.max(0, minclusters);
- WritableIntegerDataStore cluster_map = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_TEMP, -1);
- ArrayList<ModifiableDBIDs> cluster_dbids = new ArrayList<ModifiableDBIDs>(expcnum);
- TDoubleArrayList cluster_dist = new TDoubleArrayList(expcnum);
- ArrayModifiableDBIDs cluster_leads = DBIDUtil.newArray(expcnum);
-
- DBIDVar succ = DBIDUtil.newVar(); // Variable for successor.
- // Go backwards on the lower part.
- for (it.seek(split - 1); it.valid(); it.retract()) {
- double dist = lambda.doubleValue(it); // Distance to successor
- pi.assignVar(it, succ); // succ = pi(it)
- int clusterid = cluster_map.intValue(succ);
- // Successor cluster has already been created:
- if (clusterid >= 0) {
- cluster_dbids.get(clusterid).add(it);
- cluster_map.putInt(it, clusterid);
- // Update distance to maximum encountered:
- if (cluster_dist.get(clusterid) < dist) {
- cluster_dist.set(clusterid, dist);
- }
- } else {
- // Need to start a new cluster:
- clusterid = cnum; // next cluster number.
- ModifiableDBIDs cids = DBIDUtil.newArray();
- // Add element and successor as initial members:
- cids.add(succ);
- cluster_map.putInt(succ, clusterid);
- cids.add(it);
- cluster_map.putInt(it, clusterid);
- // Store new cluster.
- cluster_dbids.add(cids);
- cluster_leads.add(succ);
- cluster_dist.add(dist);
- cnum++;
- }
-
- // Decrement counter
- if (progress != null) {
- progress.incrementProcessed(LOG);
- }
- }
- // Build a hierarchy out of these clusters.
- Cluster<DendrogramModel<D>> root = null;
- ModifiableHierarchy<Cluster<DendrogramModel<D>>> hier = new HierarchyHashmapList<Cluster<DendrogramModel<D>>>();
- ArrayList<Cluster<DendrogramModel<D>>> clusters = new ArrayList<Cluster<DendrogramModel<D>>>(ids.size() + expcnum - split);
- // Convert initial clusters to cluster objects
- {
- int i = 0;
- for (DBIDIter it2 = cluster_leads.iter(); it2.valid(); it2.advance(), i++) {
- @SuppressWarnings("unchecked")
- D depth = (D) new DoubleDistance(cluster_dist.get(i));
- clusters.add(makeCluster(it2, depth, cluster_dbids.get(i), hier));
- }
- cluster_dist = null; // Invalidate
- cluster_dbids = null; // Invalidate
- }
- // Process the upper part, bottom-up.
- for (it.seek(split); it.valid(); it.advance()) {
- int clusterid = cluster_map.intValue(it);
- // The current cluster:
- final Cluster<DendrogramModel<D>> clus;
- if (clusterid >= 0) {
- clus = clusters.get(clusterid);
- } else {
- ArrayModifiableDBIDs cids = DBIDUtil.newArray(1);
- cids.add(it);
- clus = makeCluster(it, nulldist, cids, hier);
- // No need to store in clusters: cannot have another incoming pi
- // pointer!
- }
- // The successor to join:
- pi.assignVar(it, succ); // succ = pi(it)
- if (DBIDUtil.equal(it, succ)) {
- assert (root == null);
- root = clus;
- } else {
- // Parent cluster:
- int parentid = cluster_map.intValue(succ);
- @SuppressWarnings("unchecked")
- D depth = (D) new DoubleDistance(lambda.doubleValue(it));
- // Parent cluster exists - merge as a new cluster:
- if (parentid >= 0) {
- Cluster<DendrogramModel<D>> pclus = makeCluster(succ, depth, DBIDUtil.EMPTYDBIDS, hier);
- hier.add(pclus, clusters.get(parentid));
- hier.add(pclus, clus);
- clusters.set(parentid, pclus); // Replace existing parent cluster
- } else {
- // Create a new, one-element, parent cluster.
- parentid = cnum;
- cnum++;
- ArrayModifiableDBIDs cids = DBIDUtil.newArray(1);
- cids.add(succ);
- Cluster<DendrogramModel<D>> pclus = makeCluster(succ, depth, cids, hier);
- hier.add(pclus, clus);
- assert (clusters.size() == parentid);
- clusters.add(pclus); // Remember parent cluster
- cluster_map.putInt(succ, parentid); // Reference
- }
- }
-
- // Decrement counter
- if (progress != null) {
- progress.incrementProcessed(LOG);
- }
- }
-
- if (progress != null) {
- progress.ensureCompleted(LOG);
- }
- // build hierarchy
- final Clustering<DendrogramModel<D>> dendrogram = new Clustering<DendrogramModel<D>>("Single-Link-Dendrogram", "slink-dendrogram");
- dendrogram.addCluster(root);
-
- return dendrogram;
- }
-
- /**
- * Make the cluster for the given object
- *
- * @param lead Leading object
- * @param depth Linkage depth
- * @param members Member objects
- * @param hier Cluster hierarchy
- * @return Cluster
- */
- private Cluster<DendrogramModel<D>> makeCluster(DBIDRef lead, D depth, DBIDs members, ModifiableHierarchy<Cluster<DendrogramModel<D>>> hier) {
- final String name;
- if (members.size() == 0) {
- name = "merge_" + lead + "_" + depth;
- } else if (depth.isInfiniteDistance()) {
- assert (members.contains(lead));
- name = "object_" + lead;
- } else {
- name = "cluster_" + lead + "_" + depth;
- }
- Cluster<DendrogramModel<D>> cluster = new Cluster<DendrogramModel<D>>(name, members, new DendrogramModel<D>(depth), hier);
- return cluster;
- }
-
- @Override
- public TypeInformation[] getInputTypeRestriction() {
- return TypeUtil.array(getDistanceFunction().getInputTypeRestriction());
- }
-
- @Override
- protected Logging getLogger() {
- return LOG;
- }
-
- /**
- * Order a DBID collection by the lambda value.
- *
- * @author Erich Schubert
- *
- * @apiviz.exclude
- *
- * @param <D> Distance type
- */
- private static final class CompareByLambda<D extends Distance<D>> implements Comparator<DBIDRef> {
- /**
- * Lambda storage
- */
- private final DataStore<D> lambda;
-
- /**
- * Constructor.
- *
- * @param lambda Lambda storage
- */
- protected CompareByLambda(DataStore<D> lambda) {
- this.lambda = lambda;
- }
-
- @Override
- public int compare(DBIDRef id1, DBIDRef id2) {
- D k1 = lambda.get(id1);
- D k2 = lambda.get(id2);
- assert (k1 != null);
- assert (k2 != null);
- return k1.compareTo(k2);
- }
- }
-
- /**
- * Order a DBID collection by the lambda value.
- *
- * @author Erich Schubert
- *
- * @apiviz.exclude
- */
- private static final class CompareByDoubleLambda implements Comparator<DBIDRef> {
- /**
- * Lambda storage
- */
- private final DoubleDistanceDataStore lambda;
-
- /**
- * Constructor.
- *
- * @param lambda Lambda storage
- */
- protected CompareByDoubleLambda(DoubleDistanceDataStore lambda) {
- this.lambda = lambda;
- }
-
- @Override
- public int compare(DBIDRef id1, DBIDRef id2) {
- double k1 = lambda.doubleValue(id1);
- double k2 = lambda.doubleValue(id2);
- return Double.compare(k1, k2);
- }
- }
-
- /**
- * Parameterization class.
- *
- * @author Erich Schubert
- *
- * @apiviz.exclude
- */
- public static class Parameterizer<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
- /**
- * The minimum number of clusters to extract
- */
- public static final OptionID SLINK_MINCLUSTERS_ID = new OptionID("slink.minclusters", "The maximum number of clusters to extract.");
-
- protected int minclusters = -1;
-
- @Override
- protected void makeOptions(Parameterization config) {
- super.makeOptions(config);
- IntParameter minclustersP = new IntParameter(SLINK_MINCLUSTERS_ID);
- minclustersP.addConstraint(new GreaterEqualConstraint(1));
- minclustersP.setOptional(true);
- if (config.grab(minclustersP)) {
- minclusters = minclustersP.intValue();
- }
- }
-
- @Override
- protected SLINK<O, D> makeInstance() {
- return new SLINK<O, D>(distanceFunction, minclusters);
- }
- }
-}