diff options
author | Andrej Shadura <andrewsh@debian.org> | 2019-03-09 22:30:40 +0000 |
---|---|---|
committer | Andrej Shadura <andrewsh@debian.org> | 2019-03-09 22:30:40 +0000 |
commit | 337087b668d3a54f3afee3a9adb597a32e9f7e94 (patch) | |
tree | d860094269622472f8079d497ac7af02dbb4e038 /src/de/lmu/ifi/dbs/elki/algorithm/clustering/SNNClustering.java | |
parent | 14a486343aef55f97f54082d6b542dedebf6f3ba (diff) |
Import Upstream version 0.6.5~20141030
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/algorithm/clustering/SNNClustering.java')
-rw-r--r-- | src/de/lmu/ifi/dbs/elki/algorithm/clustering/SNNClustering.java | 66 |
1 files changed, 33 insertions, 33 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SNNClustering.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SNNClustering.java index 86bb9a09..d785b83f 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SNNClustering.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SNNClustering.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -38,11 +38,11 @@ import de.lmu.ifi.dbs.elki.database.Database; import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs; import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs; import de.lmu.ifi.dbs.elki.database.query.similarity.SimilarityQuery; import de.lmu.ifi.dbs.elki.database.relation.Relation; -import de.lmu.ifi.dbs.elki.distance.distancevalue.IntegerDistance; import de.lmu.ifi.dbs.elki.distance.similarityfunction.SharedNearestNeighborSimilarityFunction; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; @@ -55,7 +55,6 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DistanceParameter; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; /** @@ -76,7 +75,10 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; */ @Title("SNN: Shared Nearest Neighbor Clustering") @Description("Algorithm to find shared-nearest-neighbors-density-connected sets in a database based on the " + "parameters 'minPts' and 'epsilon' (specifying a volume). " + "These two parameters determine a density threshold for clustering.") -@Reference(authors = "L. Ertöz, M. Steinbach, V. Kumar", title = "Finding Clusters of Different Sizes, Shapes, and Densities in Noisy, High Dimensional Data", booktitle = "Proc. of SIAM Data Mining (SDM), 2003", url = "http://www.siam.org/meetings/sdm03/proceedings/sdm03_05.pdf") +@Reference(authors = "L. Ertöz, M. Steinbach, V. Kumar", // +title = "Finding Clusters of Different Sizes, Shapes, and Densities in Noisy, High Dimensional Data", // +booktitle = "Proc. of SIAM Data Mining (SDM), 2003", // +url = "http://www.siam.org/meetings/sdm03/proceedings/sdm03_05.pdf") public class SNNClustering<O> extends AbstractAlgorithm<Clustering<Model>> implements ClusteringAlgorithm<Clustering<Model>> { /** * The logger for this class. @@ -84,24 +86,12 @@ public class SNNClustering<O> extends AbstractAlgorithm<Clustering<Model>> imple private static final Logging LOG = Logging.getLogger(SNNClustering.class); /** - * Parameter to specify the minimum SNN density, must be an integer greater - * than 0. + * Epsilon radius threshold. */ - public static final OptionID EPSILON_ID = new OptionID("snn.epsilon", "The minimum SNN density."); + private int epsilon; /** - * Holds the value of {@link #EPSILON_ID}. - */ - private IntegerDistance epsilon; - - /** - * Parameter to specify the threshold for minimum number of points in the - * epsilon-SNN-neighborhood of a point, must be an integer greater than 0. - */ - public static final OptionID MINPTS_ID = new OptionID("snn.minpts", "Threshold for minimum number of points in " + "the epsilon-SNN-neighborhood of a point."); - - /** - * Holds the value of {@link #MINPTS_ID}. + * Minimum number of clusters for connectedness. */ private int minpts; @@ -132,7 +122,7 @@ public class SNNClustering<O> extends AbstractAlgorithm<Clustering<Model>> imple * @param epsilon Epsilon * @param minpts Minpts */ - public SNNClustering(SharedNearestNeighborSimilarityFunction<O> similarityFunction, IntegerDistance epsilon, int minpts) { + public SNNClustering(SharedNearestNeighborSimilarityFunction<O> similarityFunction, int epsilon, int minpts) { super(); this.similarityFunction = similarityFunction; this.epsilon = epsilon; @@ -147,7 +137,7 @@ public class SNNClustering<O> extends AbstractAlgorithm<Clustering<Model>> imple * @return Result */ public Clustering<Model> run(Database database, Relation<O> relation) { - SimilarityQuery<O, IntegerDistance> snnInstance = similarityFunction.instantiate(relation); + SimilarityQuery<O> snnInstance = similarityFunction.instantiate(relation); FiniteProgress objprog = LOG.isVerbose() ? new FiniteProgress("SNNClustering", relation.size(), LOG) : null; IndefiniteProgress clusprog = LOG.isVerbose() ? new IndefiniteProgress("Number of clusters", LOG) : null; @@ -155,9 +145,9 @@ public class SNNClustering<O> extends AbstractAlgorithm<Clustering<Model>> imple noise = DBIDUtil.newHashSet(); processedIDs = DBIDUtil.newHashSet(relation.size()); if(relation.size() >= minpts) { - for(DBIDIter id = snnInstance.getRelation().iterDBIDs(); id.valid(); id.advance()) { + for(DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) { if(!processedIDs.contains(id)) { - expandCluster(snnInstance, DBIDUtil.deref(id), objprog, clusprog); + expandCluster(snnInstance, id, objprog, clusprog); if(processedIDs.size() == relation.size() && noise.size() == 0) { break; } @@ -169,7 +159,7 @@ public class SNNClustering<O> extends AbstractAlgorithm<Clustering<Model>> imple } } else { - for(DBIDIter id = snnInstance.getRelation().iterDBIDs(); id.valid(); id.advance()) { + for(DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) { noise.add(id); if(objprog != null && clusprog != null) { objprog.setProcessed(noise.size(), LOG); @@ -178,10 +168,8 @@ public class SNNClustering<O> extends AbstractAlgorithm<Clustering<Model>> imple } } // Finish progress logging - if(objprog != null && clusprog != null) { - objprog.ensureCompleted(LOG); - clusprog.setCompleted(LOG); - } + LOG.ensureCompleted(objprog); + LOG.setCompleted(clusprog); Clustering<Model> result = new Clustering<>("Shared-Nearest-Neighbor Clustering", "snn-clustering"); for(Iterator<ModifiableDBIDs> resultListIter = resultList.iterator(); resultListIter.hasNext();) { @@ -201,10 +189,10 @@ public class SNNClustering<O> extends AbstractAlgorithm<Clustering<Model>> imple * @return the shared nearest neighbors of the specified query object in the * given database */ - protected ArrayModifiableDBIDs findSNNNeighbors(SimilarityQuery<O, IntegerDistance> snnInstance, DBID queryObject) { + protected ArrayModifiableDBIDs findSNNNeighbors(SimilarityQuery<O> snnInstance, DBIDRef queryObject) { ArrayModifiableDBIDs neighbors = DBIDUtil.newArray(); for(DBIDIter iditer = snnInstance.getRelation().iterDBIDs(); iditer.valid(); iditer.advance()) { - if(snnInstance.similarity(queryObject, iditer).compareTo(epsilon) >= 0) { + if(snnInstance.similarity(queryObject, iditer) >= epsilon) { neighbors.add(iditer); } } @@ -222,7 +210,7 @@ public class SNNClustering<O> extends AbstractAlgorithm<Clustering<Model>> imple * @param objprog the progress object to report about the progress of * clustering */ - protected void expandCluster(SimilarityQuery<O, IntegerDistance> snnInstance, DBID startObjectID, FiniteProgress objprog, IndefiniteProgress clusprog) { + protected void expandCluster(SimilarityQuery<O> snnInstance, DBIDRef startObjectID, FiniteProgress objprog, IndefiniteProgress clusprog) { ArrayModifiableDBIDs seeds = findSNNNeighbors(snnInstance, startObjectID); // startObject is no core-object @@ -310,7 +298,19 @@ public class SNNClustering<O> extends AbstractAlgorithm<Clustering<Model>> imple * @param <O> object type */ public static class Parameterizer<O> extends AbstractParameterizer { - protected IntegerDistance epsilon; + /** + * Parameter to specify the minimum SNN density, must be an integer greater + * than 0. + */ + public static final OptionID EPSILON_ID = new OptionID("snn.epsilon", "The minimum SNN density."); + + /** + * Parameter to specify the threshold for minimum number of points in the + * epsilon-SNN-neighborhood of a point, must be an integer greater than 0. + */ + public static final OptionID MINPTS_ID = new OptionID("snn.minpts", "Threshold for minimum number of points in " + "the epsilon-SNN-neighborhood of a point."); + + protected int epsilon; protected int minpts; @@ -322,7 +322,7 @@ public class SNNClustering<O> extends AbstractAlgorithm<Clustering<Model>> imple Class<SharedNearestNeighborSimilarityFunction<O>> cls = ClassGenericsUtil.uglyCastIntoSubclass(SharedNearestNeighborSimilarityFunction.class); similarityFunction = config.tryInstantiate(cls); - DistanceParameter<IntegerDistance> epsilonP = new DistanceParameter<>(EPSILON_ID, IntegerDistance.FACTORY); + IntParameter epsilonP = new IntParameter(EPSILON_ID); if(config.grab(epsilonP)) { epsilon = epsilonP.getValue(); } |