diff options
Diffstat (limited to 'elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm')
266 files changed, 2591 insertions, 439 deletions
diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/AbstractAlgorithm.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/AbstractAlgorithm.java index 0851a898..8f721fdc 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/AbstractAlgorithm.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/AbstractAlgorithm.java @@ -42,6 +42,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * </p> * * @author Arthur Zimek + * @since 0.2 * * @apiviz.landmark * @apiviz.excludeSubtypes diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/AbstractDistanceBasedAlgorithm.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/AbstractDistanceBasedAlgorithm.java index 5729695c..bbf76f03 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/AbstractDistanceBasedAlgorithm.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/AbstractDistanceBasedAlgorithm.java @@ -34,6 +34,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * Abstract base class for distance-based algorithms. * * @author Arthur Zimek + * @since 0.2 * * @apiviz.landmark * @apiviz.has DistanceFunction diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/AbstractNumberVectorDistanceBasedAlgorithm.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/AbstractNumberVectorDistanceBasedAlgorithm.java index 5c09d4a8..e0beaa36 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/AbstractNumberVectorDistanceBasedAlgorithm.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/AbstractNumberVectorDistanceBasedAlgorithm.java @@ -35,6 +35,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * synthetic numerical vectors such as <b>mean</b> vectors. * * @author Erich Schubert + * @since 0.2 * * @apiviz.has NumberVectorDistanceFunction * @apiviz.excludeSubtypes diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/AbstractPrimitiveDistanceBasedAlgorithm.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/AbstractPrimitiveDistanceBasedAlgorithm.java index 06625263..21de6b59 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/AbstractPrimitiveDistanceBasedAlgorithm.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/AbstractPrimitiveDistanceBasedAlgorithm.java @@ -38,6 +38,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * only database objects! * * @author Arthur Zimek + * @since 0.2 * * @apiviz.has PrimitiveDistanceFunction * @apiviz.excludeSubtypes diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/Algorithm.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/Algorithm.java index 1ca6ffa3..cea7f4b3 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/Algorithm.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/Algorithm.java @@ -45,6 +45,7 @@ import de.lmu.ifi.dbs.elki.result.Result; * </p> * * @author Arthur Zimek + * @since 0.2 */ public interface Algorithm { /** diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/DependencyDerivator.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/DependencyDerivator.java index 3c6dbba6..b46fc6ac 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/DependencyDerivator.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/DependencyDerivator.java @@ -71,6 +71,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * </p> * * @author Arthur Zimek + * @since 0.2 * @param <V> the type of FeatureVector handled by this Algorithm */ @Title("Dependency Derivator: Deriving numerical inter-dependencies on data") diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/DistanceBasedAlgorithm.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/DistanceBasedAlgorithm.java index 5171de6d..5c4eaaf8 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/DistanceBasedAlgorithm.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/DistanceBasedAlgorithm.java @@ -29,6 +29,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; * Very broad interface for distance based algorithms. * * @author Erich Schubert + * @since 0.4.0 * * @param <O> Object type */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/DummyAlgorithm.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/DummyAlgorithm.java index 595de4bd..7aa28b70 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/DummyAlgorithm.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/DummyAlgorithm.java @@ -46,6 +46,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title; * NullAlgorithm} instead. * * @author Erich Schubert + * @since 0.2 * @param <O> Vector type * * @apiviz.uses KNNQuery diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/KNNDistancesSampler.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/KNNDistancesSampler.java index 3e78ba77..a81823c1 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/KNNDistancesSampler.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/KNNDistancesSampler.java @@ -41,6 +41,7 @@ import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; import de.lmu.ifi.dbs.elki.math.geometry.XYCurve; import de.lmu.ifi.dbs.elki.math.random.RandomFactory; +import de.lmu.ifi.dbs.elki.utilities.Alias; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; @@ -59,11 +60,13 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter; * and look for a bend or knee in this plot. * * @author Arthur Zimek + * @since 0.7.0 * * @param <O> the type of objects handled by this Algorithm */ @Title("KNN-Distance-Order") @Description("Assesses the knn distances for a specified k and orders them.") +@Alias("de.lmu.ifi.dbs.elki.algorithm.KNNDistanceOrder") public class KNNDistancesSampler<O> extends AbstractDistanceBasedAlgorithm<O, KNNDistanceOrderResult> { /** * The logger for this class. @@ -115,7 +118,7 @@ public class KNNDistancesSampler<O> extends AbstractDistanceBasedAlgorithm<O, KN final int size = (int) ((sample <= 1.) ? Math.ceil(relation.size() * sample) : sample); DBIDs sample = DBIDUtil.randomSample(relation.getDBIDs(), size, rnd); - FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Sampling kNN distances.", size, LOG) : null; + FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Sampling kNN distances", size, LOG) : null; double[] knnDistances = new double[size]; int i = 0; for(DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance(), i++) { @@ -251,4 +254,4 @@ public class KNNDistancesSampler<O> extends AbstractDistanceBasedAlgorithm<O, KN return new KNNDistancesSampler<>(distanceFunction, k, percentage, rnd); } } -}
\ No newline at end of file +} diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/KNNJoin.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/KNNJoin.java index 593582ed..ff1ae57f 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/KNNJoin.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/KNNJoin.java @@ -71,6 +71,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * * @author Elke Achtert * @author Erich Schubert + * @since 0.2 * * @param <V> the type of FeatureVector handled by this Algorithm * @param <N> the type of node used in the spatial index structure diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/MaterializeDistances.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/MaterializeDistances.java index f9de15b0..ea061a71 100755 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/MaterializeDistances.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/MaterializeDistances.java @@ -49,6 +49,9 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title; * MultipleFileInput to use cached distances. * * Symmetry is assumed. + * + * @author Erich Schubert + * @since 0.2 * * @param <O> Object type */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/NullAlgorithm.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/NullAlgorithm.java index 5e7cf151..57eb6910 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/NullAlgorithm.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/NullAlgorithm.java @@ -36,6 +36,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title; * set. * * @author Erich Schubert + * @since 0.2 */ @Title("Null Algorithm") @Description("Algorithm which does nothing, just return a null object.") diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/benchmark/KNNBenchmarkAlgorithm.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/benchmark/KNNBenchmarkAlgorithm.java index 611e0c41..1d20e9b2 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/benchmark/KNNBenchmarkAlgorithm.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/benchmark/KNNBenchmarkAlgorithm.java @@ -58,6 +58,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter; * the original database. * * @author Erich Schubert + * @since 0.5.5 * * @param <O> Object type * diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/benchmark/RangeQueryBenchmarkAlgorithm.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/benchmark/RangeQueryBenchmarkAlgorithm.java index 9a5532e3..98c66129 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/benchmark/RangeQueryBenchmarkAlgorithm.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/benchmark/RangeQueryBenchmarkAlgorithm.java @@ -89,6 +89,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter; * TODO: alternatively, allow using a fixed radius? * * @author Erich Schubert + * @since 0.5.5 * * @param <O> Vector type * diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/benchmark/ValidateApproximativeKNNIndex.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/benchmark/ValidateApproximativeKNNIndex.java index 64e00abd..fc1098ff 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/benchmark/ValidateApproximativeKNNIndex.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/benchmark/ValidateApproximativeKNNIndex.java @@ -65,6 +65,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter; * exact indexing (e.g. linear scanning). * * @author Erich Schubert + * @since 0.5.5 * * @param <O> Object type * diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/classification/AbstractClassifier.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/classification/AbstractClassifier.java index fe97c83e..c4db6887 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/classification/AbstractClassifier.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/classification/AbstractClassifier.java @@ -36,6 +36,7 @@ import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; * Abstract base class for algorithms. * * @author Erich Schubert + * @since 0.7.0 * * @param <O> Input type * @param <R> Result type diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/classification/Classifier.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/classification/Classifier.java index 429f016a..ebdb7fe5 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/classification/Classifier.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/classification/Classifier.java @@ -33,6 +33,7 @@ import de.lmu.ifi.dbs.elki.database.relation.Relation; * classify a new instance of the same type. * * @author Arthur Zimek + * @since 0.7.0 * * @param <O> the type of DatabaseObjects handled by this Algorithm */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/classification/KNNClassifier.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/classification/KNNClassifier.java index c506fa8b..84e40731 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/classification/KNNClassifier.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/classification/KNNClassifier.java @@ -56,6 +56,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * k nearest neighbors in a database. * * @author Arthur Zimek + * @since 0.7.0 * @param <O> the type of DatabaseObjects handled by this Algorithm */ @Title("kNN-classifier") diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/classification/PriorProbabilityClassifier.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/classification/PriorProbabilityClassifier.java index 0032e8ac..df1d0f86 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/classification/PriorProbabilityClassifier.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/classification/PriorProbabilityClassifier.java @@ -45,6 +45,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title; * the database, without using the actual data values. * * @author Arthur Zimek + * @since 0.7.0 */ @Title("Prior Probability Classifier") @Description("Classifier to predict simply prior probabilities for all classes as defined by their relative abundance in a given database.") diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedClustering.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedClustering.java index c63df6c3..2b17380a 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedClustering.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedClustering.java @@ -43,6 +43,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * and {@link de.lmu.ifi.dbs.elki.algorithm.clustering.correlation.ORCLUS}. * * @author Elke Achtert + * @since 0.2 * * @param <R> the result we return * @param <V> the type of FeatureVector handled by this Algorithm diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/CanopyPreClustering.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/CanopyPreClustering.java index c8e17b45..49d3586f 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/CanopyPreClustering.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/CanopyPreClustering.java @@ -61,6 +61,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; * </p> * * @author Erich Schubert + * @since 0.6.0 * * @param <O> Object type */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/ClusteringAlgorithm.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/ClusteringAlgorithm.java index b0f3e2d3..a7957d1e 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/ClusteringAlgorithm.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/ClusteringAlgorithm.java @@ -39,6 +39,7 @@ import de.lmu.ifi.dbs.elki.database.Database; * database complete or is in any other sense a relaxed clustering result. * * @author Arthur Zimek + * @since 0.2 * * @apiviz.has Clustering * @apiviz.has Model diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/ClusteringAlgorithmUtil.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/ClusteringAlgorithmUtil.java index 8fead310..482eb40a 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/ClusteringAlgorithmUtil.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/ClusteringAlgorithmUtil.java @@ -32,6 +32,7 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDs; * Utility functionality for writing clustering algorithms. * * @author Erich Schubert + * @since 0.7.0 */ public class ClusteringAlgorithmUtil { /** diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/DBSCAN.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/DBSCAN.java index 7d802fa4..ba6cf77d 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/DBSCAN.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/DBSCAN.java @@ -34,12 +34,13 @@ import de.lmu.ifi.dbs.elki.data.model.Model; import de.lmu.ifi.dbs.elki.data.type.TypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; import de.lmu.ifi.dbs.elki.database.QueryUtil; +import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDVar; import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList; -import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs; +import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter; import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs; import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery; import de.lmu.ifi.dbs.elki.database.relation.Relation; @@ -70,6 +71,8 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * </p> * * @author Arthur Zimek + * @author Erich Schubert + * @since 0.2 * @param <O> the type of Object the algorithm is applied to */ @Title("DBSCAN: Density-Based Clustering of Applications with Noise") @@ -172,9 +175,10 @@ public class DBSCAN<O> extends AbstractDistanceBasedAlgorithm<O, Clustering<Mode IndefiniteProgress clusprog = LOG.isVerbose() ? new IndefiniteProgress("Number of clusters", LOG) : null; processedIDs = DBIDUtil.newHashSet(size); + ArrayModifiableDBIDs seeds = DBIDUtil.newArray(); for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { if(!processedIDs.contains(iditer)) { - expandCluster(relation, rangeQuery, iditer, objprog, clusprog); + expandCluster(relation, rangeQuery, iditer, seeds, objprog, clusprog); } if(objprog != null && clusprog != null) { objprog.setProcessed(processedIDs.size(), LOG); @@ -197,9 +201,11 @@ public class DBSCAN<O> extends AbstractDistanceBasedAlgorithm<O, Clustering<Mode * @param relation Database relation to run on * @param rangeQuery Range query to use * @param startObjectID potential seed of a new potential cluster - * @param objprog the progress object for logging the current status + * @param seeds Array to store the current seeds + * @param objprog Number of objects processed (may be {@code null}) + * @param clusprog Number of clusters found (may be {@code null}) */ - protected void expandCluster(Relation<O> relation, RangeQuery<O> rangeQuery, DBIDRef startObjectID, FiniteProgress objprog, IndefiniteProgress clusprog) { + protected void expandCluster(Relation<O> relation, RangeQuery<O> rangeQuery, DBIDRef startObjectID, ArrayModifiableDBIDs seeds, FiniteProgress objprog, IndefiniteProgress clusprog) { DoubleDBIDList neighbors = rangeQuery.getRangeForDBID(startObjectID, epsilon); ncounter += neighbors.size(); @@ -218,13 +224,13 @@ public class DBSCAN<O> extends AbstractDistanceBasedAlgorithm<O, Clustering<Mode processedIDs.add(startObjectID); // try to expand the cluster - HashSetModifiableDBIDs seeds = DBIDUtil.newHashSet(); + assert(seeds.size() == 0); + seeds.clear(); processNeighbors(neighbors.iter(), currentCluster, seeds); DBIDVar o = DBIDUtil.newVar(); while(!seeds.isEmpty()) { - seeds.pop(o); - neighbors = rangeQuery.getRangeForDBID(o, epsilon); + neighbors = rangeQuery.getRangeForDBID(seeds.pop(o), epsilon); ncounter += neighbors.size(); if(neighbors.size() >= minpts) { @@ -248,10 +254,13 @@ public class DBSCAN<O> extends AbstractDistanceBasedAlgorithm<O, Clustering<Mode * @param currentCluster Current cluster * @param seeds Seed set */ - private void processNeighbors(DBIDIter neighbor, ModifiableDBIDs currentCluster, HashSetModifiableDBIDs seeds) { + private void processNeighbors(DoubleDBIDListIter neighbor, ModifiableDBIDs currentCluster, ArrayModifiableDBIDs seeds) { + final boolean ismetric = getDistanceFunction().isMetric(); for(; neighbor.valid(); neighbor.advance()) { if(processedIDs.add(neighbor)) { - seeds.add(neighbor); + if(!ismetric || neighbor.doubleValue() > 0.) { + seeds.add(neighbor); + } } else if(!noise.remove(neighbor)) { continue; diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/GriDBSCAN.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/GriDBSCAN.java new file mode 100644 index 00000000..aecb7c30 --- /dev/null +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/GriDBSCAN.java @@ -0,0 +1,913 @@ +package de.lmu.ifi.dbs.elki.algorithm.clustering; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2016 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; + +import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm; +import de.lmu.ifi.dbs.elki.data.Cluster; +import de.lmu.ifi.dbs.elki.data.Clustering; +import de.lmu.ifi.dbs.elki.data.NumberVector; +import de.lmu.ifi.dbs.elki.data.model.ClusterModel; +import de.lmu.ifi.dbs.elki.data.model.Model; +import de.lmu.ifi.dbs.elki.data.type.CombinedTypeInformation; +import de.lmu.ifi.dbs.elki.data.type.TypeInformation; +import de.lmu.ifi.dbs.elki.data.type.TypeUtil; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; +import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore; +import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore; +import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs; +import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; +import de.lmu.ifi.dbs.elki.database.ids.DBIDVar; +import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList; +import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter; +import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs; +import de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList; +import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery; +import de.lmu.ifi.dbs.elki.database.relation.ProxyView; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; +import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.LPNormDistanceFunction; +import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; +import de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic; +import de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic; +import de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; +import gnu.trove.iterator.TLongObjectIterator; +import gnu.trove.map.hash.TLongObjectHashMap; + +/** + * Using Grid for Accelerating Density-Based Clustering. + * + * An accelerated DBSCAN version for numerical data and Lp-norms only, by + * partitioning the data set into overlapping grid cells. For best efficiency, + * the overlap of the grid cells must be chosen well. The authors suggest a grid + * width of 10 times epsilon. + * + * Because of partitioning the data, this version does not make use of indexes. + * + * Reference: + * <p> + * S. Mahran and K. Mahar: <br /> + * Using grid for accelerating density-based clustering.<br /> + * In 8th IEEE Int. Conf. on Computer and Information Technology, 2008. + * </p> + * + * @author Erich Schubert + * @since 0.7.1 + * + * @apiviz.composedOf Instance + * + * @param <V> the type of vector the algorithm is applied to + */ +@Reference(authors = "S. Mahran and K. Mahar", // +title = "Using grid for accelerating density-based clustering", // +booktitle = "8th IEEE Int. Conf. on Computer and Information Technology", // +url = "http://dx.doi.org/10.1109/CIT.2008.4594646") +public class GriDBSCAN<V extends NumberVector> extends AbstractDistanceBasedAlgorithm<V, Clustering<Model>> implements ClusteringAlgorithm<Clustering<Model>> { + /** + * The logger for this class. + */ + private static final Logging LOG = Logging.getLogger(GriDBSCAN.class); + + /** + * Holds the epsilon radius threshold. + */ + protected double epsilon; + + /** + * Holds the minimum cluster size. + */ + protected int minpts; + + /** + * Width of the grid cells. Must be at least 2 epsilon! + */ + protected double gridwidth; + + /** + * Constructor with parameters. + * + * @param distanceFunction Distance function + * @param epsilon Epsilon value + * @param minpts Minpts parameter + * @param gridwidth Grid width + */ + public GriDBSCAN(DistanceFunction<? super V> distanceFunction, double epsilon, int minpts, double gridwidth) { + super(distanceFunction); + this.epsilon = epsilon; + this.minpts = minpts; + this.gridwidth = gridwidth; + } + + /** + * Performs the DBSCAN algorithm on the given database. + */ + public Clustering<Model> run(Relation<V> relation) { + final DBIDs ids = relation.getDBIDs(); + + // Degenerate result: + if(ids.size() < minpts) { + Clustering<Model> result = new Clustering<>("DBSCAN Clustering", "dbscan-clustering"); + result.addToplevelCluster(new Cluster<Model>(ids, true, ClusterModel.CLUSTER)); + return result; + } + + double gridwidth = this.gridwidth; // local copy. + if(gridwidth < 2. * epsilon) { + LOG.warning("Invalid grid width (less than 2*epsilon, recommended 10*epsilon). Increasing grid width automatically."); + gridwidth = 2. * epsilon; + } + return new Instance<V>(getDistanceFunction(), epsilon, minpts, gridwidth).run(relation); + } + + /** + * Instance, for a single run. + * + * @author Erich Schubert + * + * @param <V> Vector type + */ + protected static class Instance<V extends NumberVector> { + /** + * Unprocessed IDs. + */ + protected static final int UNPROCESSED = 0; + + /** + * Noise IDs. + */ + protected static final int NOISE = 1; + + /** + * Distance function used. + */ + protected DistanceFunction<? super V> distanceFunction; + + /** + * Holds the epsilon radius threshold. + */ + protected double epsilon; + + /** + * Holds the minimum cluster size. + */ + protected int minpts; + + /** + * Width of the grid cells. Must be at least 2 epsilon! + */ + protected double gridwidth; + + /** + * Value domain. + */ + protected double[][] domain; + + /** + * Dimensionality. + */ + protected int dim; + + /** + * Grid offset. + */ + protected double[] offset; + + /** + * Number of cells per dimension. + */ + protected int[] cells; + + /** + * Data grid partitioning. + */ + TLongObjectHashMap<ModifiableDBIDs> grid; + + /** + * Core identifier objects (shared to conserve memory). + */ + private Core[] cores; + + /** + * Border identifier objects (shared to conserve memory). + */ + private Border[] borders; + + /** + * Cluster assignments. + */ + private WritableDataStore<Assignment> clusterids; + + /** + * Temporary assignments of a single run. + */ + private WritableIntegerDataStore temporary; + + /** + * Indicates that the number of grid cells has overflown. + */ + private boolean overflown; + + /** + * Constructor. + * + * @param distanceFunction Distance function + * @param epsilon Epsilon + * @param minpts MinPts + * @param gridwidth Grid width + */ + public Instance(DistanceFunction<? super V> distanceFunction, double epsilon, int minpts, double gridwidth) { + this.distanceFunction = distanceFunction; + this.epsilon = epsilon; + this.minpts = minpts; + this.gridwidth = gridwidth; + } + + /** + * Performs the DBSCAN algorithm on the given database. + * + * @param relation Relation to process + */ + public Clustering<Model> run(Relation<V> relation) { + final DBIDs ids = relation.getDBIDs(); + final int size = ids.size(); + + // Domain of the database + this.domain = RelationUtil.computeMinMax(relation); + this.dim = domain[0].length; + this.offset = new double[dim]; + this.cells = new int[dim]; + // Compute the grid start, and the number of cells in each dimension. + long numcells = computeGridBaseOffsets(); + if(numcells > size) { + LOG.warning("The generated grid has more cells than data points. This may need excessive amounts of memory."); + } + else if(numcells == 1) { + LOG.warning("All data is in a single cell. This has degenerated to a non-indexed DBSCAN!"); + } + else if(numcells <= dim * dim) { + LOG.warning("There are only " + numcells + " cells. This will likely be slower than regular DBSCAN!"); + } + + // Build the data grid. + buildGrid(relation, (int) numcells, offset); + if(grid.size() <= dim) { + LOG.warning("There are only " + grid.size() + " occupied cells. This will likely be slower than regular DBSCAN!"); + } + + // Check grid cell counts: + int mincells = checkGridCellSizes(size, numcells); + + // (Temporary) store the cluster ID assigned. + clusterids = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_TEMP, Assignment.class); + temporary = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_TEMP, UNPROCESSED); + final ArrayModifiableDBIDs activeSet = DBIDUtil.newArray(); + // Reserve the first two cluster ids: + int clusterid = NOISE + 1; + this.cores = new Core[2]; + this.borders = new Border[2]; + + // Reused storage for neighbors: + ModifiableDoubleDBIDList neighbors = DBIDUtil.newDistanceDBIDList(minpts << 1); + // Run DBSCAN on each cell that has enough objects. + FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Processing grid cells", mincells, LOG) : null; + for(TLongObjectIterator<ModifiableDBIDs> it = grid.iterator(); it.hasNext();) { + it.advance(); + ModifiableDBIDs cellids = it.value(); + if(cellids.size() < minpts) { + continue; // Too few objects. + } + temporary.clear(); // Reset to "UNPROCESSED" + ProxyView<V> rel = new ProxyView<>(cellids, relation); + RangeQuery<V> rq = rel.getRangeQuery(distanceFunction, epsilon); + FiniteProgress pprog = LOG.isVerbose() ? new FiniteProgress("Running DBSCAN", cellids.size(), LOG) : null; + for(DBIDIter id = cellids.iter(); id.valid(); id.advance()) { + // Skip already processed ids. + if(temporary.intValue(id) != UNPROCESSED) { + continue; + } + neighbors.clear(); + rq.getRangeForDBID(id, epsilon, neighbors); + if(neighbors.size() >= minpts) { + expandCluster(id, clusterid, temporary, neighbors, activeSet, rq, pprog); + ++clusterid; + } + else { + temporary.putInt(id, NOISE); + LOG.incrementProcessed(pprog); + } + } + LOG.ensureCompleted(pprog); + // Post-process DBSCAN clustering result: + updateCoreBorderObjects(clusterid); + mergeClusterInformation(cellids, temporary, clusterids); + LOG.incrementProcessed(cprog); + } + LOG.ensureCompleted(cprog); + temporary.destroy(); + + FiniteProgress pprog = LOG.isVerbose() ? new FiniteProgress("Building final result", size, LOG) : null; + ModifiableDBIDs[] clusters = new ModifiableDBIDs[clusterid]; + ModifiableDBIDs noise = DBIDUtil.newArray(); + for(DBIDIter it = ids.iter(); it.valid(); it.advance()) { + Assignment cids = clusterids.get(it); + if(cids == null) { + noise.add(it); + } + else { + if(cids instanceof MultiBorder) { + cids = ((MultiBorder) cids).getCore(); + } + else if(cids instanceof Border) { + cids = ((Border) cids).core; + } + assert (cids instanceof Core); + Core co = (Core) cids; + while(cores[co.parent].parent != co.parent) { + co = cores[co.parent = cores[co.parent].parent]; + } + ModifiableDBIDs clu = clusters[co.parent]; + if(clu == null) { + clu = clusters[co.parent] = DBIDUtil.newArray(); + } + clu.add(it); + } + LOG.incrementProcessed(pprog); + } + LOG.ensureCompleted(pprog); + clusterids.destroy(); + + Clustering<Model> result = new Clustering<>("DBSCAN Clustering", "dbscan-clustering"); + for(int i = NOISE + 1; i < clusters.length; i++) { + if(clusters[i] != null) { + result.addToplevelCluster(new Cluster<Model>(clusters[i], ClusterModel.CLUSTER)); + } + } + if(noise.size() > 0) { + result.addToplevelCluster(new Cluster<Model>(noise, true, ClusterModel.CLUSTER)); + } + return result; + } + + /** + * Update the shared arrays for core points (to conserve memory) + * + * @param clusterid Number of clusters + */ + private void updateCoreBorderObjects(int clusterid) { + int i = cores.length; + cores = Arrays.copyOf(cores, clusterid); + borders = Arrays.copyOf(borders, clusterid); + while(i < clusterid) { + cores[i] = new Core(i); + borders[i] = new Border(cores[i]); + ++i; + } + } + + /** + * Compute the grid base offset. + * + * @return Total number of grid cells + */ + private long computeGridBaseOffsets() { + StringBuffer buf = LOG.isDebuggingFinest() ? new StringBuffer() : null; + double[] min = domain[0], max = domain[1]; + long total = 1; + for(int d = 0; d < dim; d++) { + final double mi = min[d], ma = max[d], wi = ma - mi; + if(mi == Double.NEGATIVE_INFINITY || ma == Double.POSITIVE_INFINITY || mi != mi || ma != ma) { + throw new AbortException("Dimension " + d + " contains non-finite values."); + } + int c = cells[d] = Math.max(1, (int) Math.ceil(wi / gridwidth)); + offset[d] = mi - (c * gridwidth - wi) * .5; + assert (offset[d] <= mi) : "Grid inconsistent."; + assert (offset[d] + c * gridwidth >= ma) : "Grid inconsistent."; + total *= c; + if(total < 0) { + LOG.warning("Excessive amount of grid cells (long overflow)! Use larger grid cells."); + if (total < 0) { + overflown = true; + total &= 0x7FFF_FFFF_FFFF_FFFFL; + } + } + if(buf != null) { + buf.append(d).append(": min=").append(mi).append(" max=").append(ma); + double s = offset[d]; + for(int i = 0; i <= c; i++) { + buf.append(' ').append(s); + s += gridwidth; + } + buf.append('\n'); + } + } + if(buf != null) { + LOG.debugFinest(buf); + } + return total; + } + + /** + * Build the data grid. + * + * @param relation Data relation + * @param numcells Total number of cells + * @param offset Offset + */ + protected void buildGrid(Relation<V> relation, int numcells, double[] offset) { + grid = new TLongObjectHashMap<ModifiableDBIDs>(numcells >>> 2); + for(DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) { + V obj = relation.get(it); + insertIntoGrid(it, obj, 0, 0); + } + } + + /** + * Insert a single object into the grid; potentially into multiple cells (at + * most 2^d) via recursion. + * + * @param id Object ID + * @param obj Object + * @param d Current dimension + * @param v Current cell value + */ + private void insertIntoGrid(DBIDRef id, V obj, int d, int v) { + final int cn = cells[d]; // Number of cells in this dimension + final int nd = d + 1; // Next dimension + int mi = Math.max(0, (int) Math.floor((obj.doubleValue(d) - offset[d] - epsilon) / gridwidth)); + int ma = Math.min(cn - 1, (int) Math.floor((obj.doubleValue(d) - offset[d] + epsilon) / gridwidth)); + assert (mi <= ma) : "Grid inconsistent."; + for(int i = mi; i <= ma; i++) { + int c = v * cn + i; + if(nd == cells.length) { + ModifiableDBIDs ids = grid.get(c); + if(ids == null) { + grid.put(c, ids = DBIDUtil.newArray()); + } + ids.add(id); + } + else { + insertIntoGrid(id, obj, nd, c); + } + } + } + + /** + * Perform some sanity checks on the grid cells. + * + * @param numcell Number of cells + * @param size Relation size + * @return Number of cells with minPts points + */ + protected int checkGridCellSizes(int size, long numcell) { + int tcount = 0; + int hasmin = 0; + double sqcount = 0; + for(TLongObjectIterator<ModifiableDBIDs> it = grid.iterator(); it.hasNext();) { + it.advance(); + final int s = it.value().size(); + if(s >= size >> 1) { + LOG.warning("A single cell contains half of the database (" + s// + + " objects). This will not scale very well."); + } + tcount += s; + sqcount += s * (long) s; + if(s >= minpts) { + hasmin++; + } + } + double savings = sqcount / size / size; + if(savings >= 1) { + LOG.warning("Pairwise distances within each cells are more expensive than a full DBSCAN run due to overlap!"); + } + if (overflown) { + LOG.statistics(new StringStatistic(GriDBSCAN.class.getName() + ".all-cells", "overflow")); + } else { + LOG.statistics(new LongStatistic(GriDBSCAN.class.getName() + ".all-cells", numcell)); + } + LOG.statistics(new LongStatistic(GriDBSCAN.class.getName() + ".used-cells", grid.size())); + LOG.statistics(new LongStatistic(GriDBSCAN.class.getName() + ".minpts-cells", hasmin)); + LOG.statistics(new DoubleStatistic(GriDBSCAN.class.getName() + ".redundancy", tcount / (double) size)); + LOG.statistics(new DoubleStatistic(GriDBSCAN.class.getName() + ".relative-cost", savings)); + return hasmin; + } + + /** + * Set-based expand cluster implementation. + * + * @param clusterid ID of the current cluster. + * @param clusterids Current object to cluster mapping. + * @param neighbors Neighbors acquired by initial getNeighbors call. + * @param activeSet Set to manage active candidates. + * @param rq Range query + * @param pprog Object progress + * @return cluster size + */ + protected int expandCluster(final DBIDRef seed, final int clusterid, final WritableIntegerDataStore clusterids, final ModifiableDoubleDBIDList neighbors, ArrayModifiableDBIDs activeSet, RangeQuery<V> rq, FiniteProgress pprog) { + assert (activeSet.size() == 0); + int clustersize = 1 + processCorePoint(seed, neighbors, clusterid, clusterids, activeSet); + LOG.incrementProcessed(pprog); + // run expandCluster as long as there is another seed + final DBIDVar id = DBIDUtil.newVar(); + while(!activeSet.isEmpty()) { + activeSet.pop(id); + neighbors.clear(); + // Evaluate Neighborhood predicate + rq.getRangeForDBID(id, epsilon, neighbors); + // Evaluate Core-Point predicate + if(neighbors.size() >= minpts) { + clustersize += processCorePoint(id, neighbors, clusterid, clusterids, activeSet); + } + LOG.incrementProcessed(pprog); + } + return clustersize; + } + + /** + * Process a single core point. + * + * @param seed Point to process + * @param newneighbors New neighbors + * @param clusterid Cluster to add to + * @param clusterids Cluster assignment storage. + * @param activeSet Active set of cluster seeds + * @return Number of new points added to cluster + */ + protected int processCorePoint(final DBIDRef seed, DoubleDBIDList newneighbors, final int clusterid, final WritableIntegerDataStore clusterids, ArrayModifiableDBIDs activeSet) { + clusterids.putInt(seed, clusterid); // Core point now + int clustersize = 0; + // The recursion is unrolled into iteration over the active set. + for(DoubleDBIDListIter it = newneighbors.iter(); it.valid(); it.advance()) { + final int oldassign = clusterids.intValue(it); + if(oldassign == UNPROCESSED) { + if(it.doubleValue() > 0.) { // We can skip points at distance 0. + activeSet.add(it); + } + } + else if(oldassign != NOISE) { + continue; // Member of some cluster. + } + clustersize++; + clusterids.putInt(it, -clusterid); + } + return clustersize; + } + + /** + * Merge cluster information. + * + * @param cellids IDs in current cell + * @param temporary Temporary assignments + * @param clusterids Merged cluster assignment + */ + protected void mergeClusterInformation(ModifiableDBIDs cellids, WritableIntegerDataStore temporary, WritableDataStore<Assignment> clusterids) { + FiniteProgress mprog = LOG.isVerbose() ? new FiniteProgress("Collecting result", cellids.size(), LOG) : null; + for(DBIDIter id = cellids.iter(); id.valid(); id.advance()) { + int nclus = temporary.intValue(id); + if(nclus > NOISE) { // Core point + Core core = cores[nclus]; + assert (core.parent > NOISE); + Assignment oclus = clusterids.get(id); + if(oclus == null) { // No assignment yet (= NOISE) + clusterids.put(id, core); + } + else if(oclus instanceof Core) { // Core and core - merge! + core.mergeWith((Core) oclus); + } + else if(oclus instanceof Border) { // Core and border point, merge! + core.mergeWith(((Border) oclus).core); + clusterids.put(id, core); + } + else { // Point is border for multiple clusters + assert (oclus instanceof MultiBorder); + if(LOG.isDebuggingFinest()) { + LOG.debugFinest("Multi-Merge: " + nclus + " - " + oclus + " -> " + core); + } + // Find minimum: + int m = core.parent, m2 = ((MultiBorder) oclus).getCore().parent; + m = m < m2 ? m : m2; + assert (m > NOISE); + // Execute all merges: + for(Border b : ((MultiBorder) oclus).cs) { + cores[b.core.parent].parent = m; + } + core.parent = m; + clusterids.put(id, core); + } + } + else if(nclus < 0) { // Border point + Border border = borders[-nclus]; + Assignment oclus = clusterids.get(id); + if(oclus == null) { // No assignment yet. + clusterids.put(id, border); + } + else if(oclus instanceof Core) { // Border and core point - merge + ((Core) oclus).mergeWith(border.core); + } + else if(oclus instanceof Border) { // Border and border + if(((Border) oclus).core.parent != border.core.parent) { + clusterids.put(id, new MultiBorder((Border) oclus, border)); + } + } + else { + assert (oclus instanceof MultiBorder); + clusterids.put(id, ((MultiBorder) oclus).update(border)); + } + } + else { + assert (nclus == NOISE); // Ignore noise. + } + LOG.incrementProcessed(mprog); + } + LOG.ensureCompleted(mprog); + } + } + + /** + * Point assignment. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + protected static interface Assignment { + // Empty + } + + /** + * Core point assignment. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + protected static class Core implements Assignment { + /** + * Cluster number + */ + protected int parent; + + /** + * Constructor. + * + * @param parent Cluster number + */ + protected Core(int parent) { + assert (parent > GriDBSCAN.Instance.NOISE); + this.parent = parent; + } + + /** + * Merge two cores. + * + * @param o Other core + */ + public void mergeWith(Core o) { + o.parent = this.parent = (parent < o.parent ? parent : o.parent); + } + + @Override + public String toString() { + return "Core[" + parent + "]"; + } + } + + /** + * Border point assignment. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + protected static class Border implements Assignment, Comparable<Border> { + /** + * Cluster number + */ + protected Core core; + + /** + * Constructor. + * + * @param core Cluster number + */ + protected Border(Core core) { + this.core = core; + } + + @Override + public String toString() { + return "Border[" + core.parent + "]"; + } + + @Override + public int compareTo(Border o) { + return Integer.compare(o.core.parent, this.core.parent); + } + } + + /** + * Multiple border point assignment. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + protected static class MultiBorder implements Assignment { + /** + * Cluster numbers + */ + protected Border[] cs; + + /** + * Constructor. + * + * @param i First cluster border + * @param j Second cluster border + */ + protected MultiBorder(Border i, Border j) { + assert (i.core != j.core); + this.cs = new Border[] { i, j }; + } + + /** + * Add a new border to the existing borders. + * + * @param border New border. + */ + public Assignment update(Border border) { + Arrays.sort(cs); + int j = 1; + boolean found = (cs[0].core == border.core); + for(int i = 1; i < cs.length; i++) { + if(cs[i].core != cs[i - 1].core) { + cs[j++] = cs[i]; + } + found |= (cs[i].core == border.core); + } + if(found) { + if(j == 1) { + Border r = cs[0]; + cs = null; // Prevent further use + return r; + } + if(j < cs.length) { + cs = Arrays.copyOf(cs, j); + } + return this; + } + if(j + 1 != cs.length) { + cs = Arrays.copyOf(cs, j + 1); + } + cs[j] = border; + return this; + } + + /** + * Get the core this is assigned to. + * + * @return Core + */ + public Core getCore() { + Core a = cs[0].core; + for(int i = 1; i < cs.length; i++) { + Core v = cs[i].core; + a = a.parent > v.parent ? a : v; // max, of negative values + } + assert (a.parent > GriDBSCAN.Instance.NOISE); + return a; + } + + @Override + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append("MultiBorder["); + for(Border b : cs) { + buf.append(b.core.parent).append(','); + } + buf.append(']'); + return buf.toString(); + } + } + + @Override + public TypeInformation[] getInputTypeRestriction() { + // We strictly need a vector field of fixed dimensionality! + TypeInformation type = new CombinedTypeInformation(TypeUtil.NUMBER_VECTOR_FIELD, getDistanceFunction().getInputTypeRestriction()); + return TypeUtil.array(type); + } + + @Override + protected Logging getLogger() { + return LOG; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + * + * @param <O> Vector type to use + */ + public static class Parameterizer<O extends NumberVector> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> { + /** + * Parameter to control the grid width. + * + * Must be at least two times epsilon. + */ + public static final OptionID GRID_ID = new OptionID("gridbscan.gridwidth", "Width of the grid used, must be at least two times epsilon."); + + /** + * Holds the epsilon radius threshold. + */ + protected double epsilon; + + /** + * Holds the minimum cluster size. + */ + protected int minpts; + + /** + * Width of the grid cells. Must be at least 2 epsilon! + */ + protected double gridwidth; + + @Override + protected void makeOptions(Parameterization config) { + // Disabled: super.makeOptions(config); + // Because we currently only allow Lp norms: + ObjectParameter<DistanceFunction<? super O>> distanceFunctionP = makeParameterDistanceFunction(EuclideanDistanceFunction.class, LPNormDistanceFunction.class); + if(config.grab(distanceFunctionP)) { + distanceFunction = distanceFunctionP.instantiateClass(config); + } + + DoubleParameter epsilonP = new DoubleParameter(DBSCAN.Parameterizer.EPSILON_ID) // + .addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE); + if(config.grab(epsilonP)) { + epsilon = epsilonP.getValue(); + } + + IntParameter minptsP = new IntParameter(DBSCAN.Parameterizer.MINPTS_ID) // + .addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT); + if(config.grab(minptsP)) { + minpts = minptsP.getValue(); + if(minpts <= 2) { + LOG.warning("DBSCAN with minPts <= 2 is equivalent to single-link clustering at a single height. Consider using larger values of minPts."); + } + } + + DoubleParameter gridP = new DoubleParameter(GRID_ID) // + .addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE); + if(epsilon > 0.) { + gridP.setDefaultValue(10. * epsilon); + gridP.addConstraint(new GreaterEqualConstraint(1. * epsilon)); + } + if(config.grab(gridP)) { + gridwidth = gridP.doubleValue(); + } + } + + @Override + protected GriDBSCAN<O> makeInstance() { + return new GriDBSCAN<>(distanceFunction, epsilon, minpts, gridwidth); + } + } +} diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/NaiveMeanShiftClustering.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/NaiveMeanShiftClustering.java index 0d1e9fe2..b6b43047 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/NaiveMeanShiftClustering.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/NaiveMeanShiftClustering.java @@ -77,6 +77,7 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; * </p> * * @author Erich Schubert + * @since 0.5.5 * * @param <V> Vector type */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/SNNClustering.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/SNNClustering.java index 2cc48637..39da3e7f 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/SNNClustering.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/SNNClustering.java @@ -68,6 +68,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * </p> * * @author Arthur Zimek + * @since 0.2 * * @apiviz.uses SharedNearestNeighborSimilarityFunction * diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/affinitypropagation/AffinityPropagationClusteringAlgorithm.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/affinitypropagation/AffinityPropagationClusteringAlgorithm.java index a1ef9768..0216d475 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/affinitypropagation/AffinityPropagationClusteringAlgorithm.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/affinitypropagation/AffinityPropagationClusteringAlgorithm.java @@ -62,6 +62,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * </p> * * @author Erich Schubert + * @since 0.6.0 * * @apiviz.composedOf AffinityPropagationInitialization * diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/affinitypropagation/AffinityPropagationInitialization.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/affinitypropagation/AffinityPropagationInitialization.java index 5532f8c8..891b8a3e 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/affinitypropagation/AffinityPropagationInitialization.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/affinitypropagation/AffinityPropagationInitialization.java @@ -32,6 +32,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; * Initialization methods for affinity propagation. * * @author Erich Schubert + * @since 0.6.0 */ public interface AffinityPropagationInitialization<O> { /** diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/affinitypropagation/DistanceBasedInitializationWithMedian.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/affinitypropagation/DistanceBasedInitializationWithMedian.java index 1bdff0c3..e514df53 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/affinitypropagation/DistanceBasedInitializationWithMedian.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/affinitypropagation/DistanceBasedInitializationWithMedian.java @@ -42,6 +42,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * Distance based initialization. * * @author Erich Schubert + * @since 0.6.0 * * @param <O> Object type */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/affinitypropagation/SimilarityBasedInitializationWithMedian.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/affinitypropagation/SimilarityBasedInitializationWithMedian.java index d8abe9c1..3086d4fb 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/affinitypropagation/SimilarityBasedInitializationWithMedian.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/affinitypropagation/SimilarityBasedInitializationWithMedian.java @@ -41,6 +41,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * Similarity based initialization. * * @author Erich Schubert + * @since 0.6.0 * * @param <O> Object type */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/biclustering/AbstractBiclustering.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/biclustering/AbstractBiclustering.java index 2a0896ea..9d1b2f91 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/biclustering/AbstractBiclustering.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/biclustering/AbstractBiclustering.java @@ -53,6 +53,7 @@ import de.lmu.ifi.dbs.elki.utilities.exceptions.ExceptionMessages; * of the {@link NumberVector}s. * * @author Arthur Zimek + * @since 0.2 * @param <V> a certain subtype of NumberVector - the data matrix is supposed to * consist of rows where each row relates to an object of type V and the * columns relate to the attribute values of these objects diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/biclustering/ChengAndChurch.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/biclustering/ChengAndChurch.java index ceb13976..cf957fc0 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/biclustering/ChengAndChurch.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/biclustering/ChengAndChurch.java @@ -61,6 +61,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * </p> * * @author Erich Schubert + * @since 0.2 * @param <V> Vector type. */ @Reference(authors = "Y. Cheng, G. M. Church", title = "Biclustering of expression data", booktitle = "Proc. 8th International Conference on Intelligent Systems for Molecular Biology (ISMB)") diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/CASH.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/CASH.java index 43c0528d..1db2d79f 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/CASH.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/CASH.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2015 + Copyright (C) 2016 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -95,6 +95,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * </p> * * @author Elke Achtert + * @since 0.2 * * @apiviz.has CASHInterval * @apiviz.has ParameterizationFunction @@ -109,84 +110,36 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; title = "Robust clustering in arbitraily oriented subspaces", // booktitle = "Proc. 8th SIAM Int. Conf. on Data Mining (SDM'08), Atlanta, GA, 2008", // url = "http://www.siam.org/proceedings/datamining/2008/dm08_69_AchtertBoehmDavidKroegerZimek.pdf") -public class CASH<V extends NumberVector> extends AbstractAlgorithm<Clustering<Model>>implements ClusteringAlgorithm<Clustering<Model>> { +public class CASH<V extends NumberVector> extends AbstractAlgorithm<Clustering<Model>> implements ClusteringAlgorithm<Clustering<Model>> { /** * The logger for this class. */ private static final Logging LOG = Logging.getLogger(CASH.class); /** - * Parameter to specify the threshold for minimum number of points in a - * cluster, must be an integer greater than 0. - * <p> - * Key: {@code -cash.minpts} - * </p> + * Threshold for minimum number of points in a cluster */ - public static final OptionID MINPTS_ID = new OptionID("cash.minpts", "Threshold for minimum number of points in a cluster."); + protected int minPts; /** - * Parameter to specify the maximum level for splitting the hypercube, must be - * an integer greater than 0. - * <p> - * Key: {@code -cash.maxlevel} - * </p> + * Maximum level for splitting the hypercube. */ - public static final OptionID MAXLEVEL_ID = new OptionID("cash.maxlevel", "The maximum level for splitting the hypercube."); + protected int maxLevel; /** - * Parameter to specify the minimum dimensionality of the subspaces to be - * found, must be an integer greater than 0. - * <p> - * Default value: {@code 1} - * </p> - * <p> - * Key: {@code -cash.mindim} - * </p> + * Minimum dimensionality of the subspaces to be found */ - public static final OptionID MINDIM_ID = new OptionID("cash.mindim", "The minimum dimensionality of the subspaces to be found."); + protected int minDim; /** - * Parameter to specify the maximum jitter for distance values, must be a - * double greater than 0. - * <p> - * Key: {@code -cash.jitter} - * </p> + * Maximum jitter for distance values. */ - public static final OptionID JITTER_ID = new OptionID("cash.jitter", "The maximum jitter for distance values."); + protected double jitter; /** - * Flag to indicate that an adjustment of the applied heuristic for choosing - * an interval is performed after an interval is selected. - * <p> - * Key: {@code -cash.adjust} - * </p> + * Apply adjustment heuristic for interval choosing. */ - public static final OptionID ADJUST_ID = new OptionID("cash.adjust", "Flag to indicate that an adjustment of the applied heuristic for choosing an interval " + "is performed after an interval is selected."); - - /** - * Holds the value of {@link #MINPTS_ID}. - */ - private int minPts; - - /** - * Holds the value of {@link #MAXLEVEL_ID}. - */ - private int maxLevel; - - /** - * Holds the value of {@link #MINDIM_ID}. - */ - private int minDim; - - /** - * Holds the value of {@link #JITTER_ID}. - */ - private double jitter; - - /** - * Holds the value of {@link #ADJUST_ID}. - */ - private boolean adjust; + protected boolean adjust; /** * Holds the dimensionality for noise. @@ -711,7 +664,7 @@ public class CASH<V extends NumberVector> extends AbstractAlgorithm<Clustering<M // set the parameters ListParameterization parameters = new ListParameterization(); parameters.addParameter(PCAFilteredRunner.Parameterizer.PCA_EIGENPAIR_FILTER, FirstNEigenPairFilter.class.getName()); - parameters.addParameter(FirstNEigenPairFilter.EIGENPAIR_FILTER_N, Integer.toString(dim - 1)); + parameters.addParameter(FirstNEigenPairFilter.Parameterizer.EIGENPAIR_FILTER_N, Integer.toString(dim - 1)); DependencyDerivator<DoubleVector> derivator = null; Class<DependencyDerivator<DoubleVector>> cls = ClassGenericsUtil.uglyCastIntoSubclass(DependencyDerivator.class); derivator = parameters.tryInstantiate(cls); @@ -784,7 +737,7 @@ public class CASH<V extends NumberVector> extends AbstractAlgorithm<Clustering<M ListParameterization parameters = new ListParameterization(); parameters.addParameter(PCAFilteredRunner.Parameterizer.PCA_EIGENPAIR_FILTER, FirstNEigenPairFilter.class.getName()); - parameters.addParameter(FirstNEigenPairFilter.EIGENPAIR_FILTER_N, Integer.toString(dimensionality)); + parameters.addParameter(FirstNEigenPairFilter.Parameterizer.EIGENPAIR_FILTER_N, Integer.toString(dimensionality)); DependencyDerivator<DoubleVector> derivator = null; Class<DependencyDerivator<DoubleVector>> cls = ClassGenericsUtil.uglyCastIntoSubclass(DependencyDerivator.class); derivator = parameters.tryInstantiate(cls); @@ -841,14 +794,77 @@ public class CASH<V extends NumberVector> extends AbstractAlgorithm<Clustering<M * @apiviz.exclude */ public static class Parameterizer extends AbstractParameterizer { - protected int minpts; - - protected int maxlevel; - - protected int mindim; - + /** + * Parameter to specify the threshold for minimum number of points in a + * cluster, must be an integer greater than 0. + * <p> + * Key: {@code -cash.minpts} + * </p> + */ + public static final OptionID MINPTS_ID = new OptionID("cash.minpts", "Threshold for minimum number of points in a cluster."); + + /** + * Parameter to specify the maximum level for splitting the hypercube, must + * be an integer greater than 0. + * <p> + * Key: {@code -cash.maxlevel} + * </p> + */ + public static final OptionID MAXLEVEL_ID = new OptionID("cash.maxlevel", "The maximum level for splitting the hypercube."); + + /** + * Parameter to specify the minimum dimensionality of the subspaces to be + * found, must be an integer greater than 0. + * <p> + * Default value: {@code 1} + * </p> + * <p> + * Key: {@code -cash.mindim} + * </p> + */ + public static final OptionID MINDIM_ID = new OptionID("cash.mindim", "The minimum dimensionality of the subspaces to be found."); + + /** + * Parameter to specify the maximum jitter for distance values, must be a + * double greater than 0. + * <p> + * Key: {@code -cash.jitter} + * </p> + */ + public static final OptionID JITTER_ID = new OptionID("cash.jitter", "The maximum jitter for distance values."); + + /** + * Flag to indicate that an adjustment of the applied heuristic for choosing + * an interval is performed after an interval is selected. + * <p> + * Key: {@code -cash.adjust} + * </p> + */ + public static final OptionID ADJUST_ID = new OptionID("cash.adjust", "Flag to indicate that an adjustment of the applied heuristic for choosing an interval " + "is performed after an interval is selected."); + + /** + * Threshold for minimum number of points in a cluster + */ + protected int minPts; + + /** + * Maximum level for splitting the hypercube. + */ + protected int maxLevel; + + /** + * Minimum dimensionality of the subspaces to be found + */ + protected int minDim; + + /** + * Maximum jitter for distance values. + */ protected double jitter; + /** + * Apply adjustment heuristic for interval choosing. + */ protected boolean adjust; @Override @@ -857,17 +873,17 @@ public class CASH<V extends NumberVector> extends AbstractAlgorithm<Clustering<M IntParameter minptsP = new IntParameter(MINPTS_ID); minptsP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT); if(config.grab(minptsP)) { - minpts = minptsP.getValue(); + minPts = minptsP.getValue(); } IntParameter maxlevelP = new IntParameter(MAXLEVEL_ID); maxlevelP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT); if(config.grab(maxlevelP)) { - maxlevel = maxlevelP.getValue(); + maxLevel = maxlevelP.getValue(); } IntParameter mindimP = new IntParameter(MINDIM_ID, 1); mindimP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT); if(config.grab(mindimP)) { - mindim = mindimP.getValue(); + minDim = mindimP.getValue(); } DoubleParameter jitterP = new DoubleParameter(JITTER_ID); jitterP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE); @@ -882,7 +898,7 @@ public class CASH<V extends NumberVector> extends AbstractAlgorithm<Clustering<M @Override protected CASH<NumberVector> makeInstance() { - return new CASH<>(minpts, maxlevel, mindim, jitter, adjust); + return new CASH<>(minPts, maxLevel, minDim, jitter, adjust); } } } diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/COPAC.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/COPAC.java index 06e64343..72de0a55 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/COPAC.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/COPAC.java @@ -67,6 +67,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * </p> * * @author Arthur Zimek + * @since 0.2 * * @apiviz.has DimensionModel * @apiviz.composedOf COPACNeighborPredicate diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ERiC.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ERiC.java index 8fb11eaf..f73e9e70 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ERiC.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ERiC.java @@ -79,6 +79,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * </p> * * @author Elke Achtert + * @since 0.2 * * @apiviz.composedOf COPAC * @apiviz.composedOf DBSCAN diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/FourC.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/FourC.java index 52d043ec..63fe9d81 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/FourC.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/FourC.java @@ -47,19 +47,20 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * 4C identifies local subgroups of data objects sharing a uniform correlation. * The algorithm is based on a combination of PCA and density-based clustering * (DBSCAN). - * + * * Reference: * <p> * C. Böhm, K. Kailing, P. Kröger, A. Zimek:<br /> * Computing Clusters of Correlation Connected Objects. <br> * In Proc. ACM SIGMOD Int. Conf. on Management of Data, Paris, France, 2004. * </p> - * + * * @author Arthur Zimek - * + * @since 0.2 + * * @apiviz.composedOf FourCNeighborPredicate * @apiviz.composedOf FourCCorePredicate - * + * * @param <V> type of NumberVector handled by this Algorithm */ @Title("4C: Computing Correlation Connected Clusters") @@ -77,7 +78,7 @@ public class FourC<V extends NumberVector> extends GeneralizedDBSCAN { /** * Constructor. - * + * * @param settings FourC settings. */ public FourC(FourC.Settings settings) { @@ -96,9 +97,9 @@ public class FourC<V extends NumberVector> extends GeneralizedDBSCAN { /** * Class wrapping the 4C parameter settings. - * + * * @author Erich Schubert - * + * * @apiviz.exclude */ public static class Settings { @@ -135,9 +136,9 @@ public class FourC<V extends NumberVector> extends GeneralizedDBSCAN { /** * Parameterization class for 4C settings. - * + * * @author Erich Schubert - * + * * @apiviz.exclude */ public static class Parameterizer extends AbstractParameterizer { @@ -178,7 +179,7 @@ public class FourC<V extends NumberVector> extends GeneralizedDBSCAN { /** * Configure the epsilon radius parameter. - * + * * @param config Parameter source */ protected void configEpsilon(Parameterization config) { @@ -191,7 +192,7 @@ public class FourC<V extends NumberVector> extends GeneralizedDBSCAN { /** * Configure the minPts aka "mu" parameter. - * + * * @param config Parameter source */ protected void configMinPts(Parameterization config) { @@ -204,18 +205,18 @@ public class FourC<V extends NumberVector> extends GeneralizedDBSCAN { /** * Configure the delta parameter. - * + * * @param config Parameter source */ protected void configDelta(Parameterization config) { // Flag for using absolute variances - Flag absoluteF = new Flag(LimitEigenPairFilter.EIGENPAIR_FILTER_ABSOLUTE); + Flag absoluteF = new Flag(LimitEigenPairFilter.Parameterizer.EIGENPAIR_FILTER_ABSOLUTE); if(config.grab(absoluteF)) { settings.absolute = absoluteF.isTrue(); } // Parameter delta - DoubleParameter deltaP = new DoubleParameter(LimitEigenPairFilter.EIGENPAIR_FILTER_DELTA) // + DoubleParameter deltaP = new DoubleParameter(LimitEigenPairFilter.Parameterizer.EIGENPAIR_FILTER_DELTA) // .addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE); if(!settings.absolute) { deltaP.setDefaultValue(DEFAULT_DELTA); @@ -230,7 +231,7 @@ public class FourC<V extends NumberVector> extends GeneralizedDBSCAN { /** * Configure the kappa parameter. - * + * * @param config Parameter source */ protected void configKappa(Parameterization config) { @@ -244,7 +245,7 @@ public class FourC<V extends NumberVector> extends GeneralizedDBSCAN { /** * Configure the delta parameter. - * + * * @param config Parameter source */ protected void configLambda(Parameterization config) { @@ -265,9 +266,9 @@ public class FourC<V extends NumberVector> extends GeneralizedDBSCAN { /** * Parameterization class. - * + * * @author Erich Schubert - * + * * @apiviz.exclude */ public static class Parameterizer<O extends NumberVector> extends AbstractParameterizer { diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/HiCO.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/HiCO.java index 0bbd012a..6a741ce1 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/HiCO.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/HiCO.java @@ -74,11 +74,12 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * In: Proc. Int. Conf. on Scientific and Statistical Database Management * (SSDBM'06), Vienna, Austria, 2006. * </p> - * + * * @author Elke Achtert - * + * @since 0.3 + * * @apiviz.composedOf HiCO.Instance - * + * * @param <V> the type of NumberVector handled by the algorithm */ @Title("Mining Hierarchies of Correlation Clusters") @@ -117,7 +118,7 @@ public class HiCO<V extends NumberVector> extends GeneralizedOPTICS<V, Correlati /** * Constructor. - * + * * @param indexfactory Index factory * @param mu Mu parameter */ @@ -138,7 +139,7 @@ public class HiCO<V extends NumberVector> extends GeneralizedOPTICS<V, Correlati /** * Instance of the OPTICS algorithm. - * + * * @author Erich Schubert * * @apiviz.uses FilteredLocalPCAIndex @@ -275,7 +276,7 @@ public class HiCO<V extends NumberVector> extends GeneralizedOPTICS<V, Correlati /** * Sort new candidates by their distance, for determining the core size. - * + * * @author Erich Schubert * * @apiviz.exclude @@ -298,7 +299,7 @@ public class HiCO<V extends NumberVector> extends GeneralizedOPTICS<V, Correlati /** * Computes the correlation distance between the two subspaces defined by the * specified PCAs. - * + * * @param pca1 first PCA * @param pca2 second PCA * @param dimensionality the dimensionality of the data space @@ -370,7 +371,7 @@ public class HiCO<V extends NumberVector> extends GeneralizedOPTICS<V, Correlati * <code>v</code> at column <code>corrDim</code>. After insertion the matrix * <code>v</code> is orthonormalized and column <code>corrDim</code> of matrix * <code>e_czech</code> is set to the <code>corrDim</code>-th unit vector. - * + * * @param v the orthonormal matrix of the eigenvectors * @param e_czech the selection matrix of the strong eigenvectors * @param vector the vector to be inserted @@ -411,9 +412,9 @@ public class HiCO<V extends NumberVector> extends GeneralizedOPTICS<V, Correlati /** * Parameterization class. - * + * * @author Erich Schubert - * + * * @apiviz.exclude */ public static class Parameterizer<V extends NumberVector> extends AbstractParameterizer { @@ -421,7 +422,7 @@ public class HiCO<V extends NumberVector> extends GeneralizedOPTICS<V, Correlati * Parameter to specify the smoothing factor, must be an integer greater * than 0. The {link {@link #MU_ID}-nearest neighbor is used to compute the * correlation reachability of an object. - * + * * <p> * Key: {@code -hico.mu} * </p> @@ -515,7 +516,7 @@ public class HiCO<V extends NumberVector> extends GeneralizedOPTICS<V, Correlati ListParameterization params = new ListParameterization(); // preprocessor params.addParameter(KNNQueryFilteredPCAIndex.Factory.Parameterizer.K_ID, k); - params.addParameter(PercentageEigenPairFilter.ALPHA_ID, alpha); + params.addParameter(PercentageEigenPairFilter.Parameterizer.ALPHA_ID, alpha); ChainedParameterization chain = new ChainedParameterization(params, config); chain.errorsTo(config); diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/LMCLUS.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/LMCLUS.java index 934d3fc6..0a302005 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/LMCLUS.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/LMCLUS.java @@ -79,6 +79,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter; * * @author Ernst Waas * @author Erich Schubert + * @since 0.5.0 */ @Reference(authors = "Robert Haralick, Rave Harpaz", title = "Linear manifold clustering in high dimensional spaces by stochastic search", booktitle = "Pattern Recognition volume 40, Issue 10", url = "http://dx.doi.org/10.1016/j.patcog.2007.01.020") public class LMCLUS extends AbstractAlgorithm<Clustering<Model>> { diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ORCLUS.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ORCLUS.java index 4e5c3fbc..413675a8 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ORCLUS.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ORCLUS.java @@ -75,6 +75,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter; * </p> * * @author Elke Achtert + * @since 0.2 * * @apiviz.has PCARunner * diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHInterval.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHInterval.java index bf935864..42f1f474 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHInterval.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHInterval.java @@ -37,6 +37,7 @@ import de.lmu.ifi.dbs.elki.logging.LoggingConfiguration; * and a set of objects ids associated with this interval. * * @author Elke Achtert + * @since 0.2 * * @apiviz.has de.lmu.ifi.dbs.elki.algorithm.clustering.correlation.cash.CASHIntervalSplit */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHIntervalSplit.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHIntervalSplit.java index 1edec3a9..038495f2 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHIntervalSplit.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHIntervalSplit.java @@ -41,6 +41,7 @@ import de.lmu.ifi.dbs.elki.utilities.FormatUtil; * Supports the splitting of CASH intervals. * * @author Elke Achtert + * @since 0.2 */ public class CASHIntervalSplit { /** diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/ParameterizationFunction.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/ParameterizationFunction.java index 31fea80f..1b0ec58f 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/ParameterizationFunction.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/ParameterizationFunction.java @@ -37,6 +37,7 @@ import de.lmu.ifi.dbs.elki.utilities.FormatUtil; * belonging to the normal vector n. * * @author Elke Achtert + * @since 0.2 */ public class ParameterizationFunction { /** diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/em/AbstractEMModelFactory.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/em/AbstractEMModelFactory.java index 36ae7804..0b165c75 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/em/AbstractEMModelFactory.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/em/AbstractEMModelFactory.java @@ -36,6 +36,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * Abstract base class for initializing EM. * * @author Erich Schubert + * @since 0.4.0 * * @param <V> Vector type * @param <M> Model type diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/em/DiagonalGaussianModel.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/em/DiagonalGaussianModel.java index 544362da..e31089d5 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/em/DiagonalGaussianModel.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/em/DiagonalGaussianModel.java @@ -35,6 +35,7 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector; * Simpler model for a single Gaussian cluster, without covariances. * * @author Erich Schubert + * @since 0.7.0 */ public class DiagonalGaussianModel implements EMClusterModel<EMModel> { /** diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/em/DiagonalGaussianModelFactory.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/em/DiagonalGaussianModelFactory.java index 3f3bf914..66fb548b 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/em/DiagonalGaussianModelFactory.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/em/DiagonalGaussianModelFactory.java @@ -42,6 +42,7 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector; * corresponds to the {@code 'VVI'} model in Mclust (R). * * @author Erich Schubert + * @since 0.7.0 * * @apiviz.has DiagonalGaussianModel * diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/em/EM.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/em/EM.java index 208e8450..039c0174 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/em/EM.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/em/EM.java @@ -63,14 +63,16 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * Clustering by expectation maximization (EM-Algorithm), also known as Gaussian * Mixture Modeling (GMM). * + * Reference: * <p> - * Reference: A. P. Dempster, N. M. Laird, D. B. Rubin:<br /> - * Maximum Likelihood from Incomplete Data via the EM algorithm.<br> + * A. P. Dempster, N. M. Laird, D. B. Rubin:<br /> + * Maximum Likelihood from Incomplete Data via the EM algorithm.<br /> * In Journal of the Royal Statistical Society, Series B, 39(1), 1977, pp. 1-31 * </p> * * @author Arthur Zimek * @author Erich Schubert + * @since 0.2 * * @apiviz.composedOf EMClusterModelFactory * @@ -83,7 +85,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; title = "Maximum Likelihood from Incomplete Data via the EM algorithm", // booktitle = "Journal of the Royal Statistical Society, Series B, 39(1), 1977, pp. 1-31", // url = "http://www.jstor.org/stable/2984875") -@Alias({ "de.lmu.ifi.dbs.elki.algorithm.clustering.EM", "EM" }) +@Alias("de.lmu.ifi.dbs.elki.algorithm.clustering.EM") public class EM<V extends NumberVector, M extends MeanModel> extends AbstractAlgorithm<Clustering<M>> implements ClusteringAlgorithm<Clustering<M>> { /** * The logger for this class. @@ -392,7 +394,7 @@ public class EM<V extends NumberVector, M extends MeanModel> extends AbstractAlg initializer = initialP.instantiateClass(config); } - DoubleParameter deltaP = new DoubleParameter(DELTA_ID, 1e-5)// + DoubleParameter deltaP = new DoubleParameter(DELTA_ID, 1e-7)// .addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE); if(config.grab(deltaP)) { delta = deltaP.getValue(); diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/em/EMClusterModel.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/em/EMClusterModel.java index aaf66839..c89d85d0 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/em/EMClusterModel.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/em/EMClusterModel.java @@ -30,6 +30,7 @@ import de.lmu.ifi.dbs.elki.data.model.MeanModel; * Models useable in EM clustering. * * @author Erich Schubert + * @since 0.3 */ public interface EMClusterModel<M extends MeanModel> { /** diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/em/EMClusterModelFactory.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/em/EMClusterModelFactory.java index d486d5d3..a4f1e486 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/em/EMClusterModelFactory.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/em/EMClusterModelFactory.java @@ -35,6 +35,7 @@ import de.lmu.ifi.dbs.elki.distance.distancefunction.NumberVectorDistanceFunctio * Factory for initializing the EM models. * * @author Erich Schubert + * @since 0.2 * * @param <V> Vector type */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/em/MultivariateGaussianModel.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/em/MultivariateGaussianModel.java index 4c020bc1..3b433af1 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/em/MultivariateGaussianModel.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/em/MultivariateGaussianModel.java @@ -35,6 +35,7 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector; * Model for a single Gaussian cluster. * * @author Erich Schubert + * @since 0.7.0 */ public class MultivariateGaussianModel implements EMClusterModel<EMModel> { /** diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/em/MultivariateGaussianModelFactory.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/em/MultivariateGaussianModelFactory.java index 8e767a0f..e47cb1ea 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/em/MultivariateGaussianModelFactory.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/em/MultivariateGaussianModelFactory.java @@ -43,6 +43,7 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector; * {@code 'VVV'} model in Mclust (R). * * @author Erich Schubert + * @since 0.7.0 * * @apiviz.has MultivariateGaussianModel * diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/em/SphericalGaussianModel.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/em/SphericalGaussianModel.java index 7aa5b92b..97296d45 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/em/SphericalGaussianModel.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/em/SphericalGaussianModel.java @@ -33,6 +33,7 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector; * Simple spherical Gaussian cluster. * * @author Erich Schubert + * @since 0.7.0 */ public class SphericalGaussianModel implements EMClusterModel<EMModel> { /** diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/em/SphericalGaussianModelFactory.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/em/SphericalGaussianModelFactory.java index 8b2dfd57..0b32a962 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/em/SphericalGaussianModelFactory.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/em/SphericalGaussianModelFactory.java @@ -42,6 +42,7 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector; * corresponds to the {@code 'VVI'} model in Mclust (R). * * @author Erich Schubert + * @since 0.7.0 * * @apiviz.has SphericalGaussianModel * diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/AbstractRangeQueryNeighborPredicate.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/AbstractRangeQueryNeighborPredicate.java index 53f70aac..aa78033d 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/AbstractRangeQueryNeighborPredicate.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/AbstractRangeQueryNeighborPredicate.java @@ -50,6 +50,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * Abstract local model neighborhood predicate. * * @author Erich Schubert + * @since 0.5.0 * * @apiviz.has Instance * diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/COPACNeighborPredicate.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/COPACNeighborPredicate.java index a2ec0e2e..f8aa1b97 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/COPACNeighborPredicate.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/COPACNeighborPredicate.java @@ -70,6 +70,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz * * @author Arthur Zimek * @author Erich Schubert + * @since 0.7.0 * * @param <V> the type of NumberVector handled by this Algorithm */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/CorePredicate.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/CorePredicate.java index f87227c1..cd957770 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/CorePredicate.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/CorePredicate.java @@ -34,6 +34,7 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; * Note the Factory/Instance split of this interface. * * @author Erich Schubert + * @since 0.5.0 * * @apiviz.has Instance */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/ERiCNeighborPredicate.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/ERiCNeighborPredicate.java index d137ee67..cbe169ee 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/ERiCNeighborPredicate.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/ERiCNeighborPredicate.java @@ -70,6 +70,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz * * @author Elke Achtert * @author Erich Schubert + * @since 0.7.0 * * @param <V> the type of NumberVector handled by this Algorithm */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/EpsilonNeighborPredicate.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/EpsilonNeighborPredicate.java index 8610521a..e3fdb443 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/EpsilonNeighborPredicate.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/EpsilonNeighborPredicate.java @@ -59,6 +59,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * </p> * * @author Erich Schubert + * @since 0.5.0 * * @apiviz.has Instance * diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/FourCCorePredicate.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/FourCCorePredicate.java index ecd569e9..72b96fa0 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/FourCCorePredicate.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/FourCCorePredicate.java @@ -45,6 +45,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz * </p> * * @author Erich Schubert + * @since 0.5.0 * * @apiviz.has Instance */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/FourCNeighborPredicate.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/FourCNeighborPredicate.java index e9f50c41..71b4cf66 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/FourCNeighborPredicate.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/FourCNeighborPredicate.java @@ -65,6 +65,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz * * @author Arthur Zimek * @author Erich Schubert + * @since 0.7.0 * * @param <V> the type of NumberVector handled by this Algorithm */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/GeneralizedDBSCAN.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/GeneralizedDBSCAN.java index b6b9d74f..86753fcd 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/GeneralizedDBSCAN.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/GeneralizedDBSCAN.java @@ -68,6 +68,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * * @author Erich Schubert * @author Arthur Zimek + * @since 0.5.0 * * @apiviz.landmark * diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/LSDBC.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/LSDBC.java index bd145c14..5c6ddafe 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/LSDBC.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/LSDBC.java @@ -75,6 +75,7 @@ import gnu.trove.list.array.TIntArrayList; * </p> * * @author Erich Schubert + * @since 0.7.0 * * @param <O> Object type */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/MinPtsCorePredicate.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/MinPtsCorePredicate.java index ff58902d..405ba28a 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/MinPtsCorePredicate.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/MinPtsCorePredicate.java @@ -50,6 +50,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * </p> * * @author Erich Schubert + * @since 0.5.0 * * @apiviz.has Instance */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/NeighborPredicate.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/NeighborPredicate.java index 1e2aa716..9d760a4d 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/NeighborPredicate.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/NeighborPredicate.java @@ -36,6 +36,7 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDs; * Note the Factory/Instance split of this interface. * * @author Erich Schubert + * @since 0.5.0 * * @apiviz.has Instance */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/PreDeConCorePredicate.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/PreDeConCorePredicate.java index 0ae26c40..e0cd1547 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/PreDeConCorePredicate.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/PreDeConCorePredicate.java @@ -44,6 +44,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz * </p> * * @author Erich Schubert + * @since 0.5.0 * * @apiviz.has Instance */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/PreDeConNeighborPredicate.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/PreDeConNeighborPredicate.java index 26991ca9..9da9cc21 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/PreDeConNeighborPredicate.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/PreDeConNeighborPredicate.java @@ -59,6 +59,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz * * @author Peer Kröger * @author Erich Schubert + * @since 0.7.0 * * @param <V> the type of NumberVector handled by this Algorithm */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/AGNES.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/AGNES.java index 41de1cac..484c83b7 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/AGNES.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/AGNES.java @@ -27,14 +27,8 @@ import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm; import de.lmu.ifi.dbs.elki.data.type.TypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; import de.lmu.ifi.dbs.elki.database.Database; -import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; -import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; -import de.lmu.ifi.dbs.elki.database.datastore.WritableDBIDDataStore; -import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; -import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore; import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs; import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter; -import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.relation.Relation; @@ -57,7 +51,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * * This is the naive O(n^3) algorithm. See {@link SLINK} for a much faster * algorithm (however, only for single-linkage). - * + * * This implementation uses the pointer-based representation used by SLINK, so * that the extraction algorithms we have can be used with either of them. * @@ -92,8 +86,10 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * </p> * * @author Erich Schubert + * @since 0.6.0 * * @apiviz.composedOf LinkageMethod + * @apiviz.composedOf PointerHierarchyRepresentationBuilder * * @param <O> Object type */ @@ -116,7 +112,7 @@ public class AGNES<O> extends AbstractDistanceBasedAlgorithm<O, PointerHierarchy /** * Constructor. - * + * * @param distanceFunction Distance function to use * @param linkage Linkage method */ @@ -136,7 +132,7 @@ public class AGNES<O> extends AbstractDistanceBasedAlgorithm<O, PointerHierarchy /** * Run the algorithm - * + * * @param db Database * @param relation Relation * @return Clustering hierarchy @@ -162,21 +158,16 @@ public class AGNES<O> extends AbstractDistanceBasedAlgorithm<O, PointerHierarchy initializeDistanceMatrix(scratch, dq, ix, iy, square); // Initialize space for result: - WritableDBIDDataStore pi = DataStoreUtil.makeDBIDStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC); - WritableDoubleDataStore lambda = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, Double.POSITIVE_INFINITY); - WritableIntegerDataStore csize = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, 1); - for(DBIDIter it = ids.iter(); it.valid(); it.advance()) { - pi.put(it, it); - } + PointerHierarchyRepresentationBuilder builder = new PointerHierarchyRepresentationBuilder(ids); // Repeat until everything merged into 1 cluster FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Agglomerative clustering", size - 1, LOG) : null; int wsize = size; for(int i = 1; i < size; i++) { - int x = findMerge(wsize, scratch, ix, iy, pi, lambda, csize); + int x = findMerge(wsize, scratch, ix, iy, builder); if(x == wsize - 1) { --wsize; - for(ix.seek(wsize - 1); lambda.doubleValue(ix) < Double.POSITIVE_INFINITY; ix.retract()) { + for(ix.seek(wsize - 1); builder.isLinked(ix); ix.retract()) { --wsize; } } @@ -184,12 +175,12 @@ public class AGNES<O> extends AbstractDistanceBasedAlgorithm<O, PointerHierarchy } LOG.ensureCompleted(prog); - return new PointerHierarchyRepresentationResult(ids, pi, lambda); + return builder.complete(); } /** * Compute the size of a complete x by x triangle (minus diagonal) - * + * * @param x Offset * @return Size of complete triangle */ @@ -199,7 +190,7 @@ public class AGNES<O> extends AbstractDistanceBasedAlgorithm<O, PointerHierarchy /** * Initialize a distance matrix. - * + * * @param scratch Scratch space to be used. * @param dq Distance query * @param ix Data iterator @@ -221,29 +212,27 @@ public class AGNES<O> extends AbstractDistanceBasedAlgorithm<O, PointerHierarchy /** * Perform the next merge step in AGNES. - * + * * @param size Data set size * @param scratch Scratch space. * @param ix First iterator * @param iy Second iterator - * @param pi Parent storage - * @param lambda Lambda (join distance) storage - * @param csize Cluster sizes + * @param builder Pointer representation builder * @return x, for shrinking the working set. */ - protected int findMerge(int size, double[] scratch, DBIDArrayIter ix, DBIDArrayIter iy, WritableDBIDDataStore pi, WritableDoubleDataStore lambda, WritableIntegerDataStore csize) { + protected int findMerge(int size, double[] scratch, DBIDArrayIter ix, DBIDArrayIter iy, PointerHierarchyRepresentationBuilder builder) { double mindist = Double.POSITIVE_INFINITY; int x = -1, y = -1; // Find minimum: for(int ox = 0, xbase = 0; ox < size; xbase += ox++) { // Skip if object has already joined a cluster: - if(lambda.doubleValue(ix.seek(ox)) < Double.POSITIVE_INFINITY) { + if(builder.isLinked(ix.seek(ox))) { continue; } assert(xbase == triangleSize(ox)); for(int oy = 0; oy < ox; oy++) { // Skip if object has already joined a cluster: - if(lambda.doubleValue(iy.seek(oy)) < Double.POSITIVE_INFINITY) { + if(builder.isLinked(iy.seek(oy))) { continue; } final int idx = xbase + oy; @@ -255,25 +244,23 @@ public class AGNES<O> extends AbstractDistanceBasedAlgorithm<O, PointerHierarchy } } assert(x >= 0 && y >= 0); - merge(size, scratch, ix, iy, pi, lambda, csize, mindist, x, y); + merge(size, scratch, ix, iy, builder, mindist, x, y); return x; } /** * Execute the cluster merge. - * + * * @param size Data set size * @param scratch Scratch space. * @param ix First iterator * @param iy Second iterator - * @param pi Parent storage - * @param lambda Lambda (join distance) storage - * @param csize Cluster sizes + * @param builder Hierarchy builder * @param mindist Distance that was used for merging * @param x First matrix position * @param y Second matrix position */ - protected void merge(int size, double[] scratch, DBIDArrayIter ix, DBIDArrayIter iy, WritableDBIDDataStore pi, WritableDoubleDataStore lambda, WritableIntegerDataStore csize, double mindist, int x, int y) { + protected void merge(int size, double[] scratch, DBIDArrayIter ix, DBIDArrayIter iy, PointerHierarchyRepresentationBuilder builder, double mindist, int x, int y) { // Avoid allocating memory, by reusing existing iterators: ix.seek(x); iy.seek(y); @@ -283,41 +270,39 @@ public class AGNES<O> extends AbstractDistanceBasedAlgorithm<O, PointerHierarchy // Perform merge in data structure: x -> y assert(y < x); // Since y < x, prefer keeping y, dropping x. - lambda.put(ix, mindist); - pi.put(ix, iy); + builder.add(ix, mindist, iy); // Update cluster size for y: - final int sizex = csize.intValue(ix), sizey = csize.intValue(iy); - csize.put(iy, sizex + sizey); + final int sizex = builder.getSize(ix), sizey = builder.getSize(iy); + builder.setSize(iy, sizex + sizey); // Note: this changes iy. - updateMatrix(size, scratch, iy, lambda, csize, mindist, x, y, sizex, sizey); + updateMatrix(size, scratch, iy, builder, mindist, x, y, sizex, sizey); } /** * Update the scratch distance matrix. - * + * * @param size Data set size * @param scratch Scratch matrix. * @param ij Iterator to reuse - * @param lambda Lambda (join distance) storage - * @param csize Cluster sizes + * @param builder Hierarchy builder * @param mindist Distance that was used for merging * @param x First matrix position * @param y Second matrix position * @param sizex Old size of first cluster * @param sizey Old size of second cluster */ - protected void updateMatrix(int size, double[] scratch, DBIDArrayIter ij, WritableDoubleDataStore lambda, WritableIntegerDataStore csize, double mindist, int x, int y, final int sizex, final int sizey) { + protected void updateMatrix(int size, double[] scratch, DBIDArrayIter ij, PointerHierarchyRepresentationBuilder builder, double mindist, int x, int y, final int sizex, final int sizey) { // Update distance matrix. Note: miny < minx final int xbase = triangleSize(x), ybase = triangleSize(y); // Write to (y, j), with j < y int j = 0; for(; j < y; j++) { - if(lambda.doubleValue(ij.seek(j)) < Double.POSITIVE_INFINITY) { + if(builder.isLinked(ij.seek(j))) { continue; } - final int sizej = csize.intValue(ij); + final int sizej = builder.getSize(ij); final int yb = ybase + j; scratch[yb] = linkage.combine(sizex, scratch[xbase + j], sizey, scratch[yb], sizej, mindist); } @@ -325,20 +310,20 @@ public class AGNES<O> extends AbstractDistanceBasedAlgorithm<O, PointerHierarchy // Write to (j, y), with y < j < x int jbase = triangleSize(j); for(; j < x; jbase += j++) { - if(lambda.doubleValue(ij.seek(j)) < Double.POSITIVE_INFINITY) { + if(builder.isLinked(ij.seek(j))) { continue; } - final int sizej = csize.intValue(ij); + final int sizej = builder.getSize(ij); final int jb = jbase + y; scratch[jb] = linkage.combine(sizex, scratch[xbase + j], sizey, scratch[jb], sizej, mindist); } jbase += j++; // Skip x // Write to (j, y), with y < x < j for(; j < size; jbase += j++) { - if(lambda.doubleValue(ij.seek(j)) < Double.POSITIVE_INFINITY) { + if(builder.isLinked(ij.seek(j))) { continue; } - final int sizej = csize.intValue(ij); + final int sizej = builder.getSize(ij); scratch[jbase + y] = linkage.combine(sizex, scratch[jbase + x], sizey, scratch[jbase + y], sizej, mindist); } } @@ -356,11 +341,11 @@ public class AGNES<O> extends AbstractDistanceBasedAlgorithm<O, PointerHierarchy /** * Parameterization class - * + * * @author Erich Schubert - * + * * @apiviz.exclude - * + * * @param <O> Object type */ public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> { diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/AbstractHDBSCAN.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/AbstractHDBSCAN.java index 8febff49..a38f629f 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/AbstractHDBSCAN.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/AbstractHDBSCAN.java @@ -66,6 +66,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * </p> * * @author Erich Schubert + * @since 0.7.0 * * @apiviz.composedOf HDBSCANAdapter * @apiviz.composedOf HeapMSTCollector @@ -107,7 +108,7 @@ public abstract class AbstractHDBSCAN<O, R extends Result> extends AbstractDista protected WritableDoubleDataStore computeCoreDists(DBIDs ids, KNNQuery<O> knnQ, int minPts) { final Logging LOG = getLogger(); final WritableDoubleDataStore coredists = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_DB); - FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Computing core sizes.", ids.size(), LOG) : null; + FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Computing core sizes", ids.size(), LOG) : null; for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { coredists.put(iter, knnQ.getKNNForDBID(iter, minPts).getKNNDistance()); LOG.incrementProcessed(cprog); @@ -234,7 +235,7 @@ public abstract class AbstractHDBSCAN<O, R extends Result> extends AbstractDista pi.put(iter, iter); // Initialize } DBIDVar p = DBIDUtil.newVar(), q = DBIDUtil.newVar(), n = DBIDUtil.newVar(); - FiniteProgress pprog = LOG.isVerbose() ? new FiniteProgress("Converting MST to pointer representation.", heap.size(), LOG) : null; + FiniteProgress pprog = LOG.isVerbose() ? new FiniteProgress("Converting MST to pointer representation", heap.size(), LOG) : null; while(!heap.isEmpty()) { final double dist = heap.peekKey(); final long pair = heap.peekValue(); diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/AnderbergHierarchicalClustering.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/AnderbergHierarchicalClustering.java index 0484b7ad..f30bd8df 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/AnderbergHierarchicalClustering.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/AnderbergHierarchicalClustering.java @@ -29,14 +29,8 @@ import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm; import de.lmu.ifi.dbs.elki.data.type.TypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; import de.lmu.ifi.dbs.elki.database.Database; -import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; -import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; -import de.lmu.ifi.dbs.elki.database.datastore.WritableDBIDDataStore; -import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; -import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore; import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs; import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter; -import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.relation.Relation; @@ -52,16 +46,16 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; /** * This is a modification of the classic AGNES algorithm for hierarchical * clustering using a nearest-neighbor heuristic for acceleration. - * + * * Instead of scanning the matrix (with cost O(n^2)) to find the minimum, the * nearest neighbor of each object is remembered. On the downside, we need to * check these values at every merge, and it may now cost O(n^2) to perform a * merge, so there is no worst-case advantage to this approach. The average case * however improves from O(n^3) to O(n^2), which yields a considerable * improvement in running time. - * + * * This optimization is attributed to M. R. Anderberg. - * + * * Reference: * <p> * M. R. Anderberg<br /> @@ -71,8 +65,10 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * </p> * * @author Erich Schubert + * @since 0.6.0 * * @apiviz.composedOf LinkageMethod + * @apiviz.composedOf PointerHierarchyRepresentationBuilder * * @param <O> Object type */ @@ -92,7 +88,7 @@ public class AnderbergHierarchicalClustering<O> extends AbstractDistanceBasedAlg /** * Constructor. - * + * * @param distanceFunction Distance function to use * @param linkage Linkage method */ @@ -103,7 +99,7 @@ public class AnderbergHierarchicalClustering<O> extends AbstractDistanceBasedAlg /** * Run the algorithm - * + * * @param db Database * @param relation Relation * @return Clustering hierarchy @@ -135,21 +131,16 @@ public class AnderbergHierarchicalClustering<O> extends AbstractDistanceBasedAlg initializeNNCache(scratch, bestd, besti); // Initialize space for result: - WritableDBIDDataStore pi = DataStoreUtil.makeDBIDStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC); - WritableDoubleDataStore lambda = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, Double.POSITIVE_INFINITY); - WritableIntegerDataStore csize = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, 1); - for(DBIDIter it = ids.iter(); it.valid(); it.advance()) { - pi.put(it, it); - } + PointerHierarchyRepresentationBuilder builder = new PointerHierarchyRepresentationBuilder(ids); // Repeat until everything merged into 1 cluster FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Agglomerative clustering", size - 1, LOG) : null; int wsize = size; for(int i = 1; i < size; i++) { - int x = findMerge(wsize, scratch, ix, iy, bestd, besti, pi, lambda, csize); + int x = findMerge(wsize, scratch, ix, iy, bestd, besti, builder); if(x == wsize - 1) { --wsize; - for(ix.seek(wsize - 1); lambda.doubleValue(ix) < Double.POSITIVE_INFINITY; ix.retract()) { + for(ix.seek(wsize - 1); builder.isLinked(ix); ix.retract()) { --wsize; } } @@ -157,12 +148,12 @@ public class AnderbergHierarchicalClustering<O> extends AbstractDistanceBasedAlg } LOG.ensureCompleted(prog); - return new PointerHierarchyRepresentationResult(ids, pi, lambda); + return builder.complete(); } /** * Initialize the NN cache. - * + * * @param scratch Scatch space * @param bestd Best distance * @param besti Best index @@ -193,21 +184,19 @@ public class AnderbergHierarchicalClustering<O> extends AbstractDistanceBasedAlg /** * Perform the next merge step. - * + * * Due to the cache, this is now O(n) each time, instead of O(n*n). - * + * * @param size Data set size * @param scratch Scratch space. * @param ix First iterator * @param iy Second iterator * @param bestd Best distance * @param besti Index of best distance - * @param pi Parent storage - * @param lambda Lambda (join distance) storage - * @param csize Cluster sizes + * @param builder Hierarchy builder * @return x, for shrinking the working set. */ - protected int findMerge(int size, double[] scratch, DBIDArrayIter ix, DBIDArrayIter iy, double[] bestd, int[] besti, WritableDBIDDataStore pi, WritableDoubleDataStore lambda, WritableIntegerDataStore csize) { + protected int findMerge(int size, double[] scratch, DBIDArrayIter ix, DBIDArrayIter iy, double[] bestd, int[] besti, PointerHierarchyRepresentationBuilder builder) { double mindist = Double.POSITIVE_INFINITY; int x = -1, y = -1; // Find minimum: @@ -223,27 +212,25 @@ public class AnderbergHierarchicalClustering<O> extends AbstractDistanceBasedAlg } } assert(x >= 0 && y >= 0); - merge(size, scratch, ix, iy, bestd, besti, pi, lambda, csize, mindist, x < y ? y : x, x < y ? x : y); + merge(size, scratch, ix, iy, bestd, besti, builder, mindist, x < y ? y : x, x < y ? x : y); return x; } /** * Execute the cluster merge. - * + * * @param size Data set size * @param scratch Scratch space. * @param ix First iterator * @param iy Second iterator * @param bestd Best distance * @param besti Index of best distance - * @param pi Parent storage - * @param lambda Lambda (join distance) storage - * @param csize Cluster sizes + * @param builder Hierarchy builder * @param mindist Distance that was used for merging * @param x First matrix position * @param y Second matrix position */ - protected void merge(int size, double[] scratch, DBIDArrayIter ix, DBIDArrayIter iy, double[] bestd, int[] besti, WritableDBIDDataStore pi, WritableDoubleDataStore lambda, WritableIntegerDataStore csize, double mindist, int x, int y) { + protected void merge(int size, double[] scratch, DBIDArrayIter ix, DBIDArrayIter iy, double[] bestd, int[] besti, PointerHierarchyRepresentationBuilder builder, double mindist, int x, int y) { // Avoid allocating memory, by reusing existing iterators: ix.seek(x); iy.seek(y); @@ -253,17 +240,16 @@ public class AnderbergHierarchicalClustering<O> extends AbstractDistanceBasedAlg // Perform merge in data structure: x -> y assert(y < x); // Since y < x, prefer keeping y, dropping x. - lambda.put(ix, mindist); - pi.put(ix, iy); + builder.add(ix, mindist, iy); // Update cluster size for y: - final int sizex = csize.intValue(ix), sizey = csize.intValue(iy); - csize.put(iy, sizex + sizey); + final int sizex = builder.getSize(ix), sizey = builder.getSize(iy); + builder.setSize(iy, sizex + sizey); // Deactivate x in cache: besti[x] = -1; // Note: this changes iy. - updateMatrix(size, scratch, iy, bestd, besti, lambda, csize, mindist, x, y, sizex, sizey); + updateMatrix(size, scratch, iy, bestd, besti, builder, mindist, x, y, sizex, sizey); if(besti[y] == x) { findBest(size, scratch, bestd, besti, y); } @@ -271,31 +257,30 @@ public class AnderbergHierarchicalClustering<O> extends AbstractDistanceBasedAlg /** * Update the scratch distance matrix. - * + * * @param size Data set size * @param scratch Scratch matrix. * @param ij Iterator to reuse * @param bestd Best distance * @param besti Index of best distance - * @param lambda Lambda (join distance) storage - * @param csize Cluster sizes + * @param builder Hierarchy builder * @param mindist Distance that was used for merging * @param x First matrix position * @param y Second matrix position * @param sizex Old size of first cluster * @param sizey Old size of second cluster */ - protected void updateMatrix(int size, double[] scratch, DBIDArrayIter ij, double[] bestd, int[] besti, WritableDoubleDataStore lambda, WritableIntegerDataStore csize, double mindist, int x, int y, final int sizex, final int sizey) { + protected void updateMatrix(int size, double[] scratch, DBIDArrayIter ij, double[] bestd, int[] besti, PointerHierarchyRepresentationBuilder builder, double mindist, int x, int y, final int sizex, final int sizey) { // Update distance matrix. Note: miny < minx final int xbase = AGNES.triangleSize(x), ybase = AGNES.triangleSize(y); // Write to (y, j), with j < y int j = 0; for(; j < y; j++) { - if(lambda.doubleValue(ij.seek(j)) < Double.POSITIVE_INFINITY) { + if(builder.isLinked(ij.seek(j))) { continue; } - final int sizej = csize.intValue(ij); + final int sizej = builder.getSize(ij); final int yb = ybase + j; final double d = scratch[yb] = linkage.combine(sizex, scratch[xbase + j], sizey, scratch[yb], sizej, mindist); updateCache(size, scratch, bestd, besti, x, y, j, d); @@ -304,10 +289,10 @@ public class AnderbergHierarchicalClustering<O> extends AbstractDistanceBasedAlg // Write to (j, y), with y < j < x int jbase = AGNES.triangleSize(j); for(; j < x; jbase += j++) { - if(lambda.doubleValue(ij.seek(j)) < Double.POSITIVE_INFINITY) { + if(builder.isLinked(ij.seek(j))) { continue; } - final int sizej = csize.intValue(ij); + final int sizej = builder.getSize(ij); final int jb = jbase + y; final double d = scratch[jb] = linkage.combine(sizex, scratch[xbase + j], sizey, scratch[jb], sizej, mindist); updateCache(size, scratch, bestd, besti, x, y, j, d); @@ -315,10 +300,10 @@ public class AnderbergHierarchicalClustering<O> extends AbstractDistanceBasedAlg jbase += j++; // Skip x // Write to (j, y), with y < x < j for(; j < size; jbase += j++) { - if(lambda.doubleValue(ij.seek(j)) < Double.POSITIVE_INFINITY) { + if(builder.isLinked(ij.seek(j))) { continue; } - final int sizej = csize.intValue(ij); + final int sizej = builder.getSize(ij); final double d = scratch[jbase + y] = linkage.combine(sizex, scratch[jbase + x], sizey, scratch[jbase + y], sizej, mindist); updateCache(size, scratch, bestd, besti, x, y, j, d); } @@ -390,11 +375,11 @@ public class AnderbergHierarchicalClustering<O> extends AbstractDistanceBasedAlg /** * Parameterization class - * + * * @author Erich Schubert - * + * * @apiviz.exclude - * + * * @param <O> Object type */ public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> { diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/CLINK.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/CLINK.java index 65eba0cd..711cc5c7 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/CLINK.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/CLINK.java @@ -56,6 +56,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; * </p> * * @author Erich Schubert + * @since 0.7.0 * * @apiviz.has CompleteLinkageMethod * @@ -65,7 +66,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; title = "An Efficient Algorithm for the Complete Link Cluster Method", // booktitle = "The Computer Journal 20.4", // url = "http://dx.doi.org/10.1093/comjnl/20.4.364") -@Alias({ "CLINK", "Defays" }) +@Alias("Defays") public class CLINK<O> extends SLINK<O> { /** * The logger for this class. diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/CentroidLinkageMethod.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/CentroidLinkageMethod.java index 9fc13e20..5d33f693 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/CentroidLinkageMethod.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/CentroidLinkageMethod.java @@ -39,6 +39,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; * </p> * * @author Erich Schubert + * @since 0.6.0 */ @Alias({ "centroid", "upgmc" }) @Reference(authors = "A. K. Jain and R. C. Dubes", // diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/CompleteLinkageMethod.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/CompleteLinkageMethod.java index 28c6841d..241137e1 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/CompleteLinkageMethod.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/CompleteLinkageMethod.java @@ -29,6 +29,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; * Complete-linkage clustering method. * * @author Erich Schubert + * @since 0.5.0 */ @Alias({ "complete", "clink", "complete-link", "farthest-neighbor" }) public class CompleteLinkageMethod implements LinkageMethod { diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/GroupAverageLinkageMethod.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/GroupAverageLinkageMethod.java index c618775e..551f84cb 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/GroupAverageLinkageMethod.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/GroupAverageLinkageMethod.java @@ -38,6 +38,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; * </p> * * @author Erich Schubert + * @since 0.3 */ @Alias({ "upgma", "average", "average-link", "average-linkage", "UPGMA" }) @Reference(authors = "A. K. Jain and R. C. Dubes", // diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/HDBSCANLinearMemory.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/HDBSCANLinearMemory.java index 37dfd01f..b402d65f 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/HDBSCANLinearMemory.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/HDBSCANLinearMemory.java @@ -74,8 +74,11 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title; * </p> * * @author Erich Schubert + * @since 0.7.0 * * @param <O> Object type + * + * @apiviz.has PointerDensityHierarchyRepresentationResult */ @Title("HDBSCAN: Hierarchical Density-Based Spatial Clustering of Applications with Noise") @Description("Density-Based Clustering Based on Hierarchical Density Estimates") @@ -119,7 +122,7 @@ public class HDBSCANLinearMemory<O> extends AbstractHDBSCAN<O, PointerDensityHie final int numedges = ids.size() - 1; DoubleLongHeap heap = new DoubleLongMinHeap(numedges); // 2. Build spanning tree. - FiniteProgress mprog = LOG.isVerbose() ? new FiniteProgress("Computing minimum spanning tree (n-1 edges).", numedges, LOG) : null; + FiniteProgress mprog = LOG.isVerbose() ? new FiniteProgress("Computing minimum spanning tree (n-1 edges)", numedges, LOG) : null; PrimsMinimumSpanningTree.processDense(ids,// new HDBSCANAdapter(ids, coredists, distQ), // new HeapMSTCollector(heap, mprog, LOG)); diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/HierarchicalClusteringAlgorithm.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/HierarchicalClusteringAlgorithm.java index 0dfb31e0..80f72a69 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/HierarchicalClusteringAlgorithm.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/HierarchicalClusteringAlgorithm.java @@ -32,6 +32,7 @@ import de.lmu.ifi.dbs.elki.database.Database; * {@link de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical.extraction.ExtractFlatClusteringFromHierarchy}. * * @author Erich Schubert + * @since 0.4.0 * * @apiviz.has PointerHierarchyRepresentationResult */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/LinkageMethod.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/LinkageMethod.java index ea853eec..de134e1d 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/LinkageMethod.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/LinkageMethod.java @@ -38,6 +38,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; * </p> * * @author Erich Schubert + * @since 0.4.0 */ @Reference(authors = "G. N. Lance and W. T. Williams", // title = "A general theory of classificatory sorting strategies 1. Hierarchical systems", // diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/MedianLinkageMethod.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/MedianLinkageMethod.java index 6eb2e156..20f83feb 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/MedianLinkageMethod.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/MedianLinkageMethod.java @@ -38,6 +38,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; * </p> * * @author Erich Schubert + * @since 0.3 */ @Reference(authors = "J. C. Gower", // title = "A comparison of some methods of cluster analysis", // diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/PointerDensityHierarchyRepresentationResult.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/PointerDensityHierarchyRepresentationResult.java index 581841b8..b37e82af 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/PointerDensityHierarchyRepresentationResult.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/PointerDensityHierarchyRepresentationResult.java @@ -33,6 +33,7 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDs; * which is a density estimation. * * @author Erich Schubert + * @since 0.5.0 */ public class PointerDensityHierarchyRepresentationResult extends PointerHierarchyRepresentationResult { /** diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/PointerHierarchyRepresentationBuilder.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/PointerHierarchyRepresentationBuilder.java new file mode 100644 index 00000000..8e15bf81 --- /dev/null +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/PointerHierarchyRepresentationBuilder.java @@ -0,0 +1,171 @@ +package de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical; +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2015 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; +import de.lmu.ifi.dbs.elki.database.datastore.WritableDBIDDataStore; +import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; +import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore; +import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; +import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.logging.Logging; + +/** + * Class to help building a pointer hierarchy. + * + * @author Erich Schubert + * @since 0.7.1 + * + * @apiviz.has PointerHierarchyRepresentationResult + */ +public class PointerHierarchyRepresentationBuilder { + /** + * Class logger. + */ + private static final Logging LOG = Logging.getLogger(PointerHierarchyRepresentationBuilder.class); + + /** + * The DBIDs in this result. + */ + protected final DBIDs ids; + + /** + * The parent DBID relation. + */ + protected WritableDBIDDataStore parent; + + /** + * Distance to the parent object. + */ + protected WritableDoubleDataStore parentDistance; + + /** + * Last linking distance. + */ + protected double prevdist; + + /** + * Cluster size storage. May be uninitialized! + */ + protected WritableIntegerDataStore csize; + + /** + * Constructor. + * + * @param ids IDs + */ + public PointerHierarchyRepresentationBuilder(DBIDs ids) { + super(); + this.ids = ids; + this.parent = DataStoreUtil.makeDBIDStorage(ids, DataStoreFactory.HINT_DB | DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC); + this.parentDistance = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB | DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, Double.POSITIVE_INFINITY); + this.prevdist = Double.NEGATIVE_INFINITY; + for(DBIDIter it = ids.iter(); it.valid(); it.advance()) { + parent.put(it, it); + } + } + + /** + * Add an element to the pointer representation. + * + * Important: If an algorithm does not produce links in an increasing fashion, + * a warning will be issued and the linking distance will be increased. + * Otherwise, the hierarchy would be misinterpreted when links are executed + * ordered by their distance. + * + * @param cur Current object + * @param distance Link distance + * @param par Parent + */ + public void add(DBIDRef cur, double distance, DBIDRef par) { + if(distance < prevdist) { + LOG.warning("Non-monotone hierarchical clustering detected. Adjusting linking distance from " + distance + " to " + prevdist); + distance = prevdist; + } + parent.putDBID(cur, par); + double olddist = parentDistance.putDouble(cur, distance); + assert (olddist == Double.POSITIVE_INFINITY) : "Object was already linked!"; + prevdist = distance; + } + + /** + * Finalize the result. + * + * @return Completed result + */ + public PointerHierarchyRepresentationResult complete() { + if(csize != null) { + csize.destroy(); + csize = null; + } + return new PointerHierarchyRepresentationResult(ids, parent, parentDistance); + } + + /** + * Get the cluster size of the current object. + * + * @param id Object id + * @return Cluster size (initially 1). + */ + public int getSize(DBIDRef id) { + if(csize == null) { + csize = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, 1); + } + return csize.intValue(id); + } + + /** + * Set the cluster size of an object. + * + * @param id Object to set + * @param size Cluster size + */ + public void setSize(DBIDRef id, int size) { + if(csize == null) { + csize = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, 1); + } + csize.putInt(id, size); + } + + /** + * Test if an object is already linked. + * + * @param id Object + * @return {@code true} if the object is already linked. + */ + public boolean isLinked(DBIDRef id) { + return parentDistance.doubleValue(id) < Double.POSITIVE_INFINITY; + } + + /** + * Get the current linking distance of an object. + * + * @param id Object + * @return Distance, or infinity + */ + public double getDistance(DBIDRef id) { + return parentDistance.doubleValue(id); + } +} diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/PointerHierarchyRepresentationResult.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/PointerHierarchyRepresentationResult.java index 2369c6e5..fbd3d6b9 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/PointerHierarchyRepresentationResult.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/PointerHierarchyRepresentationResult.java @@ -24,8 +24,19 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical; */ import de.lmu.ifi.dbs.elki.database.datastore.DBIDDataStore; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.DoubleDataStore; +import de.lmu.ifi.dbs.elki.database.datastore.IntegerDataStore; +import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore; +import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs; +import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs; +import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; +import de.lmu.ifi.dbs.elki.database.ids.DBIDVar; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs; import de.lmu.ifi.dbs.elki.result.BasicResult; /** @@ -36,7 +47,12 @@ import de.lmu.ifi.dbs.elki.result.BasicResult; * {@link de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical.extraction.ExtractFlatClusteringFromHierarchy} * can be used to extract partitions from this graph. * + * This class can also compute dendrogram positions, but using a faster + * algorithm than the one proposed by Sibson 1971, using only O(n log n) time + * due to sorting, but using an additional temporary array. + * * @author Erich Schubert + * @since 0.6.0 */ public class PointerHierarchyRepresentationResult extends BasicResult { /** @@ -55,8 +71,13 @@ public class PointerHierarchyRepresentationResult extends BasicResult { DoubleDataStore parentDistance; /** + * Position storage, computed on demand. + */ + IntegerDataStore positions = null; + + /** * Constructor. - * + * * @param ids IDs processed. * @param parent Parent pointer. * @param parentDistance Distance to parent. @@ -70,7 +91,7 @@ public class PointerHierarchyRepresentationResult extends BasicResult { /** * Get the clustered DBIDs. - * + * * @return DBIDs */ public DBIDs getDBIDs() { @@ -79,7 +100,7 @@ public class PointerHierarchyRepresentationResult extends BasicResult { /** * Get the parent DBID relation. - * + * * @return Parent relation. */ public DBIDDataStore getParentStore() { @@ -88,10 +109,98 @@ public class PointerHierarchyRepresentationResult extends BasicResult { /** * Get the distance to the parent. - * + * * @return Parent distance. */ public DoubleDataStore getParentDistanceStore() { return parentDistance; } + + /** + * Get / compute the positions. + * + * @return Dendrogram positions + */ + public IntegerDataStore getPositions() { + if(positions != null) { + return positions; // Return cached. + } + ArrayDBIDs order = topologicalSort(ids, parent, parentDistance); + DBIDArrayIter it = order.iter(); + final int last = order.size() - 1; + // Subtree sizes of each element: + WritableIntegerDataStore siz = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_DB, 1); + DBIDVar v1 = DBIDUtil.newVar(); + for(it.seek(0); it.valid(); it.advance()) { + if(DBIDUtil.equal(it, parent.assignVar(it, v1))) { + continue; + } + siz.increment(v1, siz.intValue(it)); + } + // Assertion only holds for exact e.g. single linkage + //assert (siz.intValue(it.seek(last)) == ids.size()); + WritableIntegerDataStore pos = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_DB, -1); + WritableIntegerDataStore ins = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, -1); + int defins = 0; + // Place elements based on their successor + for(it.seek(last); it.valid(); it.retract()) { + int size = siz.intValue(it); + parent.assignVar(it, v1); // v1 = parent + final int ipos = ins.intValue(v1); + // Assertion only holds for exact e.g. single linkage + // assert (ipos >= 0); + if(ipos < 0 || DBIDUtil.equal(it, v1)) { + // Root: use interval [defins; defins + size] + ins.putInt(it, defins); + pos.putInt(it, defins + size - 1); + defins += size; + continue; + } + // Insertion position of parent = leftmost + pos.putInt(it, ipos + size - 1); + ins.putInt(it, ipos); + ins.increment(v1, size); + } + ins.destroy(); + return positions = pos; + } + + /** + * Perform topological sorting based on the successor order. + * + * @param oids IDs to sort + * @param parent Parent relationship. + * @param parentDistance Distance to parent. + * @return Sorted order + */ + public static ArrayDBIDs topologicalSort(DBIDs oids, DBIDDataStore parent, DoubleDataStore parentDistance) { + // We used to simply use this: + // But for e.g. Median Linkage, this would lead to problems, as links are + // not necessarily performed in ascending order anymore! + ArrayModifiableDBIDs ids = DBIDUtil.newArray(oids); + ids.sort(new DataStoreUtil.DescendingByDoubleDataStoreAndId(parentDistance)); + final int size = ids.size(); + ModifiableDBIDs seen = DBIDUtil.newHashSet(size); + ArrayModifiableDBIDs order = DBIDUtil.newArray(size); + DBIDVar v1 = DBIDUtil.newVar(), prev = DBIDUtil.newVar(); + for(DBIDIter it = ids.iter(); it.valid(); it.advance()) { + if(!seen.add(it)) { + continue; + } + order.add(it); + prev.set(it); // Copy + while(!DBIDUtil.equal(prev, parent.assignVar(prev, v1))) { + if(!seen.add(v1)) { + break; + } + order.add(v1); + prev.set(v1); // Copy + } + } + // Reverse the array: + for(int i = 0, j = size - 1; i < j; i++, j--) { + order.swap(i, j); + } + return order; + } } diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/SLINK.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/SLINK.java index 71213208..f41a18bb 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/SLINK.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/SLINK.java @@ -63,6 +63,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title; * * @author Elke Achtert * @author Erich Schubert + * @since 0.6.0 * * @apiviz.has SingleLinkageMethod * @@ -74,7 +75,8 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title; title = "SLINK: An optimally efficient algorithm for the single-link cluster method", // booktitle = "The Computer Journal 16 (1973), No. 1, p. 30-34.", // url = "http://dx.doi.org/10.1093/comjnl/16.1.30") -@Alias(value = { "de.lmu.ifi.dbs.elki.algorithm.clustering.SLINK", "clustering.SLINK", "SLINK", "single-link", "single-linkage" }) +@Alias({ "de.lmu.ifi.dbs.elki.algorithm.clustering.SLINK", "clustering.SLINK", // +"single-link", "single-linkage" }) public class SLINK<O> extends AbstractDistanceBasedAlgorithm<O, PointerHierarchyRepresentationResult>implements HierarchicalClusteringAlgorithm { /** * The logger for this class. diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/SLINKHDBSCANLinearMemory.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/SLINKHDBSCANLinearMemory.java index 8f1c6195..d89da303 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/SLINKHDBSCANLinearMemory.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/SLINKHDBSCANLinearMemory.java @@ -50,22 +50,22 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title; /** * Linear memory implementation of HDBSCAN clustering based on SLINK. - * + * * By not building a distance matrix, we can reduce memory usage to linear * memory only; but at the cost of roughly double the runtime (unless using * indexes) as we first need to compute all kNN distances (for core sizes), then * recompute distances when building the spanning tree. - * + * * This version uses the SLINK algorithm to directly produce the pointer * representation expected by the extraction methods. The SLINK algorithm is * closely related to Prim's minimum spanning tree, but produces the more * compact pointer representation instead of an edges list. - * + * * This implementation does <em>not</em> include the cluster extraction * discussed as Step 4. This functionality should however already be provided by * {@link de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical.extraction.HDBSCANHierarchyExtraction} * . For this reason, we also do <em>not include self-edges</em>. - * + * * Reference: * <p> * R. J. G. B. Campello, D. Moulavi, and J. Sander<br /> @@ -73,8 +73,11 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title; * Pacific-Asia Conference on Advances in Knowledge Discovery and Data Mining, * PAKDD * </p> - * + * * @author Erich Schubert + * @since 0.6.0 + * + * @apiviz.has PointerDensityHierarchyRepresentationResult */ @Title("HDBSCAN: Hierarchical Density-Based Spatial Clustering of Applications with Noise") @Description("Density-Based Clustering Based on Hierarchical Density Estimates") @@ -90,7 +93,7 @@ public class SLINKHDBSCANLinearMemory<O> extends AbstractHDBSCAN<O, PointerDensi /** * Constructor. - * + * * @param distanceFunction Distance function * @param minPts Minimum number of points for density */ @@ -100,7 +103,7 @@ public class SLINKHDBSCANLinearMemory<O> extends AbstractHDBSCAN<O, PointerDensi /** * Run the algorithm - * + * * @param db Database * @param relation Relation * @return Clustering hierarchy @@ -143,7 +146,7 @@ public class SLINKHDBSCANLinearMemory<O> extends AbstractHDBSCAN<O, PointerDensi /** * First step: Initialize P(id) = id, L(id) = infinity. - * + * * @param id the id of the object to be inserted into the pointer * representation * @param pi Pi data store @@ -160,7 +163,7 @@ public class SLINKHDBSCANLinearMemory<O> extends AbstractHDBSCAN<O, PointerDensi /** * Second step: Determine the pairwise distances from all objects in the * pointer representation to the new object with the specified id. - * + * * @param id the id of the object to be inserted into the pointer * representation * @param processedIDs the already processed ids @@ -179,7 +182,7 @@ public class SLINKHDBSCANLinearMemory<O> extends AbstractHDBSCAN<O, PointerDensi /** * Third step: Determine the values for P and L - * + * * @param id the id of the object to be inserted into the pointer * representation * @param pi Pi data store @@ -220,7 +223,7 @@ public class SLINKHDBSCANLinearMemory<O> extends AbstractHDBSCAN<O, PointerDensi /** * Fourth step: Actualize the clusters if necessary - * + * * @param id the id of the current object * @param pi Pi data store * @param lambda Lambda data store @@ -254,11 +257,11 @@ public class SLINKHDBSCANLinearMemory<O> extends AbstractHDBSCAN<O, PointerDensi /** * Parameterization class - * + * * @author Erich Schubert - * + * * @apiviz.exclude - * + * * @param <O> Object type */ public static class Parameterizer<O> extends AbstractHDBSCAN.Parameterizer<O> { diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/SingleLinkageMethod.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/SingleLinkageMethod.java index f16393de..73cf4b99 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/SingleLinkageMethod.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/SingleLinkageMethod.java @@ -38,6 +38,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; * </p> * * @author Erich Schubert + * @since 0.3 */ @Reference(authors = "K. Florek and J. Łukaszewicz and J. Perkal and H. Steinhaus and S. Zubrzycki", // title = "Sur la liaison et la division des points d'un ensemble fini",// diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/WardLinkageMethod.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/WardLinkageMethod.java index 80194a87..ad61d6da 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/WardLinkageMethod.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/WardLinkageMethod.java @@ -41,6 +41,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; * </p> * * @author Erich Schubert + * @since 0.6.0 */ @Reference(authors = "J. H. Ward Jr", // title = "Hierarchical grouping to optimize an objective function", // diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/WeightedAverageLinkageMethod.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/WeightedAverageLinkageMethod.java index 162d4bc9..3cb583f8 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/WeightedAverageLinkageMethod.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/WeightedAverageLinkageMethod.java @@ -42,6 +42,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; * </p> * * @author Erich Schubert + * @since 0.6.0 */ @Reference(authors = "A. K. Jain and R. C. Dubes", // title = "Algorithms for Clustering Data", // diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/extraction/ExtractFlatClusteringFromHierarchy.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/extraction/ExtractFlatClusteringFromHierarchy.java index 5f4bbd70..9d055b03 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/extraction/ExtractFlatClusteringFromHierarchy.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/extraction/ExtractFlatClusteringFromHierarchy.java @@ -48,6 +48,7 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; +import de.lmu.ifi.dbs.elki.utilities.Alias; import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.DoubleArray; import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; @@ -69,10 +70,12 @@ import de.lmu.ifi.dbs.elki.workflow.AlgorithmStep; * TODO: add an hierarchy simplification step. * * @author Erich Schubert + * @since 0.6.0 * * @apiviz.uses HierarchicalClusteringAlgorithm * @apiviz.uses PointerHierarchyRepresentationResult */ +@Alias("de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical.ExtractFlatClusteringFromHierarchy") public class ExtractFlatClusteringFromHierarchy implements ClusteringAlgorithm<Clustering<DendrogramModel>> { /** * Class logger. @@ -182,8 +185,7 @@ public class ExtractFlatClusteringFromHierarchy implements ClusteringAlgorithm<C // Sort DBIDs by lambda. We need this for two things: // a) to determine the stop distance from "minclusters" parameter // b) to process arrows in decreasing / increasing order - ArrayModifiableDBIDs order = DBIDUtil.newArray(ids); - order.sort(new DataStoreUtil.AscendingByDoubleDataStore(lambda)); + ArrayDBIDs order = PointerHierarchyRepresentationResult.topologicalSort(ids, pi, lambda); DBIDArrayIter it = order.iter(); // Used multiple times! final int split = findSplit(order, it, lambda); diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/extraction/HDBSCANHierarchyExtraction.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/extraction/HDBSCANHierarchyExtraction.java index 20c5e211..80c32ae4 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/extraction/HDBSCANHierarchyExtraction.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/extraction/HDBSCANHierarchyExtraction.java @@ -40,6 +40,7 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.DoubleDataStore; import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore; +import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs; import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs; import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; @@ -81,6 +82,7 @@ import de.lmu.ifi.dbs.elki.workflow.AlgorithmStep; * one-element clusters to reduce garbage collection overhead. * * @author Erich Schubert + * @since 0.7.0 * * @apiviz.uses HierarchicalClusteringAlgorithm * @apiviz.uses PointerHierarchyRepresentationResult @@ -153,8 +155,7 @@ public class HDBSCANHierarchyExtraction implements ClusteringAlgorithm<Clusterin FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Extracting clusters", ids.size(), LOG) : null; // Sort DBIDs by lambda, to process merges in increasing order. - ArrayModifiableDBIDs order = DBIDUtil.newArray(ids); - order.sort(new DataStoreUtil.AscendingByDoubleDataStore(lambda)); + ArrayDBIDs order = PointerHierarchyRepresentationResult.topologicalSort(ids, pi, lambda); WritableDataStore<TempCluster> cluster_map = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_TEMP, TempCluster.class); diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/extraction/SimplifiedHierarchyExtraction.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/extraction/SimplifiedHierarchyExtraction.java index 236e7e0a..b84d1a6f 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/extraction/SimplifiedHierarchyExtraction.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/extraction/SimplifiedHierarchyExtraction.java @@ -40,6 +40,7 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.DoubleDataStore; import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore; +import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs; import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs; import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; @@ -73,6 +74,7 @@ import de.lmu.ifi.dbs.elki.workflow.AlgorithmStep; * </p> * * @author Erich Schubert + * @since 0.7.0 * * @apiviz.uses HierarchicalClusteringAlgorithm * @apiviz.uses PointerHierarchyRepresentationResult @@ -138,8 +140,7 @@ public class SimplifiedHierarchyExtraction implements ClusteringAlgorithm<Cluste FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Extracting clusters", ids.size(), LOG) : null; // Sort DBIDs by lambda, to process merges in increasing order. - ArrayModifiableDBIDs order = DBIDUtil.newArray(ids); - order.sort(new DataStoreUtil.AscendingByDoubleDataStore(lambda)); + ArrayDBIDs order = PointerHierarchyRepresentationResult.topologicalSort(ids, pi, lambda); WritableDataStore<TempCluster> cluster_map = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_TEMP, TempCluster.class); diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeans.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeans.java index 40fb313d..5df1bee8 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeans.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeans.java @@ -63,6 +63,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * Abstract base class for k-means implementations. * * @author Erich Schubert + * @since 0.5.0 * * @apiviz.composedOf KMeansInitialization * diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/BestOfMultipleKMeans.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/BestOfMultipleKMeans.java index 292e74db..84fb877c 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/BestOfMultipleKMeans.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/BestOfMultipleKMeans.java @@ -49,6 +49,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * * @author Stephan Baier * @author Erich Schubert + * @since 0.6.0 * * @param <V> Vector type * @param <M> Model type diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/CLARA.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/CLARA.java index 4e2bf7a8..42ea12b6 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/CLARA.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/CLARA.java @@ -66,6 +66,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter; * </p> * * @author Erich Schubert + * @since 0.7.0 * * @param <V> Vector type */ @@ -124,7 +125,7 @@ public class CLARA<V> extends KMedoidsPAM<V> { WritableIntegerDataStore bestclusters = null; Random rnd = random.getSingleThreadedRandom(); - FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Random samples.", numsamples, LOG) : null; + FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Processing random samples", numsamples, LOG) : null; for(int j = 0; j < numsamples; j++) { DBIDs rids = DBIDUtil.randomSample(ids, sampling, rnd); // Choose initial medoids diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeans.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeans.java index b242b407..697655d8 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeans.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeans.java @@ -37,6 +37,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; * Some constants and options shared among kmeans family algorithms. * * @author Erich Schubert + * @since 0.2 * * @param <V> Number vector type * @param <M> Actual model type diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansBatchedLloyd.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansBatchedLloyd.java index c6082176..8885fc46 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansBatchedLloyd.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansBatchedLloyd.java @@ -69,6 +69,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter; * blocks. * * @author Erich Schubert + * @since 0.6.0 * * @apiviz.has KMeansModel * diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansBisecting.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansBisecting.java index 70b6a30e..92e00e0d 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansBisecting.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansBisecting.java @@ -61,6 +61,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * </p> * * @author Stephan Baier + * @since 0.6.0 * * @param <V> Vector type * @param <M> Model type diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansCompare.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansCompare.java new file mode 100644 index 00000000..5632621a --- /dev/null +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansCompare.java @@ -0,0 +1,271 @@ +package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2015 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.initialization.KMeansInitialization; +import de.lmu.ifi.dbs.elki.data.Cluster; +import de.lmu.ifi.dbs.elki.data.Clustering; +import de.lmu.ifi.dbs.elki.data.NumberVector; +import de.lmu.ifi.dbs.elki.data.model.KMeansModel; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; +import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore; +import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; +import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.distance.distancefunction.NumberVectorDistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction; +import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress; +import de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic; +import de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic; +import de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic; +import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.documentation.Title; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; + +/** + * Compare-Means: Accelerated k-means by exploiting the triangle inequality and + * pairwise distances of means to prune candidate means. + * + * Reference: + * <p> + * S. J. Phillips<br /> + * Acceleration of k-means and related clustering algorithms<br /> + * Proc. 4th Int. Workshop on Algorithm Engineering and Experiments (ALENEX + * 2002) + * </p> + * + * @author Erich Schubert + * @since 0.5.0 + * + * @apiviz.has KMeansModel + * + * @param <V> vector datatype + */ +@Title("Compare-Means") +@Reference(authors = "S. J. Phillips", // +title = "Acceleration of k-means and related clustering algorithms", // +booktitle = "Proc. 4th Int. Workshop on Algorithm Engineering and Experiments (ALENEX 2002)", // +url = "http://dx.doi.org/10.1007/3-540-45643-0_13") +public class KMeansCompare<V extends NumberVector> extends AbstractKMeans<V, KMeansModel> { + /** + * The logger for this class. + */ + private static final Logging LOG = Logging.getLogger(KMeansCompare.class); + + /** + * Key for statistics logging. + */ + private static final String KEY = KMeansCompare.class.getName(); + + /** + * Constructor. + * + * @param distanceFunction distance function + * @param k k parameter + * @param maxiter Maxiter parameter + * @param initializer Initialization method + */ + public KMeansCompare(NumberVectorDistanceFunction<? super V> distanceFunction, int k, int maxiter, KMeansInitialization<? super V> initializer) { + super(distanceFunction, k, maxiter, initializer); + } + + @Override + public Clustering<KMeansModel> run(Database database, Relation<V> relation) { + if(relation.size() <= 0) { + return new Clustering<>("k-Means Clustering", "kmeans-clustering"); + } + // Choose initial means + if(LOG.isStatistics()) { + LOG.statistics(new StringStatistic(KEY + ".initialization", initializer.toString())); + } + List<Vector> means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction(), Vector.FACTORY); + // Setup cluster assignment store + List<ModifiableDBIDs> clusters = new ArrayList<>(); + for(int i = 0; i < k; i++) { + clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k))); + } + WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1); + double[] varsum = new double[k]; + + // Cluster distances + double[][] cdist = new double[k][k]; + + IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Means iteration", LOG) : null; + DoubleStatistic varstat = LOG.isStatistics() ? new DoubleStatistic(this.getClass().getName() + ".variance-sum") : null; + LongStatistic diststat = LOG.isStatistics() ? new LongStatistic(KEY + ".distance-computations") : null; + int iteration = 0; + for(; maxiter <= 0 || iteration < maxiter; iteration++) { + LOG.incrementProcessed(prog); + recomputeSeperation(means, cdist, diststat); + boolean changed = assignToNearestCluster(relation, means, clusters, assignment, varsum, cdist, diststat); + logVarstat(varstat, varsum); + if(LOG.isStatistics()) { + LOG.statistics(diststat); + } + // Stop if no cluster assignment changed. + if(!changed) { + break; + } + // Recompute means. + means = means(clusters, means, relation); + } + LOG.setCompleted(prog); + if(LOG.isStatistics()) { + LOG.statistics(new LongStatistic(KEY + ".iterations", iteration)); + } + + // Wrap result + Clustering<KMeansModel> result = new Clustering<>("k-Means Clustering", "kmeans-clustering"); + for(int i = 0; i < clusters.size(); i++) { + DBIDs ids = clusters.get(i); + if(ids.size() == 0) { + continue; + } + KMeansModel model = new KMeansModel(means.get(i), varsum[i]); + result.addToplevelCluster(new Cluster<>(ids, model)); + } + return result; + } + + /** + * Recompute the separation of cluster means. + * + * @param means Means + * @param cdist Center-to-Center distances + * @param diststat Distance counting statistic + */ + private void recomputeSeperation(List<Vector> means, double[][] cdist, LongStatistic diststat) { + final int k = means.size(); + for(int i = 1; i < k; i++) { + Vector mi = means.get(i); + for(int j = 0; j < i; j++) { + double d = distanceFunction.distance(mi, means.get(j)); + cdist[i][j] = d; + cdist[j][i] = d; + } + } + if(diststat != null) { + diststat.increment((k * (k - 1)) >> 1); + } + } + + /** + * Reassign objects, but only if their bounds indicate it is necessary to do + * so. + * + * @param relation Data + * @param means Current means + * @param clusters Current clusters + * @param assignment Cluster assignment + * @param varsum Variance sum counter + * @param cdist Centroid distances + * @param diststat Distance statistics + * @return true when the object was reassigned + */ + private boolean assignToNearestCluster(Relation<V> relation, List<Vector> means, List<ModifiableDBIDs> clusters, WritableIntegerDataStore assignment, double[] varsum, double[][] cdist, LongStatistic diststat) { + assert (k == means.size()); + long dists = 0; + boolean changed = false; + // Reset all clusters + Arrays.fill(varsum, 0.); + for(ModifiableDBIDs cluster : clusters) { + cluster.clear(); + } + final NumberVectorDistanceFunction<?> df = getDistanceFunction(); + double mult = (df instanceof SquaredEuclideanDistanceFunction) ? 4 : 2; + for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { + final int cur = assignment.intValue(iditer), ini = cur >= 0 ? cur : 0; + // Distance to current mean: + V fv = relation.get(iditer); + double mindist = df.distance(fv, means.get(ini)); + ++dists; + final double thresh = mult * mindist; + int minIndex = ini; + for(int i = 0; i < k; i++) { + if(i == ini || cdist[minIndex][i] >= thresh) { // Compare pruning + continue; + } + double dist = df.distance(fv, means.get(i)); + ++dists; + if(dist < mindist) { + minIndex = i; + mindist = dist; + } + } + varsum[minIndex] += mindist; + clusters.get(minIndex).add(iditer); + changed |= assignment.putInt(iditer, minIndex) != minIndex; + } + // Increment distance computations counter. + if(diststat != null) { + diststat.increment(dists); + } + return changed; + } + + @Override + protected Logging getLogger() { + return LOG; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer<V extends NumberVector> extends AbstractKMeans.Parameterizer<V> { + @Override + protected Logging getLogger() { + return LOG; + } + + @Override + protected void getParameterDistanceFunction(Parameterization config) { + super.getParameterDistanceFunction(config); + if(distanceFunction instanceof SquaredEuclideanDistanceFunction) { + return; // Proper choice. + } + if(distanceFunction != null && !distanceFunction.isMetric()) { + LOG.warning("Compare k-means requires a metric distance, and k-means should only be used with squared Euclidean distance!"); + } + } + + @Override + protected KMeansCompare<V> makeInstance() { + return new KMeansCompare<>(distanceFunction, k, maxiter, initializer); + } + } +} diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansElkan.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansElkan.java index e2dcf88c..58015327 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansElkan.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansElkan.java @@ -47,11 +47,14 @@ import de.lmu.ifi.dbs.elki.distance.distancefunction.NumberVectorDistanceFunctio import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress; +import de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic; import de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic; import de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic; import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag; /** * Elkan's fast k-means by exploiting the triangle inequality. @@ -69,6 +72,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz * </p> * * @author Erich Schubert + * @since 0.7.0 * * @apiviz.has KMeansModel * @@ -90,15 +94,22 @@ public class KMeansElkan<V extends NumberVector> extends AbstractKMeans<V, KMean private static final String KEY = KMeansElkan.class.getName(); /** + * Flag whether to compute the final variance statistic. + */ + private boolean varstat = false; + + /** * Constructor. * * @param distanceFunction distance function * @param k k parameter * @param maxiter Maxiter parameter * @param initializer Initialization method + * @param varstat Compute the variance statistic */ - public KMeansElkan(NumberVectorDistanceFunction<? super V> distanceFunction, int k, int maxiter, KMeansInitialization<? super V> initializer) { + public KMeansElkan(NumberVectorDistanceFunction<? super V> distanceFunction, int k, int maxiter, KMeansInitialization<? super V> initializer, boolean varstat) { super(distanceFunction, k, maxiter, initializer); + this.varstat = varstat; } @Override @@ -108,7 +119,7 @@ public class KMeansElkan<V extends NumberVector> extends AbstractKMeans<V, KMean } // Choose initial means if(LOG.isStatistics()) { - LOG.statistics(new StringStatistic(KEY + ".initializer", initializer.toString())); + LOG.statistics(new StringStatistic(KEY + ".initialization", initializer.toString())); } List<Vector> means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction(), Vector.FACTORY); // Setup cluster assignment store @@ -136,7 +147,7 @@ public class KMeansElkan<V extends NumberVector> extends AbstractKMeans<V, KMean double[][] cdist = new double[k][k]; IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Means iteration", LOG) : null; - LongStatistic varstat = LOG.isStatistics() ? new LongStatistic(this.getClass().getName() + ".reassignments") : null; + LongStatistic rstat = LOG.isStatistics() ? new LongStatistic(this.getClass().getName() + ".reassignments") : null; int iteration = 0; for(; maxiter <= 0 || iteration < maxiter; iteration++) { LOG.incrementProcessed(prog); @@ -148,9 +159,9 @@ public class KMeansElkan<V extends NumberVector> extends AbstractKMeans<V, KMean recomputeSeperation(means, sep, cdist); // #1 changed = assignToNearestCluster(relation, means, sums, clusters, assignment, sep, cdist, upper, lower); } - if(varstat != null) { - varstat.setLong(changed); - LOG.statistics(varstat); + if(rstat != null) { + rstat.setLong(changed); + LOG.statistics(rstat); } // Stop if no cluster assignment changed. if(changed == 0) { @@ -178,6 +189,7 @@ public class KMeansElkan<V extends NumberVector> extends AbstractKMeans<V, KMean lower.destroy(); // Wrap result + double totalvariance = 0.; Clustering<KMeansModel> result = new Clustering<>("k-Means Clustering", "kmeans-clustering"); for(int i = 0; i < clusters.size(); i++) { DBIDs ids = clusters.get(i); @@ -189,9 +201,13 @@ public class KMeansElkan<V extends NumberVector> extends AbstractKMeans<V, KMean for(DBIDIter it = ids.iter(); it.valid(); it.advance()) { varsum += distanceFunction.distance(mean, relation.get(it)); } + totalvariance += varsum; KMeansModel model = new KMeansModel(mean, varsum); result.addToplevelCluster(new Cluster<>(ids, model)); } + if(LOG.isStatistics() && varstat) { + LOG.statistics(new DoubleStatistic(this.getClass().getName() + ".variance-sum", totalvariance)); + } return result; } @@ -395,6 +411,16 @@ public class KMeansElkan<V extends NumberVector> extends AbstractKMeans<V, KMean * @apiviz.exclude */ public static class Parameterizer<V extends NumberVector> extends AbstractKMeans.Parameterizer<V> { + /** + * Flag to compute the final clustering variance statistic. + */ + public static final OptionID VARSTAT_ID = new OptionID("kmeans.varstat", "Compute the final clustering variance statistic. Needs an additional full pass over the data set."); + + /** + * Compute the final variance statisic. + */ + protected boolean varstat = false; + @Override protected Logging getLogger() { return LOG; @@ -412,8 +438,17 @@ public class KMeansElkan<V extends NumberVector> extends AbstractKMeans<V, KMean } @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + Flag varF = new Flag(VARSTAT_ID); + if(config.grab(varF)) { + varstat = varF.isTrue(); + } + } + + @Override protected KMeansElkan<V> makeInstance() { - return new KMeansElkan<>(distanceFunction, k, maxiter, initializer); + return new KMeansElkan<>(distanceFunction, k, maxiter, initializer, varstat); } } } diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansHamerly.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansHamerly.java index 1b114701..06228525 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansHamerly.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansHamerly.java @@ -46,11 +46,14 @@ import de.lmu.ifi.dbs.elki.distance.distancefunction.NumberVectorDistanceFunctio import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress; +import de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic; import de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic; import de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic; import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag; /** * Hamerly's fast k-means by exploiting the triangle inequality. @@ -63,6 +66,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz * </p> * * @author Erich Schubert + * @since 0.7.0 * * @apiviz.has KMeansModel * @@ -84,15 +88,22 @@ public class KMeansHamerly<V extends NumberVector> extends AbstractKMeans<V, KMe private static final String KEY = KMeansHamerly.class.getName(); /** + * Flag whether to compute the final variance statistic. + */ + private boolean varstat = false; + + /** * Constructor. * * @param distanceFunction distance function * @param k k parameter * @param maxiter Maxiter parameter * @param initializer Initialization method + * @param varstat Compute the variance statistic */ - public KMeansHamerly(NumberVectorDistanceFunction<? super V> distanceFunction, int k, int maxiter, KMeansInitialization<? super V> initializer) { + public KMeansHamerly(NumberVectorDistanceFunction<? super V> distanceFunction, int k, int maxiter, KMeansInitialization<? super V> initializer, boolean varstat) { super(distanceFunction, k, maxiter, initializer); + this.varstat = varstat; } @Override @@ -102,7 +113,7 @@ public class KMeansHamerly<V extends NumberVector> extends AbstractKMeans<V, KMe } // Choose initial means if(LOG.isStatistics()) { - LOG.statistics(new StringStatistic(KEY + ".initializer", initializer.toString())); + LOG.statistics(new StringStatistic(KEY + ".initialization", initializer.toString())); } List<Vector> means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction(), Vector.FACTORY); // Setup cluster assignment store @@ -124,7 +135,7 @@ public class KMeansHamerly<V extends NumberVector> extends AbstractKMeans<V, KMe double[] sep = new double[k]; IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Means iteration", LOG) : null; - LongStatistic varstat = LOG.isStatistics() ? new LongStatistic(KEY + ".reassignments") : null; + LongStatistic rstat = LOG.isStatistics() ? new LongStatistic(KEY + ".reassignments") : null; int iteration = 0; for(; maxiter <= 0 || iteration < maxiter; iteration++) { LOG.incrementProcessed(prog); @@ -136,9 +147,9 @@ public class KMeansHamerly<V extends NumberVector> extends AbstractKMeans<V, KMe recomputeSeperation(means, sep); changed = assignToNearestCluster(relation, means, sums, clusters, assignment, sep, upper, lower); } - if(varstat != null) { - varstat.setLong(changed); - LOG.statistics(varstat); + if(rstat != null) { + rstat.setLong(changed); + LOG.statistics(rstat); } // Stop if no cluster assignment changed. if(changed == 0) { @@ -167,6 +178,7 @@ public class KMeansHamerly<V extends NumberVector> extends AbstractKMeans<V, KMe lower.destroy(); // Wrap result + double totalvariance = 0.; Clustering<KMeansModel> result = new Clustering<>("k-Means Clustering", "kmeans-clustering"); for(int i = 0; i < clusters.size(); i++) { DBIDs ids = clusters.get(i); @@ -178,9 +190,13 @@ public class KMeansHamerly<V extends NumberVector> extends AbstractKMeans<V, KMe for(DBIDIter it = ids.iter(); it.valid(); it.advance()) { varsum += distanceFunction.distance(mean, relation.get(it)); } + totalvariance += varsum; KMeansModel model = new KMeansModel(mean, varsum); result.addToplevelCluster(new Cluster<>(ids, model)); } + if(LOG.isStatistics() && varstat) { + LOG.statistics(new DoubleStatistic(this.getClass().getName() + ".variance-sum", totalvariance)); + } return result; } @@ -389,6 +405,16 @@ public class KMeansHamerly<V extends NumberVector> extends AbstractKMeans<V, KMe * @apiviz.exclude */ public static class Parameterizer<V extends NumberVector> extends AbstractKMeans.Parameterizer<V> { + /** + * Flag to compute the final clustering variance statistic. + */ + public static final OptionID VARSTAT_ID = new OptionID("kmeans.varstat", "Compute the final clustering variance statistic. Needs an additional full pass over the data set."); + + /** + * Compute the final variance statisic. + */ + protected boolean varstat = false; + @Override protected Logging getLogger() { return LOG; @@ -406,8 +432,17 @@ public class KMeansHamerly<V extends NumberVector> extends AbstractKMeans<V, KMe } @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + Flag varF = new Flag(VARSTAT_ID); + if(config.grab(varF)) { + varstat = varF.isTrue(); + } + } + + @Override protected KMeansHamerly<V> makeInstance() { - return new KMeansHamerly<>(distanceFunction, k, maxiter, initializer); + return new KMeansHamerly<>(distanceFunction, k, maxiter, initializer, varstat); } } } diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansHybridLloydMacQueen.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansHybridLloydMacQueen.java index cd025b03..476f95f2 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansHybridLloydMacQueen.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansHybridLloydMacQueen.java @@ -52,6 +52,7 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector; * processing and Lloyd-Style batch steps. * * @author Erich Schubert + * @since 0.5.0 * * @apiviz.has KMeansModel * diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java index d8b69f54..c32f22c1 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java @@ -46,6 +46,7 @@ import de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic; import de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic; import de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic; import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector; +import de.lmu.ifi.dbs.elki.utilities.Alias; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; @@ -53,15 +54,16 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title; /** * The standard k-means algorithm, using Lloyd-style bulk iterations. * + * Reference: * <p> - * Reference:<br /> - * S. Lloyd<br/> + * S. Lloyd:<br/> * Least squares quantization in PCM<br/> * IEEE Transactions on Information Theory 28 (2)<br/> * previously published as Bell Telephone Laboratories Paper * </p> * * @author Arthur Zimek + * @since 0.5.0 * * @apiviz.landmark * @apiviz.has KMeansModel @@ -74,6 +76,8 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title; title = "Least squares quantization in PCM", // booktitle = "IEEE Transactions on Information Theory 28 (2): 129–137.", // url = "http://dx.doi.org/10.1109/TIT.1982.1056489") +@Alias({ "de.lmu.ifi.dbs.elki.algorithm.clustering.KMeans", // +"de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeans" }) public class KMeansLloyd<V extends NumberVector> extends AbstractKMeans<V, KMeansModel> { /** * The logger for this class. diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansMacQueen.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansMacQueen.java index 176fa311..4f22497d 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansMacQueen.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansMacQueen.java @@ -58,14 +58,16 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title; * convergence, although MacQueen likely only meant to do a single pass over the * data. * + * Reference: * <p> - * Reference:<br /> - * J. MacQueen: Some Methods for Classification and Analysis of Multivariate - * Observations. <br /> + * J. MacQueen:<br /> + * Some Methods for Classification and Analysis of Multivariate Observations. + * <br /> * In 5th Berkeley Symp. Math. Statist. Prob., Vol. 1, 1967, pp 281-297. * </p> * * @author Erich Schubert + * @since 0.5.0 * @apiviz.has KMeansModel * * @param <V> vector type to use diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansSort.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansSort.java new file mode 100644 index 00000000..c119c4ec --- /dev/null +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansSort.java @@ -0,0 +1,284 @@ +package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2015 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.initialization.KMeansInitialization; +import de.lmu.ifi.dbs.elki.data.Cluster; +import de.lmu.ifi.dbs.elki.data.Clustering; +import de.lmu.ifi.dbs.elki.data.NumberVector; +import de.lmu.ifi.dbs.elki.data.model.KMeansModel; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; +import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore; +import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; +import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.distance.distancefunction.NumberVectorDistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction; +import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress; +import de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic; +import de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic; +import de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic; +import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arrays.DoubleIntegerArrayQuickSort; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.documentation.Title; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; + +/** + * Sort-Means: Accelerated k-means by exploiting the triangle inequality and + * pairwise distances of means to prune candidate means (with sorting). + * + * Reference: + * <p> + * S. J. Phillips<br /> + * Acceleration of k-means and related clustering algorithms<br /> + * Proc. 4th Int. Workshop on Algorithm Engineering and Experiments (ALENEX + * 2002) + * </p> + * + * @author Erich Schubert + * @since 0.7.1 + * + * @apiviz.has KMeansModel + * + * @param <V> vector datatype + */ +@Title("Sort-Means") +@Reference(authors = "S. J. Phillips", // +title = "Acceleration of k-means and related clustering algorithms", // +booktitle = "Proc. 4th Int. Workshop on Algorithm Engineering and Experiments (ALENEX 2002)", // +url = "http://dx.doi.org/10.1007/3-540-45643-0_13") +public class KMeansSort<V extends NumberVector> extends AbstractKMeans<V, KMeansModel> { + /** + * The logger for this class. + */ + private static final Logging LOG = Logging.getLogger(KMeansSort.class); + + /** + * Key for statistics logging. + */ + private static final String KEY = KMeansSort.class.getName(); + + /** + * Constructor. + * + * @param distanceFunction distance function + * @param k k parameter + * @param maxiter Maxiter parameter + * @param initializer Initialization method + */ + public KMeansSort(NumberVectorDistanceFunction<? super V> distanceFunction, int k, int maxiter, KMeansInitialization<? super V> initializer) { + super(distanceFunction, k, maxiter, initializer); + } + + @Override + public Clustering<KMeansModel> run(Database database, Relation<V> relation) { + if(relation.size() <= 0) { + return new Clustering<>("k-Means Clustering", "kmeans-clustering"); + } + // Choose initial means + if(LOG.isStatistics()) { + LOG.statistics(new StringStatistic(KEY + ".initialization", initializer.toString())); + } + List<Vector> means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction(), Vector.FACTORY); + // Setup cluster assignment store + List<ModifiableDBIDs> clusters = new ArrayList<>(); + for(int i = 0; i < k; i++) { + clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k))); + } + WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1); + double[] varsum = new double[k]; + + // Cluster distances + double[][] cdist = new double[k][k]; + int[][] cnum = new int[k][k - 1]; + + IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Means iteration", LOG) : null; + DoubleStatistic varstat = LOG.isStatistics() ? new DoubleStatistic(this.getClass().getName() + ".variance-sum") : null; + LongStatistic diststat = LOG.isStatistics() ? new LongStatistic(KEY + ".distance-computations") : null; + int iteration = 0; + for(; maxiter <= 0 || iteration < maxiter; iteration++) { + LOG.incrementProcessed(prog); + recomputeSeperation(means, cdist, cnum, diststat); + boolean changed = assignToNearestCluster(relation, means, clusters, assignment, varsum, cdist, cnum, diststat); + logVarstat(varstat, varsum); + if(LOG.isStatistics()) { + LOG.statistics(diststat); + } + // Stop if no cluster assignment changed. + if(!changed) { + break; + } + // Recompute means. + means = means(clusters, means, relation); + } + LOG.setCompleted(prog); + if(LOG.isStatistics()) { + LOG.statistics(new LongStatistic(KEY + ".iterations", iteration)); + } + + // Wrap result + Clustering<KMeansModel> result = new Clustering<>("k-Means Clustering", "kmeans-clustering"); + for(int i = 0; i < clusters.size(); i++) { + DBIDs ids = clusters.get(i); + if(ids.size() == 0) { + continue; + } + KMeansModel model = new KMeansModel(means.get(i), varsum[i]); + result.addToplevelCluster(new Cluster<>(ids, model)); + } + return result; + } + + /** + * Recompute the separation of cluster means. + * + * @param means Means + * @param cdist Center-to-Center distances + * @param cnum Center numbers + * @param diststat Distance counting statistic + */ + private void recomputeSeperation(List<Vector> means, double[][] cdist, int[][] cnum, LongStatistic diststat) { + final int k = means.size(); + for(int i = 1; i < k; i++) { + Vector mi = means.get(i); + for(int j = 0; j < i; j++) { + double d = distanceFunction.distance(mi, means.get(j)); + cdist[i][j] = d; + cdist[j][i] = d; + } + } + double[] buf = new double[k - 1]; + for(int i = 0; i < k; i++) { + System.arraycopy(cdist[i], 0, buf, 0, i); + System.arraycopy(cdist[i], i + 1, buf, i, k - i - 1); + for(int j = 0; j < buf.length; j++) { + cnum[i][j] = j < i ? j : (j + 1); + } + DoubleIntegerArrayQuickSort.sort(buf, cnum[i], k - 1); + } + if(diststat != null) { + diststat.increment((k * (k - 1)) >> 1); + } + } + + /** + * Reassign objects, but only if their bounds indicate it is necessary to do + * so. + * + * @param relation Data + * @param means Current means + * @param clusters Current clusters + * @param assignment Cluster assignment + * @param varsum Variance sum counter + * @param cdist Centroid distances + * @param cnum Centroid nearest neighbors + * @param diststat Distance statistics + * @return true when the object was reassigned + */ + private boolean assignToNearestCluster(Relation<V> relation, List<Vector> means, List<ModifiableDBIDs> clusters, WritableIntegerDataStore assignment, double[] varsum, double[][] cdist, int[][] cnum, LongStatistic diststat) { + assert (k == means.size()); + long dists = 0; + boolean changed = false; + // Reset all clusters + Arrays.fill(varsum, 0.); + for(ModifiableDBIDs cluster : clusters) { + cluster.clear(); + } + final NumberVectorDistanceFunction<?> df = getDistanceFunction(); + double mult = (df instanceof SquaredEuclideanDistanceFunction) ? 4 : 2; + for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { + final int cur = assignment.intValue(iditer), ini = cur >= 0 ? cur : 0; + // Distance to current mean: + V fv = relation.get(iditer); + double mindist = df.distance(fv, means.get(ini)); + ++dists; + final double threshold = mult * mindist; + int minIndex = ini; + for(int i : cnum[ini]) { + if(cdist[minIndex][i] >= threshold) { // Sort pruning + break; // All following can only be worse. + } + double dist = df.distance(fv, means.get(i)); + ++dists; + if(dist < mindist) { + minIndex = i; + mindist = dist; + } + } + varsum[minIndex] += mindist; + clusters.get(minIndex).add(iditer); + changed |= assignment.putInt(iditer, minIndex) != minIndex; + } + // Increment distance computations counter. + if(diststat != null) { + diststat.increment(dists); + } + return changed; + } + + @Override + protected Logging getLogger() { + return LOG; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer<V extends NumberVector> extends AbstractKMeans.Parameterizer<V> { + @Override + protected Logging getLogger() { + return LOG; + } + + @Override + protected void getParameterDistanceFunction(Parameterization config) { + super.getParameterDistanceFunction(config); + if(distanceFunction instanceof SquaredEuclideanDistanceFunction) { + return; // Proper choice. + } + if(distanceFunction != null && !distanceFunction.isMetric()) { + LOG.warning("Compare k-means requires a metric distance, and k-means should only be used with squared Euclidean distance!"); + } + } + + @Override + protected KMeansSort<V> makeInstance() { + return new KMeansSort<>(distanceFunction, k, maxiter, initializer); + } + } +} diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMediansLloyd.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMediansLloyd.java index 6d8b5874..7e4b4524 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMediansLloyd.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMediansLloyd.java @@ -60,6 +60,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title; * </p> * * @author Erich Schubert + * @since 0.5.0 * * @apiviz.has MeanModel * diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsEM.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsEM.java index 567d6753..cc06a4d5 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsEM.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsEM.java @@ -72,6 +72,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * (this variation is likely not worth publishing on its own). * * @author Erich Schubert + * @since 0.5.0 * * @apiviz.has MedoidModel * @apiviz.composedOf KMedoidsInitialization diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsPAM.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsPAM.java index fc3bdd5f..4d45eb3d 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsPAM.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsPAM.java @@ -77,6 +77,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * </p> * * @author Erich Schubert + * @since 0.5.0 * * @apiviz.has MedoidModel * @apiviz.composedOf KMedoidsInitialization diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/SingleAssignmentKMeans.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/SingleAssignmentKMeans.java index 5fa88a33..b4413904 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/SingleAssignmentKMeans.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/SingleAssignmentKMeans.java @@ -48,6 +48,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz * Pseudo-k-Means variations, that assigns each object to the nearest center. * * @author Erich Schubert + * @since 0.5.0 * * @apiviz.has KMeansModel * diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/XMeans.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/XMeans.java index d354327b..75a87daa 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/XMeans.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/XMeans.java @@ -77,6 +77,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter; * * @author Tibor Goldschwendt * @author Erich Schubert + * @since 0.7.0 * * @param <V> Vector type * @param <M> Model type diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/AbstractKMeansInitialization.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/AbstractKMeansInitialization.java index 723c86cc..72f9a887 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/AbstractKMeansInitialization.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/AbstractKMeansInitialization.java @@ -33,6 +33,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter; * Abstract base class for common k-means initializations. * * @author Erich Schubert + * @since 0.3 * * @param <V> Vector type */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/FarthestPointsInitialMeans.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/FarthestPointsInitialMeans.java index 4fa0d004..a686f345 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/FarthestPointsInitialMeans.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/FarthestPointsInitialMeans.java @@ -40,6 +40,7 @@ import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.NumberVectorDistanceFunction; import de.lmu.ifi.dbs.elki.math.random.RandomFactory; +import de.lmu.ifi.dbs.elki.utilities.Alias; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag; @@ -52,9 +53,11 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag; * times will be more likely to return the same local minima. * * @author Erich Schubert + * @since 0.6.0 * * @param <O> Object type for kMedoids and kMedians */ +@Alias("de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.FarthestPointsInitialMeans") public class FarthestPointsInitialMeans<O> extends AbstractKMeansInitialization<NumberVector> implements KMedoidsInitialization<O> { /** * Discard the first vector. diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/FarthestSumPointsInitialMeans.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/FarthestSumPointsInitialMeans.java index 9cceb4f2..9f223dac 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/FarthestSumPointsInitialMeans.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/FarthestSumPointsInitialMeans.java @@ -49,6 +49,7 @@ import de.lmu.ifi.dbs.elki.math.random.RandomFactory; * times will be more likely to return the same local minima. * * @author Erich Schubert + * @since 0.6.0 * * @param <O> Object type for kmedoids and kmedians */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/FirstKInitialMeans.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/FirstKInitialMeans.java index beab423e..210994e5 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/FirstKInitialMeans.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/FirstKInitialMeans.java @@ -34,15 +34,18 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.NumberVectorDistanceFunction; +import de.lmu.ifi.dbs.elki.utilities.Alias; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; /** * Initialize K-means by using the first k objects as initial means. * * @author Erich Schubert + * @since 0.4.0 * * @param <O> Object type for KMedoids */ +@Alias("de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.FirstKInitialMeans") public class FirstKInitialMeans<O> implements KMeansInitialization<NumberVector>, KMedoidsInitialization<O> { /** * Constructor. @@ -84,4 +87,4 @@ public class FirstKInitialMeans<O> implements KMeansInitialization<NumberVector> return new FirstKInitialMeans<>(); } } -}
\ No newline at end of file +} diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/KMeansInitialization.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/KMeansInitialization.java index 55ffb65e..c1cd71a0 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/KMeansInitialization.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/KMeansInitialization.java @@ -33,6 +33,7 @@ import de.lmu.ifi.dbs.elki.distance.distancefunction.NumberVectorDistanceFunctio * Interface for initializing K-Means * * @author Erich Schubert + * @since 0.2 * * @apiviz.landmark * diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/KMeansPlusPlusInitialMeans.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/KMeansPlusPlusInitialMeans.java index c1518d18..11bfef59 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/KMeansPlusPlusInitialMeans.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/KMeansPlusPlusInitialMeans.java @@ -41,6 +41,7 @@ import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.NumberVectorDistanceFunction; import de.lmu.ifi.dbs.elki.logging.LoggingUtil; import de.lmu.ifi.dbs.elki.math.random.RandomFactory; +import de.lmu.ifi.dbs.elki.utilities.Alias; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; @@ -56,6 +57,7 @@ import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; * </p> * * @author Erich Schubert + * @since 0.5.0 * * @param <O> Vector type */ @@ -63,6 +65,7 @@ import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; title = "k-means++: the advantages of careful seeding", // booktitle = "Proc. of the Eighteenth Annual ACM-SIAM Symposium on Discrete Algorithms, SODA 2007", // url = "http://dx.doi.org/10.1145/1283383.1283494") +@Alias("de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansPlusPlusInitialMeans") public class KMeansPlusPlusInitialMeans<O> extends AbstractKMeansInitialization<NumberVector> implements KMedoidsInitialization<O> { /** * Constructor. @@ -232,4 +235,4 @@ public class KMeansPlusPlusInitialMeans<O> extends AbstractKMeansInitialization< return new KMeansPlusPlusInitialMeans<>(rnd); } } -}
\ No newline at end of file +} diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/KMedoidsInitialization.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/KMedoidsInitialization.java index 0ae3723e..2dc11c52 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/KMedoidsInitialization.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/KMedoidsInitialization.java @@ -30,6 +30,7 @@ import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; * this initialization will only return members of the original data set. * * @author Erich Schubert + * @since 0.5.0 * * @param <V> Object type */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/PAMInitialMeans.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/PAMInitialMeans.java index 396d79e6..ea11330e 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/PAMInitialMeans.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/PAMInitialMeans.java @@ -42,6 +42,7 @@ import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; import de.lmu.ifi.dbs.elki.math.MathUtil; +import de.lmu.ifi.dbs.elki.utilities.Alias; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; @@ -57,12 +58,14 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; * </p> * * @author Erich Schubert + * @since 0.5.0 * * @param <O> Object type for KMedoids initialization */ @Reference(title = "Clustering my means of Medoids", // authors = "Kaufman, L. and Rousseeuw, P.J.", // booktitle = "Statistical Data Analysis Based on the L_1–Norm and Related Methods") +@Alias("de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.PAMInitialMeans") public class PAMInitialMeans<O> implements KMeansInitialization<NumberVector>, KMedoidsInitialization<O> { /** * Class logger. @@ -78,6 +81,9 @@ public class PAMInitialMeans<O> implements KMeansInitialization<NumberVector>, K @Override public <T extends NumberVector, V extends NumberVector> List<V> chooseInitialMeans(Database database, Relation<T> relation, int k, NumberVectorDistanceFunction<? super T> distanceFunction, NumberVector.Factory<V> factory) { + if(relation.size() < k) { + throw new AbortException("Database has less than k objects."); + } // Ugly cast; but better than code duplication. @SuppressWarnings("unchecked") Relation<O> rel = (Relation<O>) relation; @@ -97,38 +103,33 @@ public class PAMInitialMeans<O> implements KMeansInitialization<NumberVector>, K public DBIDs chooseInitialMedoids(int k, DBIDs ids, DistanceQuery<? super O> distQ) { ArrayModifiableDBIDs medids = DBIDUtil.newArray(k); DBIDVar bestid = DBIDUtil.newVar(); - WritableDoubleDataStore mindist = null; + // We need three temporary storage arrays: + WritableDoubleDataStore mindist, bestd, tempd; + mindist = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP); + bestd = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP); + tempd = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP); // First mean is chosen by having the smallest distance sum to all others. { double best = Double.POSITIVE_INFINITY; - WritableDoubleDataStore newd = null; FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Choosing initial mean", ids.size(), LOG) : null; for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { - if(newd == null) { - newd = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP); - } - int sum = 0; + double sum = 0, d; for(DBIDIter iter2 = ids.iter(); iter2.valid(); iter2.advance()) { - double d = distQ.distance(iter, iter2); - sum += d; - newd.putDouble(iter2, d); + sum += d = distQ.distance(iter, iter2); + tempd.putDouble(iter2, d); } if(sum < best) { best = sum; bestid.set(iter); - if(mindist != null) { - mindist.destroy(); - } - mindist = newd; - newd = null; + // Swap mindist and newd: + WritableDoubleDataStore temp = mindist; + mindist = tempd; + tempd = temp; } LOG.incrementProcessed(prog); } LOG.ensureCompleted(prog); - if(newd != null) { - newd.destroy(); - } medids.add(bestid); } assert(mindist != null); @@ -138,44 +139,40 @@ public class PAMInitialMeans<O> implements KMeansInitialization<NumberVector>, K LOG.incrementProcessed(prog); // First one was just chosen. for(int i = 1; i < k; i++) { double best = Double.POSITIVE_INFINITY; - WritableDoubleDataStore bestd = null, newd = null; + bestid.unset(); for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { if(medids.contains(iter)) { continue; } - if(newd == null) { - newd = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP); - } - double sum = 0.; + double sum = 0., v; for(DBIDIter iter2 = ids.iter(); iter2.valid(); iter2.advance()) { - double v = MathUtil.min(distQ.distance(iter, iter2), mindist.doubleValue(iter2)); - sum += v; - newd.put(iter2, v); + sum += v = MathUtil.min(distQ.distance(iter, iter2), mindist.doubleValue(iter2)); + tempd.put(iter2, v); } if(sum < best) { best = sum; bestid.set(iter); - if(bestd != null) { - bestd.destroy(); - } - bestd = newd; - newd = null; + // Swap bestd and newd: + WritableDoubleDataStore temp = bestd; + bestd = tempd; + tempd = temp; } } - if(bestd == null) { + if(!bestid.isSet()) { throw new AbortException("No median found that improves the criterion function?!? Too many infinite distances."); } medids.add(bestid); - if(newd != null) { - newd.destroy(); - } - mindist.destroy(); - mindist = bestd; + // Swap bestd and mindist: + WritableDoubleDataStore temp = bestd; + bestd = mindist; + mindist = temp; LOG.incrementProcessed(prog); } LOG.ensureCompleted(prog); mindist.destroy(); + bestd.destroy(); + tempd.destroy(); return medids; } @@ -192,4 +189,4 @@ public class PAMInitialMeans<O> implements KMeansInitialization<NumberVector>, K return new PAMInitialMeans<>(); } } -}
\ No newline at end of file +} diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/PredefinedInitialMeans.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/PredefinedInitialMeans.java index ca8a612a..b01651b8 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/PredefinedInitialMeans.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/PredefinedInitialMeans.java @@ -38,12 +38,13 @@ import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.VectorListParameter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleArrayListParameter; /** * Run k-means with prespecified initial means. * * @author Erich Schubert + * @since 0.7.0 */ public class PredefinedInitialMeans extends AbstractKMeansInitialization<NumberVector> { /** @@ -147,9 +148,12 @@ public class PredefinedInitialMeans extends AbstractKMeansInitialization<NumberV @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - VectorListParameter meansP = new VectorListParameter(INITIAL_MEANS); + DoubleArrayListParameter meansP = new DoubleArrayListParameter(INITIAL_MEANS); if(config.grab(meansP)) { - initialMeans = meansP.getValue(); + initialMeans = new ArrayList<>(meansP.getValue().size()); + for(double[] v : meansP.getValue()) { + initialMeans.add(new Vector(v)); + } } } diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/RandomlyChosenInitialMeans.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/RandomlyChosenInitialMeans.java index b6cfad8d..bb03b278 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/RandomlyChosenInitialMeans.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/RandomlyChosenInitialMeans.java @@ -34,6 +34,7 @@ import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.NumberVectorDistanceFunction; import de.lmu.ifi.dbs.elki.math.random.RandomFactory; +import de.lmu.ifi.dbs.elki.utilities.Alias; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; /** @@ -51,12 +52,14 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; * available in Biometrics). * * @author Erich Schubert + * @since 0.4.0 * * @param <O> Vector type */ @Reference(authors = "E. W. Forgy", // title = "Cluster analysis of multivariate data: efficiency versus interpretability of classifications", // booktitle = "Biometrics 21(3)") +@Alias("de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.RandomlyChosenInitialMeans") public class RandomlyChosenInitialMeans<O> extends AbstractKMeansInitialization<NumberVector> implements KMedoidsInitialization<O> { /** * Constructor. @@ -95,4 +98,4 @@ public class RandomlyChosenInitialMeans<O> extends AbstractKMeansInitialization< return new RandomlyChosenInitialMeans<>(rnd); } } -}
\ No newline at end of file +} diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/RandomlyGeneratedInitialMeans.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/RandomlyGeneratedInitialMeans.java index bc2181e8..30d1b00f 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/RandomlyGeneratedInitialMeans.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/RandomlyGeneratedInitialMeans.java @@ -33,13 +33,16 @@ import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; import de.lmu.ifi.dbs.elki.distance.distancefunction.NumberVectorDistanceFunction; import de.lmu.ifi.dbs.elki.math.MathUtil; import de.lmu.ifi.dbs.elki.math.random.RandomFactory; +import de.lmu.ifi.dbs.elki.utilities.Alias; /** * Initialize k-means by generating random vectors (within the data sets value * range). * * @author Erich Schubert + * @since 0.5.0 */ +@Alias("de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.RandomlyGeneratedInitialMeans") public class RandomlyGeneratedInitialMeans extends AbstractKMeansInitialization<NumberVector> { /** * Constructor. @@ -84,4 +87,4 @@ public class RandomlyGeneratedInitialMeans extends AbstractKMeansInitialization< return new RandomlyGeneratedInitialMeans(rnd); } } -}
\ No newline at end of file +} diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/SampleKMeansInitialization.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/SampleKMeansInitialization.java index ac7f7f27..f5ed00f1 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/SampleKMeansInitialization.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/SampleKMeansInitialization.java @@ -42,6 +42,7 @@ import de.lmu.ifi.dbs.elki.distance.distancefunction.NumberVectorDistanceFunctio import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction; import de.lmu.ifi.dbs.elki.logging.LoggingUtil; import de.lmu.ifi.dbs.elki.math.random.RandomFactory; +import de.lmu.ifi.dbs.elki.utilities.Alias; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ChainedParameterization; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; @@ -53,9 +54,11 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * Initialize k-means by running k-means on a sample of the data set only. * * @author Erich Schubert + * @since 0.6.0 * * @param <V> Vector type */ +@Alias("de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.SampleKMeansInitialization") public class SampleKMeansInitialization<V extends NumberVector> extends AbstractKMeansInitialization<V> { /** * Variant of kMeans to use for initialization. diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/parallel/KMeansProcessor.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/parallel/KMeansProcessor.java index c0e34f12..f2b9db1f 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/parallel/KMeansProcessor.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/parallel/KMeansProcessor.java @@ -41,6 +41,7 @@ import de.lmu.ifi.dbs.elki.parallel.processor.Processor; * Parallel k-means implementation. * * @author Erich Schubert + * @since 0.7.0 * * @apiviz.has Instance */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/parallel/ParallelLloydKMeans.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/parallel/ParallelLloydKMeans.java index a10d45ae..e214581a 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/parallel/ParallelLloydKMeans.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/parallel/ParallelLloydKMeans.java @@ -51,6 +51,7 @@ import de.lmu.ifi.dbs.elki.parallel.ParallelExecutor; * Parallel implementation of k-Means clustering. * * @author Erich Schubert + * @since 0.7.0 * * @apiviz.has KMeansProcessor * diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/AbstractKMeansQualityMeasure.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/AbstractKMeansQualityMeasure.java index 721b74b6..cc4c00ea 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/AbstractKMeansQualityMeasure.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/AbstractKMeansQualityMeasure.java @@ -65,6 +65,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; * * @author Tibor Goldschwendt * @author Erich Schubert + * @since 0.7.0 */ @Reference(authors = "D. Pelleg, A. Moore", // booktitle = "X-means: Extending K-means with Efficient Estimation on the Number of Clusters", // diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/AkaikeInformationCriterion.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/AkaikeInformationCriterion.java index e1e72b51..aafd6173 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/AkaikeInformationCriterion.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/AkaikeInformationCriterion.java @@ -51,6 +51,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; * * @author Tibor Goldschwendt * @author Erich Schubert + * @since 0.2 */ @Reference(authors = "H. Akaike", // title = "On entropy maximization principle", // diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/BayesianInformationCriterion.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/BayesianInformationCriterion.java index 8b21753d..22ad56d9 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/BayesianInformationCriterion.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/BayesianInformationCriterion.java @@ -52,6 +52,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; * * @author Tibor Goldschwendt * @author Erich Schubert + * @since 0.6.0 */ @Reference(authors = "G. Schwarz", // title = "Estimating the dimension of a model", // diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/BayesianInformationCriterionZhao.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/BayesianInformationCriterionZhao.java index 755d7137..6a18269a 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/BayesianInformationCriterionZhao.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/BayesianInformationCriterionZhao.java @@ -42,6 +42,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; * * @author Tibor Goldschwendt * @author Erich Schubert + * @since 0.2 */ @Reference(authors = "Q. Zhao, M. Xu, P. Fränti", // title = "Knee Point Detection on Bayesian Information Criterion", // diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/KMeansQualityMeasure.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/KMeansQualityMeasure.java index bd0f12a1..100c2dfc 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/KMeansQualityMeasure.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/KMeansQualityMeasure.java @@ -35,6 +35,7 @@ import de.lmu.ifi.dbs.elki.distance.distancefunction.NumberVectorDistanceFunctio * Important note: some measures are ascending, others are descending! * * @author Erich Schubert + * @since 0.2 * * @param <O> Input Object restriction type */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/WithinClusterMeanDistanceQualityMeasure.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/WithinClusterMeanDistanceQualityMeasure.java index cd7a4d14..e7069ac0 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/WithinClusterMeanDistanceQualityMeasure.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/WithinClusterMeanDistanceQualityMeasure.java @@ -38,6 +38,7 @@ import de.lmu.ifi.dbs.elki.distance.distancefunction.NumberVectorDistanceFunctio * The average of all average pairwise distances in a cluster. * * @author Stephan Baier + * @since 0.6.0 */ public class WithinClusterMeanDistanceQualityMeasure implements KMeansQualityMeasure<NumberVector> { @Override diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/WithinClusterVarianceQualityMeasure.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/WithinClusterVarianceQualityMeasure.java index 09017413..fbd04dbd 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/WithinClusterVarianceQualityMeasure.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/WithinClusterVarianceQualityMeasure.java @@ -35,6 +35,7 @@ import de.lmu.ifi.dbs.elki.distance.distancefunction.NumberVectorDistanceFunctio * * @author Stephan Baier * @author Erich Schubert + * @since 0.6.0 */ public class WithinClusterVarianceQualityMeasure extends AbstractKMeansQualityMeasure<NumberVector> { @Override diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/meta/ExternalClustering.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/meta/ExternalClustering.java index ef8045a9..634eb057 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/meta/ExternalClustering.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/meta/ExternalClustering.java @@ -79,6 +79,7 @@ import gnu.trove.map.hash.TIntObjectHashMap; * considered noise clusters. * * @author Erich Schubert + * @since 0.7.0 * * @apiviz.composedOf CSVReaderFormat * @apiviz.has Clustering diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/onedimensional/KNNKernelDensityMinimaClustering.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/onedimensional/KNNKernelDensityMinimaClustering.java index 71167a90..4b2d5b8a 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/onedimensional/KNNKernelDensityMinimaClustering.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/onedimensional/KNNKernelDensityMinimaClustering.java @@ -58,6 +58,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * performing kernel density estimation. * * @author Erich Schubert + * @since 0.6.0 */ public class KNNKernelDensityMinimaClustering<V extends NumberVector> extends AbstractAlgorithm<Clustering<ClusterModel>> implements ClusteringAlgorithm<Clustering<ClusterModel>> { /** diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/optics/AbstractOPTICS.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/optics/AbstractOPTICS.java index da4c94f1..da0a6dc3 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/optics/AbstractOPTICS.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/optics/AbstractOPTICS.java @@ -53,6 +53,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * * @author Elke Achtert * @author Erich Schubert + * @since 0.7.0 * * @param <O> the type of DatabaseObjects handled by the algorithm */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/optics/ClusterOrder.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/optics/ClusterOrder.java index 93a24407..dc50b086 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/optics/ClusterOrder.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/optics/ClusterOrder.java @@ -46,6 +46,7 @@ import de.lmu.ifi.dbs.elki.result.OrderingResult; * Class to store the result of an ordering clustering algorithm such as OPTICS. * * @author Erich Schubert + * @since 0.7.0 * * @apiviz.landmark */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/optics/CorrelationClusterOrder.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/optics/CorrelationClusterOrder.java index 6b94e358..d8bdf5a8 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/optics/CorrelationClusterOrder.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/optics/CorrelationClusterOrder.java @@ -34,6 +34,7 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; * * @author Elke Achtert * @author Erich Schubert + * @since 0.7.0 */ public class CorrelationClusterOrder extends ClusterOrder { /** diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/optics/DeLiClu.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/optics/DeLiClu.java index e2017f85..27737f60 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/optics/DeLiClu.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/optics/DeLiClu.java @@ -80,6 +80,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * </p> * * @author Elke Achtert + * @since 0.7.0 * @param <NV> the type of NumberVector handled by this Algorithm */ @Title("DeliClu: Density-Based Hierarchical Clustering") diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/optics/FastOPTICS.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/optics/FastOPTICS.java index 9ec79b29..5df5712b 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/optics/FastOPTICS.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/optics/FastOPTICS.java @@ -73,6 +73,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * * @author Johannes Schneider * @author Erich Schubert + * @since 0.7.0 * * @apiviz.composedOf RandomProjectedNeighborsAndDensities */ @@ -158,7 +159,7 @@ public class FastOPTICS<V extends NumberVector> extends AbstractAlgorithm<Cluste neighs = index.getNeighs(); // get neighbors of points // compute ordering as for OPTICS - FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("OPTICS clustering.", ids.size(), LOG) : null; + FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("FastOPTICS clustering", ids.size(), LOG) : null; processed = DBIDUtil.newHashSet(ids.size()); order = new ClusterOrder(ids, "FastOPTICS Cluster Order", "fast-optics"); for(DBIDIter it = ids.iter(); it.valid(); it.advance()) { diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/optics/GeneralizedOPTICS.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/optics/GeneralizedOPTICS.java index 771d59e8..5b47ec16 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/optics/GeneralizedOPTICS.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/optics/GeneralizedOPTICS.java @@ -48,6 +48,7 @@ import de.lmu.ifi.dbs.elki.utilities.datastructures.QuickSelect; * distances, and serves as a base for several other algorithms (HiCO, HiSC). * * @author Erich Schubert + * @since 0.7.0 * * @apiviz.composedOf Instance * diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/optics/OPTICSHeap.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/optics/OPTICSHeap.java index cb1265eb..a7d7b3f7 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/optics/OPTICSHeap.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/optics/OPTICSHeap.java @@ -58,6 +58,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title; * * @author Elke Achtert * @author Erich Schubert + * @since 0.7.0 * * @apiviz.composedOf Instance * @@ -69,8 +70,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title; title = "OPTICS: Ordering Points to Identify the Clustering Structure", // booktitle = "Proc. ACM SIGMOD Int. Conf. on Management of Data (SIGMOD '99)", // url = "http://dx.doi.org/10.1145/304181.304187") -@Alias({ "OPTICS", "de.lmu.ifi.dbs.elki.algorithm.clustering.OPTICS", // -"de.lmu.ifi.dbs.elki.algorithm.clustering.optics.OPTICS" }) +@Alias("de.lmu.ifi.dbs.elki.algorithm.clustering.OPTICS") public class OPTICSHeap<O> extends AbstractOPTICS<O> { /** * The logger for this class. @@ -213,4 +213,4 @@ public class OPTICSHeap<O> extends AbstractOPTICS<O> { return new OPTICSHeap<>(distanceFunction, epsilon, minpts); } } -}
\ No newline at end of file +} diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/optics/OPTICSHeapEntry.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/optics/OPTICSHeapEntry.java index 89e6ebb1..0b181442 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/optics/OPTICSHeapEntry.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/optics/OPTICSHeapEntry.java @@ -30,6 +30,7 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; * Entry in the priority heap. * * @author Elke Achtert + * @since 0.2 * * @apiviz.exclude */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/optics/OPTICSList.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/optics/OPTICSList.java index 34d822cc..313bba79 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/optics/OPTICSList.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/optics/OPTICSList.java @@ -65,6 +65,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title; * * @author Elke Achtert * @author Erich Schubert + * @since 0.7.0 * * @apiviz.composedOf OPTICSList.Instance * diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/optics/OPTICSTypeAlgorithm.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/optics/OPTICSTypeAlgorithm.java index 695ef8fd..9b9cde62 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/optics/OPTICSTypeAlgorithm.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/optics/OPTICSTypeAlgorithm.java @@ -30,6 +30,7 @@ import de.lmu.ifi.dbs.elki.database.Database; * Interface for OPTICS type algorithms, that can be analyzed by OPTICS Xi etc. * * @author Erich Schubert + * @since 0.4.0 * * @apiviz.has ClusterOrder */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/optics/OPTICSXi.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/optics/OPTICSXi.java index 087876dc..7ce0bba1 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/optics/OPTICSXi.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/optics/OPTICSXi.java @@ -66,12 +66,13 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag; * This removes a popular type of artifacts. * * @author Erich Schubert + * @since 0.7.0 * * @apiviz.composedOf OPTICSTypeAlgorithm oneway - «runs» * @apiviz.uses ClusterOrder oneway - «reads» * @apiviz.has SteepAreaResult oneway - «produces» */ -@Alias({ "OPTICSXi", "de.lmu.ifi.dbs.elki.algorithm.clustering.OPTICSXi" }) +@Alias("de.lmu.ifi.dbs.elki.algorithm.clustering.OPTICSXi") public class OPTICSXi extends AbstractAlgorithm<Clustering<OPTICSModel>> implements ClusteringAlgorithm<Clustering<OPTICSModel>> { /** * The logger for this class. diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/CLIQUE.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/CLIQUE.java index 86aa3b3f..c8dada62 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/CLIQUE.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/CLIQUE.java @@ -85,6 +85,7 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; * </p> * * @author Elke Achtert + * @since 0.2 * * @apiviz.has SubspaceModel * @apiviz.has CLIQUESubspace diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DOC.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DOC.java index 896519b3..a19ea88a 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DOC.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DOC.java @@ -77,6 +77,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter; * </p> * * @author Florian Nuecke + * @since 0.6.0 * * @apiviz.has SubspaceModel * diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java index 1bbd4def..7653246f 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java @@ -95,6 +95,7 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; * </p> * * @author Elke Achtert + * @since 0.2 * * @apiviz.uses DiSHPreferenceVectorIndex * @apiviz.has SubspaceModel diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/HiSC.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/HiSC.java index 64bde076..088b81d9 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/HiSC.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/HiSC.java @@ -69,6 +69,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; * </p> * * @author Elke Achtert + * @since 0.3 * * @apiviz.uses HiSCPreferenceVectorIndex * diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/P3C.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/P3C.java index 03670579..02e7792c 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/P3C.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/P3C.java @@ -93,6 +93,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * * @author Florian Nuecke * @author Erich Schubert + * @since 0.6.0 * * @apiviz.uses EM * @apiviz.has SubspaceModel @@ -363,7 +364,7 @@ public class P3C<V extends NumberVector> extends AbstractAlgorithm<Clustering<Su * @return p-signatures */ private ArrayList<Signature> mergeClusterCores(final int binCount, ArrayList<Signature> signatures) { - MutableProgress mergeProgress = LOG.isVerbose() ? new MutableProgress("Merging signatures.", signatures.size(), LOG) : null; + MutableProgress mergeProgress = LOG.isVerbose() ? new MutableProgress("Merging signatures", signatures.size(), LOG) : null; // Annotate dimensions to 1-signatures for quick stopping. int[] firstdim = new int[signatures.size()]; diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PROCLUS.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PROCLUS.java index 45cc4c97..dcc84d20 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PROCLUS.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PROCLUS.java @@ -88,6 +88,7 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; * </p> * * @author Elke Achtert + * @since 0.2 * * @apiviz.has SubspaceModel * diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PreDeCon.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PreDeCon.java index f7533443..9a98396b 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PreDeCon.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PreDeCon.java @@ -52,6 +52,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * </p> * * @author Peer Kröger + * @since 0.2 * * @apiviz.has PreDeCon.Settings * @apiviz.composedOf PreDeConNeighborPredicate diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java index b8856044..f20064be 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java @@ -74,6 +74,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * </p> * * @author Elke Achtert + * @since 0.2 * * @apiviz.uses DBSCAN * @apiviz.uses DimensionSelectingSubspaceDistanceFunction diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SubspaceClusteringAlgorithm.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SubspaceClusteringAlgorithm.java index 453b26dc..e3f97350 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SubspaceClusteringAlgorithm.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SubspaceClusteringAlgorithm.java @@ -31,6 +31,7 @@ import de.lmu.ifi.dbs.elki.data.model.SubspaceModel; * {@link SubspaceModel}, that can then be post-processed for outlier detection. * * @author Erich Schubert + * @since 0.4.0 * * @param <M> Model type */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUEInterval.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUEInterval.java index 510f10a8..7305d43f 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUEInterval.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUEInterval.java @@ -29,6 +29,7 @@ import de.lmu.ifi.dbs.elki.utilities.FormatUtil; * Represents an interval in a certain dimension of the data space. * * @author Elke Achtert + * @since 0.2 */ public class CLIQUEInterval implements Comparable<CLIQUEInterval> { /** diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUESubspace.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUESubspace.java index cda00f8b..280702cb 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUESubspace.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUESubspace.java @@ -39,6 +39,7 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; * Represents a subspace of the original data space in the CLIQUE algorithm. * * @author Elke Achtert + * @since 0.2 * * @apiviz.has CoverageComparator * @apiviz.composedOf CLIQUEUnit diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUEUnit.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUEUnit.java index cf96670e..f7b581a7 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUEUnit.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUEUnit.java @@ -37,6 +37,7 @@ import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs; * Represents a unit in the CLIQUE algorithm. * * @author Elke Achtert + * @since 0.2 * * @apiviz.composedOf CLIQUEInterval * @apiviz.composedOf ModifiableDBIDs diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelClustering.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelClustering.java index 29006f01..d2ba1b38 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelClustering.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelClustering.java @@ -47,6 +47,7 @@ import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs; import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.utilities.Alias; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; @@ -65,16 +66,18 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.PatternParameter; * * If an assignment of an object to multiple clusters is desired, the labels of * the object indicating the clusters need to be separated by blanks and the - * flag {@link #MULTIPLE_ID} needs to be set. + * flag {@link Parameterizer#MULTIPLE_ID} needs to be set. * * TODO: handling of data sets with no labels? * * @author Erich Schubert + * @since 0.2 * * @apiviz.uses de.lmu.ifi.dbs.elki.data.ClassLabel */ @Title("Clustering by label") @Description("Cluster points by a (pre-assigned!) label. For comparing results with a reference clustering.") +@Alias("de.lmu.ifi.dbs.elki.algorithm.clustering.ByLabelClustering") public class ByLabelClustering extends AbstractAlgorithm<Clustering<Model>> implements ClusteringAlgorithm<Clustering<Model>> { /** * The logger for this class. @@ -82,24 +85,12 @@ public class ByLabelClustering extends AbstractAlgorithm<Clustering<Model>> impl private static final Logging LOG = Logging.getLogger(ByLabelClustering.class); /** - * Flag to indicate that multiple cluster assignment is possible. If an - * assignment to multiple clusters is desired, the labels indicating the - * clusters need to be separated by blanks. - */ - public static final OptionID MULTIPLE_ID = new OptionID("bylabelclustering.multiple", "Flag to indicate that only subspaces with large coverage " + "(i.e. the fraction of the database that is covered by the dense units) " + "are selected, the rest will be pruned."); - - /** - * Pattern to recognize noise clusters by. - */ - public static final OptionID NOISE_ID = new OptionID("bylabelclustering.noise", "Pattern to recognize noise classes by their label."); - - /** - * Holds the value of {@link #MULTIPLE_ID}. + * Allow multiple cluster assignment. */ private boolean multiple; /** - * Holds the value of {@link #NOISE_ID}. + * Pattern to recognize noise clusters by. */ private Pattern noisepattern = null; @@ -248,8 +239,26 @@ public class ByLabelClustering extends AbstractAlgorithm<Clustering<Model>> impl * @apiviz.exclude */ public static class Parameterizer extends AbstractParameterizer { + /** + * Flag to indicate that multiple cluster assignment is possible. If an + * assignment to multiple clusters is desired, the labels indicating the + * clusters need to be separated by blanks. + */ + public static final OptionID MULTIPLE_ID = new OptionID("bylabelclustering.multiple", "Flag to indicate that only subspaces with large coverage " + "(i.e. the fraction of the database that is covered by the dense units) " + "are selected, the rest will be pruned."); + + /** + * Parameter to specify the pattern to recognize noise clusters by. + */ + public static final OptionID NOISE_ID = new OptionID("bylabelclustering.noise", "Pattern to recognize noise classes by their label."); + + /** + * Allow multiple cluster assignment. + */ protected boolean multiple; + /** + * Pattern to recognize noise clusters by. + */ protected Pattern noisepat; @Override @@ -272,4 +281,4 @@ public class ByLabelClustering extends AbstractAlgorithm<Clustering<Model>> impl return new ByLabelClustering(multiple, noisepat); } } -}
\ No newline at end of file +} diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelHierarchicalClustering.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelHierarchicalClustering.java index e246c4e6..d48efe68 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelHierarchicalClustering.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelHierarchicalClustering.java @@ -47,6 +47,7 @@ import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs; import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.utilities.Alias; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; @@ -64,11 +65,13 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title; * TODO: Noise handling (e.g. allow the user to specify a noise label pattern?) * * @author Erich Schubert + * @since 0.2 * * @apiviz.uses de.lmu.ifi.dbs.elki.data.ClassLabel */ @Title("Hierarchical clustering by label") @Description("Cluster points by a (pre-assigned!) label. For comparing results with a reference clustering.") +@Alias("de.lmu.ifi.dbs.elki.algorithm.clustering.ByLabelHierarchicalClustering") public class ByLabelHierarchicalClustering extends AbstractAlgorithm<Clustering<Model>> implements ClusteringAlgorithm<Clustering<Model>> { /** * The logger for this class. diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelOrAllInOneClustering.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelOrAllInOneClustering.java index 12af74ba..7de4c4f0 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelOrAllInOneClustering.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelOrAllInOneClustering.java @@ -39,6 +39,7 @@ import de.lmu.ifi.dbs.elki.database.relation.Relation; * "all-in-one" clustering. * * @author Erich Schubert + * @since 0.2 */ public class ByLabelOrAllInOneClustering extends ByLabelClustering { /** diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByModelClustering.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByModelClustering.java index 9832ea4a..0583ceb5 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByModelClustering.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByModelClustering.java @@ -56,6 +56,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.PatternParameter; * reference result / golden standard used by the generator). * * @author Erich Schubert + * @since 0.5.0 * * @apiviz.uses Model */ @@ -68,12 +69,7 @@ public class ByModelClustering extends AbstractAlgorithm<Clustering<Model>> impl private static final Logging LOG = Logging.getLogger(ByModelClustering.class); /** - * Pattern to recognize noise clusters with - */ - public static final OptionID NOISE_ID = new OptionID("bymodel.noise", "Pattern to recognize noise models by their label."); - - /** - * Holds the value of {@link #NOISE_ID}. + * Pattern to recognize noise clusters with. */ private Pattern noisepattern = null; @@ -144,6 +140,14 @@ public class ByModelClustering extends AbstractAlgorithm<Clustering<Model>> impl * @apiviz.exclude */ public static class Parameterizer extends AbstractParameterizer { + /** + * Parameter to specify the pattern to recognize noise clusters with. + */ + public static final OptionID NOISE_ID = new OptionID("bymodel.noise", "Pattern to recognize noise models by their label."); + + /** + * Pattern to recognize noise clusters with + */ protected Pattern noisepat; @Override diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllInOne.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllInOne.java index 64c103c3..188db3e9 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllInOne.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllInOne.java @@ -34,6 +34,7 @@ import de.lmu.ifi.dbs.elki.data.type.TypeUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.utilities.Alias; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; @@ -44,9 +45,11 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title; * Useful for evaluation and testing. * * @author Erich Schubert + * @since 0.2 */ @Title("Trivial all-in-one clustering") @Description("Returns a 'tivial' clustering which just considers all points to be one big cluster.") +@Alias("de.lmu.ifi.dbs.elki.algorithm.clustering.TrivialAllInOne") public class TrivialAllInOne extends AbstractAlgorithm<Clustering<Model>> implements ClusteringAlgorithm<Clustering<Model>> { /** * The logger for this class. @@ -77,4 +80,4 @@ public class TrivialAllInOne extends AbstractAlgorithm<Clustering<Model>> implem protected Logging getLogger() { return LOG; } -}
\ No newline at end of file +} diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllNoise.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllNoise.java index db4cb8dd..4db4dfed 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllNoise.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllNoise.java @@ -34,6 +34,7 @@ import de.lmu.ifi.dbs.elki.data.type.TypeUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.utilities.Alias; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; @@ -43,9 +44,11 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title; * Useful for evaluation and testing. * * @author Erich Schubert + * @since 0.2 */ @Title("Trivial all-noise clustering") @Description("Returns a 'trivial' clustering which just considers all points as noise points.") +@Alias("de.lmu.ifi.dbs.elki.algorithm.clustering.TrivialAllNoise") public class TrivialAllNoise extends AbstractAlgorithm<Clustering<Model>> implements ClusteringAlgorithm<Clustering<Model>> { /** * The logger for this class. @@ -76,4 +79,4 @@ public class TrivialAllNoise extends AbstractAlgorithm<Clustering<Model>> implem protected Logging getLogger() { return LOG; } -}
\ No newline at end of file +} diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/uncertain/CKMeans.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/uncertain/CKMeans.java index 14915807..3d095cbd 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/uncertain/CKMeans.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/uncertain/CKMeans.java @@ -57,6 +57,7 @@ import de.lmu.ifi.dbs.elki.workflow.AlgorithmStep; * however, is constant. * * @author Erich Schubert + * @since 0.7.0 */ @Reference(authors = "S. D. Lee, B. Kao, R. Cheng", // title = "Reducing UK-means to K-means", // diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/uncertain/CenterOfMassMetaClustering.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/uncertain/CenterOfMassMetaClustering.java index 1187ff30..961e7fc2 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/uncertain/CenterOfMassMetaClustering.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/uncertain/CenterOfMassMetaClustering.java @@ -66,6 +66,7 @@ import de.lmu.ifi.dbs.elki.workflow.AlgorithmStep; * </p> * * @author Erich Schubert + * @since 0.7.0 * * @param <C> Clustering result type (inherited from inner algorithm) */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/uncertain/FDBSCAN.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/uncertain/FDBSCAN.java index 47f3e6bb..4c177463 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/uncertain/FDBSCAN.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/uncertain/FDBSCAN.java @@ -52,6 +52,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter; * </p> * * @author Erich Schubert + * @since 0.7.0 * * @apiviz.composedOf FDBSCANNeighborPredicate */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/uncertain/FDBSCANNeighborPredicate.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/uncertain/FDBSCANNeighborPredicate.java index a51ec92a..8fc3bd03 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/uncertain/FDBSCANNeighborPredicate.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/uncertain/FDBSCANNeighborPredicate.java @@ -76,6 +76,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter; * * @author Alexander Koos * @author Erich Schubert + * @since 0.7.0 * * @apiviz.composedOf Instance */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/uncertain/RepresentativeUncertainClustering.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/uncertain/RepresentativeUncertainClustering.java index da377ea1..c1ee8ed8 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/uncertain/RepresentativeUncertainClustering.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/uncertain/RepresentativeUncertainClustering.java @@ -104,6 +104,7 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair; * * @author Alexander Koos * @author Erich Schubert + * @since 0.7.0 * * @apiviz.has RepresentativenessEvaluation */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/uncertain/UKMeans.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/uncertain/UKMeans.java index fc602d1f..14aee8aa 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/uncertain/UKMeans.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/uncertain/UKMeans.java @@ -76,6 +76,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter; * </p> * * @author Klaus Arthur Schmidt + * @since 0.7.0 */ @Reference(authors = "M. Chau, R. Cheng, B. Kao, J. Ng", // title = "Uncertain data mining: An example in clustering location data", // diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/itemsetmining/APRIORI.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/itemsetmining/APRIORI.java index 64cfa509..3555e307 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/itemsetmining/APRIORI.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/itemsetmining/APRIORI.java @@ -23,9 +23,6 @@ package de.lmu.ifi.dbs.elki.algorithm.itemsetmining; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import gnu.trove.iterator.TLongIntIterator; -import gnu.trove.map.hash.TLongIntHashMap; - import java.util.ArrayList; import java.util.Collections; import java.util.Iterator; @@ -46,11 +43,14 @@ import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.statistics.Duration; import de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic; import de.lmu.ifi.dbs.elki.result.FrequentItemsetsResult; +import de.lmu.ifi.dbs.elki.utilities.Alias; import de.lmu.ifi.dbs.elki.utilities.BitsUtil; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; +import gnu.trove.iterator.TLongIntIterator; +import gnu.trove.map.hash.TLongIntHashMap; /** * The APRIORI algorithm for Mining Association Rules. @@ -72,6 +72,7 @@ import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; * * @author Arthur Zimek * @author Erich Schubert + * @since 0.2 * * @apiviz.has Itemset * @apiviz.uses BitVector @@ -82,6 +83,7 @@ import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; title = "Fast Algorithms for Mining Association Rules", // booktitle = "Proc. 20th Int. Conf. on Very Large Data Bases (VLDB '94), Santiago de Chile, Chile 1994", // url = "http://www.vldb.org/conf/1994/P487.PDF") +@Alias("de.lmu.ifi.dbs.elki.algorithm.APRIORI") public class APRIORI extends AbstractFrequentItemsetAlgorithm { /** * The logger for this class. @@ -596,4 +598,4 @@ public class APRIORI extends AbstractFrequentItemsetAlgorithm { return new APRIORI(minsupp, minlength, maxlength); } } -}
\ No newline at end of file +} diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/itemsetmining/AbstractFrequentItemsetAlgorithm.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/itemsetmining/AbstractFrequentItemsetAlgorithm.java index 31947c33..4f73d247 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/itemsetmining/AbstractFrequentItemsetAlgorithm.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/itemsetmining/AbstractFrequentItemsetAlgorithm.java @@ -35,6 +35,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * Abstract base class for frequent itemset mining. * * @author Erich Schubert + * @since 0.7.0 */ public abstract class AbstractFrequentItemsetAlgorithm extends AbstractAlgorithm<FrequentItemsetsResult> { /** diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/itemsetmining/DenseItemset.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/itemsetmining/DenseItemset.java index d77a0915..7896bb00 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/itemsetmining/DenseItemset.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/itemsetmining/DenseItemset.java @@ -30,6 +30,7 @@ import de.lmu.ifi.dbs.elki.utilities.BitsUtil; * APRIORI itemset. * * @author Erich Schubert + * @since 0.7.0 */ public class DenseItemset extends Itemset { /** diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/itemsetmining/Eclat.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/itemsetmining/Eclat.java index 47f75c54..9e62c5ed 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/itemsetmining/Eclat.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/itemsetmining/Eclat.java @@ -74,6 +74,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; * </p> * * @author Erich Schubert + * @since 0.7.0 */ @Reference(title = "New Algorithms for Fast Discovery of Association Rules", // authors = "M.J. Zaki, S. Parthasarathy, M. Ogihara, and W. Li", // diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/itemsetmining/FPGrowth.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/itemsetmining/FPGrowth.java index 5cdde71a..a09bf551 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/itemsetmining/FPGrowth.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/itemsetmining/FPGrowth.java @@ -75,6 +75,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; * </p> * * @author Erich Schubert + * @since 0.7.0 * * @apiviz.composedOf FPTree */ @@ -194,7 +195,7 @@ public class FPGrowth extends AbstractFrequentItemsetAlgorithm { */ private int[] countItemSupport(final Relation<BitVector> relation, final int dim) { final int[] counts = new int[dim]; - FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Finding frequent 1-items.", relation.size(), LOG) : null; + FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Finding frequent 1-items", relation.size(), LOG) : null; for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { SparseFeatureVector<?> bv = relation.get(iditer); // TODO: only count those which satisfy minlength? diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/itemsetmining/Itemset.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/itemsetmining/Itemset.java index c195a79f..7e6446e7 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/itemsetmining/Itemset.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/itemsetmining/Itemset.java @@ -29,6 +29,7 @@ import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation; * APRIORI itemset. * * @author Erich Schubert + * @since 0.7.0 */ public abstract class Itemset implements Comparable<Itemset> { /** diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/itemsetmining/OneItemset.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/itemsetmining/OneItemset.java index 9fddd758..63ca1dde 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/itemsetmining/OneItemset.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/itemsetmining/OneItemset.java @@ -32,6 +32,7 @@ import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; * APRIORI itemset. * * @author Erich Schubert + * @since 0.7.0 */ public class OneItemset extends Itemset { /** diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/itemsetmining/SmallDenseItemset.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/itemsetmining/SmallDenseItemset.java index e1b8ca55..c68c65ab 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/itemsetmining/SmallDenseItemset.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/itemsetmining/SmallDenseItemset.java @@ -31,6 +31,7 @@ import de.lmu.ifi.dbs.elki.utilities.BitsUtil; * APRIORI itemset. * * @author Erich Schubert + * @since 0.7.0 */ public class SmallDenseItemset extends Itemset { /** diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/itemsetmining/SparseItemset.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/itemsetmining/SparseItemset.java index 87534439..3eada0a6 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/itemsetmining/SparseItemset.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/itemsetmining/SparseItemset.java @@ -34,6 +34,7 @@ import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; * APRIORI itemset. * * @author Erich Schubert + * @since 0.7.0 */ public class SparseItemset extends Itemset { /** diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/COP.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/COP.java index 7f43796b..d2ba2160 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/COP.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/COP.java @@ -83,6 +83,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * </p> * * @author Erich Schubert + * @since 0.2 * * @param <V> the type of NumberVector handled by this Algorithm */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/DWOF.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/DWOF.java index a3e9e1d2..51ca8848 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/DWOF.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/DWOF.java @@ -90,6 +90,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * </p> * * @author Omar Yousry + * @since 0.6.0 * * @param <O> the type of DatabaseObjects handled by this Algorithm */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java index 25a776e0..4f6ba095 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java @@ -56,6 +56,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag; * <em>probability density</em> of the assumed distribution. * * @author Lisa Reichert + * @since 0.3 * * @param <V> Vector type */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java index cb170573..190bbb55 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java @@ -74,6 +74,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; * </p> * * @author Lisa Reichert + * @since 0.3 * * @param <V> Vector Type */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java index 484736bb..ccde20eb 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java @@ -72,6 +72,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * and Data Mining (PKDD), Prague, Czech Republic * * @author Ahmed Hettab + * @since 0.3 * * @apiviz.has KNNQuery * @apiviz.has RangeQuery diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/OutlierAlgorithm.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/OutlierAlgorithm.java index 6d671cd0..fff0310c 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/OutlierAlgorithm.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/OutlierAlgorithm.java @@ -31,6 +31,7 @@ import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; * Generic super interface for outlier detection algorithms. * * @author Erich Schubert + * @since 0.4.0 * * @apiviz.landmark * @apiviz.excludeSubtypes diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleCOP.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleCOP.java index 6b8f645d..271d0467 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleCOP.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleCOP.java @@ -70,6 +70,8 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * Algorithm to compute local correlation outlier probability. * * This is the simpler, original version of COP, as published in + * + * Reference: * <p> * Arthur Zimek<br /> * Correlation Clustering.<br /> @@ -78,10 +80,13 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * which has then been refined to the method published as {@link COP} * * @author Erich Schubert + * @since 0.5.5 * @param <V> the type of NumberVector handled by this Algorithm */ @Title("Simple COP: Correlation Outlier Probability") -@Reference(authors = "Arthur Zimek", title = "Correlation Clustering. PhD thesis, Chapter 18", booktitle = "") +@Reference(authors = "Arthur Zimek", // +title = "Correlation Clustering", // +booktitle = "PhD thesis, Chapter 18") public class SimpleCOP<V extends NumberVector> extends AbstractDistanceBasedAlgorithm<V, OutlierResult> implements OutlierAlgorithm { /** * The logger for this class. diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/anglebased/ABOD.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/anglebased/ABOD.java index a78f76c9..946c94ad 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/anglebased/ABOD.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/anglebased/ABOD.java @@ -75,6 +75,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * * @author Matthias Schubert (Original Code) * @author Erich Schubert (ELKIfication) + * @since 0.2 * * @param <V> Vector type */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/anglebased/FastABOD.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/anglebased/FastABOD.java index 88a606d5..5ff844a5 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/anglebased/FastABOD.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/anglebased/FastABOD.java @@ -75,6 +75,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * * @author Matthias Schubert (Original Code) * @author Erich Schubert (ELKIfication) + * @since 0.6.0 * * @param <V> Vector type */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/anglebased/LBABOD.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/anglebased/LBABOD.java index 945d3d46..cb0e4114 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/anglebased/LBABOD.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/anglebased/LBABOD.java @@ -82,6 +82,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * * @author Matthias Schubert (Original Code) * @author Erich Schubert (ELKIfication) + * @since 0.6.0 * * @param <V> Vector type */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/clustering/EMOutlier.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/clustering/EMOutlier.java index fc33d88f..62f492e3 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/clustering/EMOutlier.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/clustering/EMOutlier.java @@ -43,6 +43,7 @@ import de.lmu.ifi.dbs.elki.result.Result; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore; +import de.lmu.ifi.dbs.elki.utilities.Alias; import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; import de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy.Hierarchy.Iter; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; @@ -57,6 +58,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz * object is an outlier. * * @author Lisa Reichert + * @since 0.3 * * @apiviz.has EM * @@ -65,6 +67,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz // TODO: re-use an existing EM when present? @Title("EM Outlier: Outlier Detection based on the generic EM clustering") @Description("The outlier score assigned is based on the highest cluster probability obtained from EM clustering.") +@Alias("de.lmu.ifi.dbs.elki.algorithm.outlier.EMOutlier") public class EMOutlier<V extends NumberVector> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm { /** * The logger for this class. diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/clustering/KMeansOutlierDetection.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/clustering/KMeansOutlierDetection.java index 4db56748..b4a7b44c 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/clustering/KMeansOutlierDetection.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/clustering/KMeansOutlierDetection.java @@ -67,6 +67,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * publication list</a> for citation information and BibTeX templates). * * @author Erich Schubert + * @since 0.7.0 * * @apiviz.has KMeans * diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/clustering/SilhouetteOutlierDetection.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/clustering/SilhouetteOutlierDetection.java index 34600968..a4a3d7f1 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/clustering/SilhouetteOutlierDetection.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/clustering/SilhouetteOutlierDetection.java @@ -74,6 +74,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * for citation information and BibTeX templates). * * @author Erich Schubert + * @since 0.7.0 * * @apiviz.has ClusteringAlgorithm * diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/AbstractDBOutlier.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/AbstractDBOutlier.java index 6a7a2922..7d7fc950 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/AbstractDBOutlier.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/AbstractDBOutlier.java @@ -53,6 +53,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; * </p> * * @author Lisa Reichert + * @since 0.3 * * @param <O> the type of DatabaseObjects handled by this Algorithm */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/DBOutlierDetection.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/DBOutlierDetection.java index bee17874..d9e705d8 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/DBOutlierDetection.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/DBOutlierDetection.java @@ -62,6 +62,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; * 3.1. * * @author Lisa Reichert + * @since 0.3 * * @apiviz.has KNNQuery * diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/DBOutlierScore.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/DBOutlierScore.java index e1775bad..354057a4 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/DBOutlierScore.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/DBOutlierScore.java @@ -54,6 +54,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title; * </p> * * @author Lisa Reichert + * @since 0.3 * * @apiviz.has RangeQuery * diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/HilOut.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/HilOut.java index c748c832..35fe19e0 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/HilOut.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/HilOut.java @@ -85,6 +85,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * * @author Jonathan von Brünken * @author Erich Schubert + * @since 0.5.0 * * @apiviz.composedOf HilbertFeatures * @apiviz.uses HilFeature diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/KNNOutlier.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/KNNOutlier.java index 649ea088..41653ac3 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/KNNOutlier.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/KNNOutlier.java @@ -73,6 +73,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * </p> * * @author Lisa Reichert + * @since 0.3 * * @apiviz.has KNNQuery * diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/KNNWeightOutlier.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/KNNWeightOutlier.java index 3aeb6da2..5b92cef5 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/KNNWeightOutlier.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/KNNWeightOutlier.java @@ -81,6 +81,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * </p> * * @author Lisa Reichert + * @since 0.3 * * @apiviz.has KNNQuery * @@ -125,7 +126,7 @@ public class KNNWeightOutlier<O> extends AbstractDistanceBasedAlgorithm<O, Outli final DistanceQuery<O> distanceQuery = database.getDistanceQuery(relation, getDistanceFunction()); KNNQuery<O> knnQuery = database.getKNNQuery(distanceQuery, k + 1); // + query point - FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Compute kNN weights.", relation.size(), LOG) : null; + FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Compute kNN weights", relation.size(), LOG) : null; DoubleMinMax minmax = new DoubleMinMax(); WritableDoubleDataStore knnw_score = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC); diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/LocalIsolationCoefficient.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/LocalIsolationCoefficient.java index 33450eb1..034f6af2 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/LocalIsolationCoefficient.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/LocalIsolationCoefficient.java @@ -68,6 +68,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * </p> * * @author Erich Schubert + * @since 0.3 * * @apiviz.has KNNQuery * diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/ODIN.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/ODIN.java index e249e543..534f2be1 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/ODIN.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/ODIN.java @@ -65,6 +65,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * </p> * * @author Erich Schubert + * @since 0.6.0 * * @param <O> Object type */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/ReferenceBasedOutlierDetection.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/ReferenceBasedOutlierDetection.java index cf184221..304474a3 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/ReferenceBasedOutlierDetection.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/ReferenceBasedOutlierDetection.java @@ -81,6 +81,7 @@ import de.lmu.ifi.dbs.elki.utilities.referencepoints.ReferencePointsHeuristic; * * @author Lisa Reichert * @author Erich Schubert + * @since 0.3 * * @apiviz.composedOf ReferencePointsHeuristic */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/KNNWeightProcessor.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/KNNWeightProcessor.java index 60ebcd8b..3de88864 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/KNNWeightProcessor.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/KNNWeightProcessor.java @@ -37,6 +37,7 @@ import de.lmu.ifi.dbs.elki.parallel.variables.SharedObject; * Needs the k nearest neighbors as input, for example from {@link KNNProcessor} * * @author Erich Schubert + * @since 0.7.0 */ public class KNNWeightProcessor extends AbstractDoubleProcessor { /** diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/ParallelKNNOutlier.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/ParallelKNNOutlier.java index 3b4c17f1..96c70b60 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/ParallelKNNOutlier.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/ParallelKNNOutlier.java @@ -76,6 +76,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * </p> * * @author Erich Schubert + * @since 0.7.0 * * @apiviz.composedOf KNNProcessor * @apiviz.composedOf KDistanceProcessor diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/ParallelKNNWeightOutlier.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/ParallelKNNWeightOutlier.java index 571c2038..7f86e48f 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/ParallelKNNWeightOutlier.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/ParallelKNNWeightOutlier.java @@ -76,6 +76,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * </p> * * @author Erich Schubert + * @since 0.7.0 * * @apiviz.composedOf KNNWeightProcessor * diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/intrinsic/IDOS.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/intrinsic/IDOS.java index b462b836..442a96a1 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/intrinsic/IDOS.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/intrinsic/IDOS.java @@ -70,6 +70,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * </p>
*
* @author Jonathan von Brünken
+ * @since 0.7.0 * @author Erich Schubert
*
* @param <O> Object type
diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/intrinsic/IntrinsicDimensionalityOutlier.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/intrinsic/IntrinsicDimensionalityOutlier.java index 1b2d25ef..04679930 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/intrinsic/IntrinsicDimensionalityOutlier.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/intrinsic/IntrinsicDimensionalityOutlier.java @@ -63,6 +63,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * Please see their upcoming publications for an improved solution. * * @author Erich Schubert + * @since 0.3 * * @param <O> Object type */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/ALOCI.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/ALOCI.java index f7edae0f..e0fefe2d 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/ALOCI.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/ALOCI.java @@ -55,6 +55,7 @@ import de.lmu.ifi.dbs.elki.math.random.RandomFactory; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta; +import de.lmu.ifi.dbs.elki.utilities.Alias; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; @@ -67,27 +68,31 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter; /** * Fast Outlier Detection Using the "approximate Local Correlation Integral". - * + * * Outlier detection using multiple epsilon neighborhoods. - * + * * Reference: * <p> - * S. Papadimitriou, H. Kitagawa, P. B. Gibbons and C. Faloutsos:<br /> - * LOCI: Fast Outlier Detection Using the Local Correlation Integral.<br /> - * In: Proc. 19th IEEE Int. Conf. on Data Engineering (ICDE '03), Bangalore, - * India, 2003. + * S. Papadimitriou, H. Kitagawa, P. B. Gibbons and C. Faloutsos: <br /> + * LOCI: Fast Outlier Detection Using the Local Correlation Integral. <br /> + * In: Proc. 19th IEEE Int. Conf. on Data Engineering (ICDE '03) * </p> - * + * * @author Jonathan von Brünken * @author Erich Schubert - * + * @since 0.5.0 + * * @apiviz.composedOf ALOCIQuadTree - * + * * @param <O> Object type */ @Title("LOCI: Fast Outlier Detection Using the Local Correlation Integral") @Description("Algorithm to compute outliers based on the Local Correlation Integral") -@Reference(authors = "S. Papadimitriou, H. Kitagawa, P. B. Gibbons, C. Faloutsos", title = "LOCI: Fast Outlier Detection Using the Local Correlation Integral", booktitle = "Proc. 19th IEEE Int. Conf. on Data Engineering (ICDE '03), Bangalore, India, 2003", url = "http://dx.doi.org/10.1109/ICDE.2003.1260802") +@Reference(authors = "S. Papadimitriou, H. Kitagawa, P. B. Gibbons, C. Faloutsos", // +title = "LOCI: Fast Outlier Detection Using the Local Correlation Integral", // +booktitle = "Proc. 19th IEEE Int. Conf. on Data Engineering (ICDE '03)", // +url = "http://dx.doi.org/10.1109/ICDE.2003.1260802") +@Alias("de.lmu.ifi.dbs.elki.algorithm.outlier.ALOCI") public class ALOCI<O extends NumberVector> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm { /** * The logger for this class. @@ -121,7 +126,7 @@ public class ALOCI<O extends NumberVector> extends AbstractAlgorithm<OutlierResu /** * Constructor. - * + * * @param distanceFunction Distance function * @param nmin Minimum neighborhood size * @param alpha Alpha value @@ -246,10 +251,10 @@ public class ALOCI<O extends NumberVector> extends AbstractAlgorithm<OutlierResu /** * Method for the MDEF calculation - * + * * @param sn Sampling Neighborhood * @param cg Counting Neighborhood - * + * * @return MDEF norm */ private static double calculate_MDEF_norm(Node sn, Node cg) { @@ -294,13 +299,13 @@ public class ALOCI<O extends NumberVector> extends AbstractAlgorithm<OutlierResu /** * Simple quadtree for ALOCI. Not storing the actual objects, just the counts. - * + * * Furthermore, the quadtree can be shifted by a specified vector, wrapping * around min/max - * + * * @author Jonathan von Brünken * @author Erich Schubert - * + * * @apiviz.composedOf Node */ static class ALOCIQuadTree { @@ -326,7 +331,7 @@ public class ALOCI<O extends NumberVector> extends AbstractAlgorithm<OutlierResu /** * Constructor. - * + * * @param min Minimum coordinates * @param max Maximum coordinates * @param shift Tree shift offset @@ -364,7 +369,7 @@ public class ALOCI<O extends NumberVector> extends AbstractAlgorithm<OutlierResu /** * Bulk load the tree - * + * * @param lmin Subtree minimum (unshifted, will be modified) * @param lmax Subtree maximum (unshifted, will be modified) * @param children List of children for current parent @@ -461,7 +466,7 @@ public class ALOCI<O extends NumberVector> extends AbstractAlgorithm<OutlierResu /** * Shift and wrap a single dimension. - * + * * @param obj Object * @param dim Dimension * @param level Level (controls scaling/wraping!) @@ -476,7 +481,7 @@ public class ALOCI<O extends NumberVector> extends AbstractAlgorithm<OutlierResu /** * Find the closest node (of depth tlevel or above, if there is no node at * this depth) for the given vector. - * + * * @param vec Query vector * @param tlevel Target level * @return Node @@ -511,7 +516,7 @@ public class ALOCI<O extends NumberVector> extends AbstractAlgorithm<OutlierResu /** * Node of the ALOCI Quadtree - * + * * @author Erich Schubert */ static class Node { @@ -547,7 +552,7 @@ public class ALOCI<O extends NumberVector> extends AbstractAlgorithm<OutlierResu /** * Constructor. - * + * * @param code Node code * @param center Center vector * @param count Element count @@ -569,7 +574,7 @@ public class ALOCI<O extends NumberVector> extends AbstractAlgorithm<OutlierResu /** * Get level of node. - * + * * @return Level of node */ public int getLevel() { @@ -578,7 +583,7 @@ public class ALOCI<O extends NumberVector> extends AbstractAlgorithm<OutlierResu /** * Get count of subtree - * + * * @return subtree count */ public int getCount() { @@ -587,7 +592,7 @@ public class ALOCI<O extends NumberVector> extends AbstractAlgorithm<OutlierResu /** * Return center vector - * + * * @return center vector */ public Vector getCenter() { @@ -596,7 +601,7 @@ public class ALOCI<O extends NumberVector> extends AbstractAlgorithm<OutlierResu /** * Get sum of squares, recursively - * + * * @param levels Depth to collect * @return Sum of squares */ @@ -613,7 +618,7 @@ public class ALOCI<O extends NumberVector> extends AbstractAlgorithm<OutlierResu /** * Get cubic sum. - * + * * @param levels Level to collect * @return sum of cubes */ @@ -631,9 +636,9 @@ public class ALOCI<O extends NumberVector> extends AbstractAlgorithm<OutlierResu /** * Parameterization class. - * + * * @author Erich Schubert - * + * * @apiviz.exclude */ public static class Parameterizer<O extends NumberVector> extends AbstractParameterizer { diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/COF.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/COF.java index 87019ae6..61cb915f 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/COF.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/COF.java @@ -67,6 +67,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * </p> * * @author Erich Schubert + * @since 0.2 * * @param <O> Object type */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/FlexibleLOF.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/FlexibleLOF.java index e38e8da2..eae6f530 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/FlexibleLOF.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/FlexibleLOF.java @@ -107,6 +107,7 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; * @author Peer Kröger * @author Erich Schubert * @author Elke Achtert + * @since 0.2 * * @apiviz.has LOFResult oneway - - computes * @apiviz.has KNNQuery diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/INFLO.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/INFLO.java index 1e77cab5..375031f9 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/INFLO.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/INFLO.java @@ -49,6 +49,7 @@ import de.lmu.ifi.dbs.elki.math.DoubleMinMax; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta; +import de.lmu.ifi.dbs.elki.utilities.Alias; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; @@ -72,6 +73,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * * @author Ahmed Hettab * @author Erich Schubert + * @since 0.3 * * @apiviz.has KNNQuery * @@ -83,6 +85,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; title = "Ranking outliers using symmetric neighborhood relationship", // booktitle = "Proc. 10th Pacific-Asia conference on Advances in Knowledge Discovery and Data Mining", // url = "http://dx.doi.org/10.1007/11731139_68") +@Alias("de.lmu.ifi.dbs.elki.algorithm.outlier.INFLO") public class INFLO<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm { /** * The logger for this class. diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/KDEOS.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/KDEOS.java index 1583ac99..92f0d7a9 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/KDEOS.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/KDEOS.java @@ -93,6 +93,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * </p> * * @author Erich Schubert + * @since 0.7.0 * * @apiviz.has KNNQuery * @apiviz.has KernelDensityFunction @@ -203,7 +204,7 @@ public class KDEOS<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult>im densities.put(iter, new double[knum]); } // Distribute densities: - FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Computing densities.", ids.size(), LOG) : null; + FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Computing densities", ids.size(), LOG) : null; double iminbw = (minBandwidth > 0.) ? 1. / (minBandwidth * scale) : Double.POSITIVE_INFINITY; for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { KNNList neighbors = knnq.getKNNForDBID(iter, kmax + 1); @@ -269,7 +270,7 @@ public class KDEOS<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult>im */ protected void computeOutlierScores(KNNQuery<O> knnq, final DBIDs ids, WritableDataStore<double[]> densities, WritableDoubleDataStore kdeos, DoubleMinMax minmax) { final int knum = kmax + 1 - kmin; - FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Computing KDEOS scores.", ids.size(), LOG) : null; + FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Computing KDEOS scores", ids.size(), LOG) : null; double[][] scratch = new double[knum][kmax + 5]; MeanVariance mv = new MeanVariance(); @@ -339,32 +340,32 @@ public class KDEOS<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult>im /** * Parameter to specify the kernel density function. */ - private static final OptionID KERNEL_ID = new OptionID("kdeos.kernel", "Kernel density function to use."); + public static final OptionID KERNEL_ID = new OptionID("kdeos.kernel", "Kernel density function to use."); /** * Parameter to specify the minimum bandwidth. */ - private static final OptionID KERNEL_MIN_ID = new OptionID("kdeos.kernel.minbw", "Minimum bandwidth for kernel density estimation."); + public static final OptionID KERNEL_MIN_ID = new OptionID("kdeos.kernel.minbw", "Minimum bandwidth for kernel density estimation."); /** * Parameter to specify the kernel scaling factor. */ - private static final OptionID KERNEL_SCALE_ID = new OptionID("kdeos.kernel.scale", "Scaling factor for the kernel function."); + public static final OptionID KERNEL_SCALE_ID = new OptionID("kdeos.kernel.scale", "Scaling factor for the kernel function."); /** * Minimum value of k to analyze. */ - private static final OptionID KMIN_ID = new OptionID("kdeos.k.min", "Minimum value of k to analyze."); + public static final OptionID KMIN_ID = new OptionID("kdeos.k.min", "Minimum value of k to analyze."); /** * Maximum value of k to analyze. */ - private static final OptionID KMAX_ID = new OptionID("kdeos.k.max", "Maximum value of k to analyze."); + public static final OptionID KMAX_ID = new OptionID("kdeos.k.max", "Maximum value of k to analyze."); /** * Intrinsic dimensionality. */ - private static final OptionID IDIM_ID = new OptionID("kdeos.idim", "Intrinsic dimensionality of this data set. Use -1 for using the true data dimensionality, but values such as 0-2 often offer better performance."); + public static final OptionID IDIM_ID = new OptionID("kdeos.idim", "Intrinsic dimensionality of this data set. Use -1 for using the true data dimensionality, but values such as 0-2 often offer better performance."); /** * Kernel function to use for density estimation. diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDF.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDF.java index 056a6ba3..c4067504 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDF.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDF.java @@ -54,6 +54,7 @@ import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.KernelDensityFunction import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; +import de.lmu.ifi.dbs.elki.utilities.Alias; import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; @@ -77,6 +78,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * </p> * * @author Erich Schubert + * @since 0.5.5 * * @apiviz.has KNNQuery * @apiviz.has KernelDensityFunction @@ -87,6 +89,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; title = "Outlier Detection with Kernel Density Functions", // booktitle = "Machine Learning and Data Mining in Pattern Recognition", // url = "http://dx.doi.org/10.1007/978-3-540-73499-4_6") +@Alias("de.lmu.ifi.dbs.elki.algorithm.outlier.LDF") public class LDF<O extends NumberVector> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm { /** * The logger for this class. diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDOF.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDOF.java index b41af3bd..9dadf0e3 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDOF.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDOF.java @@ -71,6 +71,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * </p> * * @author Arthur Zimek + * @since 0.3 * * @apiviz.has KNNQuery * diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOCI.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOCI.java index 7b332639..f8623bcc 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOCI.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOCI.java @@ -63,26 +63,33 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; /** * Fast Outlier Detection Using the "Local Correlation Integral". - * + * * Exact implementation only, not aLOCI. See {@link ALOCI}. - * + * * Outlier detection using multiple epsilon neighborhoods. - * + * * This implementation has O(n<sup>3</sup> log n) runtime complexity! - * - * Based on: S. Papadimitriou, H. Kitagawa, P. B. Gibbons and C. Faloutsos: - * LOCI: Fast Outlier Detection Using the Local Correlation Integral. In: Proc. - * 19th IEEE Int. Conf. on Data Engineering (ICDE '03), Bangalore, India, 2003. - * + * + * Reference: + * <p> + * S. Papadimitriou, H. Kitagawa, P. B. Gibbons and C. Faloutsos: <br /> + * LOCI: Fast Outlier Detection Using the Local Correlation Integral. <br /> + * In: Proc. 19th IEEE Int. Conf. on Data Engineering (ICDE '03) + * </p> + * * @author Erich Schubert - * + * @since 0.2 + * * @apiviz.has RangeQuery - * + * * @param <O> Object type */ @Title("LOCI: Fast Outlier Detection Using the Local Correlation Integral") @Description("Algorithm to compute outliers based on the Local Correlation Integral") -@Reference(authors = "S. Papadimitriou, H. Kitagawa, P. B. Gibbons, C. Faloutsos", title = "LOCI: Fast Outlier Detection Using the Local Correlation Integral", booktitle = "Proc. 19th IEEE Int. Conf. on Data Engineering (ICDE '03), Bangalore, India, 2003", url = "http://dx.doi.org/10.1109/ICDE.2003.1260802") +@Reference(authors = "S. Papadimitriou, H. Kitagawa, P. B. Gibbons, C. Faloutsos", // +title = "LOCI: Fast Outlier Detection Using the Local Correlation Integral", // +booktitle = "Proc. 19th IEEE Int. Conf. on Data Engineering (ICDE '03)", // +url = "http://dx.doi.org/10.1109/ICDE.2003.1260802") @Alias({ "de.lmu.ifi.dbs.elki.algorithm.outlier.LOCI" }) public class LOCI<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm { /** @@ -91,39 +98,23 @@ public class LOCI<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> im private static final Logging LOG = Logging.getLogger(LOCI.class); /** - * Parameter to specify the maximum radius of the neighborhood to be - * considered, must be suitable to the distance function specified. - */ - public static final OptionID RMAX_ID = new OptionID("loci.rmax", "The maximum radius of the neighborhood to be considered."); - - /** - * Parameter to specify the minimum neighborhood size - */ - public static final OptionID NMIN_ID = new OptionID("loci.nmin", "Minimum neighborhood size to be considered."); - - /** - * Parameter to specify the averaging neighborhood scaling. - */ - public static final OptionID ALPHA_ID = new OptionID("loci.alpha", "Scaling factor for averaging neighborhood"); - - /** - * Holds the value of {@link #RMAX_ID}. + * Maximum radius. */ private double rmax; /** - * Holds the value of {@link #NMIN_ID}. + * Minimum neighborhood size. */ - private int nmin; + private int nmin = 0; /** - * Holds the value of {@link #ALPHA_ID}. + * Scaling of averaging neighborhood. */ - private double alpha; + private double alpha = 0.5; /** * Constructor. - * + * * @param distanceFunction Distance function * @param rmax Maximum radius * @param nmin Minimum neighborhood size @@ -138,7 +129,7 @@ public class LOCI<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> im /** * Run the algorithm - * + * * @param database Database to process * @param relation Relation to process * @return Outlier result @@ -228,7 +219,7 @@ public class LOCI<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> im /** * Preprocessing step: determine the radii of interest for each point. - * + * * @param ids IDs to process * @param rangeQuery Range query * @param interestingDistances Distances of interest @@ -284,7 +275,7 @@ public class LOCI<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> im /** * Array of double-int values. - * + * * @author Erich Schubert * * @apiviz.exclude @@ -307,7 +298,7 @@ public class LOCI<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> im /** * Constructor. - * + * * @param alloc Initial allocation. */ public DoubleIntArrayList(int alloc) { @@ -318,7 +309,7 @@ public class LOCI<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> im /** * Collection size. - * + * * @return Size */ public int size() { @@ -327,7 +318,7 @@ public class LOCI<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> im /** * Get the key at the given position. - * + * * @param i Position * @return Key */ @@ -337,7 +328,7 @@ public class LOCI<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> im /** * Get the value at the given position. - * + * * @param i Position * @return Value */ @@ -347,7 +338,7 @@ public class LOCI<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> im /** * Get the value at the given position. - * + * * @param i Position * @param val New value */ @@ -357,7 +348,7 @@ public class LOCI<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> im /** * Append a key-value pair. - * + * * @param key Key to append * @param val Value to append. */ @@ -373,7 +364,7 @@ public class LOCI<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> im /** * Find the last position with a smaller or equal key. - * + * * @param search Key * @return Position */ @@ -412,18 +403,43 @@ public class LOCI<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> im /** * Parameterization class. - * + * * @author Erich Schubert - * + * * @apiviz.exclude - * + * * @param <O> Object type */ public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> { + /** + * Parameter to specify the maximum radius of the neighborhood to be + * considered, must be suitable to the distance function specified. + */ + public static final OptionID RMAX_ID = new OptionID("loci.rmax", "The maximum radius of the neighborhood to be considered."); + + /** + * Parameter to specify the minimum neighborhood size + */ + public static final OptionID NMIN_ID = new OptionID("loci.nmin", "Minimum neighborhood size to be considered."); + + /** + * Parameter to specify the averaging neighborhood scaling. + */ + public static final OptionID ALPHA_ID = new OptionID("loci.alpha", "Scaling factor for averaging neighborhood"); + + /** + * Maximum radius. + */ protected double rmax; + /** + * Minimum neighborhood size. + */ protected int nmin = 0; + /** + * Scaling of averaging neighborhood. + */ protected double alpha = 0.5; @Override diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOF.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOF.java index 87b1be8c..52fbeafc 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOF.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOF.java @@ -87,6 +87,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * * @author Erich Schubert * @author Elke Achtert + * @since 0.2 * * @apiviz.has KNNQuery * @@ -98,7 +99,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; title = "LOF: Identifying Density-Based Local Outliers", // booktitle = "Proc. 2nd ACM SIGMOD Int. Conf. on Management of Data (SIGMOD '00), Dallas, TX, 2000", // url = "http://dx.doi.org/10.1145/342009.335388") -@Alias({ "de.lmu.ifi.dbs.elki.algorithm.outlier.LOF", "LOF" }) +@Alias("de.lmu.ifi.dbs.elki.algorithm.outlier.LOF") public class LOF<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm { /** * The logger for this class. diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LoOP.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LoOP.java index 48589698..16f46a33 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LoOP.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LoOP.java @@ -90,6 +90,7 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; * </ul> * * @author Erich Schubert + * @since 0.3 * * @apiviz.has KNNQuery * @@ -101,7 +102,7 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; title = "LoOP: Local Outlier Probabilities", // booktitle = "Proceedings of the 18th International Conference on Information and Knowledge Management (CIKM), Hong Kong, China, 2009", // url = "http://dx.doi.org/10.1145/1645953.1646195") -@Alias({ "de.lmu.ifi.dbs.elki.algorithm.outlier.LoOP", "LoOP" }) +@Alias("de.lmu.ifi.dbs.elki.algorithm.outlier.LoOP") public class LoOP<O> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm { /** * The logger for this class. diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/OnlineLOF.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/OnlineLOF.java index 715d3c0e..6a9344b3 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/OnlineLOF.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/OnlineLOF.java @@ -53,6 +53,7 @@ import de.lmu.ifi.dbs.elki.logging.progress.StepProgress; import de.lmu.ifi.dbs.elki.math.DoubleMinMax; import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.utilities.Alias; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; @@ -63,10 +64,12 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; * BUG: this currently does not appear to work. * * @author Elke Achtert + * @since 0.2 * * @apiviz.has FlexibleLOF.LOFResult oneway - - updates */ // TODO: related to publication? +@Alias("de.lmu.ifi.dbs.elki.algorithm.outlier.OnlineLOF") public class OnlineLOF<O> extends FlexibleLOF<O> { /** * The logger for this class. diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimpleKernelDensityLOF.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimpleKernelDensityLOF.java index af32988e..80c41e57 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimpleKernelDensityLOF.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimpleKernelDensityLOF.java @@ -54,6 +54,7 @@ import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.KernelDensityFunction import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta; +import de.lmu.ifi.dbs.elki.utilities.Alias; import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints; @@ -66,12 +67,14 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * estimation instead of the local reachability density. * * @author Erich Schubert + * @since 0.5.5 * * @apiviz.has KNNQuery * @apiviz.has KernelDensityFunction * * @param <O> the type of objects handled by this Algorithm */ +@Alias("de.lmu.ifi.dbs.elki.algorithm.outlier.SimpleKernelDensityLOF") public class SimpleKernelDensityLOF<O extends NumberVector> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm { /** * The logger for this class. diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimplifiedLOF.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimplifiedLOF.java index 17ebdabf..5705e7aa 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimplifiedLOF.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimplifiedLOF.java @@ -68,6 +68,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * </p> * * @author Erich Schubert + * @since 0.5.5 * * @apiviz.has KNNQuery * @@ -77,7 +78,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; title = "Local Outlier Detection Reconsidered: a Generalized View on Locality with Applications to Spatial, Video, and Network Outlier Detection", // booktitle = "Data Mining and Knowledge Discovery, 28(1): 190–237, 2014.", // url = "http://dx.doi.org/10.1007/s10618-012-0300-z") -@Alias({ "SimplifiedLOF", "outlier.SimplifiedLOF", "de.lmu.ifi.dbs.elki.algorithm.outlier.SimplifiedLOF" }) +@Alias("de.lmu.ifi.dbs.elki.algorithm.outlier.SimpleLOF") public class SimplifiedLOF<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm { /** * The logger for this class. @@ -172,7 +173,7 @@ public class SimplifiedLOF<O> extends AbstractDistanceBasedAlgorithm<O, OutlierR * @param lofminmax Minimum and maximum scores */ private void computeSimplifiedLOFs(DBIDs ids, KNNQuery<O> knnq, WritableDoubleDataStore slrds, WritableDoubleDataStore lofs, DoubleMinMax lofminmax) { - FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("Simplified LOF scores.", ids.size(), LOG) : null; + FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("Simplified LOF scores", ids.size(), LOG) : null; for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { final double lof; final double lrdp = slrds.doubleValue(iter); diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/VarianceOfVolume.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/VarianceOfVolume.java index 3dffe213..93f62bea 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/VarianceOfVolume.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/VarianceOfVolume.java @@ -77,6 +77,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * </p> * * @author Erich Schubert + * @since 0.7.0 * * @apiviz.has KNNQuery * diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/LOFProcessor.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/LOFProcessor.java index 095a1b5b..c6c7b315 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/LOFProcessor.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/LOFProcessor.java @@ -37,6 +37,7 @@ import de.lmu.ifi.dbs.elki.parallel.variables.SharedDouble; * Processor for computing the LOF. * * @author Erich Schubert + * @since 0.7.0 * * @apiviz.has Instance */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/LRDProcessor.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/LRDProcessor.java index e9c48cbf..c21a45af 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/LRDProcessor.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/LRDProcessor.java @@ -38,6 +38,7 @@ import de.lmu.ifi.dbs.elki.parallel.variables.SharedDouble; * Processor for the "local reachability density" of LOF. * * @author Erich Schubert + * @since 0.7.0 * * @apiviz.has Instance */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/ParallelLOF.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/ParallelLOF.java index 59b69931..04b7f1da 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/ParallelLOF.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/ParallelLOF.java @@ -71,6 +71,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * </p> * * @author Erich Schubert + * @since 0.7.0 * * @apiviz.has LRDProcessor * @apiviz.has LOFProcessor diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/ParallelSimplifiedLOF.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/ParallelSimplifiedLOF.java index 9be38c34..d57972e3 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/ParallelSimplifiedLOF.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/ParallelSimplifiedLOF.java @@ -70,6 +70,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * </p> * * @author Erich Schubert + * @since 0.7.0 * * @apiviz.has SimplifiedLRDProcessor * @apiviz.has LOFProcessor diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/SimplifiedLRDProcessor.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/SimplifiedLRDProcessor.java index 0ee9a862..c075c674 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/SimplifiedLRDProcessor.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/SimplifiedLRDProcessor.java @@ -38,6 +38,7 @@ import de.lmu.ifi.dbs.elki.parallel.variables.SharedDouble; * Note: we compute 1/lrd, the local reachability distance. * * @author Erich Schubert + * @since 0.7.0 * * @apiviz.has Instance */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java index 2791b800..63226bc9 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java @@ -73,6 +73,7 @@ import de.lmu.ifi.dbs.elki.utilities.scaling.outlier.OutlierScalingFunction; * score specified with an algorithm-specific prefix. * * @author Erich Schubert + * @since 0.4.0 * * @apiviz.has ScalingFunction * @apiviz.has File diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java index 6a7a41f3..1354b5c0 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java @@ -79,6 +79,7 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; * * @author Erich Schubert * @author Remigius Wojdanowski + * @since 0.4.0 */ @Title("Feature Bagging for Outlier Detection") @Reference(title = "Feature Bagging for Outlier Detection", authors = "A. Lazarevic, V. Kumar", booktitle = "Proc. of the 11th ACM SIGKDD international conference on Knowledge discovery in data mining", url = "http://dx.doi.org/10.1145/1081870.1081891") diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/HiCS.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/HiCS.java index 4dcf83ec..e38dd7aa 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/HiCS.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/HiCS.java @@ -94,6 +94,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter; * * @author Jan Brusis * @author Erich Schubert + * @since 0.5.0 * * @apiviz.composedOf GoodnessOfFitTest * @apiviz.composedOf OutlierAlgorithm diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/RescaleMetaOutlierAlgorithm.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/RescaleMetaOutlierAlgorithm.java index f6f3fabc..fa940477 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/RescaleMetaOutlierAlgorithm.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/RescaleMetaOutlierAlgorithm.java @@ -56,6 +56,7 @@ import de.lmu.ifi.dbs.elki.workflow.AlgorithmStep; * Scale another outlier score using the given scaling function. * * @author Erich Schubert + * @since 0.4.0 * * @apiviz.composedOf OutlierAlgorithm */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/SimpleOutlierEnsemble.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/SimpleOutlierEnsemble.java index 591ab776..16d07d05 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/SimpleOutlierEnsemble.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/SimpleOutlierEnsemble.java @@ -64,6 +64,7 @@ import de.lmu.ifi.dbs.elki.workflow.AlgorithmStep; * Simple outlier ensemble method. * * @author Erich Schubert + * @since 0.5.5 * * @apiviz.composedOf EnsembleVoting * @apiviz.uses OutlierResult oneway - - reads diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractDistanceBasedSpatialOutlier.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractDistanceBasedSpatialOutlier.java index 829c9010..3ee1d13e 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractDistanceBasedSpatialOutlier.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractDistanceBasedSpatialOutlier.java @@ -35,6 +35,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * Abstract base class for distance-based spatial outlier detection methods. * * @author Ahmed Hettab + * @since 0.4.0 * * @param <N> Object type for neighborhood * @param <O> Non-spatial object type diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractNeighborhoodOutlier.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractNeighborhoodOutlier.java index 2be79793..0fed345b 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractNeighborhoodOutlier.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractNeighborhoodOutlier.java @@ -37,6 +37,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * neighborhood. * * @author Ahmed Hettab + * @since 0.4.0 * * @param <O> Object type */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuGLSBackwardSearchAlgorithm.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuGLSBackwardSearchAlgorithm.java index 70f0eddb..13499720 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuGLSBackwardSearchAlgorithm.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuGLSBackwardSearchAlgorithm.java @@ -79,6 +79,7 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; * that make the optimization problem convex. * * @author Ahmed Hettab + * @since 0.4.0 * * @param <V> Vector type to use for distances */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMeanMultipleAttributes.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMeanMultipleAttributes.java index f0783700..2322205f 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMeanMultipleAttributes.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMeanMultipleAttributes.java @@ -69,6 +69,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; * </p> * * @author Ahmed Hettab + * @since 0.4.0 * * @param <N> Spatial Vector * @param <O> Attribute Vector diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianAlgorithm.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianAlgorithm.java index a5cb7423..cae611b8 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianAlgorithm.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianAlgorithm.java @@ -65,6 +65,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title; * Outliers. * * @author Ahmed Hettab + * @since 0.4.0 * * @param <N> Neighborhood type */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianMultipleAttributes.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianMultipleAttributes.java index 1093b449..9af83939 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianMultipleAttributes.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianMultipleAttributes.java @@ -70,6 +70,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; * </p> * * @author Ahmed Hettab + * @since 0.4.0 * * @param <N> Spatial Vector * @param <O> Non Spatial Vector diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMoranScatterplotOutlier.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMoranScatterplotOutlier.java index 5e8a7086..78e7fbd4 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMoranScatterplotOutlier.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMoranScatterplotOutlier.java @@ -67,6 +67,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title; * upper left or lower right are Spatial Outliers. * * @author Ahmed Hettab + * @since 0.4.0 * * @param <N> Neighborhood type */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java index 6892200c..793b6977 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java @@ -75,6 +75,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * </p> * * @author Ahmed Hettab + * @since 0.4.0 * * @param <P> Spatial Vector type */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuScatterplotOutlier.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuScatterplotOutlier.java index 16cd6625..b0a31a2c 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuScatterplotOutlier.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuScatterplotOutlier.java @@ -69,6 +69,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title; * </p> * * @author Ahmed Hettab + * @since 0.4.0 * * @param <N> Neighborhood object type */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuZTestOutlier.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuZTestOutlier.java index ba93c79c..13f7488d 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuZTestOutlier.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuZTestOutlier.java @@ -69,6 +69,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title; * </p> * * @author Ahmed Hettab + * @since 0.4.0 * * @param <N> Neighborhood type */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SLOM.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SLOM.java index c18c69e0..ff2a671d 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SLOM.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SLOM.java @@ -62,6 +62,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title; * still not be too useful then), which will result in divisions by zero. * * @author Ahmed Hettab + * @since 0.4.0 * * @param <N> the type the spatial neighborhood is defined over * @param <O> the type of objects handled by the algorithm diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SOF.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SOF.java index 31083439..32747bf3 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SOF.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SOF.java @@ -62,6 +62,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title; * A LOF variation simplified with reachDist(o,p) == dist(o,p). * * @author Ahmed Hettab + * @since 0.4.0 * * @param <N> Neighborhood object type * @param <O> Attribute object type diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java index 6581bb82..2e5b305b 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java @@ -74,6 +74,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; * </p> * * @author Ahmed Hettab + * @since 0.4.0 * @param <N> Neighborhood object type */ @Title("A Trimmed Mean Approach to Finding Spatial Outliers") diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/AbstractPrecomputedNeighborhood.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/AbstractPrecomputedNeighborhood.java index abf42e98..da5c3d5c 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/AbstractPrecomputedNeighborhood.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/AbstractPrecomputedNeighborhood.java @@ -33,6 +33,7 @@ import de.lmu.ifi.dbs.elki.logging.Logging; * Abstract base class for precomputed neighborhoods. * * @author Erich Schubert + * @since 0.4.0 */ public abstract class AbstractPrecomputedNeighborhood implements NeighborSetPredicate { /** diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java index ba37400b..8284ace8 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java @@ -49,6 +49,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * neighborhood. * * @author Erich Schubert + * @since 0.4.0 */ public class ExtendedNeighborhood extends AbstractPrecomputedNeighborhood { /** diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java index 7881633c..4d0dd24a 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java @@ -59,6 +59,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.FileParameter; * A precomputed neighborhood, loaded from an external file. * * @author Erich Schubert + * @since 0.4.0 */ public class ExternalNeighborhood extends AbstractPrecomputedNeighborhood { /** diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/NeighborSetPredicate.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/NeighborSetPredicate.java index d953ece1..9ae428e6 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/NeighborSetPredicate.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/NeighborSetPredicate.java @@ -34,6 +34,7 @@ import de.lmu.ifi.dbs.elki.result.Result; * Predicate to obtain the neighbors of a reference object as set. * * @author Erich Schubert + * @since 0.4.0 */ public interface NeighborSetPredicate extends Result { /** diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/PrecomputedKNearestNeighborNeighborhood.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/PrecomputedKNearestNeighborNeighborhood.java index 923cd39d..0a70ae85 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/PrecomputedKNearestNeighborNeighborhood.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/PrecomputedKNearestNeighborNeighborhood.java @@ -49,6 +49,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * Neighborhoods based on k nearest neighbors. * * @author Ahmed Hettab + * @since 0.4.0 */ public class PrecomputedKNearestNeighborNeighborhood extends AbstractPrecomputedNeighborhood { /** diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java index b45fd3cc..54753e17 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java @@ -53,6 +53,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * TODO: make actual weighting parameterizable? * * @author Erich Schubert + * @since 0.4.0 */ public class LinearWeightedExtendedNeighborhood implements WeightedNeighborSetPredicate { /** diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/UnweightedNeighborhoodAdapter.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/UnweightedNeighborhoodAdapter.java index 5476791d..def719cf 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/UnweightedNeighborhoodAdapter.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/UnweightedNeighborhoodAdapter.java @@ -45,6 +45,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * weighted neighborhoods. * * @author Erich Schubert + * @since 0.4.0 */ public class UnweightedNeighborhoodAdapter implements WeightedNeighborSetPredicate { /** diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/WeightedNeighborSetPredicate.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/WeightedNeighborSetPredicate.java index aeb91741..3e9a301c 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/WeightedNeighborSetPredicate.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/WeightedNeighborSetPredicate.java @@ -35,6 +35,7 @@ import de.lmu.ifi.dbs.elki.database.relation.Relation; * Neighbor predicate with weight support. * * @author Erich Schubert + * @since 0.4.0 */ public interface WeightedNeighborSetPredicate { /** diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/AbstractAggarwalYuOutlier.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/AbstractAggarwalYuOutlier.java index 54fc9853..63e58864 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/AbstractAggarwalYuOutlier.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/AbstractAggarwalYuOutlier.java @@ -63,6 +63,7 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.IntIntPair; * * @author Ahmed Hettab * @author Erich Schubert + * @since 0.4.0 * * @param <V> Vector type */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/AggarwalYuEvolutionary.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/AggarwalYuEvolutionary.java index 829347dc..e856ce62 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/AggarwalYuEvolutionary.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/AggarwalYuEvolutionary.java @@ -23,8 +23,6 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.subspace; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import gnu.trove.iterator.TIntIterator; -import gnu.trove.list.array.TIntArrayList; import java.util.ArrayList; import java.util.Arrays; @@ -50,6 +48,7 @@ import de.lmu.ifi.dbs.elki.math.random.RandomFactory; import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; +import de.lmu.ifi.dbs.elki.utilities.Alias; import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap; import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TopBoundedHeap; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; @@ -61,6 +60,8 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter; import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; +import gnu.trove.iterator.TIntIterator; +import gnu.trove.list.array.TIntArrayList; /** * Evolutionary variant (EAFOD) of the high-dimensional outlier detection @@ -75,6 +76,7 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; * * @author Ahmed Hettab * @author Erich Schubert + * @since 0.4.0 * * @apiviz.has EvolutionarySearch oneway - - runs * @apiviz.has Individuum oneway - - obtains @@ -85,6 +87,7 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; @Title("EAFOD: the evolutionary outlier detection algorithm") @Description("Outlier detection for high dimensional data") @Reference(authors = "C.C. Aggarwal, P. S. Yu", title = "Outlier detection for high dimensional data", booktitle = "Proc. ACM SIGMOD Int. Conf. on Management of Data (SIGMOD 2001), Santa Barbara, CA, 2001", url = "http://dx.doi.org/10.1145/375663.375668") +@Alias("de.lmu.ifi.dbs.elki.algorithm.outlier.AggarwalYuEvolutionary") public class AggarwalYuEvolutionary<V extends NumberVector> extends AbstractAggarwalYuOutlier<V> { /** * The logger for this class. diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/AggarwalYuNaive.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/AggarwalYuNaive.java index e141ad48..3c601dda 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/AggarwalYuNaive.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/AggarwalYuNaive.java @@ -40,6 +40,7 @@ import de.lmu.ifi.dbs.elki.math.DoubleMinMax; import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; +import de.lmu.ifi.dbs.elki.utilities.Alias; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; @@ -64,6 +65,7 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.IntIntPair; * * @author Ahmed Hettab * @author Erich Schubert + * @since 0.4.0 * * @param <V> Vector type */ @@ -71,6 +73,7 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.IntIntPair; @Title("BruteForce: Outlier detection for high dimensional data") @Description("Examines all possible sets of k dimensional projections") @Reference(authors = "C.C. Aggarwal, P. S. Yu", title = "Outlier detection for high dimensional data", booktitle = "Proc. ACM SIGMOD Int. Conf. on Management of Data (SIGMOD 2001), Santa Barbara, CA, 2001", url = "http://dx.doi.org/10.1145/375663.375668") +@Alias("de.lmu.ifi.dbs.elki.algorithm.outlier.AggarwalYuNaive") public class AggarwalYuNaive<V extends NumberVector> extends AbstractAggarwalYuOutlier<V> { /** * The logger for this class. @@ -188,4 +191,4 @@ public class AggarwalYuNaive<V extends NumberVector> extends AbstractAggarwalYuO return new AggarwalYuNaive<>(k, phi); } } -}
\ No newline at end of file +} diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java index 6f7ee1f4..66d8ef5f 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java @@ -83,6 +83,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; * * @author Viktoria Pleintinger * @author Erich Schubert + * @since 0.5.0 * * @apiviz.composedOf KernelDensityEstimator * diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OutRankS1.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OutRankS1.java index 54047472..4ba34c10 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OutRankS1.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OutRankS1.java @@ -71,6 +71,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * </p> * * @author Erich Schubert + * @since 0.5.0 */ @Title("OutRank: ranking outliers in high dimensional data") @Description("Ranking outliers in high dimensional data - score 1") diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/SOD.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/SOD.java index db1a969c..8b0ac7c7 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/SOD.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/SOD.java @@ -57,6 +57,7 @@ import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.textwriter.TextWriteable; import de.lmu.ifi.dbs.elki.result.textwriter.TextWriterStream; +import de.lmu.ifi.dbs.elki.utilities.Alias; import de.lmu.ifi.dbs.elki.utilities.BitsUtil; import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap; import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TiedTopBoundedHeap; @@ -85,6 +86,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * </p> * * @author Arthur Zimek + * @since 0.2 * * @apiviz.has SODModel oneway - - computes * @apiviz.has SharedNearestNeighborSimilarityFunction @@ -94,6 +96,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; @Title("SOD: Subspace outlier degree") @Description("Outlier Detection in Axis-Parallel Subspaces of High Dimensional Data") @Reference(authors = "H.-P. Kriegel, P. Kröger, E. Schubert, A. Zimek", title = "Outlier Detection in Axis-Parallel Subspaces of High Dimensional Data", booktitle = "Proceedings of the 13th Pacific-Asia Conference on Knowledge Discovery and Data Mining (PAKDD), Bangkok, Thailand, 2009", url = "http://dx.doi.org/10.1007/978-3-642-01307-2") +@Alias("de.lmu.ifi.dbs.elki.algorithm.outlier.SOD") public class SOD<V extends NumberVector> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm { /** * The logger for this class. diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java index dfd35c78..6c2fc6c2 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java @@ -53,6 +53,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.PatternParameter; * reference algorithm in comparisons. * * @author Erich Schubert + * @since 0.4.0 */ public class ByLabelOutlier extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm { /** diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java index 5ef77e12..3a8d1528 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java @@ -44,6 +44,7 @@ import de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore; * reference algorithm in comparisons. * * @author Erich Schubert + * @since 0.4.0 */ public class TrivialAllOutlier extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm { /** diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAverageCoordinateOutlier.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAverageCoordinateOutlier.java index 4fe722a2..06da97e3 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAverageCoordinateOutlier.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAverageCoordinateOutlier.java @@ -48,6 +48,7 @@ import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; * convert a 1d data set into a score, e.g. in geodata. * * @author Erich Schubert + * @since 0.4.0 */ public class TrivialAverageCoordinateOutlier extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm { /** diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java index c837e246..82155c2b 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java @@ -64,6 +64,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; * generator model information is still available. * * @author Erich Schubert + * @since 0.5.0 */ public class TrivialGeneratedOutlier extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm { /** diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java index e25c35f5..d5df4573 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java @@ -44,6 +44,7 @@ import de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore; * algorithm in comparisons. * * @author Erich Schubert + * @since 0.4.0 */ public class TrivialNoOutlier extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm { /** diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/statistics/AddSingleScale.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/statistics/AddSingleScale.java index 6f243996..9e15127e 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/statistics/AddSingleScale.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/statistics/AddSingleScale.java @@ -49,6 +49,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleListParamet * FIXME: this should become part of relation metadata. * * @author Erich Schubert + * @since 0.5.0 */ @Description("Setup a scaling so that all dimensions are scaled equally in visualization.") public class AddSingleScale implements Algorithm { diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/statistics/AveragePrecisionAtK.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/statistics/AveragePrecisionAtK.java index a5a9291e..8a31f956 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/statistics/AveragePrecisionAtK.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/statistics/AveragePrecisionAtK.java @@ -59,6 +59,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter; * at k, when ranking the objects by distance. * * @author Erich Schubert + * @since 0.5.0 * * @param <O> Object type */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/statistics/DistanceQuantileSampler.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/statistics/DistanceQuantileSampler.java index 5f0ae42a..fe7b98f0 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/statistics/DistanceQuantileSampler.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/statistics/DistanceQuantileSampler.java @@ -58,6 +58,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter; * algorithms. * * @author Erich Schubert + * @since 0.7.0 * * @param <O> Object type */ @@ -164,6 +165,8 @@ public class DistanceQuantileSampler<O> extends AbstractDistanceBasedAlgorithm<O * @author Erich Schubert * * @param <O> Object type + * + * @apiviz.exclude */ public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> { /** diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/statistics/DistanceStatisticsWithClasses.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/statistics/DistanceStatisticsWithClasses.java index 4cc4a35c..bb3808b8 100755 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/statistics/DistanceStatisticsWithClasses.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/statistics/DistanceStatisticsWithClasses.java @@ -73,6 +73,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Parameter; * set. * * @author Erich Schubert + * @since 0.2 * * @param <O> Object type */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/statistics/EstimateIntrinsicDimensionality.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/statistics/EstimateIntrinsicDimensionality.java index 8bc0cfe8..f78ff82c 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/statistics/EstimateIntrinsicDimensionality.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/statistics/EstimateIntrinsicDimensionality.java @@ -55,6 +55,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * Note: this algorithm does not produce a result, but only logs statistics. * * @author Erich Schubert + * @since 0.7.0 * * @param <O> Data type */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/statistics/EvaluateRankingQuality.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/statistics/EvaluateRankingQuality.java index 27c9d1ed..8c1ff8e3 100755 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/statistics/EvaluateRankingQuality.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/statistics/EvaluateRankingQuality.java @@ -82,6 +82,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * TODO: Add sampling * * @author Erich Schubert + * @since 0.2 * @param <V> Vector type */ @Title("Evaluate Ranking Quality") diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/statistics/EvaluateRetrievalPerformance.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/statistics/EvaluateRetrievalPerformance.java index 6ca75b0c..ec70970a 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/statistics/EvaluateRetrievalPerformance.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/statistics/EvaluateRetrievalPerformance.java @@ -64,6 +64,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter; * distance. * * @author Erich Schubert + * @since 0.5.0 * * @apiviz.has RetrievalPerformanceResult * @apiviz.composedOf KNNEvaluator diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/statistics/HopkinsStatisticClusteringTendency.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/statistics/HopkinsStatisticClusteringTendency.java index 7ce4e428..e41b77df 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/statistics/HopkinsStatisticClusteringTendency.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/statistics/HopkinsStatisticClusteringTendency.java @@ -78,6 +78,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter; * * @author Lisa Reichert * @author Erich Schubert + * @since 0.7.0 */ // TODO: allow using more than one k @Reference(authors = "B. Hopkins and J. G. Skellam", // diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/statistics/RangeQuerySelectivity.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/statistics/RangeQuerySelectivity.java index 5fb7d785..cf6294d0 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/statistics/RangeQuerySelectivity.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/statistics/RangeQuerySelectivity.java @@ -54,6 +54,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter; * TODO: Add sampling * * @author Erich Schubert + * @since 0.7.0 * @param <V> Vector type */ public class RangeQuerySelectivity<V extends NumberVector> extends AbstractDistanceBasedAlgorithm<V, Result> { diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/statistics/RankingQualityHistogram.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/statistics/RankingQualityHistogram.java index bfdf190a..d009628f 100755 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/statistics/RankingQualityHistogram.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/statistics/RankingQualityHistogram.java @@ -65,6 +65,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * TODO: Add sampling * * @author Erich Schubert + * @since 0.2 * @param <O> Object type */ @Title("Ranking Quality Histogram") |