summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/algorithm/clustering
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/algorithm/clustering')
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedClustering.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedDBSCAN.java30
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/CanopyPreClustering.java236
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/ClusteringAlgorithm.java3
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/DBSCAN.java20
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/DeLiClu.java29
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/EM.java54
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/NaiveMeanShiftClustering.java28
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICS.java38
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICSTypeAlgorithm.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICSXi.java33
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/SLINK.java818
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/SNNClustering.java14
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/CASH.java57
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/COPAC.java22
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ERiC.java178
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/FourC.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/HiCO.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/LMCLUS.java12
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ORCLUS.java34
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHInterval.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHIntervalSplit.java10
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/ParameterizationFunction.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/CorePredicate.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/EpsilonNeighborPredicate.java20
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/GeneralizedDBSCAN.java173
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/MinPtsCorePredicate.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/NeighborPredicate.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/CentroidLinkageMethod.java84
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/CompleteLinkageMethod.java70
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/ExtractFlatClusteringFromHierarchy.java854
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/GroupAverageLinkageMethod.java82
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/HierarchicalClusteringAlgorithm.java51
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/LinkageMethod.java56
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/MedianLinkageMethod.java80
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/NaiveAgglomerativeHierarchicalClustering.java303
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/PointerHierarchyRepresentationResult.java97
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/SLINK.java368
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/SingleLinkageMethod.java80
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/WardLinkageMethod.java86
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/WeightedAverageLinkageMethod.java84
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeans.java182
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeansInitialization.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/BestOfMultipleKMeans.java219
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/FarthestPointsInitialMeans.java186
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/FirstKInitialMeans.java9
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeans.java46
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansBisecting.java231
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansInitialization.java12
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java86
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansMacQueen.java88
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansPlusPlusInitialMeans.java16
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMediansLloyd.java76
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsEM.java22
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsInitialization.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsPAM.java22
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/PAMInitialMeans.java12
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyChosenInitialMeans.java11
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyGeneratedInitialMeans.java9
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/SampleKMeansInitialization.java160
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/KMeansQualityMeasure.java54
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/WithinClusterMeanDistanceQualityMeasure.java89
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/WithinClusterVarianceQualityMeasure.java83
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/package-info.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/CLIQUE.java30
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java74
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/HiSC.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PROCLUS.java214
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PreDeCon.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java140
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SubspaceClusteringAlgorithm.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUESubspace.java12
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUEUnit.java12
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelClustering.java12
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelHierarchicalClustering.java65
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelOrAllInOneClustering.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByModelClustering.java10
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllInOne.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllNoise.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/package-info.java2
87 files changed, 4579 insertions, 1783 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedClustering.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedClustering.java
index 05cc2b4f..0c4eb5fc 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedClustering.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedClustering.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -31,7 +31,7 @@ import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.QueryUtil;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedDBSCAN.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedDBSCAN.java
index f8b73f48..ee3b234c 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedDBSCAN.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedDBSCAN.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -41,14 +41,14 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDMIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.IndexBasedDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.LocallyWeightedDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
@@ -179,7 +179,7 @@ public abstract class AbstractProjectedDBSCAN<R extends Clustering<Model>, V ext
public Clustering<Model> run(Database database, Relation<V> relation) {
FiniteProgress objprog = getLogger().isVerbose() ? new FiniteProgress("Processing objects", relation.size(), getLogger()) : null;
IndefiniteProgress clusprog = getLogger().isVerbose() ? new IndefiniteProgress("Number of clusters", getLogger()) : null;
- resultList = new ArrayList<ModifiableDBIDs>();
+ resultList = new ArrayList<>();
noise = DBIDUtil.newHashSet();
processedIDs = DBIDUtil.newHashSet(relation.size());
@@ -215,14 +215,14 @@ public abstract class AbstractProjectedDBSCAN<R extends Clustering<Model>, V ext
clusprog.setProcessed(resultList.size(), getLogger());
}
- Clustering<Model> result = new Clustering<Model>(getLongResultName(), getShortResultName());
+ Clustering<Model> result = new Clustering<>(getLongResultName(), getShortResultName());
for(Iterator<ModifiableDBIDs> resultListIter = resultList.iterator(); resultListIter.hasNext();) {
Cluster<Model> c = new Cluster<Model>(resultListIter.next(), ClusterModel.CLUSTER);
- result.addCluster(c);
+ result.addToplevelCluster(c);
}
Cluster<Model> n = new Cluster<Model>(noise, true, ClusterModel.CLUSTER);
- result.addCluster(n);
+ result.addToplevelCluster(n);
if(objprog != null && clusprog != null) {
objprog.setProcessed(processedIDs.size(), getLogger());
@@ -279,7 +279,7 @@ public abstract class AbstractProjectedDBSCAN<R extends Clustering<Model>, V ext
}
// compute weighted epsilon neighborhood
- DistanceDBIDResult<DoubleDistance> neighbors = rangeQuery.getRangeForDBID(startObjectID, epsilon);
+ DistanceDBIDList<DoubleDistance> neighbors = rangeQuery.getRangeForDBID(startObjectID, epsilon);
// neighbors < minPts -> noise
if(neighbors.size() < minpts) {
noise.add(startObjectID);
@@ -294,7 +294,7 @@ public abstract class AbstractProjectedDBSCAN<R extends Clustering<Model>, V ext
// try to expand the cluster
ModifiableDBIDs currentCluster = DBIDUtil.newArray();
ModifiableDBIDs seeds = DBIDUtil.newHashSet();
- for (DistanceDBIDResultIter<DoubleDistance> seed = neighbors.iter(); seed.valid(); seed.advance()) {
+ for (DistanceDBIDListIter<DoubleDistance> seed = neighbors.iter(); seed.valid(); seed.advance()) {
int nextID_corrDim = distFunc.getIndex().getLocalProjection(seed).getCorrelationDimension();
// nextID is not reachable from start object
if(nextID_corrDim > lambda) {
@@ -320,11 +320,11 @@ public abstract class AbstractProjectedDBSCAN<R extends Clustering<Model>, V ext
continue;
}
- DistanceDBIDResult<DoubleDistance> reachables = rangeQuery.getRangeForDBID(iter, epsilon);
+ DistanceDBIDList<DoubleDistance> reachables = rangeQuery.getRangeForDBID(iter, epsilon);
iter.remove();
if(reachables.size() > minpts) {
- for (DistanceDBIDResultIter<DoubleDistance> r = reachables.iter(); r.valid(); r.advance()) {
+ for (DistanceDBIDListIter<DoubleDistance> r = reachables.iter(); r.valid(); r.advance()) {
int corrDim_r = distFunc.getIndex().getLocalProjection(r).getCorrelationDimension();
// r is not reachable from q
if(corrDim_r > lambda) {
@@ -395,7 +395,7 @@ public abstract class AbstractProjectedDBSCAN<R extends Clustering<Model>, V ext
protected Integer lambda;
protected void configInnerDistance(Parameterization config) {
- ObjectParameter<DistanceFunction<V, D>> innerdistP = new ObjectParameter<DistanceFunction<V, D>>(AbstractProjectedDBSCAN.INNER_DISTANCE_FUNCTION_ID, DistanceFunction.class, EuclideanDistanceFunction.class);
+ ObjectParameter<DistanceFunction<V, D>> innerdistP = new ObjectParameter<>(AbstractProjectedDBSCAN.INNER_DISTANCE_FUNCTION_ID, DistanceFunction.class, EuclideanDistanceFunction.class);
if(config.grab(innerdistP)) {
innerdist = innerdistP.instantiateClass(config);
}
@@ -403,7 +403,7 @@ public abstract class AbstractProjectedDBSCAN<R extends Clustering<Model>, V ext
protected void configEpsilon(Parameterization config, DistanceFunction<V, D> innerdist) {
D distanceParser = innerdist != null ? innerdist.getDistanceFactory() : null;
- DistanceParameter<D> epsilonP = new DistanceParameter<D>(EPSILON_ID, distanceParser);
+ DistanceParameter<D> epsilonP = new DistanceParameter<>(EPSILON_ID, distanceParser);
if(config.grab(epsilonP)) {
epsilon = epsilonP.getValue();
}
@@ -418,7 +418,7 @@ public abstract class AbstractProjectedDBSCAN<R extends Clustering<Model>, V ext
}
protected void configOuterDistance(Parameterization config, D epsilon, int minpts, Class<?> preprocessorClass, DistanceFunction<V, D> innerdist) {
- ObjectParameter<LocallyWeightedDistanceFunction<V>> outerdistP = new ObjectParameter<LocallyWeightedDistanceFunction<V>>(OUTER_DISTANCE_FUNCTION_ID, LocallyWeightedDistanceFunction.class, LocallyWeightedDistanceFunction.class);
+ ObjectParameter<LocallyWeightedDistanceFunction<V>> outerdistP = new ObjectParameter<>(OUTER_DISTANCE_FUNCTION_ID, LocallyWeightedDistanceFunction.class, LocallyWeightedDistanceFunction.class);
if(config.grab(outerdistP)) {
// parameters for the distance function
ListParameterization distanceFunctionParameters = new ListParameterization();
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/CanopyPreClustering.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/CanopyPreClustering.java
new file mode 100644
index 00000000..2dff7554
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/CanopyPreClustering.java
@@ -0,0 +1,236 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering;
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.ArrayList;
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.data.Cluster;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.model.ClusterModel;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDMIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDVar;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.WrongParameterValueException;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DistanceParameter;
+
+/**
+ * Canopy pre-clustering is a simple preprocessing step for clustering.
+ *
+ * <p>
+ * Reference:<br>
+ * A. McCallum, K. Nigam, L.H. Ungar<br />
+ * Efficient Clustering of High Dimensional Data Sets with Application to
+ * Reference Matching<br />
+ * Proc. 6th ACM SIGKDD international conference on Knowledge discovery and data
+ * mining
+ * </p>
+ *
+ * @author Erich Schubert
+ *
+ * @param <O> Object type
+ * @param <D> Distance type
+ */
+@Reference(authors = "A. McCallum, K. Nigam, L.H. Ungar", title = "Efficient Clustering of High Dimensional Data Sets with Application to Reference Matching", booktitle = "Proc. 6th ACM SIGKDD international conference on Knowledge discovery and data mining", url = "http://dx.doi.org/10.1145%2F347090.347123")
+public class CanopyPreClustering<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm<O, D, Clustering<ClusterModel>> implements ClusteringAlgorithm<Clustering<ClusterModel>> {
+ /**
+ * Class logger.
+ */
+ private static final Logging LOG = Logging.getLogger(CanopyPreClustering.class);
+
+ /**
+ * Threshold for inclusion
+ */
+ private D t1;
+
+ /**
+ * Threshold for removal
+ */
+ private D t2;
+
+ /**
+ * Constructor.
+ *
+ * @param distanceFunction Distance function
+ * @param t1 Inclusion threshold
+ * @param t2 Exclusion threshold
+ */
+ public CanopyPreClustering(DistanceFunction<? super O, D> distanceFunction, D t1, D t2) {
+ super(distanceFunction);
+ this.t1 = t1;
+ this.t2 = t2;
+ }
+
+ /**
+ * Run the algorithm
+ *
+ * @param database Database
+ * @param relation Relation to process
+ */
+ public Clustering<ClusterModel> run(Database database, Relation<O> relation) {
+ DistanceQuery<O, D> dq = database.getDistanceQuery(relation, getDistanceFunction());
+ ModifiableDBIDs ids = DBIDUtil.newHashSet(relation.getDBIDs());
+ ArrayList<Cluster<ClusterModel>> clusters = new ArrayList<>();
+ final int size = relation.size();
+
+ if(t1.compareTo(t2) <= 0) {
+ LOG.warning(Parameterizer.T1_ID.getName() + " must be larger than " + Parameterizer.T2_ID.getName());
+ }
+
+ FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Canopy clustering", size, LOG) : null;
+
+ DBIDVar first = DBIDUtil.newVar();
+ while(!ids.isEmpty()) {
+ // Remove first element:
+ DBIDMIter iter = ids.iter();
+ first.set(iter);
+ iter.remove();
+ iter.advance();
+
+ // Start a new cluster:
+ ModifiableDBIDs cids = DBIDUtil.newArray();
+ cids.add(first);
+
+ // Compare to remaining objects:
+ for(; iter.valid(); iter.advance()) {
+ D dist = dq.distance(first, iter);
+ // Inclusion threshold:
+ if(t1.compareTo(dist) >= 0) {
+ cids.add(iter);
+ }
+ // Removal threshold:
+ if(t2.compareTo(dist) >= 0) {
+ iter.remove();
+ }
+ }
+ // TODO: remember the central object using a CanopyModel?
+ // Construct cluster:
+ clusters.add(new Cluster<>(cids, ClusterModel.CLUSTER));
+
+ if(prog != null) {
+ prog.setProcessed(size - ids.size(), LOG);
+ }
+ }
+ if(prog != null) {
+ prog.ensureCompleted(LOG);
+ }
+
+ return new Clustering<>("Canopy clustering", "canopy-clustering", clusters);
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(getDistanceFunction().getInputTypeRestriction());
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ *
+ * @param <O> Object type
+ * @param <D> Distance type
+ */
+ public static class Parameterizer<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ /**
+ * Parameter for the inclusion threshold of canopy clustering.
+ *
+ * Note: t1 > t2
+ *
+ * Syntax:
+ *
+ * <pre>
+ * -canopy.t1 &lt;value&gt;
+ * </pre>
+ */
+ public static final OptionID T1_ID = new OptionID("canopy.t1", "Inclusion threshold for canopy clustering. t1 > t2!");
+
+ /**
+ * Parameter for the removal threshold of canopy clustering.
+ *
+ * Note: t1 > t2
+ *
+ * Syntax:
+ *
+ * <pre>
+ * -canopy.t2 &lt;value&gt;
+ * </pre>
+ */
+ public static final OptionID T2_ID = new OptionID("canopy.t2", "Removal threshold for canopy clustering. t1 > t2!");
+
+ /**
+ * Threshold for inclusion
+ */
+ private D t1;
+
+ /**
+ * Threshold for removal
+ */
+ private D t2;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ DistanceParameter<D> t1P = new DistanceParameter<>(T1_ID, distanceFunction);
+ if(config.grab(t1P)) {
+ t1 = t1P.getValue();
+ }
+
+ DistanceParameter<D> t2P = new DistanceParameter<>(T2_ID, distanceFunction);
+ // TODO: add distance constraint t1 > t2
+ if(config.grab(t2P)) {
+ t2 = t2P.getValue();
+ if(t1.compareTo(t2) <= 0) {
+ config.reportError(new WrongParameterValueException(t2P, T1_ID.getName() + " must be larger than " + T2_ID.getName()));
+ }
+ }
+ }
+
+ @Override
+ protected CanopyPreClustering<O, D> makeInstance() {
+ return new CanopyPreClustering<>(distanceFunction, t1, t2);
+ }
+
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/ClusteringAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/ClusteringAlgorithm.java
index 8f637460..249dc313 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/ClusteringAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/ClusteringAlgorithm.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -42,6 +42,7 @@ import de.lmu.ifi.dbs.elki.database.Database;
*
* @apiviz.has Clustering
* @apiviz.has Model
+ * @apiviz.excludeSubtypes
*
* @param <C> Clustering type
*/
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/DBSCAN.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/DBSCAN.java
index fcf81faa..57dcb435 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/DBSCAN.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/DBSCAN.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -40,10 +40,10 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
@@ -140,7 +140,7 @@ public class DBSCAN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
FiniteProgress objprog = LOG.isVerbose() ? new FiniteProgress("Processing objects", size, LOG) : null;
IndefiniteProgress clusprog = LOG.isVerbose() ? new IndefiniteProgress("Number of clusters", LOG) : null;
- resultList = new ArrayList<ModifiableDBIDs>();
+ resultList = new ArrayList<>();
noise = DBIDUtil.newHashSet();
processedIDs = DBIDUtil.newHashSet(size);
if(size < minpts) {
@@ -170,14 +170,14 @@ public class DBSCAN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
clusprog.setCompleted(LOG);
}
- Clustering<Model> result = new Clustering<Model>("DBSCAN Clustering", "dbscan-clustering");
+ Clustering<Model> result = new Clustering<>("DBSCAN Clustering", "dbscan-clustering");
for(ModifiableDBIDs res : resultList) {
Cluster<Model> c = new Cluster<Model>(res, ClusterModel.CLUSTER);
- result.addCluster(c);
+ result.addToplevelCluster(c);
}
Cluster<Model> n = new Cluster<Model>(noise, true, ClusterModel.CLUSTER);
- result.addCluster(n);
+ result.addToplevelCluster(n);
return result;
}
@@ -193,7 +193,7 @@ public class DBSCAN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
* @param objprog the progress object for logging the current status
*/
protected void expandCluster(Relation<O> relation, RangeQuery<O, D> rangeQuery, DBIDRef startObjectID, FiniteProgress objprog, IndefiniteProgress clusprog) {
- DistanceDBIDResult<D> neighbors = rangeQuery.getRangeForDBID(startObjectID, epsilon);
+ DistanceDBIDList<D> neighbors = rangeQuery.getRangeForDBID(startObjectID, epsilon);
// startObject is no core-object
if(neighbors.size() < minpts) {
@@ -224,7 +224,7 @@ public class DBSCAN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
while(seeds.size() > 0) {
DBIDMIter o = seeds.iter();
- DistanceDBIDResult<D> neighborhood = rangeQuery.getRangeForDBID(o, epsilon);
+ DistanceDBIDList<D> neighborhood = rangeQuery.getRangeForDBID(o, epsilon);
o.remove();
if(neighborhood.size() >= minpts) {
@@ -289,7 +289,7 @@ public class DBSCAN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- DistanceParameter<D> epsilonP = new DistanceParameter<D>(EPSILON_ID, distanceFunction);
+ DistanceParameter<D> epsilonP = new DistanceParameter<>(EPSILON_ID, distanceFunction);
if(config.grab(epsilonP)) {
epsilon = epsilonP.getValue();
}
@@ -303,7 +303,7 @@ public class DBSCAN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
@Override
protected DBSCAN<O, D> makeInstance() {
- return new DBSCAN<O, D>(distanceFunction, epsilon, minpts);
+ return new DBSCAN<>(distanceFunction, epsilon, minpts);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/DeLiClu.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/DeLiClu.java
index 22875715..3c2e0278 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/DeLiClu.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/DeLiClu.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -23,9 +23,10 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import gnu.trove.set.TIntSet;
+
import java.util.Collection;
import java.util.List;
-import java.util.Set;
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.KNNJoin;
@@ -37,11 +38,11 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.DistanceUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.SpatialPrimitiveDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.index.tree.LeafEntry;
import de.lmu.ifi.dbs.elki.index.tree.TreeIndexPathComponent;
@@ -119,14 +120,14 @@ public class DeLiClu<NV extends NumberVector<?>, D extends Distance<D>> extends
*/
public DeLiClu(DistanceFunction<? super NV, D> distanceFunction, int minpts) {
super(distanceFunction);
- this.knnJoin = new KNNJoin<NV, D, DeLiCluNode, DeLiCluEntry>(distanceFunction, minpts);
+ this.knnJoin = new KNNJoin<>(distanceFunction, minpts);
this.minpts = minpts;
}
public ClusterOrderResult<D> run(Database database, Relation<NV> relation) {
Collection<DeLiCluTreeIndex<NV>> indexes = ResultUtil.filterResults(database, DeLiCluTreeIndex.class);
if(indexes.size() != 1) {
- throw new AbortException("DeLiClu found " + indexes.size() + " DeLiCluTree indexes, expected exactly one.");
+ throw new AbortException("DeLiClu found " + indexes.size() + " DeLiCluTree indexes. DeLiClu needs a special index to operate, therefore you need to add this index to your database.");
}
DeLiCluTreeIndex<NV> index = indexes.iterator().next();
// FIXME: check that the index matches the relation!
@@ -141,13 +142,13 @@ public class DeLiClu<NV extends NumberVector<?>, D extends Distance<D>> extends
if(LOG.isVerbose()) {
LOG.verbose("knnJoin...");
}
- DataStore<KNNResult<D>> knns = knnJoin.run(database, relation);
+ DataStore<KNNList<D>> knns = knnJoin.run(database, relation);
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("DeLiClu", relation.size(), LOG) : null;
final int size = relation.size();
- ClusterOrderResult<D> clusterOrder = new ClusterOrderResult<D>("DeLiClu Clustering", "deliclu-clustering");
- heap = new UpdatableHeap<SpatialObjectPair>();
+ ClusterOrderResult<D> clusterOrder = new ClusterOrderResult<>("DeLiClu Clustering", "deliclu-clustering");
+ heap = new UpdatableHeap<>();
// add start object to cluster order and (root, root) to priority queue
DBID startID = getStartObject(relation);
@@ -217,7 +218,7 @@ public class DeLiClu<NV extends NumberVector<?>, D extends Distance<D>> extends
* @param nodePair the pair of nodes to be expanded
* @param knns the knn list
*/
- private void expandNodes(DeLiCluTree index, SpatialPrimitiveDistanceFunction<NV, D> distFunction, SpatialObjectPair nodePair, DataStore<KNNResult<D>> knns) {
+ private void expandNodes(DeLiCluTree index, SpatialPrimitiveDistanceFunction<NV, D> distFunction, SpatialObjectPair nodePair, DataStore<KNNList<D>> knns) {
DeLiCluNode node1 = index.getNode(((SpatialDirectoryEntry) nodePair.entry1).getPageID());
DeLiCluNode node2 = index.getNode(((SpatialDirectoryEntry) nodePair.entry2).getPageID());
@@ -274,7 +275,7 @@ public class DeLiClu<NV extends NumberVector<?>, D extends Distance<D>> extends
* @param node2 the second node
* @param knns the knn list
*/
- private void expandLeafNodes(SpatialPrimitiveDistanceFunction<NV, D> distFunction, DeLiCluNode node1, DeLiCluNode node2, DataStore<KNNResult<D>> knns) {
+ private void expandLeafNodes(SpatialPrimitiveDistanceFunction<NV, D> distFunction, DeLiCluNode node1, DeLiCluNode node2, DataStore<KNNList<D>> knns) {
if(LOG.isDebuggingFinest()) {
LOG.debugFinest("ExpandLeafNodes: " + node1.getPageID() + " + " + node2.getPageID());
}
@@ -310,12 +311,12 @@ public class DeLiClu<NV extends NumberVector<?>, D extends Distance<D>> extends
* @param path the path of the object inserted last
* @param knns the knn list
*/
- private void reinsertExpanded(SpatialPrimitiveDistanceFunction<NV, D> distFunction, DeLiCluTree index, List<TreeIndexPathComponent<DeLiCluEntry>> path, DataStore<KNNResult<D>> knns) {
+ private void reinsertExpanded(SpatialPrimitiveDistanceFunction<NV, D> distFunction, DeLiCluTree index, List<TreeIndexPathComponent<DeLiCluEntry>> path, DataStore<KNNList<D>> knns) {
SpatialDirectoryEntry rootEntry = (SpatialDirectoryEntry) path.remove(0).getEntry();
reinsertExpanded(distFunction, index, path, 0, rootEntry, knns);
}
- private void reinsertExpanded(SpatialPrimitiveDistanceFunction<NV, D> distFunction, DeLiCluTree index, List<TreeIndexPathComponent<DeLiCluEntry>> path, int pos, SpatialDirectoryEntry parentEntry, DataStore<KNNResult<D>> knns) {
+ private void reinsertExpanded(SpatialPrimitiveDistanceFunction<NV, D> distFunction, DeLiCluTree index, List<TreeIndexPathComponent<DeLiCluEntry>> path, int pos, SpatialDirectoryEntry parentEntry, DataStore<KNNList<D>> knns) {
DeLiCluNode parentNode = index.getNode(parentEntry.getPageID());
SpatialEntry entry2 = path.get(pos).getEntry();
@@ -332,7 +333,7 @@ public class DeLiClu<NV extends NumberVector<?>, D extends Distance<D>> extends
}
}
else {
- Set<Integer> expanded = index.getExpanded(entry2);
+ TIntSet expanded = index.getExpanded(entry2);
for(int i = 0; i < parentNode.getNumEntries(); i++) {
SpatialDirectoryEntry entry1 = (SpatialDirectoryEntry) parentNode.getEntry(i);
@@ -503,7 +504,7 @@ public class DeLiClu<NV extends NumberVector<?>, D extends Distance<D>> extends
@Override
protected DeLiClu<NV, D> makeInstance() {
- return new DeLiClu<NV, D>(distanceFunction, minpts);
+ return new DeLiClu<>(distanceFunction, minpts);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/EM.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/EM.java
index 514e63bd..c66442a1 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/EM.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/EM.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -46,7 +46,7 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.MathUtil;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
@@ -179,13 +179,13 @@ public class EM<V extends NumberVector<?>> extends AbstractAlgorithm<Clustering<
if (LOG.isVerbose()) {
LOG.verbose("initializing " + k + " models");
}
- List<Vector> means = new ArrayList<Vector>();
- for (NumberVector<?> nv : initializer.chooseInitialMeans(relation, k, EuclideanDistanceFunction.STATIC)) {
+ List<Vector> means = new ArrayList<>();
+ for (NumberVector<?> nv : initializer.chooseInitialMeans(database, relation, k, EuclideanDistanceFunction.STATIC)) {
means.add(nv.getColumnVector());
}
- List<Matrix> covarianceMatrices = new ArrayList<Matrix>(k);
+ List<Matrix> covarianceMatrices = new ArrayList<>(k);
double[] normDistrFactor = new double[k];
- List<Matrix> invCovMatr = new ArrayList<Matrix>(k);
+ List<Matrix> invCovMatr = new ArrayList<>(k);
double[] clusterWeights = new double[k];
probClusterIGivenX = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_SORTED, double[].class);
@@ -193,7 +193,13 @@ public class EM<V extends NumberVector<?>> extends AbstractAlgorithm<Clustering<
for (int i = 0; i < k; i++) {
Matrix m = Matrix.identity(dimensionality, dimensionality);
covarianceMatrices.add(m);
- normDistrFactor[i] = 1.0 / Math.sqrt(Math.pow(MathUtil.TWOPI, dimensionality) * m.det());
+ final double det = m.det();
+ if (det > 0.) {
+ normDistrFactor[i] = 1.0 / Math.sqrt(Math.pow(MathUtil.TWOPI, dimensionality) * det);
+ } else {
+ LOG.warning("Encountered matrix with 0 determinant - degenerated.");
+ normDistrFactor[i] = 1.0; // Not really well defined
+ }
invCovMatr.add(m.inverse());
clusterWeights[i] = 1.0 / k;
if (LOG.isDebuggingFinest()) {
@@ -201,7 +207,7 @@ public class EM<V extends NumberVector<?>> extends AbstractAlgorithm<Clustering<
msg.append(" model ").append(i).append(":\n");
msg.append(" mean: ").append(means.get(i)).append('\n');
msg.append(" m:\n").append(FormatUtil.format(m, " ")).append('\n');
- msg.append(" m.det(): ").append(m.det()).append('\n');
+ msg.append(" m.det(): ").append(det).append('\n');
msg.append(" cluster weight: ").append(clusterWeights[i]).append('\n');
msg.append(" normDistFact: ").append(normDistrFactor[i]).append('\n');
LOG.debugFine(msg.toString());
@@ -222,7 +228,7 @@ public class EM<V extends NumberVector<?>> extends AbstractAlgorithm<Clustering<
em = emNew;
// recompute models
- List<Vector> meanSums = new ArrayList<Vector>(k);
+ List<Vector> meanSums = new ArrayList<>(k);
double[] sumOfClusterProbabilities = new double[k];
for (int i = 0; i < k; i++) {
@@ -260,7 +266,13 @@ public class EM<V extends NumberVector<?>> extends AbstractAlgorithm<Clustering<
covarianceMatrices.set(i, covarianceMatrices.get(i).times(1 / sumOfClusterProbabilities[i]).cheatToAvoidSingularity(SINGULARITY_CHEAT));
}
for (int i = 0; i < k; i++) {
- normDistrFactor[i] = 1.0 / Math.sqrt(Math.pow(MathUtil.TWOPI, dimensionality) * covarianceMatrices.get(i).det());
+ final double det = covarianceMatrices.get(i).det();
+ if (det > 0.) {
+ normDistrFactor[i] = 1.0 / Math.sqrt(Math.pow(MathUtil.TWOPI, dimensionality) * det);
+ } else {
+ LOG.warning("Encountered matrix with 0 determinant - degenerated.");
+ normDistrFactor[i] = 1.0; // Not really well defined
+ }
invCovMatr.set(i, covarianceMatrices.get(i).inverse());
}
// reassign probabilities
@@ -279,7 +291,7 @@ public class EM<V extends NumberVector<?>> extends AbstractAlgorithm<Clustering<
}
// fill result with clusters and models
- List<ModifiableDBIDs> hardClusters = new ArrayList<ModifiableDBIDs>(k);
+ List<ModifiableDBIDs> hardClusters = new ArrayList<>(k);
for (int i = 0; i < k; i++) {
hardClusters.add(DBIDUtil.newHashSet());
}
@@ -298,14 +310,14 @@ public class EM<V extends NumberVector<?>> extends AbstractAlgorithm<Clustering<
hardClusters.get(maxIndex).add(iditer);
}
final NumberVector.Factory<V, ?> factory = RelationUtil.getNumberVectorFactory(relation);
- Clustering<EMModel<V>> result = new Clustering<EMModel<V>>("EM Clustering", "em-clustering");
+ Clustering<EMModel<V>> result = new Clustering<>("EM Clustering", "em-clustering");
// provide models within the result
for (int i = 0; i < k; i++) {
// TODO: re-do labeling.
// SimpleClassLabel label = new SimpleClassLabel();
// label.init(result.canonicalClusterLabel(i));
- Cluster<EMModel<V>> model = new Cluster<EMModel<V>>(hardClusters.get(i), new EMModel<V>(factory.newNumberVector(means.get(i).getArrayRef()), covarianceMatrices.get(i)));
- result.addCluster(model);
+ Cluster<EMModel<V>> model = new Cluster<>(hardClusters.get(i), new EMModel<>(factory.newNumberVector(means.get(i).getArrayRef()), covarianceMatrices.get(i)));
+ result.addToplevelCluster(model);
}
return result;
}
@@ -339,6 +351,9 @@ public class EM<V extends NumberVector<?>> extends AbstractAlgorithm<Clustering<
if (LOG.isDebuggingFinest()) {
LOG.debugFinest(" difference vector= ( " + difference.toString() + " )\n" + " difference:\n" + FormatUtil.format(difference, " ") + "\n" + " rowTimesCovTimesCol:\n" + rowTimesCovTimesCol + "\n" + " power= " + power + "\n" + " prob=" + prob + "\n" + " inv cov matrix: \n" + FormatUtil.format(invCovMatr.get(i), " "));
}
+ if (!(prob >= 0.)) {
+ LOG.warning("Invalid probability: " + prob + " power: " + power + " factor: " + normDistrFactor[i]);
+ }
probabilities[i] = prob;
}
double priorProbability = 0.0;
@@ -352,13 +367,12 @@ public class EM<V extends NumberVector<?>> extends AbstractAlgorithm<Clustering<
double[] clusterProbabilities = new double[k];
for (int i = 0; i < k; i++) {
- assert (priorProbability >= 0.0);
assert (clusterWeights[i] >= 0.0);
// do not divide by zero!
- if (priorProbability == 0.0) {
- clusterProbabilities[i] = 0.0;
- } else {
+ if (priorProbability > 0.0) {
clusterProbabilities[i] = probabilities[i] / priorProbability * clusterWeights[i];
+ } else {
+ clusterProbabilities[i] = 0.0;
}
}
probClusterIGivenX.put(iditer, clusterProbabilities);
@@ -412,7 +426,7 @@ public class EM<V extends NumberVector<?>> extends AbstractAlgorithm<Clustering<
k = kP.getValue();
}
- ObjectParameter<KMeansInitialization<V>> initialP = new ObjectParameter<KMeansInitialization<V>>(INIT_ID, KMeansInitialization.class, RandomlyGeneratedInitialMeans.class);
+ ObjectParameter<KMeansInitialization<V>> initialP = new ObjectParameter<>(INIT_ID, KMeansInitialization.class, RandomlyGeneratedInitialMeans.class);
if (config.grab(initialP)) {
initializer = initialP.instantiateClass(config);
}
@@ -433,7 +447,7 @@ public class EM<V extends NumberVector<?>> extends AbstractAlgorithm<Clustering<
@Override
protected EM<V> makeInstance() {
- return new EM<V>(k, delta, initializer, maxiter);
+ return new EM<>(k, delta, initializer, maxiter);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/NaiveMeanShiftClustering.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/NaiveMeanShiftClustering.java
index 8429d8ac..a4d6e307 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/NaiveMeanShiftClustering.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/NaiveMeanShiftClustering.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -36,19 +36,19 @@ import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid;
-import de.lmu.ifi.dbs.elki.math.statistics.EpanechnikovKernelDensityFunction;
-import de.lmu.ifi.dbs.elki.math.statistics.KernelDensityFunction;
+import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.EpanechnikovKernelDensityFunction;
+import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.KernelDensityFunction;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
@@ -135,7 +135,7 @@ public class NaiveMeanShiftClustering<V extends NumberVector<?>, D extends Numbe
final double threshold = bandwidth * 1E-10;
// Result store:
- ArrayList<Pair<V, ModifiableDBIDs>> clusters = new ArrayList<Pair<V, ModifiableDBIDs>>();
+ ArrayList<Pair<V, ModifiableDBIDs>> clusters = new ArrayList<>();
ModifiableDBIDs noise = DBIDUtil.newArray();
@@ -148,11 +148,11 @@ public class NaiveMeanShiftClustering<V extends NumberVector<?>, D extends Numbe
// Compute new position:
V newvec = null;
{
- DistanceDBIDResult<D> neigh = rangeq.getRangeForObject(position, range);
+ DistanceDBIDList<D> neigh = rangeq.getRangeForObject(position, range);
boolean okay = (neigh.size() > 1) || (neigh.size() >= 1 && j > 1);
if (okay) {
Centroid newpos = new Centroid(dim);
- for (DistanceDBIDResultIter<D> niter = neigh.iter(); niter.valid(); niter.advance()) {
+ for (DistanceDBIDListIter<D> niter = neigh.iter(); niter.valid(); niter.advance()) {
final double weight = kernel.density(niter.getDistance().doubleValue() / bandwidth);
newpos.put(relation.get(niter), weight);
}
@@ -206,14 +206,14 @@ public class NaiveMeanShiftClustering<V extends NumberVector<?>, D extends Numbe
prog.ensureCompleted(LOG);
}
- ArrayList<Cluster<MeanModel<V>>> cs = new ArrayList<Cluster<MeanModel<V>>>(clusters.size());
+ ArrayList<Cluster<MeanModel<V>>> cs = new ArrayList<>(clusters.size());
for (Pair<V, ModifiableDBIDs> pair : clusters) {
- cs.add(new Cluster<MeanModel<V>>(pair.second, new MeanModel<V>(pair.first)));
+ cs.add(new Cluster<>(pair.second, new MeanModel<>(pair.first)));
}
if (noise.size() > 0) {
cs.add(new Cluster<MeanModel<V>>(noise, true));
}
- Clustering<MeanModel<V>> c = new Clustering<MeanModel<V>>("Mean-shift Clustering", "mean-shift-clustering", cs);
+ Clustering<MeanModel<V>> c = new Clustering<>("Mean-shift Clustering", "mean-shift-clustering", cs);
return c;
}
@@ -261,11 +261,11 @@ public class NaiveMeanShiftClustering<V extends NumberVector<?>, D extends Numbe
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- ObjectParameter<KernelDensityFunction> kernelP = new ObjectParameter<KernelDensityFunction>(KERNEL_ID, KernelDensityFunction.class, EpanechnikovKernelDensityFunction.class);
+ ObjectParameter<KernelDensityFunction> kernelP = new ObjectParameter<>(KERNEL_ID, KernelDensityFunction.class, EpanechnikovKernelDensityFunction.class);
if (config.grab(kernelP)) {
kernel = kernelP.instantiateClass(config);
}
- DistanceParameter<D> rangeP = new DistanceParameter<D>(RANGE_ID, distanceFunction);
+ DistanceParameter<D> rangeP = new DistanceParameter<>(RANGE_ID, distanceFunction);
if (config.grab(rangeP)) {
range = rangeP.getValue();
}
@@ -273,7 +273,7 @@ public class NaiveMeanShiftClustering<V extends NumberVector<?>, D extends Numbe
@Override
protected NaiveMeanShiftClustering<V, D> makeInstance() {
- return new NaiveMeanShiftClustering<V, D>(distanceFunction, kernel, range);
+ return new NaiveMeanShiftClustering<>(distanceFunction, kernel, range);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICS.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICS.java
index 2c098dc0..e928d041 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICS.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICS.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -31,17 +31,17 @@ import de.lmu.ifi.dbs.elki.database.QueryUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
-import de.lmu.ifi.dbs.elki.database.ids.DistanceDBIDPair;
-import de.lmu.ifi.dbs.elki.database.ids.DoubleDistanceDBIDPair;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDPair;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDPair;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.DistanceUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDResultIter;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
@@ -139,7 +139,7 @@ public class OPTICS<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
final FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("OPTICS", size, LOG) : null;
processedIDs = DBIDUtil.newHashSet(size);
- ClusterOrderResult<D> clusterOrder = new ClusterOrderResult<D>("OPTICS Clusterorder", "optics-clusterorder");
+ ClusterOrderResult<D> clusterOrder = new ClusterOrderResult<>("OPTICS Clusterorder", "optics-clusterorder");
if(getDistanceFunction() instanceof PrimitiveDoubleDistanceFunction && DoubleDistance.class.isInstance(epsilon)) {
// Optimized codepath for double-based distances. Avoids Java
@@ -182,25 +182,25 @@ public class OPTICS<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
* the algorithm
*/
protected void expandClusterOrder(ClusterOrderResult<D> clusterOrder, Database database, RangeQuery<O, D> rangeQuery, DBID objectID, D epsilon, FiniteProgress progress) {
- UpdatableHeap<ClusterOrderEntry<D>> heap = new UpdatableHeap<ClusterOrderEntry<D>>();
- heap.add(new GenericClusterOrderEntry<D>(objectID, null, getDistanceFunction().getDistanceFactory().infiniteDistance()));
+ UpdatableHeap<ClusterOrderEntry<D>> heap = new UpdatableHeap<>();
+ heap.add(new GenericClusterOrderEntry<>(objectID, null, getDistanceFunction().getDistanceFactory().infiniteDistance()));
while(!heap.isEmpty()) {
final ClusterOrderEntry<D> current = heap.poll();
clusterOrder.add(current);
processedIDs.add(current.getID());
- DistanceDBIDResult<D> neighbors = rangeQuery.getRangeForDBID(current.getID(), epsilon);
+ DistanceDBIDList<D> neighbors = rangeQuery.getRangeForDBID(current.getID(), epsilon);
if(neighbors.size() >= minpts) {
final DistanceDBIDPair<D> last = neighbors.get(minpts - 1);
D coreDistance = last.getDistance();
- for(DistanceDBIDResultIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ for(DistanceDBIDListIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
if(processedIDs.contains(neighbor)) {
continue;
}
D reachability = DistanceUtil.max(neighbor.getDistance(), coreDistance);
- heap.add(new GenericClusterOrderEntry<D>(DBIDUtil.deref(neighbor), current.getID(), reachability));
+ heap.add(new GenericClusterOrderEntry<>(DBIDUtil.deref(neighbor), current.getID(), reachability));
}
}
if(progress != null) {
@@ -221,7 +221,7 @@ public class OPTICS<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
* the algorithm
*/
protected void expandClusterOrderDouble(ClusterOrderResult<DoubleDistance> clusterOrder, Database database, RangeQuery<O, DoubleDistance> rangeQuery, DBID objectID, DoubleDistance epsilon, FiniteProgress progress) {
- UpdatableHeap<DoubleDistanceClusterOrderEntry> heap = new UpdatableHeap<DoubleDistanceClusterOrderEntry>();
+ UpdatableHeap<DoubleDistanceClusterOrderEntry> heap = new UpdatableHeap<>();
heap.add(new DoubleDistanceClusterOrderEntry(objectID, null, Double.POSITIVE_INFINITY));
while(!heap.isEmpty()) {
@@ -229,17 +229,17 @@ public class OPTICS<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
clusterOrder.add(current);
processedIDs.add(current.getID());
- DistanceDBIDResult<DoubleDistance> neighbors = rangeQuery.getRangeForDBID(current.getID(), epsilon);
+ DistanceDBIDList<DoubleDistance> neighbors = rangeQuery.getRangeForDBID(current.getID(), epsilon);
if(neighbors.size() >= minpts) {
final DistanceDBIDPair<DoubleDistance> last = neighbors.get(minpts - 1);
if(last instanceof DoubleDistanceDBIDPair) {
double coreDistance = ((DoubleDistanceDBIDPair) last).doubleDistance();
- for(DistanceDBIDResultIter<DoubleDistance> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ for(DistanceDBIDListIter<DoubleDistance> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
if(processedIDs.contains(neighbor)) {
continue;
}
- double reachability = Math.max(((DoubleDistanceDBIDResultIter) neighbor).doubleDistance(), coreDistance);
+ double reachability = Math.max(((DoubleDistanceDBIDListIter) neighbor).doubleDistance(), coreDistance);
heap.add(new DoubleDistanceClusterOrderEntry(DBIDUtil.deref(neighbor), current.getID(), reachability));
}
}
@@ -248,7 +248,7 @@ public class OPTICS<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
// Only if we got an optimized result before.
double coreDistance = last.getDistance().doubleValue();
- for(DistanceDBIDResultIter<DoubleDistance> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ for(DistanceDBIDListIter<DoubleDistance> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
if(processedIDs.contains(neighbor)) {
continue;
}
@@ -298,7 +298,7 @@ public class OPTICS<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- DistanceParameter<D> epsilonP = new DistanceParameter<D>(EPSILON_ID, distanceFunction, true);
+ DistanceParameter<D> epsilonP = new DistanceParameter<>(EPSILON_ID, distanceFunction, true);
if(config.grab(epsilonP)) {
epsilon = epsilonP.getValue();
}
@@ -312,7 +312,7 @@ public class OPTICS<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
@Override
protected OPTICS<O, D> makeInstance() {
- return new OPTICS<O, D>(distanceFunction, epsilon, minpts);
+ return new OPTICS<>(distanceFunction, epsilon, minpts);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICSTypeAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICSTypeAlgorithm.java
index 3ead6f3e..82d7ec88 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICSTypeAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICSTypeAlgorithm.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICSXi.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICSXi.java
index 39a0ebd6..583d402b 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICSXi.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICSXi.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -46,8 +46,6 @@ import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.result.IterableResult;
import de.lmu.ifi.dbs.elki.result.optics.ClusterOrderEntry;
import de.lmu.ifi.dbs.elki.result.optics.ClusterOrderResult;
-import de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy.HierarchyHashmapList;
-import de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy.ModifiableHierarchy;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
@@ -135,13 +133,13 @@ public class OPTICSXi<N extends NumberDistance<N, ?>> extends AbstractAlgorithm<
double mib = 0.0;
// TODO: make it configurable to keep this list; this is mostly useful for
// visualization
- List<SteepArea> salist = new ArrayList<SteepArea>();
- List<SteepDownArea> sdaset = new ArrayList<SteepDownArea>();
- ModifiableHierarchy<Cluster<OPTICSModel>> hier = new HierarchyHashmapList<Cluster<OPTICSModel>>();
- HashSet<Cluster<OPTICSModel>> curclusters = new HashSet<Cluster<OPTICSModel>>();
+ List<SteepArea> salist = new ArrayList<>();
+ List<SteepDownArea> sdaset = new ArrayList<>();
+ final Clustering<OPTICSModel> clustering = new Clustering<>("OPTICS Xi-Clusters", "optics");
+ HashSet<Cluster<OPTICSModel>> curclusters = new HashSet<>();
HashSetModifiableDBIDs unclaimedids = DBIDUtil.newHashSet(relation.getDBIDs());
- SteepScanPosition<N> scan = new SteepScanPosition<N>(clusterOrder);
+ SteepScanPosition<N> scan = new SteepScanPosition<>(clusterOrder);
while(scan.hasNext()) {
final int curpos = scan.index;
// Update maximum-inbetween
@@ -285,7 +283,7 @@ public class OPTICSXi<N extends NumberDistance<N, ?>> extends AbstractAlgorithm<
LOG.debugFine("Found cluster with " + dbids.size() + " new objects, length " + (cstart - cend + 1));
}
OPTICSModel model = new OPTICSModel(cstart, cend);
- Cluster<OPTICSModel> cluster = new Cluster<OPTICSModel>("Cluster_" + cstart + "_" + cend, dbids, model, hier);
+ Cluster<OPTICSModel> cluster = new Cluster<>("Cluster_" + cstart + "_" + cend, dbids, model);
// Build the hierarchy
{
Iterator<Cluster<OPTICSModel>> iter = curclusters.iterator();
@@ -293,7 +291,7 @@ public class OPTICSXi<N extends NumberDistance<N, ?>> extends AbstractAlgorithm<
Cluster<OPTICSModel> clus = iter.next();
OPTICSModel omodel = clus.getModel();
if(model.getStartIndex() <= omodel.getStartIndex() && omodel.getEndIndex() <= model.getEndIndex()) {
- hier.add(cluster, clus);
+ clustering.addChildCluster(cluster, clus);
iter.remove();
}
}
@@ -308,23 +306,22 @@ public class OPTICSXi<N extends NumberDistance<N, ?>> extends AbstractAlgorithm<
}
}
if(curclusters.size() > 0 || unclaimedids.size() > 0) {
- final Clustering<OPTICSModel> clustering = new Clustering<OPTICSModel>("OPTICS Xi-Clusters", "optics");
if(unclaimedids.size() > 0) {
final Cluster<OPTICSModel> allcluster;
if(clusterOrder.get(clusterOrder.size() - 1).getReachability().isInfiniteDistance()) {
- allcluster = new Cluster<OPTICSModel>("Noise", unclaimedids, true, new OPTICSModel(0, clusterOrder.size() - 1), hier);
+ allcluster = new Cluster<>("Noise", unclaimedids, true, new OPTICSModel(0, clusterOrder.size() - 1));
}
else {
- allcluster = new Cluster<OPTICSModel>("Cluster", unclaimedids, new OPTICSModel(0, clusterOrder.size() - 1), hier);
+ allcluster = new Cluster<>("Cluster", unclaimedids, new OPTICSModel(0, clusterOrder.size() - 1));
}
for(Cluster<OPTICSModel> cluster : curclusters) {
- hier.add(allcluster, cluster);
+ clustering.addChildCluster(allcluster, cluster);
}
- clustering.addCluster(allcluster);
+ clustering.addToplevelCluster(allcluster);
}
else {
for(Cluster<OPTICSModel> cluster : curclusters) {
- clustering.addCluster(cluster);
+ clustering.addToplevelCluster(cluster);
}
}
clustering.addChildResult(clusterOrderResult);
@@ -663,7 +660,7 @@ public class OPTICSXi<N extends NumberDistance<N, ?>> extends AbstractAlgorithm<
xi = xiP.doubleValue();
}
- ClassParameter<OPTICSTypeAlgorithm<D>> opticsP = new ClassParameter<OPTICSTypeAlgorithm<D>>(XIALG_ID, OPTICSTypeAlgorithm.class, OPTICS.class);
+ ClassParameter<OPTICSTypeAlgorithm<D>> opticsP = new ClassParameter<>(XIALG_ID, OPTICSTypeAlgorithm.class, OPTICS.class);
if(config.grab(opticsP)) {
optics = opticsP.instantiateClass(config);
}
@@ -671,7 +668,7 @@ public class OPTICSXi<N extends NumberDistance<N, ?>> extends AbstractAlgorithm<
@Override
protected OPTICSXi<D> makeInstance() {
- return new OPTICSXi<D>(optics, xi);
+ return new OPTICSXi<>(optics, xi);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SLINK.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SLINK.java
deleted file mode 100644
index 3e1f0650..00000000
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SLINK.java
+++ /dev/null
@@ -1,818 +0,0 @@
-package de.lmu.ifi.dbs.elki.algorithm.clustering;
-
-/*
- This file is part of ELKI:
- Environment for Developing KDD-Applications Supported by Index-Structures
-
- Copyright (C) 2012
- Ludwig-Maximilians-Universität München
- Lehr- und Forschungseinheit für Datenbanksysteme
- ELKI Development Team
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-import gnu.trove.list.array.TDoubleArrayList;
-
-import java.util.ArrayList;
-import java.util.Comparator;
-
-import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
-import de.lmu.ifi.dbs.elki.data.Cluster;
-import de.lmu.ifi.dbs.elki.data.Clustering;
-import de.lmu.ifi.dbs.elki.data.model.DendrogramModel;
-import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
-import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
-import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
-import de.lmu.ifi.dbs.elki.database.Database;
-import de.lmu.ifi.dbs.elki.database.datastore.DBIDDataStore;
-import de.lmu.ifi.dbs.elki.database.datastore.DataStore;
-import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
-import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
-import de.lmu.ifi.dbs.elki.database.datastore.DoubleDistanceDataStore;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableDBIDDataStore;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDistanceDataStore;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDVar;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
-import de.lmu.ifi.dbs.elki.database.relation.Relation;
-import de.lmu.ifi.dbs.elki.distance.DistanceUtil;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
-import de.lmu.ifi.dbs.elki.logging.Logging;
-import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
-import de.lmu.ifi.dbs.elki.result.BasicResult;
-import de.lmu.ifi.dbs.elki.result.OrderingFromDataStore;
-import de.lmu.ifi.dbs.elki.result.Result;
-import de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy.HierarchyHashmapList;
-import de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy.ModifiableHierarchy;
-import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
-import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
-import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
-
-/**
- * Implementation of the efficient Single-Link Algorithm SLINK of R. Sibson.
- * <p>
- * Reference: R. Sibson: SLINK: An optimally efficient algorithm for the
- * single-link cluster method. <br>
- * In: The Computer Journal 16 (1973), No. 1, p. 30-34.
- * </p>
- *
- * @author Elke Achtert
- * @param <O> the type of DatabaseObject the algorithm is applied on
- * @param <D> the type of Distance used
- */
-@Title("SLINK: Single Link Clustering")
-@Description("Hierarchical clustering algorithm based on single-link connectivity.")
-@Reference(authors = "R. Sibson", title = "SLINK: An optimally efficient algorithm for the single-link cluster method", booktitle = "The Computer Journal 16 (1973), No. 1, p. 30-34.", url = "http://dx.doi.org/10.1093/comjnl/16.1.30")
-public class SLINK<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm<O, D, Result> {
- /**
- * The logger for this class.
- */
- private static final Logging LOG = Logging.getLogger(SLINK.class);
-
- /**
- * Minimum number of clusters to extract
- */
- private int minclusters = -1;
-
- /**
- * Constructor.
- *
- * @param distanceFunction Distance function
- * @param minclusters Minimum clusters to extract. Can be {@code -1}.
- */
- public SLINK(DistanceFunction<? super O, D> distanceFunction, int minclusters) {
- super(distanceFunction);
- this.minclusters = minclusters;
- }
-
- /**
- * Performs the SLINK algorithm on the given database.
- */
- public Result run(Database database, Relation<O> relation) {
- DistanceQuery<O, D> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
- @SuppressWarnings("unchecked")
- Class<D> distCls = (Class<D>) getDistanceFunction().getDistanceFactory().getClass();
- WritableDBIDDataStore pi = DataStoreUtil.makeDBIDStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
- WritableDataStore<D> lambda = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, distCls);
- // Temporary storage for m.
- WritableDataStore<D> m = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, distCls);
-
- FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Running SLINK", relation.size(), LOG) : null;
- // has to be an array for monotonicity reasons!
- ModifiableDBIDs processedIDs = DBIDUtil.newArray(relation.size());
-
- // Optimized code path for double distances
- if (getDistanceFunction() instanceof PrimitiveDoubleDistanceFunction && lambda instanceof WritableDoubleDistanceDataStore && m instanceof WritableDoubleDistanceDataStore) {
- @SuppressWarnings("unchecked")
- PrimitiveDoubleDistanceFunction<? super O> dist = (PrimitiveDoubleDistanceFunction<? super O>) getDistanceFunction();
- WritableDoubleDistanceDataStore lambdad = (WritableDoubleDistanceDataStore) lambda;
- WritableDoubleDistanceDataStore md = (WritableDoubleDistanceDataStore) m;
- // apply the algorithm
- for (DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) {
- step1double(id, pi, lambdad);
- step2double(id, processedIDs, distQuery.getRelation(), dist, md);
- step3double(id, pi, lambdad, processedIDs, md);
- step4double(id, pi, lambdad, processedIDs);
-
- processedIDs.add(id);
-
- if (progress != null) {
- progress.incrementProcessed(LOG);
- }
- }
- } else {
- // apply the algorithm
- for (DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) {
- step1(id, pi, lambda);
- step2(id, processedIDs, distQuery, m);
- step3(id, pi, lambda, processedIDs, m);
- step4(id, pi, lambda, processedIDs);
-
- processedIDs.add(id);
-
- if (progress != null) {
- progress.incrementProcessed(LOG);
- }
- }
- }
-
- if (progress != null) {
- progress.ensureCompleted(LOG);
- }
- // We don't need m anymore.
- m.destroy();
- m = null;
-
- // Build dendrogam clusters identified by their target object
- if (LOG.isVerbose()) {
- LOG.verbose("Extracting clusters.");
- }
- final BasicResult result;
- if (lambda instanceof DoubleDistanceDataStore) {
- result = extractClustersDouble(relation.getDBIDs(), pi, (DoubleDistanceDataStore) lambda, minclusters);
- } else {
- result = extractClusters(relation.getDBIDs(), pi, lambda, minclusters);
- }
-
- result.addChildResult(new MaterializedRelation<DBID>("SLINK pi", "slink-order", TypeUtil.DBID, pi, processedIDs));
- result.addChildResult(new MaterializedRelation<D>("SLINK lambda", "slink-order", new SimpleTypeInformation<D>(distCls), lambda, processedIDs));
- result.addChildResult(new OrderingFromDataStore<D>("SLINK order", "slink-order", processedIDs, lambda));
- return result;
- }
-
- /**
- * First step: Initialize P(id) = id, L(id) = infinity.
- *
- * @param id the id of the object to be inserted into the pointer
- * representation
- * @param pi Pi data store
- * @param lambda Lambda data store
- */
- private void step1(DBIDRef id, WritableDBIDDataStore pi, WritableDataStore<D> lambda) {
- // P(n+1) = n+1:
- pi.put(id, id);
- // L(n+1) = infinity
- lambda.put(id, getDistanceFunction().getDistanceFactory().infiniteDistance());
- }
-
- /**
- * Second step: Determine the pairwise distances from all objects in the
- * pointer representation to the new object with the specified id.
- *
- * @param id the id of the object to be inserted into the pointer
- * representation
- * @param processedIDs the already processed ids
- * @param m Data store
- * @param distFunc Distance function to use
- */
- private void step2(DBIDRef id, DBIDs processedIDs, DistanceQuery<O, D> distFunc, WritableDataStore<D> m) {
- O newObj = distFunc.getRelation().get(id);
- for (DBIDIter it = processedIDs.iter(); it.valid(); it.advance()) {
- // M(i) = dist(i, n+1)
- m.put(it, distFunc.distance(it, newObj));
- }
- }
-
- /**
- * Third step: Determine the values for P and L
- *
- * @param id the id of the object to be inserted into the pointer
- * representation
- * @param pi Pi data store
- * @param lambda Lambda data store
- * @param processedIDs the already processed ids
- * @param m Data store
- */
- private void step3(DBIDRef id, WritableDBIDDataStore pi, WritableDataStore<D> lambda, DBIDs processedIDs, WritableDataStore<D> m) {
- DBIDVar p_i = DBIDUtil.newVar();
- // for i = 1..n
- for (DBIDIter it = processedIDs.iter(); it.valid(); it.advance()) {
- D l_i = lambda.get(it);
- D m_i = m.get(it);
- pi.assignVar(it, p_i); // p_i = pi(it)
- D mp_i = m.get(p_i);
-
- // if L(i) >= M(i)
- if (l_i.compareTo(m_i) >= 0) {
- // M(P(i)) = min { M(P(i)), L(i) }
- m.put(p_i, DistanceUtil.min(mp_i, l_i));
-
- // L(i) = M(i)
- lambda.put(it, m_i);
-
- // P(i) = n+1;
- pi.put(it, id);
- } else {
- // M(P(i)) = min { M(P(i)), M(i) }
- m.put(p_i, DistanceUtil.min(mp_i, m_i));
- }
- }
- }
-
- /**
- * Fourth step: Actualize the clusters if necessary
- *
- * @param id the id of the current object
- * @param pi Pi data store
- * @param lambda Lambda data store
- * @param processedIDs the already processed ids
- */
- private void step4(DBIDRef id, WritableDBIDDataStore pi, WritableDataStore<D> lambda, DBIDs processedIDs) {
- DBIDVar p_i = DBIDUtil.newVar();
- // for i = 1..n
- for (DBIDIter it = processedIDs.iter(); it.valid(); it.advance()) {
- D l_i = lambda.get(it);
- pi.assignVar(it, p_i); // p_i = pi(it)
- D lp_i = lambda.get(p_i);
-
- // if L(i) >= L(P(i))
- if (l_i.compareTo(lp_i) >= 0) {
- // P(i) = n+1
- pi.put(it, id);
- }
- }
- }
-
- /**
- * First step: Initialize P(id) = id, L(id) = infinity.
- *
- * @param id the id of the object to be inserted into the pointer
- * representation
- * @param pi Pi data store
- * @param lambda Lambda data store
- */
- private void step1double(DBIDRef id, WritableDBIDDataStore pi, WritableDoubleDistanceDataStore lambda) {
- // P(n+1) = n+1:
- pi.put(id, id);
- // L(n+1) = infinity
- lambda.putDouble(id, Double.POSITIVE_INFINITY);
- }
-
- /**
- * Second step: Determine the pairwise distances from all objects in the
- * pointer representation to the new object with the specified id.
- *
- * @param id the id of the object to be inserted into the pointer
- * representation
- * @param processedIDs the already processed ids
- * @param m Data store
- * @param relation Data relation
- * @param distFunc Distance function to use
- */
- private void step2double(DBIDRef id, DBIDs processedIDs, Relation<? extends O> relation, PrimitiveDoubleDistanceFunction<? super O> distFunc, WritableDoubleDistanceDataStore m) {
- O newObj = relation.get(id);
- for (DBIDIter it = processedIDs.iter(); it.valid(); it.advance()) {
- // M(i) = dist(i, n+1)
- m.putDouble(it, distFunc.doubleDistance(relation.get(it), newObj));
- }
- }
-
- /**
- * Third step: Determine the values for P and L
- *
- * @param id the id of the object to be inserted into the pointer
- * representation
- * @param pi Pi data store
- * @param lambda Lambda data store
- * @param processedIDs the already processed ids
- * @param m Data store
- */
- private void step3double(DBIDRef id, WritableDBIDDataStore pi, WritableDoubleDistanceDataStore lambda, DBIDs processedIDs, WritableDoubleDistanceDataStore m) {
- DBIDVar p_i = DBIDUtil.newVar();
- // for i = 1..n
- for (DBIDIter it = processedIDs.iter(); it.valid(); it.advance()) {
- double l_i = lambda.doubleValue(it);
- double m_i = m.doubleValue(it);
- pi.assignVar(it, p_i); // p_i = pi(it)
- double mp_i = m.doubleValue(p_i);
-
- // if L(i) >= M(i)
- if (l_i >= m_i) {
- // M(P(i)) = min { M(P(i)), L(i) }
- m.putDouble(p_i, Math.min(mp_i, l_i));
-
- // L(i) = M(i)
- lambda.putDouble(it, m_i);
-
- // P(i) = n+1;
- pi.put(it, id);
- } else {
- // M(P(i)) = min { M(P(i)), M(i) }
- m.putDouble(p_i, Math.min(mp_i, m_i));
- }
- }
- }
-
- /**
- * Fourth step: Actualize the clusters if necessary
- *
- * @param id the id of the current object
- * @param pi Pi data store
- * @param lambda Lambda data store
- * @param processedIDs the already processed ids
- */
- private void step4double(DBIDRef id, WritableDBIDDataStore pi, WritableDoubleDistanceDataStore lambda, DBIDs processedIDs) {
- DBIDVar p_i = DBIDUtil.newVar();
- // for i = 1..n
- for (DBIDIter it = processedIDs.iter(); it.valid(); it.advance()) {
- double l_i = lambda.doubleValue(it);
- pi.assignVar(it, p_i); // p_i = pi(it)
- double lp_i = lambda.doubleValue(p_i);
-
- // if L(i) >= L(P(i))
- if (l_i >= lp_i) {
- // P(i) = n+1
- pi.put(it, id);
- }
- }
- }
-
- /**
- * Extract all clusters from the pi-lambda-representation.
- *
- * @param ids Object ids to process
- * @param pi Pi store
- * @param lambda Lambda store
- * @param minclusters Minimum number of clusters to extract
- *
- * @return Hierarchical clustering
- */
- private Clustering<DendrogramModel<D>> extractClusters(DBIDs ids, final DBIDDataStore pi, final DataStore<D> lambda, int minclusters) {
- FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Extracting clusters", ids.size(), LOG) : null;
- D nulldist = getDistanceFunction().getDistanceFactory().nullDistance();
-
- // Sort DBIDs by lambda. We need this for two things:
- // a) to determine the stop distance from "minclusters" parameter
- // b) to process arrows in decreasing / increasing order
- ArrayModifiableDBIDs order = DBIDUtil.newArray(ids);
- order.sort(new CompareByLambda<D>(lambda));
-
- // Stop distance:
- final D stopdist = (minclusters > 0) ? lambda.get(order.get(ids.size() - minclusters)) : null;
-
- // The initial pass is top-down.
- DBIDArrayIter it = order.iter();
- int split = (minclusters > 0) ? Math.max(ids.size() - minclusters, 0) : 0;
- // Tie handling: decrement split.
- if (stopdist != null) {
- while (split > 0) {
- it.seek(split - 1);
- if (stopdist.compareTo(lambda.get(it)) == 0) {
- split--;
- minclusters++;
- } else {
- break;
- }
- }
- }
-
- // Extract the child clusters
- int cnum = 0;
- int expcnum = Math.max(0, minclusters);
- WritableIntegerDataStore cluster_map = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_TEMP, -1);
- ArrayList<ModifiableDBIDs> cluster_dbids = new ArrayList<ModifiableDBIDs>(expcnum);
- ArrayList<D> cluster_dist = new ArrayList<D>(expcnum);
- ArrayModifiableDBIDs cluster_leads = DBIDUtil.newArray(expcnum);
-
- DBIDVar succ = DBIDUtil.newVar(); // Variable for successor.
- // Go backwards on the lower part.
- for (it.seek(split - 1); it.valid(); it.retract()) {
- D dist = lambda.get(it); // Distance to successor
- pi.assignVar(it, succ); // succ = pi(it)
- int clusterid = cluster_map.intValue(succ);
- // Successor cluster has already been created:
- if (clusterid >= 0) {
- cluster_dbids.get(clusterid).add(it);
- cluster_map.putInt(it, clusterid);
- // Update distance to maximum encountered:
- if (cluster_dist.get(clusterid).compareTo(dist) < 0) {
- cluster_dist.set(clusterid, dist);
- }
- } else {
- // Need to start a new cluster:
- clusterid = cnum; // next cluster number.
- ModifiableDBIDs cids = DBIDUtil.newArray();
- // Add element and successor as initial members:
- cids.add(succ);
- cluster_map.putInt(succ, clusterid);
- cids.add(it);
- cluster_map.putInt(it, clusterid);
- // Store new cluster.
- cluster_dbids.add(cids);
- cluster_leads.add(succ);
- cluster_dist.add(dist);
- cnum++;
- }
-
- // Decrement counter
- if (progress != null) {
- progress.incrementProcessed(LOG);
- }
- }
- // Build a hierarchy out of these clusters.
- Cluster<DendrogramModel<D>> root = null;
- ModifiableHierarchy<Cluster<DendrogramModel<D>>> hier = new HierarchyHashmapList<Cluster<DendrogramModel<D>>>();
- ArrayList<Cluster<DendrogramModel<D>>> clusters = new ArrayList<Cluster<DendrogramModel<D>>>(ids.size() + expcnum - split);
- // Convert initial clusters to cluster objects
- {
- int i = 0;
- for (DBIDIter it2 = cluster_leads.iter(); it2.valid(); it2.advance(), i++) {
- clusters.add(makeCluster(it2, cluster_dist.get(i), cluster_dbids.get(i), hier));
- }
- cluster_dist = null; // Invalidate
- cluster_dbids = null; // Invalidate
- }
- // Process the upper part, bottom-up.
- for (it.seek(split); it.valid(); it.advance()) {
- int clusterid = cluster_map.intValue(it);
- // The current cluster:
- final Cluster<DendrogramModel<D>> clus;
- if (clusterid >= 0) {
- clus = clusters.get(clusterid);
- } else {
- ArrayModifiableDBIDs cids = DBIDUtil.newArray(1);
- cids.add(it);
- clus = makeCluster(it, nulldist, cids, hier);
- // No need to store in clusters: cannot have another incoming pi
- // pointer!
- }
- // The successor to join:
- pi.assignVar(it, succ); // succ = pi(it)
- if (DBIDUtil.equal(it, succ)) {
- assert (root == null);
- root = clus;
- } else {
- // Parent cluster:
- int parentid = cluster_map.intValue(succ);
- D depth = lambda.get(it);
- // Parent cluster exists - merge as a new cluster:
- if (parentid >= 0) {
- Cluster<DendrogramModel<D>> pclus = makeCluster(succ, depth, DBIDUtil.EMPTYDBIDS, hier);
- hier.add(pclus, clusters.get(parentid));
- hier.add(pclus, clus);
- clusters.set(parentid, pclus); // Replace existing parent cluster
- } else {
- // Create a new, one-element, parent cluster.
- parentid = cnum;
- cnum++;
- ArrayModifiableDBIDs cids = DBIDUtil.newArray(1);
- cids.add(succ);
- Cluster<DendrogramModel<D>> pclus = makeCluster(succ, depth, cids, hier);
- hier.add(pclus, clus);
- assert (clusters.size() == parentid);
- clusters.add(pclus); // Remember parent cluster
- cluster_map.putInt(succ, parentid); // Reference
- }
- }
-
- // Decrement counter
- if (progress != null) {
- progress.incrementProcessed(LOG);
- }
- }
-
- if (progress != null) {
- progress.ensureCompleted(LOG);
- }
- // build hierarchy
- final Clustering<DendrogramModel<D>> dendrogram = new Clustering<DendrogramModel<D>>("Single-Link-Dendrogram", "slink-dendrogram");
- dendrogram.addCluster(root);
-
- return dendrogram;
- }
-
- /**
- * Extract all clusters from the pi-lambda-representation.
- *
- * @param ids Object ids to process
- * @param pi Pi store
- * @param lambda Lambda store
- * @param minclusters Minimum number of clusters to extract
- *
- * @return Hierarchical clustering
- */
- private Clustering<DendrogramModel<D>> extractClustersDouble(DBIDs ids, final DBIDDataStore pi, final DoubleDistanceDataStore lambda, int minclusters) {
- FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Extracting clusters", ids.size(), LOG) : null;
- D nulldist = getDistanceFunction().getDistanceFactory().nullDistance();
-
- // Sort DBIDs by lambda. We need this for two things:
- // a) to determine the stop distance from "minclusters" parameter
- // b) to process arrows in decreasing / increasing order
- ArrayModifiableDBIDs order = DBIDUtil.newArray(ids);
- order.sort(new CompareByDoubleLambda(lambda));
-
- // Stop distance:
- final double stopdist = (minclusters > 0) ? lambda.doubleValue(order.get(ids.size() - minclusters)) : Double.POSITIVE_INFINITY;
-
- // The initial pass is top-down.
- DBIDArrayIter it = order.iter();
- int split = (minclusters > 0) ? Math.max(ids.size() - minclusters, 0) : 0;
- // Tie handling: decrement split.
- if (minclusters > 0) {
- while (split > 0) {
- it.seek(split - 1);
- if (stopdist <= lambda.doubleValue(it)) {
- split--;
- minclusters++;
- } else {
- break;
- }
- }
- }
-
- // Extract the child clusters
- int cnum = 0;
- int expcnum = Math.max(0, minclusters);
- WritableIntegerDataStore cluster_map = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_TEMP, -1);
- ArrayList<ModifiableDBIDs> cluster_dbids = new ArrayList<ModifiableDBIDs>(expcnum);
- TDoubleArrayList cluster_dist = new TDoubleArrayList(expcnum);
- ArrayModifiableDBIDs cluster_leads = DBIDUtil.newArray(expcnum);
-
- DBIDVar succ = DBIDUtil.newVar(); // Variable for successor.
- // Go backwards on the lower part.
- for (it.seek(split - 1); it.valid(); it.retract()) {
- double dist = lambda.doubleValue(it); // Distance to successor
- pi.assignVar(it, succ); // succ = pi(it)
- int clusterid = cluster_map.intValue(succ);
- // Successor cluster has already been created:
- if (clusterid >= 0) {
- cluster_dbids.get(clusterid).add(it);
- cluster_map.putInt(it, clusterid);
- // Update distance to maximum encountered:
- if (cluster_dist.get(clusterid) < dist) {
- cluster_dist.set(clusterid, dist);
- }
- } else {
- // Need to start a new cluster:
- clusterid = cnum; // next cluster number.
- ModifiableDBIDs cids = DBIDUtil.newArray();
- // Add element and successor as initial members:
- cids.add(succ);
- cluster_map.putInt(succ, clusterid);
- cids.add(it);
- cluster_map.putInt(it, clusterid);
- // Store new cluster.
- cluster_dbids.add(cids);
- cluster_leads.add(succ);
- cluster_dist.add(dist);
- cnum++;
- }
-
- // Decrement counter
- if (progress != null) {
- progress.incrementProcessed(LOG);
- }
- }
- // Build a hierarchy out of these clusters.
- Cluster<DendrogramModel<D>> root = null;
- ModifiableHierarchy<Cluster<DendrogramModel<D>>> hier = new HierarchyHashmapList<Cluster<DendrogramModel<D>>>();
- ArrayList<Cluster<DendrogramModel<D>>> clusters = new ArrayList<Cluster<DendrogramModel<D>>>(ids.size() + expcnum - split);
- // Convert initial clusters to cluster objects
- {
- int i = 0;
- for (DBIDIter it2 = cluster_leads.iter(); it2.valid(); it2.advance(), i++) {
- @SuppressWarnings("unchecked")
- D depth = (D) new DoubleDistance(cluster_dist.get(i));
- clusters.add(makeCluster(it2, depth, cluster_dbids.get(i), hier));
- }
- cluster_dist = null; // Invalidate
- cluster_dbids = null; // Invalidate
- }
- // Process the upper part, bottom-up.
- for (it.seek(split); it.valid(); it.advance()) {
- int clusterid = cluster_map.intValue(it);
- // The current cluster:
- final Cluster<DendrogramModel<D>> clus;
- if (clusterid >= 0) {
- clus = clusters.get(clusterid);
- } else {
- ArrayModifiableDBIDs cids = DBIDUtil.newArray(1);
- cids.add(it);
- clus = makeCluster(it, nulldist, cids, hier);
- // No need to store in clusters: cannot have another incoming pi
- // pointer!
- }
- // The successor to join:
- pi.assignVar(it, succ); // succ = pi(it)
- if (DBIDUtil.equal(it, succ)) {
- assert (root == null);
- root = clus;
- } else {
- // Parent cluster:
- int parentid = cluster_map.intValue(succ);
- @SuppressWarnings("unchecked")
- D depth = (D) new DoubleDistance(lambda.doubleValue(it));
- // Parent cluster exists - merge as a new cluster:
- if (parentid >= 0) {
- Cluster<DendrogramModel<D>> pclus = makeCluster(succ, depth, DBIDUtil.EMPTYDBIDS, hier);
- hier.add(pclus, clusters.get(parentid));
- hier.add(pclus, clus);
- clusters.set(parentid, pclus); // Replace existing parent cluster
- } else {
- // Create a new, one-element, parent cluster.
- parentid = cnum;
- cnum++;
- ArrayModifiableDBIDs cids = DBIDUtil.newArray(1);
- cids.add(succ);
- Cluster<DendrogramModel<D>> pclus = makeCluster(succ, depth, cids, hier);
- hier.add(pclus, clus);
- assert (clusters.size() == parentid);
- clusters.add(pclus); // Remember parent cluster
- cluster_map.putInt(succ, parentid); // Reference
- }
- }
-
- // Decrement counter
- if (progress != null) {
- progress.incrementProcessed(LOG);
- }
- }
-
- if (progress != null) {
- progress.ensureCompleted(LOG);
- }
- // build hierarchy
- final Clustering<DendrogramModel<D>> dendrogram = new Clustering<DendrogramModel<D>>("Single-Link-Dendrogram", "slink-dendrogram");
- dendrogram.addCluster(root);
-
- return dendrogram;
- }
-
- /**
- * Make the cluster for the given object
- *
- * @param lead Leading object
- * @param depth Linkage depth
- * @param members Member objects
- * @param hier Cluster hierarchy
- * @return Cluster
- */
- private Cluster<DendrogramModel<D>> makeCluster(DBIDRef lead, D depth, DBIDs members, ModifiableHierarchy<Cluster<DendrogramModel<D>>> hier) {
- final String name;
- if (members.size() == 0) {
- name = "merge_" + lead + "_" + depth;
- } else if (depth.isInfiniteDistance()) {
- assert (members.contains(lead));
- name = "object_" + lead;
- } else {
- name = "cluster_" + lead + "_" + depth;
- }
- Cluster<DendrogramModel<D>> cluster = new Cluster<DendrogramModel<D>>(name, members, new DendrogramModel<D>(depth), hier);
- return cluster;
- }
-
- @Override
- public TypeInformation[] getInputTypeRestriction() {
- return TypeUtil.array(getDistanceFunction().getInputTypeRestriction());
- }
-
- @Override
- protected Logging getLogger() {
- return LOG;
- }
-
- /**
- * Order a DBID collection by the lambda value.
- *
- * @author Erich Schubert
- *
- * @apiviz.exclude
- *
- * @param <D> Distance type
- */
- private static final class CompareByLambda<D extends Distance<D>> implements Comparator<DBIDRef> {
- /**
- * Lambda storage
- */
- private final DataStore<D> lambda;
-
- /**
- * Constructor.
- *
- * @param lambda Lambda storage
- */
- protected CompareByLambda(DataStore<D> lambda) {
- this.lambda = lambda;
- }
-
- @Override
- public int compare(DBIDRef id1, DBIDRef id2) {
- D k1 = lambda.get(id1);
- D k2 = lambda.get(id2);
- assert (k1 != null);
- assert (k2 != null);
- return k1.compareTo(k2);
- }
- }
-
- /**
- * Order a DBID collection by the lambda value.
- *
- * @author Erich Schubert
- *
- * @apiviz.exclude
- */
- private static final class CompareByDoubleLambda implements Comparator<DBIDRef> {
- /**
- * Lambda storage
- */
- private final DoubleDistanceDataStore lambda;
-
- /**
- * Constructor.
- *
- * @param lambda Lambda storage
- */
- protected CompareByDoubleLambda(DoubleDistanceDataStore lambda) {
- this.lambda = lambda;
- }
-
- @Override
- public int compare(DBIDRef id1, DBIDRef id2) {
- double k1 = lambda.doubleValue(id1);
- double k2 = lambda.doubleValue(id2);
- return Double.compare(k1, k2);
- }
- }
-
- /**
- * Parameterization class.
- *
- * @author Erich Schubert
- *
- * @apiviz.exclude
- */
- public static class Parameterizer<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
- /**
- * The minimum number of clusters to extract
- */
- public static final OptionID SLINK_MINCLUSTERS_ID = new OptionID("slink.minclusters", "The maximum number of clusters to extract.");
-
- protected int minclusters = -1;
-
- @Override
- protected void makeOptions(Parameterization config) {
- super.makeOptions(config);
- IntParameter minclustersP = new IntParameter(SLINK_MINCLUSTERS_ID);
- minclustersP.addConstraint(new GreaterEqualConstraint(1));
- minclustersP.setOptional(true);
- if (config.grab(minclustersP)) {
- minclusters = minclustersP.intValue();
- }
- }
-
- @Override
- protected SLINK<O, D> makeInstance() {
- return new SLINK<O, D>(distanceFunction, minclusters);
- }
- }
-}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SNNClustering.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SNNClustering.java
index f3b59c42..95d9f23c 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SNNClustering.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SNNClustering.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -151,7 +151,7 @@ public class SNNClustering<O> extends AbstractAlgorithm<Clustering<Model>> imple
FiniteProgress objprog = LOG.isVerbose() ? new FiniteProgress("SNNClustering", relation.size(), LOG) : null;
IndefiniteProgress clusprog = LOG.isVerbose() ? new IndefiniteProgress("Number of clusters", LOG) : null;
- resultList = new ArrayList<ModifiableDBIDs>();
+ resultList = new ArrayList<>();
noise = DBIDUtil.newHashSet();
processedIDs = DBIDUtil.newHashSet(relation.size());
if(relation.size() >= minpts) {
@@ -183,11 +183,11 @@ public class SNNClustering<O> extends AbstractAlgorithm<Clustering<Model>> imple
clusprog.setCompleted(LOG);
}
- Clustering<Model> result = new Clustering<Model>("Shared-Nearest-Neighbor Clustering", "snn-clustering");
+ Clustering<Model> result = new Clustering<>("Shared-Nearest-Neighbor Clustering", "snn-clustering");
for(Iterator<ModifiableDBIDs> resultListIter = resultList.iterator(); resultListIter.hasNext();) {
- result.addCluster(new Cluster<Model>(resultListIter.next(), ClusterModel.CLUSTER));
+ result.addToplevelCluster(new Cluster<Model>(resultListIter.next(), ClusterModel.CLUSTER));
}
- result.addCluster(new Cluster<Model>(noise, true, ClusterModel.CLUSTER));
+ result.addToplevelCluster(new Cluster<Model>(noise, true, ClusterModel.CLUSTER));
return result;
}
@@ -322,7 +322,7 @@ public class SNNClustering<O> extends AbstractAlgorithm<Clustering<Model>> imple
Class<SharedNearestNeighborSimilarityFunction<O>> cls = ClassGenericsUtil.uglyCastIntoSubclass(SharedNearestNeighborSimilarityFunction.class);
similarityFunction = config.tryInstantiate(cls);
- DistanceParameter<IntegerDistance> epsilonP = new DistanceParameter<IntegerDistance>(EPSILON_ID, IntegerDistance.FACTORY);
+ DistanceParameter<IntegerDistance> epsilonP = new DistanceParameter<>(EPSILON_ID, IntegerDistance.FACTORY);
if(config.grab(epsilonP)) {
epsilon = epsilonP.getValue();
}
@@ -336,7 +336,7 @@ public class SNNClustering<O> extends AbstractAlgorithm<Clustering<Model>> imple
@Override
protected SNNClustering<O> makeInstance() {
- return new SNNClustering<O>(similarityFunction, epsilon, minpts);
+ return new SNNClustering<>(similarityFunction, epsilon, minpts);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/CASH.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/CASH.java
index 1cb1eb0d..0d82add9 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/CASH.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/CASH.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -66,8 +66,9 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.FirstNEigenPairFilter;
import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredRunner;
import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil;
-import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ComparableMinHeap;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.IntegerPriorityObject;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ObjectHeap;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -263,8 +264,8 @@ public class CASH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
*/
private Relation<ParameterizationFunction> preprocess(Database db, Relation<V> vrel) {
DBIDs ids = vrel.getDBIDs();
- SimpleTypeInformation<ParameterizationFunction> type = new SimpleTypeInformation<ParameterizationFunction>(ParameterizationFunction.class);
- MaterializedRelation<ParameterizationFunction> prep = new MaterializedRelation<ParameterizationFunction>(db, type, ids);
+ SimpleTypeInformation<ParameterizationFunction> type = new SimpleTypeInformation<>(ParameterizationFunction.class);
+ MaterializedRelation<ParameterizationFunction> prep = new MaterializedRelation<>(db, type, ids);
// Project
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
@@ -284,12 +285,12 @@ public class CASH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
* @return a mapping of subspace dimensionalities to clusters
*/
private Clustering<Model> doRun(Relation<ParameterizationFunction> relation, FiniteProgress progress) {
- Clustering<Model> res = new Clustering<Model>("CASH clustering", "cash-clustering");
+ Clustering<Model> res = new Clustering<>("CASH clustering", "cash-clustering");
final int dim = dimensionality(relation);
// init heap
- Heap<IntegerPriorityObject<CASHInterval>> heap = new Heap<IntegerPriorityObject<CASHInterval>>();
+ ObjectHeap<IntegerPriorityObject<CASHInterval>> heap = new ComparableMinHeap<>();
ModifiableDBIDs noiseIDs = DBIDUtil.newHashSet(relation.getDBIDs());
initHeap(heap, relation, dim, noiseIDs);
@@ -338,7 +339,7 @@ public class CASH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
// add result of dim-1 to this result
Clustering<Model> res_dim_minus_1 = doRun(db, progress);
for (Cluster<Model> cluster : res_dim_minus_1.getAllClusters()) {
- res.addCluster(cluster);
+ res.addToplevelCluster(cluster);
noiseIDs.removeDBIDs(cluster.getIDs());
clusterIDs.addDBIDs(cluster.getIDs());
processedIDs.addDBIDs(cluster.getIDs());
@@ -349,23 +350,23 @@ public class CASH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
else {
LinearEquationSystem les = runDerivator(relation, dim - 1, interval.getIDs());
Cluster<Model> c = new Cluster<Model>(interval.getIDs(), new LinearEquationModel(les));
- res.addCluster(c);
+ res.addToplevelCluster(c);
noiseIDs.removeDBIDs(interval.getIDs());
clusterIDs.addDBIDs(interval.getIDs());
processedIDs.addDBIDs(interval.getIDs());
}
// Rebuild heap
- ArrayList<IntegerPriorityObject<CASHInterval>> heapVector = new ArrayList<IntegerPriorityObject<CASHInterval>>(heap.size());
- for (IntegerPriorityObject<CASHInterval> obj : heap) {
- heapVector.add(obj);
+ ArrayList<IntegerPriorityObject<CASHInterval>> heapVector = new ArrayList<>(heap.size());
+ for (ObjectHeap.UnsortedIter<IntegerPriorityObject<CASHInterval>> iter = heap.unsortedIter(); iter.valid(); iter.advance()) {
+ heapVector.add(iter.get());
}
heap.clear();
for (IntegerPriorityObject<CASHInterval> pair : heapVector) {
CASHInterval currentInterval = pair.getObject();
currentInterval.removeIDs(clusterIDs);
if (currentInterval.getIDs().size() >= minPts) {
- heap.add(new IntegerPriorityObject<CASHInterval>(currentInterval.priority(), currentInterval));
+ heap.add(new IntegerPriorityObject<>(currentInterval.priority(), currentInterval));
}
}
@@ -378,12 +379,12 @@ public class CASH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
if (!noiseIDs.isEmpty()) {
if (dim == noiseDim) {
Cluster<Model> c = new Cluster<Model>(noiseIDs, true, ClusterModel.CLUSTER);
- res.addCluster(c);
+ res.addToplevelCluster(c);
processedIDs.addDBIDs(noiseIDs);
} else if (noiseIDs.size() >= minPts) {
LinearEquationSystem les = runDerivator(fulldatabase, dim - 1, noiseIDs);
Cluster<Model> c = new Cluster<Model>(noiseIDs, true, new LinearEquationModel(les));
- res.addCluster(c);
+ res.addToplevelCluster(c);
processedIDs.addDBIDs(noiseIDs);
}
}
@@ -427,7 +428,7 @@ public class CASH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
* @param dim the dimensionality of the database
* @param ids the ids of the database
*/
- private void initHeap(Heap<IntegerPriorityObject<CASHInterval>> heap, Relation<ParameterizationFunction> relation, int dim, DBIDs ids) {
+ private void initHeap(ObjectHeap<IntegerPriorityObject<CASHInterval>> heap, Relation<ParameterizationFunction> relation, int dim, DBIDs ids) {
CASHIntervalSplit split = new CASHIntervalSplit(relation, minPts);
// determine minimum and maximum function value of all functions
@@ -479,7 +480,7 @@ public class CASH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
ModifiableDBIDs intervalIDs = split.determineIDs(ids, alphaInterval, d_mins[i], d_maxs[i]);
if (intervalIDs != null && intervalIDs.size() >= minPts) {
CASHInterval rootInterval = new CASHInterval(alphaMin, alphaMax, split, intervalIDs, -1, 0, d_mins[i], d_maxs[i]);
- heap.add(new IntegerPriorityObject<CASHInterval>(rootInterval.priority(), rootInterval));
+ heap.add(new IntegerPriorityObject<>(rootInterval.priority(), rootInterval));
}
}
@@ -503,8 +504,8 @@ public class CASH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
*/
private MaterializedRelation<ParameterizationFunction> buildDB(int dim, Matrix basis, DBIDs ids, Relation<ParameterizationFunction> relation) {
ProxyDatabase proxy = new ProxyDatabase(ids);
- SimpleTypeInformation<ParameterizationFunction> type = new SimpleTypeInformation<ParameterizationFunction>(ParameterizationFunction.class);
- MaterializedRelation<ParameterizationFunction> prep = new MaterializedRelation<ParameterizationFunction>(proxy, type, ids);
+ SimpleTypeInformation<ParameterizationFunction> type = new SimpleTypeInformation<>(ParameterizationFunction.class);
+ MaterializedRelation<ParameterizationFunction> prep = new MaterializedRelation<>(proxy, type, ids);
proxy.addRelation(prep);
// Project
@@ -566,7 +567,7 @@ public class CASH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
private double sinusProduct(int start, int end, double[] alpha) {
double result = 1;
for (int j = start; j < end; j++) {
- result *= StrictMath.sin(alpha[j]);
+ result *= Math.sin(alpha[j]);
}
return result;
}
@@ -578,7 +579,7 @@ public class CASH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
* @param heap the heap storing the intervals
* @return the next ''best'' interval at maximum level
*/
- private CASHInterval determineNextIntervalAtMaxLevel(Heap<IntegerPriorityObject<CASHInterval>> heap) {
+ private CASHInterval determineNextIntervalAtMaxLevel(ObjectHeap<IntegerPriorityObject<CASHInterval>> heap) {
CASHInterval next = doDetermineNextIntervalAtMaxLevel(heap);
// noise path was chosen
while (next == null) {
@@ -598,7 +599,7 @@ public class CASH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
* @param heap the heap storing the intervals
* @return the next ''best'' interval at maximum level
*/
- private CASHInterval doDetermineNextIntervalAtMaxLevel(Heap<IntegerPriorityObject<CASHInterval>> heap) {
+ private CASHInterval doDetermineNextIntervalAtMaxLevel(ObjectHeap<IntegerPriorityObject<CASHInterval>> heap) {
CASHInterval interval = heap.poll().getObject();
int dim = interval.getDimensionality();
while (true) {
@@ -632,10 +633,10 @@ public class CASH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
int comp = interval.getLeftChild().compareTo(interval.getRightChild());
if (comp < 0) {
bestInterval = interval.getRightChild();
- heap.add(new IntegerPriorityObject<CASHInterval>(interval.getLeftChild().priority(), interval.getLeftChild()));
+ heap.add(new IntegerPriorityObject<>(interval.getLeftChild().priority(), interval.getLeftChild()));
} else {
bestInterval = interval.getLeftChild();
- heap.add(new IntegerPriorityObject<CASHInterval>(interval.getRightChild().priority(), interval.getRightChild()));
+ heap.add(new IntegerPriorityObject<>(interval.getRightChild().priority(), interval.getRightChild()));
}
} else if (interval.getLeftChild() == null) {
bestInterval = interval.getRightChild();
@@ -733,8 +734,8 @@ public class CASH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
DBIDs ids = interval.getIDs();
ProxyDatabase proxy = new ProxyDatabase(ids);
int dim = dimensionality(relation);
- SimpleTypeInformation<DoubleVector> type = new VectorFieldTypeInformation<DoubleVector>(DoubleVector.FACTORY, dim);
- MaterializedRelation<DoubleVector> prep = new MaterializedRelation<DoubleVector>(proxy, type, ids);
+ SimpleTypeInformation<DoubleVector> type = new VectorFieldTypeInformation<>(DoubleVector.FACTORY, dim);
+ MaterializedRelation<DoubleVector> prep = new MaterializedRelation<>(proxy, type, ids);
proxy.addRelation(prep);
// Project
@@ -792,8 +793,8 @@ public class CASH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
private Database buildDerivatorDB(Relation<ParameterizationFunction> relation, DBIDs ids) {
ProxyDatabase proxy = new ProxyDatabase(ids);
int dim = dimensionality(relation);
- SimpleTypeInformation<DoubleVector> type = new VectorFieldTypeInformation<DoubleVector>(DoubleVector.FACTORY, dim);
- MaterializedRelation<DoubleVector> prep = new MaterializedRelation<DoubleVector>(proxy, type, ids);
+ SimpleTypeInformation<DoubleVector> type = new VectorFieldTypeInformation<>(DoubleVector.FACTORY, dim);
+ MaterializedRelation<DoubleVector> prep = new MaterializedRelation<>(proxy, type, ids);
proxy.addRelation(prep);
// Project
@@ -864,7 +865,7 @@ public class CASH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
@Override
protected CASH<NumberVector<?>> makeInstance() {
- return new CASH<NumberVector<?>>(minpts, maxlevel, mindim, jitter, adjust);
+ return new CASH<>(minpts, maxlevel, mindim, jitter, adjust);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/COPAC.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/COPAC.java
index ac50559e..9a4b8512 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/COPAC.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/COPAC.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -185,7 +185,7 @@ public class COPAC<V extends NumberVector<?>, D extends Distance<D>> extends Abs
LocalProjectionIndex<V, ?> preprocin = partitionDistanceQuery.getIndex();
// partitioning
- Map<Integer, ModifiableDBIDs> partitionMap = new HashMap<Integer, ModifiableDBIDs>();
+ Map<Integer, ModifiableDBIDs> partitionMap = new HashMap<>();
FiniteProgress partitionProgress = LOG.isVerbose() ? new FiniteProgress("Partitioning", relation.size(), LOG) : null;
int processed = 1;
@@ -214,7 +214,7 @@ public class COPAC<V extends NumberVector<?>, D extends Distance<D>> extends Abs
// convert for partition algorithm.
// TODO: do this with DynamicDBIDs instead
- Map<Integer, DBIDs> pmap = new HashMap<Integer, DBIDs>();
+ Map<Integer, DBIDs> pmap = new HashMap<>();
for(Entry<Integer, ModifiableDBIDs> ent : partitionMap.entrySet()) {
pmap.put(ent.getKey(), ent.getValue());
}
@@ -230,14 +230,14 @@ public class COPAC<V extends NumberVector<?>, D extends Distance<D>> extends Abs
* @param query The preprocessor based query function
*/
private Clustering<Model> runPartitionAlgorithm(Relation<V> relation, Map<Integer, DBIDs> partitionMap, DistanceQuery<V, D> query) {
- Clustering<Model> result = new Clustering<Model>("COPAC clustering", "copac-clustering");
+ Clustering<Model> result = new Clustering<>("COPAC clustering", "copac-clustering");
// TODO: use an extra finite progress for the partitions?
for(Entry<Integer, DBIDs> pair : partitionMap.entrySet()) {
// noise partition
if(pair.getKey() == RelationUtil.dimensionality(relation)) {
// Make a Noise cluster
- result.addCluster(new Cluster<Model>(pair.getValue(), true, ClusterModel.CLUSTER));
+ result.addToplevelCluster(new Cluster<Model>(pair.getValue(), true, ClusterModel.CLUSTER));
}
else {
DBIDs partids = pair.getValue();
@@ -251,10 +251,10 @@ public class COPAC<V extends NumberVector<?>, D extends Distance<D>> extends Abs
// Re-Wrap resulting Clusters as DimensionModel clusters.
for(Cluster<Model> clus : p.getAllClusters()) {
if(clus.isNoise()) {
- result.addCluster(new Cluster<Model>(clus.getIDs(), true, ClusterModel.CLUSTER));
+ result.addToplevelCluster(new Cluster<Model>(clus.getIDs(), true, ClusterModel.CLUSTER));
}
else {
- result.addCluster(new Cluster<Model>(clus.getIDs(), new DimensionModel(pair.getKey())));
+ result.addToplevelCluster(new Cluster<Model>(clus.getIDs(), new DimensionModel(pair.getKey())));
}
}
}
@@ -316,12 +316,12 @@ public class COPAC<V extends NumberVector<?>, D extends Distance<D>> extends Abs
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- ClassParameter<Factory<V, ?>> indexP = new ClassParameter<LocalProjectionIndex.Factory<V, ?>>(PREPROCESSOR_ID, LocalProjectionIndex.Factory.class);
+ ClassParameter<Factory<V, ?>> indexP = new ClassParameter<>(PREPROCESSOR_ID, LocalProjectionIndex.Factory.class);
if(config.grab(indexP)) {
indexI = indexP.instantiateClass(config);
}
- ObjectParameter<FilteredLocalPCABasedDistanceFunction<V, ?, D>> pdistP = new ObjectParameter<FilteredLocalPCABasedDistanceFunction<V, ?, D>>(PARTITION_DISTANCE_ID, FilteredLocalPCABasedDistanceFunction.class, LocallyWeightedDistanceFunction.class);
+ ObjectParameter<FilteredLocalPCABasedDistanceFunction<V, ?, D>> pdistP = new ObjectParameter<>(PARTITION_DISTANCE_ID, FilteredLocalPCABasedDistanceFunction.class, LocallyWeightedDistanceFunction.class);
if(config.grab(pdistP)) {
ListParameterization predefinedDist = new ListParameterization();
predefinedDist.addParameter(IndexBasedDistanceFunction.INDEX_ID, indexI);
@@ -332,7 +332,7 @@ public class COPAC<V extends NumberVector<?>, D extends Distance<D>> extends Abs
}
// Parameterize algorithm:
- ClassParameter<ClusteringAlgorithm<Clustering<Model>>> algP = new ClassParameter<ClusteringAlgorithm<Clustering<Model>>>(PARTITION_ALGORITHM_ID, ClusteringAlgorithm.class);
+ ClassParameter<ClusteringAlgorithm<Clustering<Model>>> algP = new ClassParameter<>(PARTITION_ALGORITHM_ID, ClusteringAlgorithm.class);
if(config.grab(algP)) {
ListParameterization predefined = new ListParameterization();
predefined.addParameter(AbstractDistanceBasedAlgorithm.DISTANCE_FUNCTION_ID, pdistI);
@@ -348,7 +348,7 @@ public class COPAC<V extends NumberVector<?>, D extends Distance<D>> extends Abs
@Override
protected COPAC<V, D> makeInstance() {
- return new COPAC<V, D>(pdistI, algC, algO);
+ return new COPAC<>(pdistI, algC, algO);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ERiC.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ERiC.java
index 7e7314b4..d535e136 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ERiC.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ERiC.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -25,8 +25,6 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation;
import java.util.ArrayList;
import java.util.List;
-import java.util.SortedMap;
-import java.util.TreeMap;
import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm;
@@ -58,6 +56,8 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.FirstNEigenPairFilter;
import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredResult;
import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredRunner;
import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy.Hierarchy;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy.Hierarchy.Iter;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -125,7 +125,7 @@ public class ERiC<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
StepProgress stepprog = LOG.isVerbose() ? new StepProgress(3) : null;
// run COPAC
- if(stepprog != null) {
+ if (stepprog != null) {
stepprog.beginStep(1, "Preprocessing local correlation dimensionalities and partitioning data", LOG);
}
Clustering<Model> copacResult = copacAlgorithm.run(relation);
@@ -133,16 +133,16 @@ public class ERiC<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
DistanceQuery<V, IntegerDistance> query = copacAlgorithm.getPartitionDistanceQuery();
// extract correlation clusters
- if(stepprog != null) {
+ if (stepprog != null) {
stepprog.beginStep(2, "Extract correlation clusters", LOG);
}
- SortedMap<Integer, List<Cluster<CorrelationModel<V>>>> clusterMap = extractCorrelationClusters(copacResult, relation, dimensionality);
- if(LOG.isDebugging()) {
+ List<List<Cluster<CorrelationModel<V>>>> clusterMap = extractCorrelationClusters(copacResult, relation, dimensionality);
+ if (LOG.isDebugging()) {
StringBuilder msg = new StringBuilder("Step 2: Extract correlation clusters...");
- for(Integer corrDim : clusterMap.keySet()) {
+ for (int corrDim = 0; corrDim < clusterMap.size(); corrDim++) {
List<Cluster<CorrelationModel<V>>> correlationClusters = clusterMap.get(corrDim);
msg.append("\n\ncorrDim ").append(corrDim);
- for(Cluster<CorrelationModel<V>> cluster : correlationClusters) {
+ for (Cluster<CorrelationModel<V>> cluster : correlationClusters) {
msg.append("\n cluster ").append(cluster).append(", ids: ").append(cluster.getIDs().size());
// .append(", level: ").append(cluster.getLevel()).append(", index: ").append(cluster.getLevelIndex());
// msg.append("\n basis " +
@@ -152,45 +152,45 @@ public class ERiC<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
}
LOG.debugFine(msg.toString());
}
- if(LOG.isVerbose()) {
+ if (LOG.isVerbose()) {
int clusters = 0;
- for(List<Cluster<CorrelationModel<V>>> correlationClusters : clusterMap.values()) {
+ for (List<Cluster<CorrelationModel<V>>> correlationClusters : clusterMap) {
clusters += correlationClusters.size();
}
LOG.verbose(clusters + " clusters extracted.");
}
// build hierarchy
- if(stepprog != null) {
+ if (stepprog != null) {
stepprog.beginStep(3, "Building hierarchy", LOG);
}
- buildHierarchy(clusterMap, query);
- if(LOG.isDebugging()) {
+ Clustering<CorrelationModel<V>> clustering = new Clustering<>("ERiC clustering", "eric-clustering");
+ buildHierarchy(clustering, clusterMap, query);
+ if (LOG.isDebugging()) {
StringBuilder msg = new StringBuilder("Step 3: Build hierarchy");
- for(Integer corrDim : clusterMap.keySet()) {
+ for (int corrDim = 0; corrDim < clusterMap.size(); corrDim++) {
List<Cluster<CorrelationModel<V>>> correlationClusters = clusterMap.get(corrDim);
- for(Cluster<CorrelationModel<V>> cluster : correlationClusters) {
+ for (Cluster<CorrelationModel<V>> cluster : correlationClusters) {
msg.append("\n cluster ").append(cluster).append(", ids: ").append(cluster.getIDs().size());
// .append(", level: ").append(cluster.getLevel()).append(", index: ").append(cluster.getLevelIndex());
- for(int i = 0; i < cluster.getParents().size(); i++) {
- msg.append("\n parent ").append(cluster.getParents().get(i));
+ for (Iter<Cluster<CorrelationModel<V>>> iter = clustering.getClusterHierarchy().iterParents(cluster); iter.valid(); iter.advance()) {
+ msg.append("\n parent ").append(iter.get());
}
- for(int i = 0; i < cluster.numChildren(); i++) {
- msg.append("\n child ").append(cluster.getChildren().get(i));
+ for (Iter<Cluster<CorrelationModel<V>>> iter = clustering.getClusterHierarchy().iterChildren(cluster); iter.valid(); iter.advance()) {
+ msg.append("\n child ").append(iter.get());
}
}
}
LOG.debugFine(msg.toString());
}
- if(stepprog != null) {
+ if (stepprog != null) {
stepprog.setCompleted(LOG);
}
- Clustering<CorrelationModel<V>> result = new Clustering<CorrelationModel<V>>("ERiC clustering", "eric-clustering");
- for(Cluster<CorrelationModel<V>> rc : clusterMap.get(clusterMap.lastKey())) {
- result.addCluster(rc);
+ for (Cluster<CorrelationModel<V>> rc : clusterMap.get(clusterMap.size() - 1)) {
+ clustering.addToplevelCluster(rc);
}
- return result;
+ return clustering;
}
/**
@@ -203,77 +203,75 @@ public class ERiC<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
*
* @param database the database containing the objects
* @param dimensionality the dimensionality of the feature space
- * @return a mapping of correlation dimension to maps of clusters
+ * @return a list of clusters for each dimensionality
*/
- private SortedMap<Integer, List<Cluster<CorrelationModel<V>>>> extractCorrelationClusters(Clustering<Model> copacResult, Relation<V> database, int dimensionality) {
+ private List<List<Cluster<CorrelationModel<V>>>> extractCorrelationClusters(Clustering<Model> copacResult, Relation<V> database, int dimensionality) {
// result
- SortedMap<Integer, List<Cluster<CorrelationModel<V>>>> clusterMap = new TreeMap<Integer, List<Cluster<CorrelationModel<V>>>>();
+ List<List<Cluster<CorrelationModel<V>>>> clusterMap = new ArrayList<>();
+ for (int i = 0; i <= dimensionality; i++) {
+ clusterMap.add(new ArrayList<Cluster<CorrelationModel<V>>>());
+ }
// noise cluster containing all noise objects over all partitions
Cluster<Model> noise = null;
// iterate over correlation dimensions
- for(Cluster<Model> clus : copacResult.getAllClusters()) {
+ for (Cluster<Model> clus : copacResult.getAllClusters()) {
DBIDs group = clus.getIDs();
- if(clus.getModel() != null && clus.getModel() instanceof DimensionModel) {
+ if (clus.getModel() != null && clus.getModel() instanceof DimensionModel) {
int correlationDimension = ((DimensionModel) clus.getModel()).getDimension();
ListParameterization parameters = pcaParameters(correlationDimension);
Class<PCAFilteredRunner<V>> cls = ClassGenericsUtil.uglyCastIntoSubclass(PCAFilteredRunner.class);
PCAFilteredRunner<V> pca = parameters.tryInstantiate(cls);
- for(ParameterException e : parameters.getErrors()) {
- LOG.warning("Error in internal parameterization: " + e.getMessage());
- }
+ parameters.failOnErrors();
// get cluster list for this dimension.
List<Cluster<CorrelationModel<V>>> correlationClusters = clusterMap.get(correlationDimension);
- if(correlationClusters == null) {
- correlationClusters = new ArrayList<Cluster<CorrelationModel<V>>>();
- clusterMap.put(correlationDimension, correlationClusters);
- }
-
PCAFilteredResult pcares = pca.processIds(group, database);
V centroid = Centroid.make(database, group).toVector(database);
- Cluster<CorrelationModel<V>> correlationCluster = new Cluster<CorrelationModel<V>>("[" + correlationDimension + "_" + correlationClusters.size() + "]", group, new CorrelationModel<V>(pcares, centroid), new ArrayList<Cluster<CorrelationModel<V>>>(), new ArrayList<Cluster<CorrelationModel<V>>>());
+ Cluster<CorrelationModel<V>> correlationCluster = new Cluster<>("[" + correlationDimension + "_" + correlationClusters.size() + "]", group, new CorrelationModel<>(pcares, centroid));
correlationClusters.add(correlationCluster);
}
// partition containing noise
- else if(clus.getModel() != null && clus.isNoise()) {
- if(noise == null) {
+ else if (clus.getModel() != null && clus.isNoise()) {
+ if (noise == null) {
noise = clus;
- }
- else {
+ } else {
ModifiableDBIDs merged = DBIDUtil.newHashSet(noise.getIDs());
merged.addDBIDs(clus.getIDs());
noise.setIDs(merged);
}
- }
- else {
+ } else {
throw new IllegalStateException("Unexpected group returned: " + clus.getClass().getName());
}
}
- if(noise != null && noise.size() > 0) {
+ if (noise != null && noise.size() > 0) {
// get cluster list for this dimension.
List<Cluster<CorrelationModel<V>>> correlationClusters = clusterMap.get(dimensionality);
- if(correlationClusters == null) {
- correlationClusters = new ArrayList<Cluster<CorrelationModel<V>>>();
- clusterMap.put(dimensionality, correlationClusters);
- }
ListParameterization parameters = pcaParameters(dimensionality);
Class<PCAFilteredRunner<V>> cls = ClassGenericsUtil.uglyCastIntoSubclass(PCAFilteredRunner.class);
PCAFilteredRunner<V> pca = parameters.tryInstantiate(cls);
- for(ParameterException e : parameters.getErrors()) {
+ for (ParameterException e : parameters.getErrors()) {
LOG.warning("Error in internal parameterization: " + e.getMessage());
}
PCAFilteredResult pcares = pca.processIds(noise.getIDs(), database);
V centroid = Centroid.make(database, noise.getIDs()).toVector(database);
- Cluster<CorrelationModel<V>> correlationCluster = new Cluster<CorrelationModel<V>>("[noise]", noise.getIDs(), new CorrelationModel<V>(pcares, centroid), new ArrayList<Cluster<CorrelationModel<V>>>(), new ArrayList<Cluster<CorrelationModel<V>>>());
+ Cluster<CorrelationModel<V>> correlationCluster = new Cluster<>("[noise]", noise.getIDs(), new CorrelationModel<>(pcares, centroid));
correlationClusters.add(correlationCluster);
}
+ // Delete dimensionalities not found.
+ for (int i = dimensionality; i > 0; i--) {
+ if (clusterMap.get(i).size() > 0) {
+ break;
+ }
+ clusterMap.remove(i);
+ }
+
return clusterMap;
}
@@ -292,48 +290,48 @@ public class ERiC<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
return parameters;
}
- private void buildHierarchy(SortedMap<Integer, List<Cluster<CorrelationModel<V>>>> clusterMap, DistanceQuery<V, IntegerDistance> query) {
- StringBuilder msg = new StringBuilder();
+ private void buildHierarchy(Clustering<CorrelationModel<V>> clustering, List<List<Cluster<CorrelationModel<V>>>> clusterMap, DistanceQuery<V, IntegerDistance> query) {
+ StringBuilder msg = LOG.isDebuggingFine() ? new StringBuilder() : null;
+ Hierarchy<Cluster<CorrelationModel<V>>> hier = clustering.getClusterHierarchy();
DBSCAN<V, DoubleDistance> dbscan = ClassGenericsUtil.castWithGenericsOrNull(DBSCAN.class, copacAlgorithm.getPartitionAlgorithm(query));
- if(dbscan == null) {
+ if (dbscan == null) {
// TODO: appropriate exception class?
throw new IllegalArgumentException("ERiC was run without DBSCAN as COPAC algorithm!");
}
DistanceFunction<? super V, ?> dfun = ProxyDistanceFunction.unwrapDistance(dbscan.getDistanceFunction());
ERiCDistanceFunction distanceFunction = ClassGenericsUtil.castWithGenericsOrNull(ERiCDistanceFunction.class, dfun);
- if(distanceFunction == null) {
+ if (distanceFunction == null) {
// TODO: appropriate exception class?
throw new IllegalArgumentException("ERiC was run without ERiCDistanceFunction as distance function: got " + dfun.getClass());
}
- Integer lambda_max = clusterMap.lastKey();
+ // Find maximum dimensionality found:
+ int lambda_max = clusterMap.size() - 1;
- for(Integer childCorrDim : clusterMap.keySet()) {
+ for (int childCorrDim = 0; childCorrDim < lambda_max; childCorrDim++) {
List<Cluster<CorrelationModel<V>>> children = clusterMap.get(childCorrDim);
- SortedMap<Integer, List<Cluster<CorrelationModel<V>>>> parentMap = clusterMap.tailMap(childCorrDim + 1);
- if(LOG.isDebugging()) {
+ // SortedMap<Integer, List<Cluster<CorrelationModel<V>>>> parentMap =
+ // clusterMap.tailMap(childCorrDim + 1);
+ if (msg != null) {
msg.append("\ncorrdim ").append(childCorrDim);
- msg.append("\nparents ").append(parentMap.keySet());
+ // msg.append("\nparents ").append(parentMap.keySet());
}
- for(Cluster<CorrelationModel<V>> child : children) {
- for(Integer parentCorrDim : parentMap.keySet()) {
- List<Cluster<CorrelationModel<V>>> parents = parentMap.get(parentCorrDim);
- for(Cluster<CorrelationModel<V>> parent : parents) {
+ for (Cluster<CorrelationModel<V>> child : children) {
+ for (int parentCorrDim = childCorrDim + 1; parentCorrDim <= lambda_max; parentCorrDim++) {
+ List<Cluster<CorrelationModel<V>>> parents = clusterMap.get(parentCorrDim);
+ for (Cluster<CorrelationModel<V>> parent : parents) {
int subspaceDim_parent = parent.getModel().getPCAResult().getCorrelationDimension();
- if(subspaceDim_parent == lambda_max && child.getParents().isEmpty()) {
- parent.getChildren().add(child);
- child.getParents().add(parent);
- if(LOG.isDebugging()) {
+ if (subspaceDim_parent == lambda_max && hier.numParents(child) == 0) {
+ clustering.addChildCluster(parent, child);
+ if (msg != null) {
msg.append('\n').append(parent).append(" is parent of ").append(child);
}
- }
- else {
+ } else {
BitDistance dist = distanceFunction.distance(parent.getModel().getCentroid(), child.getModel().getCentroid(), parent.getModel().getPCAResult(), child.getModel().getPCAResult());
- if(!dist.bitValue() && (child.getParents().isEmpty() || !isParent(distanceFunction, parent, child.getParents()))) {
- parent.getChildren().add(child);
- child.getParents().add(parent);
- if(LOG.isDebugging()) {
+ if (!dist.bitValue() && (hier.numParents(child) == 0 || !isParent(distanceFunction, parent, hier.iterParents(child)))) {
+ clustering.addChildCluster(parent, child);
+ if (msg != null) {
msg.append('\n').append(parent).append(" is parent of ").append(child);
}
}
@@ -342,7 +340,7 @@ public class ERiC<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
}
}
}
- if(LOG.isDebugging()) {
+ if (msg != null) {
LOG.debugFine(msg.toString());
}
@@ -355,32 +353,32 @@ public class ERiC<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
* @param distanceFunction the distance function for distance computation
* between the clusters
* @param parent the parent to be tested
- * @param children the list of children to be tested
+ * @param iter the list of children to be tested
* @return true, if the specified parent cluster is a parent of one child of
* the children clusters, false otherwise
*/
- private boolean isParent(ERiCDistanceFunction distanceFunction, Cluster<CorrelationModel<V>> parent, List<Cluster<CorrelationModel<V>>> children) {
-
- StringBuilder msg = new StringBuilder();
+ private boolean isParent(ERiCDistanceFunction distanceFunction, Cluster<CorrelationModel<V>> parent, Iter<Cluster<CorrelationModel<V>>> iter) {
+ StringBuilder msg = LOG.isDebugging() ? new StringBuilder() : null;
- for(Cluster<CorrelationModel<V>> child : children) {
- if(parent.getModel().getPCAResult().getCorrelationDimension() == child.getModel().getPCAResult().getCorrelationDimension()) {
+ for (; iter.valid(); iter.advance()) {
+ Cluster<CorrelationModel<V>> child = iter.get();
+ if (parent.getModel().getPCAResult().getCorrelationDimension() == child.getModel().getPCAResult().getCorrelationDimension()) {
return false;
}
BitDistance dist = distanceFunction.distance(parent.getModel().getCentroid(), child.getModel().getCentroid(), parent.getModel().getPCAResult(), child.getModel().getPCAResult());
- if(LOG.isDebugging()) {
+ if (msg != null) {
msg.append("\ndist(").append(child).append(" - ").append(parent).append(") = ").append(dist);
}
- if(!dist.bitValue()) {
- if(LOG.isDebugging()) {
- LOG.debugFine(msg.toString());
+ if (!dist.bitValue()) {
+ if (msg != null) {
+ LOG.debugFine(msg);
}
return true;
}
}
- if(LOG.isDebugging()) {
+ if (msg != null) {
LOG.debugFine(msg.toString());
}
return false;
@@ -395,7 +393,7 @@ public class ERiC<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
protected Logging getLogger() {
return LOG;
}
-
+
/**
* Parameterization class.
*
@@ -418,7 +416,7 @@ public class ERiC<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
@Override
protected ERiC<V> makeInstance() {
- return new ERiC<V>(copac);
+ return new ERiC<>(copac);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/FourC.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/FourC.java
index f56342e0..5235273c 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/FourC.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/FourC.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -115,7 +115,7 @@ public class FourC<V extends NumberVector<?>> extends AbstractProjectedDBSCAN<Cl
@Override
protected FourC<O> makeInstance() {
- return new FourC<O>(epsilon, minpts, outerdist, lambda);
+ return new FourC<>(epsilon, minpts, outerdist, lambda);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/HiCO.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/HiCO.java
index 759e8f59..d1b714bf 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/HiCO.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/HiCO.java
@@ -64,7 +64,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
*/
@Title("Mining Hierarchies of Correlation Clusters")
@Description("Algorithm for detecting hierarchies of correlation clusters.")
-@Reference(authors = "E. Achtert, C. Böhm, P. Kröger, A. Zimek", title = "Mining Hierarchies of Correlation Clusterse", booktitle = "Proc. Int. Conf. on Scientific and Statistical Database Management (SSDBM'06), Vienna, Austria, 2006", url = "http://dx.doi.org/10.1109/SSDBM.2006.35")
+@Reference(authors = "E. Achtert, C. Böhm, P. Kröger, A. Zimek", title = "Mining Hierarchies of Correlation Clusters", booktitle = "Proc. Int. Conf. on Scientific and Statistical Database Management (SSDBM'06), Vienna, Austria, 2006", url = "http://dx.doi.org/10.1109/SSDBM.2006.35")
public class HiCO<V extends NumberVector<?>> extends OPTICS<V, PCACorrelationDistance> {
/**
* The logger for this class.
@@ -207,7 +207,7 @@ public class HiCO<V extends NumberVector<?>> extends OPTICS<V, PCACorrelationDis
@Override
protected HiCO<V> makeInstance() {
- return new HiCO<V>(distance, mu);
+ return new HiCO<>(distance, mu);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/LMCLUS.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/LMCLUS.java
index fdea8b35..f9531be0 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/LMCLUS.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/LMCLUS.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -159,7 +159,7 @@ public class LMCLUS extends AbstractAlgorithm<Clustering<Model>> {
* @return Clustering result
*/
public Clustering<Model> run(Database database, Relation<NumberVector<?>> relation) {
- Clustering<Model> ret = new Clustering<Model>("LMCLUS Clustering", "lmclus-clustering");
+ Clustering<Model> ret = new Clustering<>("LMCLUS Clustering", "lmclus-clustering");
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Clustered objects", relation.size(), LOG) : null;
IndefiniteProgress cprogress = LOG.isVerbose() ? new IndefiniteProgress("Clusters found", LOG) : null;
ModifiableDBIDs unclustered = DBIDUtil.newHashSet(relation.getDBIDs());
@@ -204,10 +204,10 @@ public class LMCLUS extends AbstractAlgorithm<Clustering<Model>> {
}
// New cluster found
// TODO: annotate cluster with dimensionality
- final Cluster<Model> cluster = new Cluster<Model>(current);
+ final Cluster<Model> cluster = new Cluster<>(current);
cluster.setName("Cluster_" + lmDim + "d_" + cnum);
cnum++;
- ret.addCluster(cluster);
+ ret.addToplevelCluster(cluster);
// Remove from main working set.
unclustered.removeDBIDs(current);
if (progress != null) {
@@ -219,7 +219,7 @@ public class LMCLUS extends AbstractAlgorithm<Clustering<Model>> {
}
// Remaining objects are noise
if (unclustered.size() > 0) {
- ret.addCluster(new Cluster<Model>(unclustered, true));
+ ret.addToplevelCluster(new Cluster<>(unclustered, true));
}
if (progress != null) {
progress.setProcessed(relation.size(), LOG);
@@ -281,7 +281,7 @@ public class LMCLUS extends AbstractAlgorithm<Clustering<Model>> {
// Build orthogonal basis from remainder
Matrix basis;
{
- List<Vector> vectors = new ArrayList<Vector>(sample.size() - 1);
+ List<Vector> vectors = new ArrayList<>(sample.size() - 1);
for (; iter.valid(); iter.advance()) {
Vector vec = relation.get(iter).getColumnVector();
vectors.add(vec.minusEquals(originV));
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ORCLUS.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ORCLUS.java
index f567098b..a9c67a58 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ORCLUS.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ORCLUS.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -42,10 +42,10 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.generic.GenericDistanceDBIDList;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.GenericDistanceDBIDList;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
@@ -178,9 +178,9 @@ public class ORCLUS<V extends NumberVector<?>> extends AbstractProjectedClusteri
}
// get the result
- Clustering<Model> r = new Clustering<Model>("ORCLUS clustering", "orclus-clustering");
+ Clustering<Model> r = new Clustering<>("ORCLUS clustering", "orclus-clustering");
for (ORCLUSCluster c : clusters) {
- r.addCluster(new Cluster<Model>(c.objectIDs, ClusterModel.CLUSTER));
+ r.addToplevelCluster(new Cluster<Model>(c.objectIDs, ClusterModel.CLUSTER));
}
return r;
} catch (Exception e) {
@@ -198,7 +198,7 @@ public class ORCLUS<V extends NumberVector<?>> extends AbstractProjectedClusteri
private List<ORCLUSCluster> initialSeeds(Relation<V> database, int k) {
DBIDs randomSample = DBIDUtil.randomSample(database.getDBIDs(), k, rnd);
NumberVector.Factory<V, ?> factory = RelationUtil.getNumberVectorFactory(database);
- List<ORCLUSCluster> seeds = new ArrayList<ORCLUSCluster>();
+ List<ORCLUSCluster> seeds = new ArrayList<>();
for (DBIDIter iter = randomSample.iter(); iter.valid(); iter.advance()) {
seeds.add(new ORCLUSCluster(database.get(iter), iter, factory));
}
@@ -222,7 +222,7 @@ public class ORCLUS<V extends NumberVector<?>> extends AbstractProjectedClusteri
}
// projected centroids of the clusters
- List<V> projectedCentroids = new ArrayList<V>(clusters.size());
+ List<V> projectedCentroids = new ArrayList<>(clusters.size());
for (ORCLUSCluster c : clusters) {
projectedCentroids.add(projection(c, c.centroid, factory));
}
@@ -270,7 +270,7 @@ public class ORCLUS<V extends NumberVector<?>> extends AbstractProjectedClusteri
private Matrix findBasis(Relation<V> database, DistanceQuery<V, DoubleDistance> distFunc, ORCLUSCluster cluster, int dim) {
// covariance matrix of cluster
// Matrix covariance = Util.covarianceMatrix(database, cluster.objectIDs);
- GenericDistanceDBIDList<DoubleDistance> results = new GenericDistanceDBIDList<DoubleDistance>(cluster.objectIDs.size());
+ GenericDistanceDBIDList<DoubleDistance> results = new GenericDistanceDBIDList<>(cluster.objectIDs.size());
for (DBIDIter it = cluster.objectIDs.iter(); it.valid(); it.advance()) {
DoubleDistance distance = distFunc.distance(cluster.centroid, database.get(it));
results.add(distance, it);
@@ -303,7 +303,7 @@ public class ORCLUS<V extends NumberVector<?>> extends AbstractProjectedClusteri
* @param d_new the new dimensionality of the subspaces for each seed
*/
private void merge(Relation<V> database, DistanceQuery<V, DoubleDistance> distFunc, List<ORCLUSCluster> clusters, int k_new, int d_new, IndefiniteProgress cprogress) {
- ArrayList<ProjectedEnergy> projectedEnergies = new ArrayList<ProjectedEnergy>();
+ ArrayList<ProjectedEnergy> projectedEnergies = new ArrayList<>();
for (int i = 0; i < clusters.size(); i++) {
for (int j = 0; j < clusters.size(); j++) {
if (i >= j) {
@@ -387,16 +387,16 @@ public class ORCLUS<V extends NumberVector<?>> extends AbstractProjectedClusteri
ORCLUSCluster c_ij = union(database, distFunc, c_i, c_j, dim);
NumberVector.Factory<V, ?> factory = RelationUtil.getNumberVectorFactory(database);
- DoubleDistance sum = getDistanceFunction().getDistanceFactory().nullDistance();
+ double sum = 0.;
V c_proj = projection(c_ij, c_ij.centroid, factory);
for (DBIDIter iter = c_ij.objectIDs.iter(); iter.valid(); iter.advance()) {
V o_proj = projection(c_ij, database.get(iter), factory);
- DoubleDistance dist = distFunc.distance(o_proj, c_proj);
- sum = sum.plus(dist.times(dist));
+ double dist = distFunc.distance(o_proj, c_proj).doubleValue();
+ sum += dist * dist;
}
- DoubleDistance projectedEnergy = sum.times(1.0 / c_ij.objectIDs.size());
+ sum /= c_ij.objectIDs.size();
- return new ProjectedEnergy(i, j, c_ij, projectedEnergy);
+ return new ProjectedEnergy(i, j, c_ij, sum);
}
/**
@@ -520,9 +520,9 @@ public class ORCLUS<V extends NumberVector<?>> extends AbstractProjectedClusteri
ORCLUSCluster cluster;
- DoubleDistance projectedEnergy;
+ double projectedEnergy;
- ProjectedEnergy(int i, int j, ORCLUSCluster cluster, DoubleDistance projectedEnergy) {
+ ProjectedEnergy(int i, int j, ORCLUSCluster cluster, double projectedEnergy) {
this.i = i;
this.j = j;
this.cluster = cluster;
@@ -538,7 +538,7 @@ public class ORCLUS<V extends NumberVector<?>> extends AbstractProjectedClusteri
*/
@Override
public int compareTo(ProjectedEnergy o) {
- return this.projectedEnergy.compareTo(o.projectedEnergy);
+ return Double.compare(projectedEnergy, o.projectedEnergy);
}
}
@@ -606,7 +606,7 @@ public class ORCLUS<V extends NumberVector<?>> extends AbstractProjectedClusteri
@Override
protected ORCLUS<V> makeInstance() {
- return new ORCLUS<V>(k, k_i, l, alpha, rnd, pca);
+ return new ORCLUS<>(k, k_i, l, alpha, rnd, pca);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHInterval.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHInterval.java
index 0153ddc3..95cb2e58 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHInterval.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHInterval.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation.cash;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHIntervalSplit.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHIntervalSplit.java
index 12f10725..328fe3b3 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHIntervalSplit.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHIntervalSplit.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation.cash;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -81,8 +81,8 @@ public class CASHIntervalSplit {
this.database = database;
this.minPts = minPts;
- this.f_minima = new HashMap<HyperBoundingBox, Map<DBID, Double>>();
- this.f_maxima = new HashMap<HyperBoundingBox, Map<DBID, Double>>();
+ this.f_minima = new HashMap<>();
+ this.f_maxima = new HashMap<>();
}
/**
@@ -108,9 +108,9 @@ public class CASHIntervalSplit {
Map<DBID, Double> minima = f_minima.get(interval);
Map<DBID, Double> maxima = f_maxima.get(interval);
if(minima == null || maxima == null) {
- minima = new HashMap<DBID, Double>();
+ minima = new HashMap<>();
f_minima.put(interval, minima);
- maxima = new HashMap<DBID, Double>();
+ maxima = new HashMap<>();
f_maxima.put(interval, maxima);
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/ParameterizationFunction.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/ParameterizationFunction.java
index 56e68bfe..5c690feb 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/ParameterizationFunction.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/ParameterizationFunction.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation.cash;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/package-info.java
index 8b6d104c..bfc272fd 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/package-info.java
@@ -7,7 +7,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2012
+Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/package-info.java
index 665de632..89d3c930 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/package-info.java
@@ -7,7 +7,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2012
+Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/CorePredicate.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/CorePredicate.java
index a4440a29..27cc48d6 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/CorePredicate.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/CorePredicate.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/EpsilonNeighborPredicate.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/EpsilonNeighborPredicate.java
index 2b946f1c..545a8171 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/EpsilonNeighborPredicate.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/EpsilonNeighborPredicate.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -32,11 +32,11 @@ import de.lmu.ifi.dbs.elki.database.QueryUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
@@ -91,7 +91,7 @@ public class EpsilonNeighborPredicate<O, D extends Distance<D>> implements Neigh
public <T> NeighborPredicate.Instance<T> instantiate(Database database, SimpleTypeInformation<?> type) {
DistanceQuery<O, D> dq = QueryUtil.getDistanceQuery(database, distFunc);
RangeQuery<O, D> rq = database.getRangeQuery(dq);
- return (NeighborPredicate.Instance<T>) new Instance<D>(epsilon, rq, dq.getRelation().getDBIDs());
+ return (NeighborPredicate.Instance<T>) new Instance<>(epsilon, rq, dq.getRelation().getDBIDs());
}
@Override
@@ -109,7 +109,7 @@ public class EpsilonNeighborPredicate<O, D extends Distance<D>> implements Neigh
*
* @author Erich Schubert
*/
- public static class Instance<D extends Distance<D>> implements NeighborPredicate.Instance<DistanceDBIDResult<D>> {
+ public static class Instance<D extends Distance<D>> implements NeighborPredicate.Instance<DistanceDBIDList<D>> {
/**
* Range to query with
*/
@@ -145,12 +145,12 @@ public class EpsilonNeighborPredicate<O, D extends Distance<D>> implements Neigh
}
@Override
- public DistanceDBIDResult<D> getNeighbors(DBIDRef reference) {
+ public DistanceDBIDList<D> getNeighbors(DBIDRef reference) {
return rq.getRangeForDBID(reference, epsilon);
}
@Override
- public void addDBIDs(ModifiableDBIDs ids, DistanceDBIDResult<D> neighbors) {
+ public void addDBIDs(ModifiableDBIDs ids, DistanceDBIDList<D> neighbors) {
ids.addDBIDs(neighbors);
}
}
@@ -177,14 +177,14 @@ public class EpsilonNeighborPredicate<O, D extends Distance<D>> implements Neigh
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
// Get a distance function.
- ObjectParameter<DistanceFunction<O, D>> distanceP = new ObjectParameter<DistanceFunction<O, D>>(AbstractDistanceBasedAlgorithm.DISTANCE_FUNCTION_ID, DistanceFunction.class, EuclideanDistanceFunction.class);
+ ObjectParameter<DistanceFunction<O, D>> distanceP = new ObjectParameter<>(AbstractDistanceBasedAlgorithm.DISTANCE_FUNCTION_ID, DistanceFunction.class, EuclideanDistanceFunction.class);
D distanceFactory = null;
if(config.grab(distanceP)) {
distfun = distanceP.instantiateClass(config);
distanceFactory = distfun.getDistanceFactory();
}
// Get the epsilon parameter
- DistanceParameter<D> epsilonP = new DistanceParameter<D>(de.lmu.ifi.dbs.elki.algorithm.clustering.DBSCAN.EPSILON_ID, distanceFactory);
+ DistanceParameter<D> epsilonP = new DistanceParameter<>(de.lmu.ifi.dbs.elki.algorithm.clustering.DBSCAN.EPSILON_ID, distanceFactory);
if(config.grab(epsilonP)) {
epsilon = epsilonP.getValue();
}
@@ -192,7 +192,7 @@ public class EpsilonNeighborPredicate<O, D extends Distance<D>> implements Neigh
@Override
protected EpsilonNeighborPredicate<O, D> makeInstance() {
- return new EpsilonNeighborPredicate<O, D>(epsilon, distfun);
+ return new EpsilonNeighborPredicate<>(epsilon, distfun);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/GeneralizedDBSCAN.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/GeneralizedDBSCAN.java
index ef1cb0dc..1e0a8642 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/GeneralizedDBSCAN.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/GeneralizedDBSCAN.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -32,6 +32,7 @@ import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm;
import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.model.ClusterModel;
+import de.lmu.ifi.dbs.elki.data.model.CoreObjectsModel;
import de.lmu.ifi.dbs.elki.data.model.Model;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
@@ -53,6 +54,7 @@ import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
/**
@@ -67,7 +69,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
*
* @author Erich Schubert
* @author Arthur Zimek
- *
+ *
* @apiviz.landmark
*
* @apiviz.has Instance
@@ -92,22 +94,29 @@ public class GeneralizedDBSCAN extends AbstractAlgorithm<Clustering<Model>> impl
CorePredicate corepred;
/**
+ * Track which objects are "core" objects.
+ */
+ boolean coremodel = false;
+
+ /**
* Constructor for parameterized algorithm.
*
- * @param npred Neighbor predicate
- * @param corepred Core point predicate
+ * @param npred Neighbor predicate.
+ * @param corepred Core point predicate.
+ * @param coremodel Keep track of core points.
*/
- public GeneralizedDBSCAN(NeighborPredicate npred, CorePredicate corepred) {
+ public GeneralizedDBSCAN(NeighborPredicate npred, CorePredicate corepred, boolean coremodel) {
super();
this.npred = npred;
this.corepred = corepred;
+ this.coremodel = coremodel;
}
@Override
public Clustering<Model> run(Database database) {
for (SimpleTypeInformation<?> t : npred.getOutputType()) {
if (corepred.acceptsType(t)) {
- return new Instance<Object>(npred.instantiate(database, t), corepred.instantiate(database, t)).run();
+ return new Instance<>(npred.instantiate(database, t), corepred.instantiate(database, t), coremodel).run();
}
}
throw new AbortException("No compatible types found.");
@@ -127,7 +136,7 @@ public class GeneralizedDBSCAN extends AbstractAlgorithm<Clustering<Model>> impl
* Instance for a particular data set.
*
* @author Erich Schubert
- *
+ *
* @apiviz.composedOf CorePredicate.Instance
* @apiviz.composedOf NeighborPredicate.Instance
*/
@@ -135,17 +144,12 @@ public class GeneralizedDBSCAN extends AbstractAlgorithm<Clustering<Model>> impl
/**
* Unprocessed IDs
*/
- private static final int UNPROCESSED = -2;
-
- /**
- * Noise IDs
- */
- private static final int NOISE = -1;
+ private static final int UNPROCESSED = 0;
/**
* Noise IDs
*/
- private static final int FIRST_CLUSTER = 0;
+ private static final int NOISE = 1;
/**
* The neighborhood predicate
@@ -158,15 +162,22 @@ public class GeneralizedDBSCAN extends AbstractAlgorithm<Clustering<Model>> impl
final CorePredicate.Instance<T> corepred;
/**
+ * Track which objects are "core" objects.
+ */
+ boolean coremodel = false;
+
+ /**
* Full Constructor
*
* @param npred Neighborhood predicate
* @param corepred Core object predicate
+ * @param coremodel Keep track of core points.
*/
- public Instance(NeighborPredicate.Instance<T> npred, CorePredicate.Instance<T> corepred) {
+ public Instance(NeighborPredicate.Instance<T> npred, CorePredicate.Instance<T> corepred, boolean coremodel) {
super();
this.npred = npred;
this.corepred = corepred;
+ this.coremodel = coremodel;
}
/**
@@ -177,78 +188,85 @@ public class GeneralizedDBSCAN extends AbstractAlgorithm<Clustering<Model>> impl
public Clustering<Model> run() {
final DBIDs ids = npred.getIDs();
// Setup progress logging
- final FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Clustering", ids.size(), LOG) : null;
- final IndefiniteProgress clusprogress = LOG.isVerbose() ? new IndefiniteProgress("Clusters", LOG) : null;
+ final FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Generalized DBSCAN Clustering", ids.size(), LOG) : null;
+ final IndefiniteProgress clusprogress = LOG.isVerbose() ? new IndefiniteProgress("Number of clusters found", LOG) : null;
// (Temporary) store the cluster ID assigned.
final WritableIntegerDataStore clusterids = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_TEMP, UNPROCESSED);
- // Note: these are not exact!
+ // Note: these are not exact, as objects may be stolen from noise.
final TIntArrayList clustersizes = new TIntArrayList();
+ clustersizes.add(0); // Unprocessed dummy value.
+ clustersizes.add(0); // Noise counter.
// Implementation Note: using Integer objects should result in
// reduced memory use in the HashMap!
- int clusterid = FIRST_CLUSTER;
- int clustersize = 0;
- int noisesize = 0;
+ int clusterid = NOISE + 1;
// Iterate over all objects in the database.
- for(DBIDIter id = ids.iter(); id.valid(); id.advance()) {
+ for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
// Skip already processed ids.
- if(clusterids.intValue(id) != UNPROCESSED) {
+ if (clusterids.intValue(id) != UNPROCESSED) {
continue;
}
// Evaluate Neighborhood predicate
final T neighbors = npred.getNeighbors(id);
// Evaluate Core-Point predicate:
- if(corepred.isCorePoint(id, neighbors)) {
+ if (corepred.isCorePoint(id, neighbors)) {
clusterids.putInt(id, clusterid);
- clustersize = 1 + setbasedExpandCluster(clusterid, clusterids, neighbors, progress);
+ clustersizes.add(expandCluster(clusterid, clusterids, neighbors, progress));
// start next cluster on next iteration.
- clustersizes.add(clustersize);
- clustersize = 0;
- clusterid += 1;
- if(clusprogress != null) {
+ ++clusterid;
+ if (clusprogress != null) {
clusprogress.setProcessed(clusterid, LOG);
}
- }
- else {
+ } else {
// otherwise, it's a noise point
clusterids.putInt(id, NOISE);
- noisesize += 1;
+ clustersizes.set(NOISE, clustersizes.get(NOISE) + 1);
}
// We've completed this element
- if(progress != null) {
+ if (progress != null) {
progress.incrementProcessed(LOG);
}
}
// Finish progress logging.
- if(progress != null) {
+ if (progress != null) {
progress.ensureCompleted(LOG);
}
- if(clusprogress != null) {
+ if (clusprogress != null) {
clusprogress.setCompleted(LOG);
}
// Transform cluster ID mapping into a clustering result:
- ArrayList<ArrayModifiableDBIDs> clusterlists = new ArrayList<ArrayModifiableDBIDs>(clusterid + 1);
- // add noise cluster storage
- clusterlists.add(DBIDUtil.newArray(noisesize));
+ ArrayList<ArrayModifiableDBIDs> clusterlists = new ArrayList<>(clusterid);
+ ArrayList<ArrayModifiableDBIDs> corelists = coremodel ? new ArrayList<ArrayModifiableDBIDs>(clusterid) : null;
// add storage containers for clusters
- for(int i = 0; i < clustersizes.size(); i++) {
+ for (int i = 0; i < clustersizes.size(); i++) {
clusterlists.add(DBIDUtil.newArray(clustersizes.get(i)));
+ if (corelists != null) {
+ corelists.add(DBIDUtil.newArray(clustersizes.get(i)));
+ }
}
// do the actual inversion
- for(DBIDIter id = ids.iter(); id.valid(); id.advance()) {
- int cluster = clusterids.intValue(id);
- clusterlists.get(cluster + 1).add(id);
+ for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
+ // Negative values are non-core points:
+ int cid = clusterids.intValue(id);
+ int cluster = Math.abs(cid);
+ clusterlists.get(cluster).add(id);
+ if (corelists != null && cid > NOISE) {
+ corelists.get(cluster).add(id);
+ }
}
clusterids.destroy();
- Clustering<Model> result = new Clustering<Model>("GDBSCAN", "gdbscan-clustering");
- int cid = 0;
- for(ArrayModifiableDBIDs res : clusterlists) {
- boolean isNoise = (cid == 0);
- Cluster<Model> c = new Cluster<Model>(res, isNoise, ClusterModel.CLUSTER);
- result.addCluster(c);
- cid++;
+ Clustering<Model> result = new Clustering<>("GDBSCAN", "gdbscan-clustering");
+ for (int cid = NOISE; cid < clusterlists.size(); cid++) {
+ boolean isNoise = (cid == NOISE);
+ Cluster<Model> c;
+ if (corelists != null) {
+ c = new Cluster<Model>(clusterlists.get(cid), isNoise, new CoreObjectsModel(corelists.get(cid)));
+ } else {
+ c = new Cluster<Model>(clusterlists.get(cid), isNoise, ClusterModel.CLUSTER);
+ }
+ result.addToplevelCluster(c);
}
return result;
}
@@ -263,28 +281,36 @@ public class GeneralizedDBSCAN extends AbstractAlgorithm<Clustering<Model>> impl
*
* @return cluster size
*/
- protected int setbasedExpandCluster(final int clusterid, final WritableIntegerDataStore clusterids, final T neighbors, final FiniteProgress progress) {
- int clustersize = 0;
+ protected int expandCluster(final int clusterid, final WritableIntegerDataStore clusterids, final T neighbors, final FiniteProgress progress) {
+ int clustersize = 1; // initial seed!
final ArrayModifiableDBIDs activeSet = DBIDUtil.newArray();
npred.addDBIDs(activeSet, neighbors);
// run expandCluster as long as this set is non-empty (non-recursive
// implementation)
- while(!activeSet.isEmpty()) {
+ while (!activeSet.isEmpty()) {
final DBID id = activeSet.remove(activeSet.size() - 1);
- clustersize += 1;
// Assign object to cluster
- final int oldclus = clusterids.putInt(id, clusterid);
- if(oldclus == -2) {
+ final int oldclus = clusterids.intValue(id);
+ if (oldclus == NOISE) {
+ clustersize += 1;
+ // Non core point cluster member:
+ clusterids.putInt(id, -clusterid);
+ } else if (oldclus == UNPROCESSED) {
+ clustersize += 1;
// expandCluster again:
// Evaluate Neighborhood predicate
final T newneighbors = npred.getNeighbors(id);
// Evaluate Core-Point predicate
- if(corepred.isCorePoint(id, newneighbors)) {
+ if (corepred.isCorePoint(id, newneighbors)) {
// Note: the recursion is unrolled into iteration over the active
// set.
npred.addDBIDs(activeSet, newneighbors);
+ clusterids.putInt(id, clusterid);
+ } else {
+ // Non core point cluster member:
+ clusterids.putInt(id, -clusterid);
}
- if(progress != null) {
+ if (progress != null) {
progress.incrementProcessed(LOG);
}
}
@@ -302,43 +328,58 @@ public class GeneralizedDBSCAN extends AbstractAlgorithm<Clustering<Model>> impl
*/
public static class Parameterizer extends AbstractParameterizer {
/**
- * Neighborhood predicate
+ * Neighborhood predicate.
*/
NeighborPredicate npred = null;
/**
- * Core point predicate
+ * Core point predicate.
*/
CorePredicate corepred = null;
/**
- * Parameter for neighborhood predicate
+ * Track which objects are "core" objects.
+ */
+ boolean coremodel = false;
+
+ /**
+ * Parameter for neighborhood predicate.
*/
public static final OptionID NEIGHBORHOODPRED_ID = new OptionID("gdbscan.neighborhood", "Neighborhood predicate for GDBSCAN");
/**
- * Parameter for core predicate
+ * Parameter for core predicate.
*/
public static final OptionID COREPRED_ID = new OptionID("gdbscan.core", "Core point predicate for GDBSCAN");
+ /**
+ * Flag to keep track of core points.
+ */
+ public static final OptionID COREMODEL_ID = new OptionID("gdbscan.core-model", "Use a model that keeps track of core points. Needs more memory.");
+
@Override
protected void makeOptions(Parameterization config) {
// Neighborhood predicate
- ObjectParameter<NeighborPredicate> npredOpt = new ObjectParameter<NeighborPredicate>(NEIGHBORHOODPRED_ID, NeighborPredicate.class, EpsilonNeighborPredicate.class);
- if(config.grab(npredOpt)) {
+ ObjectParameter<NeighborPredicate> npredOpt = new ObjectParameter<>(NEIGHBORHOODPRED_ID, NeighborPredicate.class, EpsilonNeighborPredicate.class);
+ if (config.grab(npredOpt)) {
npred = npredOpt.instantiateClass(config);
}
// Core point predicate
- ObjectParameter<CorePredicate> corepredOpt = new ObjectParameter<CorePredicate>(COREPRED_ID, CorePredicate.class, MinPtsCorePredicate.class);
- if(config.grab(corepredOpt)) {
+ ObjectParameter<CorePredicate> corepredOpt = new ObjectParameter<>(COREPRED_ID, CorePredicate.class, MinPtsCorePredicate.class);
+ if (config.grab(corepredOpt)) {
corepred = corepredOpt.instantiateClass(config);
}
+
+ Flag coremodelOpt = new Flag(COREMODEL_ID);
+ if (config.grab(coremodelOpt)) {
+ coremodel = coremodelOpt.isTrue();
+ }
}
@Override
protected GeneralizedDBSCAN makeInstance() {
- return new GeneralizedDBSCAN(npred, corepred);
+ return new GeneralizedDBSCAN(npred, corepred, coremodel);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/MinPtsCorePredicate.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/MinPtsCorePredicate.java
index 47097f9b..a6e62e2e 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/MinPtsCorePredicate.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/MinPtsCorePredicate.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/NeighborPredicate.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/NeighborPredicate.java
index ed927696..c3e1e8c9 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/NeighborPredicate.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/NeighborPredicate.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.gdbscan;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/package-info.java
index 8be23c7d..7ea3c7e4 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/gdbscan/package-info.java
@@ -22,7 +22,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/CentroidLinkageMethod.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/CentroidLinkageMethod.java
new file mode 100644
index 00000000..72b6fb57
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/CentroidLinkageMethod.java
@@ -0,0 +1,84 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.utilities.Alias;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+
+/**
+ * Centroid linkage clustering method, aka UPGMC: Unweighted Pair-Group Method
+ * using Centroids.
+ *
+ * Reference:
+ * <p>
+ * A. K. Jain and R. C. Dubes<br />
+ * Algorithms for Clustering Data<br />
+ * Prentice-Hall
+ * </p>
+ *
+ * @author Erich Schubert
+ */
+@Alias({ "centroid", "upgmc" })
+@Reference(authors = "A. K. Jain and R. C. Dubes", title = "Algorithms for Clustering Data", booktitle = "Algorithms for Clustering Data, Prentice-Hall")
+public class CentroidLinkageMethod implements LinkageMethod {
+ /**
+ * Static instance of class.
+ */
+ public static final CentroidLinkageMethod STATIC = new CentroidLinkageMethod();
+
+ /**
+ * Constructor.
+ *
+ * @deprecated use the static instance {@link #STATIC} instead.
+ */
+ @Deprecated
+ public CentroidLinkageMethod() {
+ super();
+ }
+
+ @Override
+ public double combine(int sizex, double dx, int sizey, double dy, int sizej, double dxy) {
+ final double wx = sizex / (double) (sizex + sizey);
+ final double wy = sizey / (double) (sizex + sizey);
+ final double beta = (sizex * sizey) / (double) ((sizex + sizey) * (sizex + sizey));
+ return wx * dx + wy * dy - beta * dxy;
+ }
+
+ /**
+ * Class parameterizer.
+ *
+ * Returns the static instance.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ @Override
+ protected CentroidLinkageMethod makeInstance() {
+ return STATIC;
+ }
+ }
+} // Sokal and Michener (1958), Gower (1967)
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/CompleteLinkageMethod.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/CompleteLinkageMethod.java
new file mode 100644
index 00000000..0cb47fa7
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/CompleteLinkageMethod.java
@@ -0,0 +1,70 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import de.lmu.ifi.dbs.elki.utilities.Alias;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+
+/**
+ * Complete-linkage clustering method.
+ *
+ * @author Erich Schubert
+ */
+@Alias({ "complete", "clink", "complete-link", "farthest-neighbor" })
+public class CompleteLinkageMethod implements LinkageMethod {
+ /**
+ * Static instance of class.
+ */
+ public static final CompleteLinkageMethod STATIC = new CompleteLinkageMethod();
+
+ /**
+ * Constructor.
+ *
+ * @deprecated use the static instance {@link #STATIC} instead.
+ */
+ @Deprecated
+ public CompleteLinkageMethod() {
+ super();
+ }
+
+ @Override
+ public double combine(int sizex, double dx, int sizey, double dy, int sizej, double dxy) {
+ return Math.max(dx, dy);
+ }
+
+ /**
+ * Class parameterizer.
+ *
+ * Returns the static instance.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ @Override
+ protected CompleteLinkageMethod makeInstance() {
+ return STATIC;
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/ExtractFlatClusteringFromHierarchy.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/ExtractFlatClusteringFromHierarchy.java
new file mode 100644
index 00000000..ac5cb77c
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/ExtractFlatClusteringFromHierarchy.java
@@ -0,0 +1,854 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import gnu.trove.list.array.TDoubleArrayList;
+
+import java.util.ArrayList;
+import java.util.Comparator;
+
+import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm;
+import de.lmu.ifi.dbs.elki.data.Cluster;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.model.DendrogramModel;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.datastore.DBIDDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.DoubleDistanceDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDVar;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DistanceParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.EnumParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+import de.lmu.ifi.dbs.elki.workflow.AlgorithmStep;
+
+/**
+ * Extract a flat clustering from a full hierarchy, represented in pointer form.
+ *
+ * FIXME: re-check tie handling!
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.uses HierarchicalClusteringAlgorithm
+ * @apiviz.uses PointerHierarchyRepresentationResult
+ * @apiviz.has Clustering
+ */
+public class ExtractFlatClusteringFromHierarchy<D extends Distance<D>> implements ClusteringAlgorithm<Clustering<DendrogramModel<D>>> {
+ /**
+ * Class logger.
+ */
+ private static final Logging LOG = Logging.getLogger(ExtractFlatClusteringFromHierarchy.class);
+
+ /**
+ * Threshold mode.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static enum ThresholdMode {
+ /** Cut by minimum number of clusters */
+ BY_MINCLUSTERS,
+ /** Cut by threshold */
+ BY_THRESHOLD,
+ /** No thresholding */
+ NO_THRESHOLD,
+ }
+
+ /**
+ * Output mode.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static enum OutputMode {
+ /** Strict partitioning. */
+ STRICT_PARTITIONS,
+ /** Partial hierarchy. */
+ PARTIAL_HIERARCHY,
+ }
+
+ /**
+ * Minimum number of clusters to extract
+ */
+ private final int minclusters;
+
+ /**
+ * Clustering algorithm to run to obtain the hierarchy.
+ */
+ private HierarchicalClusteringAlgorithm<D> algorithm;
+
+ /**
+ * Include empty cluster in the hierarchy produced.
+ */
+ private OutputMode outputmode = OutputMode.PARTIAL_HIERARCHY;
+
+ /**
+ * Threshold for extracting clusters.
+ */
+ private D threshold = null;
+
+ /**
+ * Disallow singleton clusters, but add them to the parent cluster instead.
+ */
+ private boolean singletons = false;
+
+ /**
+ * Constructor.
+ *
+ * @param algorithm Algorithm to run
+ * @param minclusters Minimum number of clusters
+ * @param outputmode Output mode: truncated hierarchy or strict partitions.
+ * @param singletons Allow producing singleton clusters.
+ */
+ public ExtractFlatClusteringFromHierarchy(HierarchicalClusteringAlgorithm<D> algorithm, int minclusters, OutputMode outputmode, boolean singletons) {
+ super();
+ this.algorithm = algorithm;
+ this.threshold = null;
+ this.minclusters = minclusters;
+ this.outputmode = outputmode;
+ this.singletons = singletons;
+ }
+
+ /**
+ * Constructor.
+ *
+ * @param algorithm Algorithm to run
+ * @param threshold Distance threshold
+ * @param outputmode Output mode: truncated hierarchy or strict partitions.
+ * @param singletons Allow producing singleton clusters.
+ */
+ public ExtractFlatClusteringFromHierarchy(HierarchicalClusteringAlgorithm<D> algorithm, D threshold, OutputMode outputmode, boolean singletons) {
+ super();
+ this.algorithm = algorithm;
+ this.threshold = threshold;
+ this.minclusters = -1;
+ this.outputmode = outputmode;
+ this.singletons = singletons;
+ }
+
+ @Override
+ public Clustering<DendrogramModel<D>> run(Database database) {
+ PointerHierarchyRepresentationResult<D> pointerresult = algorithm.run(database);
+ DBIDs ids = pointerresult.getDBIDs();
+ DBIDDataStore pi = pointerresult.getParentStore();
+ DataStore<D> lambda = pointerresult.getParentDistanceStore();
+
+ Clustering<DendrogramModel<D>> result;
+ if (lambda instanceof DoubleDistanceDataStore) {
+ result = extractClustersDouble(ids, pi, (DoubleDistanceDataStore) lambda);
+ } else {
+ result = extractClusters(ids, pi, lambda);
+ }
+ result.addChildResult(pointerresult);
+
+ return result;
+ }
+
+ /**
+ * Extract all clusters from the pi-lambda-representation.
+ *
+ * @param ids Object ids to process
+ * @param pi Pi store
+ * @param lambda Lambda store
+ *
+ * @return Hierarchical clustering
+ */
+ private Clustering<DendrogramModel<D>> extractClusters(DBIDs ids, final DBIDDataStore pi, final DataStore<D> lambda) {
+ FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Extracting clusters", ids.size(), LOG) : null;
+
+ // Sort DBIDs by lambda. We need this for two things:
+ // a) to determine the stop distance from "minclusters" parameter
+ // b) to process arrows in decreasing / increasing order
+ ArrayModifiableDBIDs order = DBIDUtil.newArray(ids);
+ order.sort(new CompareByLambda<>(lambda));
+ DBIDArrayIter it = order.iter(); // Used multiple times!
+
+ int split;
+ if (minclusters > 0) {
+ split = Math.max(ids.size() - minclusters, 0);
+ // Stop distance:
+ final D stopdist = lambda.get(order.get(split));
+
+ // Tie handling: decrement split.
+ while (split > 0) {
+ it.seek(split - 1);
+ if (stopdist.compareTo(lambda.get(it)) <= 0) {
+ split--;
+ } else {
+ break;
+ }
+ }
+ } else if (threshold != null) {
+ split = ids.size();
+ it.seek(split - 1);
+ while (threshold.compareTo(lambda.get(it)) <= 0 && it.valid()) {
+ split--;
+ it.retract();
+ }
+ } else { // full hierarchy
+ split = 0;
+ }
+
+ // Extract the child clusters
+ int expcnum = ids.size() - split;
+ WritableIntegerDataStore cluster_map = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_TEMP, -1);
+ ArrayList<ModifiableDBIDs> cluster_dbids = new ArrayList<>(expcnum);
+ ArrayList<D> cluster_dist = new ArrayList<>(expcnum);
+ ArrayModifiableDBIDs cluster_leads = DBIDUtil.newArray(expcnum);
+
+ DBIDVar succ = DBIDUtil.newVar(); // Variable for successor.
+ // Go backwards on the lower part.
+ for (it.seek(split - 1); it.valid(); it.retract()) {
+ D dist = lambda.get(it); // Distance to successor
+ pi.assignVar(it, succ); // succ = pi(it)
+ int clusterid = cluster_map.intValue(succ);
+ // Successor cluster has already been created:
+ if (clusterid >= 0) {
+ cluster_dbids.get(clusterid).add(it);
+ cluster_map.putInt(it, clusterid);
+ // Update distance to maximum encountered:
+ if (cluster_dist.get(clusterid).compareTo(dist) < 0) {
+ cluster_dist.set(clusterid, dist);
+ }
+ } else {
+ // Need to start a new cluster:
+ clusterid = cluster_dbids.size(); // next cluster number.
+ ModifiableDBIDs cids = DBIDUtil.newArray();
+ // Add element and successor as initial members:
+ cids.add(succ);
+ cluster_map.putInt(succ, clusterid);
+ cids.add(it);
+ cluster_map.putInt(it, clusterid);
+ // Store new cluster.
+ cluster_dbids.add(cids);
+ cluster_leads.add(succ);
+ cluster_dist.add(dist);
+ }
+
+ // Decrement counter
+ if (progress != null) {
+ progress.incrementProcessed(LOG);
+ }
+ }
+ final Clustering<DendrogramModel<D>> dendrogram;
+ switch(outputmode) {
+ case PARTIAL_HIERARCHY: {
+ // Build a hierarchy out of these clusters.
+ dendrogram = new Clustering<>("Hierarchical Clustering", "hierarchical-clustering");
+ Cluster<DendrogramModel<D>> root = null;
+ ArrayList<Cluster<DendrogramModel<D>>> clusters = new ArrayList<>(expcnum);
+ // Convert initial clusters to cluster objects
+ {
+ int i = 0;
+ for (DBIDIter it2 = cluster_leads.iter(); it2.valid(); it2.advance(), i++) {
+ clusters.add(makeCluster(it2, cluster_dist.get(i), cluster_dbids.get(i)));
+ }
+ cluster_dist = null; // Invalidate
+ cluster_dbids = null; // Invalidate
+ }
+ // Process the upper part, bottom-up.
+ for (it.seek(split); it.valid(); it.advance()) {
+ int clusterid = cluster_map.intValue(it);
+ // The current cluster led by the current element:
+ final Cluster<DendrogramModel<D>> clus;
+ if (clusterid >= 0) {
+ clus = clusters.get(clusterid);
+ } else if (!singletons && ids.size() != 1) {
+ clus = null;
+ } else {
+ clus = makeCluster(it, null, DBIDUtil.deref(it));
+ }
+ // The successor to join:
+ pi.assignVar(it, succ); // succ = pi(it)
+ if (DBIDUtil.equal(it, succ)) {
+ assert (root == null);
+ root = clus;
+ } else {
+ // Parent cluster:
+ int parentid = cluster_map.intValue(succ);
+ D depth = lambda.get(it);
+ // Parent cluster exists - merge as a new cluster:
+ if (parentid >= 0) {
+ final Cluster<DendrogramModel<D>> pclus = clusters.get(parentid);
+ if (pclus.getModel().getDistance().equals(depth)) {
+ if (clus == null) {
+ ((ModifiableDBIDs) pclus.getIDs()).add(it);
+ } else {
+ dendrogram.addChildCluster(pclus, clus);
+ }
+ } else {
+ // Merge at new depth:
+ ModifiableDBIDs cids = DBIDUtil.newArray(clus == null ? 1 : 0);
+ if (clus == null) {
+ cids.add(it);
+ }
+ Cluster<DendrogramModel<D>> npclus = makeCluster(succ, depth, cids);
+ if (clus != null) {
+ dendrogram.addChildCluster(npclus, clus);
+ }
+ dendrogram.addChildCluster(npclus, pclus);
+ // Replace existing parent cluster: new depth
+ clusters.set(parentid, npclus);
+ }
+ } else {
+ // Merge with parent at this depth:
+ final Cluster<DendrogramModel<D>> pclus;
+ if (!singletons) {
+ ModifiableDBIDs cids = DBIDUtil.newArray(clus == null ? 2 : 1);
+ cids.add(succ);
+ if (clus == null) {
+ cids.add(it);
+ }
+ // New cluster for parent and/or new point
+ pclus = makeCluster(succ, depth, cids);
+ } else {
+ // Create a new, one-element cluster for parent, and a merged
+ // cluster on top.
+ pclus = makeCluster(succ, depth, DBIDUtil.EMPTYDBIDS);
+ dendrogram.addChildCluster(pclus, makeCluster(succ, null, DBIDUtil.deref(succ)));
+ }
+ if (clus != null) {
+ dendrogram.addChildCluster(pclus, clus);
+ }
+ // Store cluster:
+ parentid = clusters.size();
+ clusters.add(pclus); // Remember parent cluster
+ cluster_map.putInt(succ, parentid); // Reference
+ }
+ }
+
+ // Decrement counter
+ if (progress != null) {
+ progress.incrementProcessed(LOG);
+ }
+ }
+ assert (root != null);
+ // attach root
+ dendrogram.addToplevelCluster(root);
+ break;
+ }
+ case STRICT_PARTITIONS: {
+ // Build a hierarchy out of these clusters.
+ dendrogram = new Clustering<>("Flattened Hierarchical Clustering", "flattened-hierarchical-clustering");
+ // Convert initial clusters to cluster objects
+ {
+ int i = 0;
+ for (DBIDIter it2 = cluster_leads.iter(); it2.valid(); it2.advance(), i++) {
+ dendrogram.addToplevelCluster(makeCluster(it2, cluster_dist.get(i), cluster_dbids.get(i)));
+ }
+ cluster_dist = null; // Invalidate
+ cluster_dbids = null; // Invalidate
+ }
+ // Process the upper part, bottom-up.
+ for (it.seek(split); it.valid(); it.advance()) {
+ int clusterid = cluster_map.intValue(it);
+ if (clusterid < 0) {
+ dendrogram.addToplevelCluster(makeCluster(it, null, DBIDUtil.deref(it)));
+ }
+
+ // Decrement counter
+ if (progress != null) {
+ progress.incrementProcessed(LOG);
+ }
+ }
+ break;
+ }
+ default:
+ throw new AbortException("Unsupported output mode.");
+ }
+
+ if (progress != null) {
+ progress.ensureCompleted(LOG);
+ }
+
+ return dendrogram;
+ }
+
+ /**
+ * Extract all clusters from the pi-lambda-representation.
+ *
+ * @param ids Object ids to process
+ * @param pi Pi store
+ * @param lambda Lambda store
+ *
+ * @return Hierarchical clustering
+ */
+ private Clustering<DendrogramModel<D>> extractClustersDouble(DBIDs ids, final DBIDDataStore pi, final DoubleDistanceDataStore lambda) {
+ FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Extracting clusters", ids.size(), LOG) : null;
+
+ // Sort DBIDs by lambda. We need this for two things:
+ // a) to determine the stop distance from "minclusters" parameter
+ // b) to process arrows in decreasing / increasing order
+ ArrayModifiableDBIDs order = DBIDUtil.newArray(ids);
+ order.sort(new CompareByDoubleLambda(lambda));
+ DBIDArrayIter it = order.iter(); // Used multiple times!
+
+ int split;
+ if (minclusters > 0) {
+ split = Math.max(ids.size() - minclusters, 0);
+ // Stop distance:
+ final double stopdist = lambda.doubleValue(order.get(split));
+
+ // Tie handling: decrement split.
+ while (split > 0) {
+ it.seek(split - 1);
+ if (stopdist <= lambda.doubleValue(it)) {
+ split--;
+ } else {
+ break;
+ }
+ }
+ } else if (threshold != null) {
+ split = ids.size();
+ it.seek(split - 1);
+ double stopdist = ((DoubleDistance) threshold).doubleValue();
+ while (stopdist <= lambda.doubleValue(it) && it.valid()) {
+ split--;
+ it.retract();
+ }
+ } else { // full hierarchy
+ split = 0;
+ }
+
+ // Extract the child clusters
+ int expcnum = ids.size() - split;
+ WritableIntegerDataStore cluster_map = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_TEMP, -1);
+ ArrayList<ModifiableDBIDs> cluster_dbids = new ArrayList<>(expcnum);
+ TDoubleArrayList cluster_dist = new TDoubleArrayList(expcnum);
+ ArrayModifiableDBIDs cluster_leads = DBIDUtil.newArray(expcnum);
+
+ DBIDVar succ = DBIDUtil.newVar(); // Variable for successor.
+ // Go backwards on the lower part.
+ for (it.seek(split - 1); it.valid(); it.retract()) {
+ double dist = lambda.doubleValue(it); // Distance to successor
+ pi.assignVar(it, succ); // succ = pi(it)
+ int clusterid = cluster_map.intValue(succ);
+ // Successor cluster has already been created:
+ if (clusterid >= 0) {
+ cluster_dbids.get(clusterid).add(it);
+ cluster_map.putInt(it, clusterid);
+ // Update distance to maximum encountered:
+ if (cluster_dist.get(clusterid) < dist) {
+ cluster_dist.set(clusterid, dist);
+ }
+ } else {
+ // Need to start a new cluster:
+ clusterid = cluster_dbids.size(); // next cluster number.
+ ModifiableDBIDs cids = DBIDUtil.newArray();
+ // Add element and successor as initial members:
+ cids.add(succ);
+ cluster_map.putInt(succ, clusterid);
+ cids.add(it);
+ cluster_map.putInt(it, clusterid);
+ // Store new cluster.
+ cluster_dbids.add(cids);
+ cluster_leads.add(succ);
+ cluster_dist.add(dist);
+ }
+
+ // Decrement counter
+ if (progress != null) {
+ progress.incrementProcessed(LOG);
+ }
+ }
+ final Clustering<DendrogramModel<D>> dendrogram;
+ switch(outputmode) {
+ case PARTIAL_HIERARCHY: {
+ // Build a hierarchy out of these clusters.
+ dendrogram = new Clustering<>("Hierarchical Clustering", "hierarchical-clustering");
+ Cluster<DendrogramModel<D>> root = null;
+ ArrayList<Cluster<DendrogramModel<D>>> clusters = new ArrayList<>(expcnum);
+ // Convert initial clusters to cluster objects
+ {
+ int i = 0;
+ for (DBIDIter it2 = cluster_leads.iter(); it2.valid(); it2.advance(), i++) {
+ @SuppressWarnings("unchecked")
+ D depth = (D) new DoubleDistance(cluster_dist.get(i));
+ clusters.add(makeCluster(it2, depth, cluster_dbids.get(i)));
+ }
+ cluster_dist = null; // Invalidate
+ cluster_dbids = null; // Invalidate
+ }
+ // Process the upper part, bottom-up.
+ for (it.seek(split); it.valid(); it.advance()) {
+ int clusterid = cluster_map.intValue(it);
+ // The current cluster led by the current element:
+ final Cluster<DendrogramModel<D>> clus;
+ if (clusterid >= 0) {
+ clus = clusters.get(clusterid);
+ } else if (!singletons && ids.size() != 1) {
+ clus = null;
+ } else {
+ clus = makeCluster(it, null, DBIDUtil.deref(it));
+ }
+ // The successor to join:
+ pi.assignVar(it, succ); // succ = pi(it)
+ if (DBIDUtil.equal(it, succ)) {
+ assert (root == null);
+ root = clus;
+ } else {
+ // Parent cluster:
+ int parentid = cluster_map.intValue(succ);
+ @SuppressWarnings("unchecked")
+ D depth = (D) new DoubleDistance(lambda.doubleValue(it));
+ // Parent cluster exists - merge as a new cluster:
+ if (parentid >= 0) {
+ final Cluster<DendrogramModel<D>> pclus = clusters.get(parentid);
+ if (pclus.getModel().getDistance().equals(depth)) {
+ if (clus == null) {
+ ((ModifiableDBIDs) pclus.getIDs()).add(it);
+ } else {
+ dendrogram.addChildCluster(pclus, clus);
+ }
+ } else {
+ // Merge at new depth:
+ ModifiableDBIDs cids = DBIDUtil.newArray(clus == null ? 1 : 0);
+ if (clus == null) {
+ cids.add(it);
+ }
+ Cluster<DendrogramModel<D>> npclus = makeCluster(succ, depth, cids);
+ if (clus != null) {
+ dendrogram.addChildCluster(npclus, clus);
+ }
+ dendrogram.addChildCluster(npclus, pclus);
+ // Replace existing parent cluster: new depth
+ clusters.set(parentid, npclus);
+ }
+ } else {
+ // Merge with parent at this depth:
+ final Cluster<DendrogramModel<D>> pclus;
+ if (!singletons) {
+ ModifiableDBIDs cids = DBIDUtil.newArray(clus == null ? 2 : 1);
+ cids.add(succ);
+ if (clus == null) {
+ cids.add(it);
+ }
+ // New cluster for parent and/or new point
+ pclus = makeCluster(succ, depth, cids);
+ } else {
+ // Create a new, one-element cluster for parent, and a merged
+ // cluster on top.
+ pclus = makeCluster(succ, depth, DBIDUtil.EMPTYDBIDS);
+ dendrogram.addChildCluster(pclus, makeCluster(succ, null, DBIDUtil.deref(succ)));
+ }
+ if (clus != null) {
+ dendrogram.addChildCluster(pclus, clus);
+ }
+ // Store cluster:
+ parentid = clusters.size();
+ clusters.add(pclus); // Remember parent cluster
+ cluster_map.putInt(succ, parentid); // Reference
+ }
+ }
+
+ // Decrement counter
+ if (progress != null) {
+ progress.incrementProcessed(LOG);
+ }
+ }
+ assert (root != null);
+ // attach root
+ dendrogram.addToplevelCluster(root);
+ break;
+ }
+ case STRICT_PARTITIONS: {
+ // Build a hierarchy out of these clusters.
+ dendrogram = new Clustering<>("Flattened Hierarchical Clustering", "flattened-hierarchical-clustering");
+ // Convert initial clusters to cluster objects
+ {
+ int i = 0;
+ for (DBIDIter it2 = cluster_leads.iter(); it2.valid(); it2.advance(), i++) {
+ @SuppressWarnings("unchecked")
+ D depth = (D) new DoubleDistance(cluster_dist.get(i));
+ dendrogram.addToplevelCluster(makeCluster(it2, depth, cluster_dbids.get(i)));
+ }
+ cluster_dist = null; // Invalidate
+ cluster_dbids = null; // Invalidate
+ }
+ // Process the upper part, bottom-up.
+ for (it.seek(split); it.valid(); it.advance()) {
+ int clusterid = cluster_map.intValue(it);
+ if (clusterid < 0) {
+ dendrogram.addToplevelCluster(makeCluster(it, null, DBIDUtil.deref(it)));
+ }
+
+ // Decrement counter
+ if (progress != null) {
+ progress.incrementProcessed(LOG);
+ }
+ }
+ break;
+ }
+ default:
+ throw new AbortException("Unsupported output mode.");
+ }
+
+ if (progress != null) {
+ progress.ensureCompleted(LOG);
+ }
+
+ return dendrogram;
+ }
+
+ /**
+ * Make the cluster for the given object
+ *
+ * @param lead Leading object
+ * @param depth Linkage depth
+ * @param members Member objects
+ * @return Cluster
+ */
+ private Cluster<DendrogramModel<D>> makeCluster(DBIDRef lead, D depth, DBIDs members) {
+ final String name;
+ if (members.size() == 0) {
+ name = "mrg_" + DBIDUtil.toString(lead) + "_" + depth;
+ } else if (depth != null && depth.isInfiniteDistance() || (members.size() == 1 && members.contains(lead))) {
+ name = "obj_" + DBIDUtil.toString(lead);
+ } else if (depth != null) {
+ name = "clu_" + DBIDUtil.toString(lead) + "_" + depth;
+ } else {
+ // Complete data set only?
+ name = "clu_" + DBIDUtil.toString(lead);
+ }
+ Cluster<DendrogramModel<D>> cluster = new Cluster<>(name, members, new DendrogramModel<>(depth));
+ return cluster;
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return algorithm.getInputTypeRestriction();
+ }
+
+ /**
+ * Order a DBID collection by the lambda value.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ *
+ * @param <D> Distance type
+ */
+ private static final class CompareByLambda<D extends Distance<D>> implements Comparator<DBIDRef> {
+ /**
+ * Lambda storage
+ */
+ private final DataStore<D> lambda;
+
+ /**
+ * Constructor.
+ *
+ * @param lambda Lambda storage
+ */
+ protected CompareByLambda(DataStore<D> lambda) {
+ this.lambda = lambda;
+ }
+
+ @Override
+ public int compare(DBIDRef id1, DBIDRef id2) {
+ D k1 = lambda.get(id1);
+ D k2 = lambda.get(id2);
+ assert (k1 != null);
+ assert (k2 != null);
+ return k1.compareTo(k2);
+ }
+ }
+
+ /**
+ * Order a DBID collection by the lambda value.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ private static final class CompareByDoubleLambda implements Comparator<DBIDRef> {
+ /**
+ * Lambda storage
+ */
+ private final DoubleDistanceDataStore lambda;
+
+ /**
+ * Constructor.
+ *
+ * @param lambda Lambda storage
+ */
+ protected CompareByDoubleLambda(DoubleDistanceDataStore lambda) {
+ this.lambda = lambda;
+ }
+
+ @Override
+ public int compare(DBIDRef id1, DBIDRef id2) {
+ double k1 = lambda.doubleValue(id1);
+ double k2 = lambda.doubleValue(id2);
+ return Double.compare(k1, k2);
+ }
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<D extends Distance<D>> extends AbstractParameterizer {
+ /**
+ * Extraction mode to use.
+ */
+ public static final OptionID MODE_ID = new OptionID("hierarchical.threshold-mode", "The thresholding mode to use for extracting clusters: by desired number of clusters, or by distance threshold.");
+
+ /**
+ * The minimum number of clusters to extract.
+ */
+ public static final OptionID MINCLUSTERS_ID = new OptionID("hierarchical.minclusters", "The minimum number of clusters to extract (there may be more clusters when tied).");
+
+ /**
+ * The threshold level for which to extract the clustering.
+ */
+ public static final OptionID THRESHOLD_ID = new OptionID("hierarchical.threshold", "The threshold level for which to extract the clusters.");
+
+ /**
+ * Parameter to configure the output mode (nested or truncated clusters).
+ */
+ public static final OptionID OUTPUTMODE_ID = new OptionID("hierarchical.output-mode", "The output mode: a truncated cluster hierarchy, or a strict (flat) partitioning of the data set.");
+
+ /**
+ * Flag to produce singleton clusters.
+ */
+ public static final OptionID SINGLETONS_ID = new OptionID("hierarchical.singletons", "Do not avoid singleton clusters. This produces a more complex hierarchy.");
+
+ /**
+ * Number of clusters to extract.
+ */
+ int minclusters = -1;
+
+ /**
+ * Threshold level.
+ */
+ D threshold = null;
+
+ /**
+ * Flag to produce empty clusters to model the hierarchy above.
+ */
+ OutputMode outputmode = null;
+
+ /**
+ * The hierarchical clustering algorithm to run.
+ */
+ HierarchicalClusteringAlgorithm<D> algorithm;
+
+ /**
+ * Threshold mode.
+ */
+ ThresholdMode thresholdmode = null;
+
+ /**
+ * Also create singleton clusters.
+ */
+ boolean singletons = false;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ ObjectParameter<HierarchicalClusteringAlgorithm<D>> algorithmP = new ObjectParameter<>(AlgorithmStep.Parameterizer.ALGORITHM_ID, HierarchicalClusteringAlgorithm.class);
+ if (config.grab(algorithmP)) {
+ algorithm = algorithmP.instantiateClass(config);
+ }
+
+ EnumParameter<ThresholdMode> modeP = new EnumParameter<>(MODE_ID, ThresholdMode.class, ThresholdMode.BY_MINCLUSTERS);
+ if (config.grab(modeP)) {
+ thresholdmode = modeP.getValue();
+ }
+
+ if (thresholdmode == null || ThresholdMode.BY_MINCLUSTERS.equals(thresholdmode)) {
+ IntParameter minclustersP = new IntParameter(MINCLUSTERS_ID);
+ minclustersP.addConstraint(new GreaterEqualConstraint(1));
+ if (config.grab(minclustersP)) {
+ minclusters = minclustersP.intValue();
+ }
+ }
+
+ if (thresholdmode == null || ThresholdMode.BY_THRESHOLD.equals(thresholdmode)) {
+ // Fallback to double when no algorithm chosen yet:
+ @SuppressWarnings("unchecked")
+ final D factory = algorithm != null ? algorithm.getDistanceFactory() : (D) DoubleDistance.FACTORY;
+ DistanceParameter<D> distP = new DistanceParameter<>(THRESHOLD_ID, factory);
+ if (config.grab(distP)) {
+ threshold = distP.getValue();
+ }
+ }
+
+ if (thresholdmode == null || !ThresholdMode.NO_THRESHOLD.equals(thresholdmode)) {
+ EnumParameter<OutputMode> outputP = new EnumParameter<>(OUTPUTMODE_ID, OutputMode.class);
+ if (config.grab(outputP)) {
+ outputmode = outputP.getValue();
+ }
+ } else {
+ // This becomes full hierarchy:
+ minclusters = -1;
+ outputmode = OutputMode.PARTIAL_HIERARCHY;
+ }
+
+ Flag singletonsF = new Flag(SINGLETONS_ID);
+ if (config.grab(singletonsF)) {
+ singletons = singletonsF.isTrue();
+ }
+ }
+
+ @Override
+ protected ExtractFlatClusteringFromHierarchy<D> makeInstance() {
+ switch(thresholdmode) {
+ case NO_THRESHOLD:
+ case BY_MINCLUSTERS:
+ return new ExtractFlatClusteringFromHierarchy<>(algorithm, minclusters, outputmode, singletons);
+ case BY_THRESHOLD:
+ return new ExtractFlatClusteringFromHierarchy<>(algorithm, threshold, outputmode, singletons);
+ default:
+ throw new AbortException("Unknown extraction mode.");
+ }
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/GroupAverageLinkageMethod.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/GroupAverageLinkageMethod.java
new file mode 100644
index 00000000..079fb69b
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/GroupAverageLinkageMethod.java
@@ -0,0 +1,82 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.utilities.Alias;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+
+/**
+ * Group-average linkage clustering method.
+ *
+ * Reference:
+ * <p>
+ * A. K. Jain and R. C. Dubes<br />
+ * Algorithms for Clustering Data<br />
+ * Prentice-Hall
+ * </p>
+ *
+ * @author Erich Schubert
+ */
+@Alias({ "upgma", "average", "average-link", "average-linkage", "UPGMA" })
+@Reference(authors = "A. K. Jain and R. C. Dubes", title = "Algorithms for Clustering Data", booktitle = "Algorithms for Clustering Data, Prentice-Hall")
+public class GroupAverageLinkageMethod implements LinkageMethod {
+ /**
+ * Static instance of class.
+ */
+ public static final GroupAverageLinkageMethod STATIC = new GroupAverageLinkageMethod();
+
+ /**
+ * Constructor.
+ *
+ * @deprecated use the static instance {@link #STATIC} instead.
+ */
+ @Deprecated
+ public GroupAverageLinkageMethod() {
+ super();
+ }
+
+ @Override
+ public double combine(int sizex, double dx, int sizey, double dy, int sizej, double dxy) {
+ final double wx = sizex / (double) (sizex + sizey);
+ final double wy = sizey / (double) (sizex + sizey);
+ return wx * dx + wy * dy;
+ }
+
+ /**
+ * Class parameterizer.
+ *
+ * Returns the static instance.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ @Override
+ protected GroupAverageLinkageMethod makeInstance() {
+ return STATIC;
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/HierarchicalClusteringAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/HierarchicalClusteringAlgorithm.java
new file mode 100644
index 00000000..f3595d51
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/HierarchicalClusteringAlgorithm.java
@@ -0,0 +1,51 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import de.lmu.ifi.dbs.elki.algorithm.Algorithm;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
+
+/**
+ * Interface for hierarchical clustering algorithms.
+ *
+ * This interface allows the algorithms to be used by e.g.
+ * {@link ExtractFlatClusteringFromHierarchy}.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has PointerHierarchyRepresentationResult
+ *
+ * @param <D> Distance type
+ */
+public interface HierarchicalClusteringAlgorithm<D extends Distance<D>> extends Algorithm {
+ @Override
+ public PointerHierarchyRepresentationResult<D> run(Database db);
+
+ /**
+ * Return the distance type that will be used by the algorithm.
+ *
+ * @return Distance factory.
+ */
+ public D getDistanceFactory();
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/LinkageMethod.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/LinkageMethod.java
new file mode 100644
index 00000000..68d0b4d8
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/LinkageMethod.java
@@ -0,0 +1,56 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical;
+
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * Abstract interface for implementing a new linkage method into hierarchical
+ * clustering.
+ *
+ * Reference:
+ * <p>
+ * G. N. Lance and W. T. Williams<br />
+ * A general theory of classificatory sorting strategies 1. Hierarchical systems
+ * <br/>
+ * The computer journal 9.4 (1967): 373-380.
+ * </p>
+ *
+ * @author Erich Schubert
+ */
+@Reference(authors = "G. N. Lance and W. T. Williams", title = "A general theory of classificatory sorting strategies 1. Hierarchical systems", booktitle = "The computer journal 9.4", url = "http://dx.doi.org/ 10.1093/comjnl/9.4.373")
+public interface LinkageMethod {
+ /**
+ * Compute combined linkage for two clusters.
+ *
+ * @param sizex Size of first cluster x before merging
+ * @param dx Distance of cluster x to j before merging
+ * @param sizey Size of second cluster y before merging
+ * @param dy Distance of cluster y to j before merging
+ * @param sizej Size of candidate cluster j
+ * @param dxy Distance between clusters x and y before merging
+ * @return Combined distance
+ */
+ double combine(int sizex, double dx, int sizey, double dy, int sizej, double dxy);
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/MedianLinkageMethod.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/MedianLinkageMethod.java
new file mode 100644
index 00000000..fe167cec
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/MedianLinkageMethod.java
@@ -0,0 +1,80 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import de.lmu.ifi.dbs.elki.utilities.Alias;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+
+/**
+ * Median-linkage clustering method: Weighted pair group method using centroids
+ * (WPGMC).
+ *
+ * Reference:
+ * <p>
+ * J.C. Gower<br/>
+ * A comparison of some methods of cluster analysis<br/>
+ * Biometrics (1967): 623-637.
+ * </p>
+ *
+ * @author Erich Schubert
+ */
+@Reference(authors = "J. C. Gower", title = "A comparison of some methods of cluster analysis", booktitle = "Biometrics (1967)", url = "http://www.jstor.org/stable/10.2307/2528417")
+@Alias({ "wpgmc", "WPGMC", "weighted-centroid" })
+public class MedianLinkageMethod implements LinkageMethod {
+ /**
+ * Static instance of class.
+ */
+ public static final MedianLinkageMethod STATIC = new MedianLinkageMethod();
+
+ /**
+ * Constructor.
+ *
+ * @deprecated use the static instance {@link #STATIC} instead.
+ */
+ @Deprecated
+ public MedianLinkageMethod() {
+ super();
+ }
+
+ @Override
+ public double combine(int sizex, double dx, int sizey, double dy, int sizej, double dxy) {
+ return .5 * (dx + dy) - .25 * dxy;
+ }
+
+ /**
+ * Class parameterizer.
+ *
+ * Returns the static instance.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ @Override
+ protected MedianLinkageMethod makeInstance() {
+ return STATIC;
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/NaiveAgglomerativeHierarchicalClustering.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/NaiveAgglomerativeHierarchicalClustering.java
new file mode 100644
index 00000000..ee3052a4
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/NaiveAgglomerativeHierarchicalClustering.java
@@ -0,0 +1,303 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDBIDDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDistanceDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * This tutorial will step you through implementing a well known clustering
+ * algorithm, agglomerative hierarchical clustering, in multiple steps.
+ *
+ * This is the third step, where we add support for different linkage
+ * strategies.
+ *
+ * This is the naive O(n^3) algorithm. See {@link SLINK} for a much faster
+ * algorithm (however, only for single-linkage).
+ *
+ * Reference for the unified concept:
+ * <p>
+ * G. N. Lance and W. T. Williams<br />
+ * A general theory of classificatory sorting strategies 1. Hierarchical systems
+ * <br/>
+ * The computer journal 9.4 (1967): 373-380.
+ * </p>
+ *
+ * See also:
+ * <p>
+ * A Review of Classification<br />
+ * R. M. Cormack<br />
+ * Journal of the Royal Statistical Society. Series A, Vol. 134, No. 3
+ * </p>
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.composedOf LinkageMethod
+ *
+ * @param <O> Object type
+ */
+@Reference(authors = "G. N. Lance and W. T. Williams", title = "A general theory of classificatory sorting strategies 1. Hierarchical systems", booktitle = "The computer journal 9.4", url = "http://dx.doi.org/ 10.1093/comjnl/9.4.373")
+public class NaiveAgglomerativeHierarchicalClustering<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, PointerHierarchyRepresentationResult<DoubleDistance>> implements HierarchicalClusteringAlgorithm<DoubleDistance> {
+ /**
+ * Class logger
+ */
+ private static final Logging LOG = Logging.getLogger(NaiveAgglomerativeHierarchicalClustering.class);
+
+ /**
+ * Current linkage method in use.
+ */
+ LinkageMethod linkage = WardLinkageMethod.STATIC;
+
+ /**
+ * Constructor.
+ *
+ * @param distanceFunction Distance function to use
+ * @param linkage Linkage method
+ */
+ public NaiveAgglomerativeHierarchicalClustering(DistanceFunction<? super O, D> distanceFunction, LinkageMethod linkage) {
+ super(distanceFunction);
+ this.linkage = linkage;
+ }
+
+ /**
+ * Run the algorithm
+ *
+ * @param db Database
+ * @param relation Relation
+ * @return Clustering hierarchy
+ */
+ public PointerHierarchyRepresentationResult<DoubleDistance> run(Database db, Relation<O> relation) {
+ DistanceQuery<O, D> dq = db.getDistanceQuery(relation, getDistanceFunction());
+ ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
+ final int size = ids.size();
+
+ if (size > 0x10000) {
+ throw new AbortException("This implementation does not scale to data sets larger than " + 0x10000 + " instances (~17 GB RAM), which results in an integer overflow.");
+ }
+ if (SingleLinkageMethod.class.isInstance(linkage)) {
+ LOG.verbose("Notice: SLINK is a much faster algorithm for single-linkage clustering!");
+ }
+
+ // Compute the initial (lower triangular) distance matrix.
+ double[] scratch = new double[triangleSize(size)];
+ DBIDArrayIter ix = ids.iter(), iy = ids.iter(), ij = ids.iter();
+ // Position counter - must agree with computeOffset!
+ int pos = 0;
+ boolean square = WardLinkageMethod.class.isInstance(linkage) && !(SquaredEuclideanDistanceFunction.class.isInstance(getDistanceFunction()));
+ for (ix.seek(0); ix.valid(); ix.advance()) {
+ for (iy.seek(0); iy.getOffset() < ix.getOffset(); iy.advance()) {
+ scratch[pos] = dq.distance(ix, iy).doubleValue();
+ // Ward uses variances -- i.e. squared values
+ if (square) {
+ scratch[pos] *= scratch[pos];
+ }
+ pos++;
+ }
+ }
+
+ // Initialize space for result:
+ WritableDBIDDataStore pi = DataStoreUtil.makeDBIDStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
+ WritableDoubleDistanceDataStore lambda = DataStoreUtil.makeDoubleDistanceStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
+ WritableIntegerDataStore csize = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
+ for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
+ pi.put(it, it);
+ lambda.put(it, Double.POSITIVE_INFINITY);
+ csize.put(it, 1);
+ }
+
+ // Repeat until everything merged into 1 cluster
+ FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Agglomerative clustering", size - 1, LOG) : null;
+ for (int i = 1; i < size; i++) {
+ double mindist = Double.POSITIVE_INFINITY;
+ int x = -1, y = -1;
+ for (ix.seek(0); ix.valid(); ix.advance()) {
+ if (lambda.doubleValue(ix) < Double.POSITIVE_INFINITY) {
+ continue;
+ }
+ final int xbase = triangleSize(ix.getOffset());
+ for (iy.seek(0); iy.getOffset() < ix.getOffset(); iy.advance()) {
+ if (lambda.doubleValue(iy) < Double.POSITIVE_INFINITY) {
+ continue;
+ }
+ final int idx = xbase + iy.getOffset();
+ if (scratch[idx] <= mindist) {
+ mindist = scratch[idx];
+ x = ix.getOffset();
+ y = iy.getOffset();
+ }
+ }
+ }
+ assert (x >= 0 && y >= 0);
+ // Avoid allocating memory, by reusing existing iterators:
+ ix.seek(x);
+ iy.seek(y);
+ if (LOG.isDebuggingFine()) {
+ LOG.debugFine("Merging: " + DBIDUtil.toString(ix) + " -> " + DBIDUtil.toString(iy));
+ }
+ // Perform merge in data structure: x -> y
+ // Since y < x, prefer keeping y, dropping x.
+ lambda.put(ix, mindist);
+ pi.put(ix, iy);
+ // Merge into cluster
+ int sizex = csize.intValue(ix), sizey = csize.intValue(iy);
+ csize.put(iy, sizex + sizey);
+
+ // Update distance matrix. Note: miny < minx
+
+ // Implementation note: most will not need sizej, and could save the
+ // hashmap lookup.
+ final int xbase = triangleSize(x), ybase = triangleSize(y);
+
+ ij.seek(0);
+ // Write to (y, j), with j < y
+ for (; ij.getOffset() < y; ij.advance()) {
+ if (lambda.doubleValue(ij) < Double.POSITIVE_INFINITY) {
+ continue;
+ }
+ final int sizej = csize.intValue(ij);
+ scratch[ybase + ij.getOffset()] = linkage.combine(sizex, scratch[xbase + ij.getOffset()], sizey, scratch[ybase + ij.getOffset()], sizej, mindist);
+ }
+ ij.advance(); // Skip y
+ // Write to (j, y), with y < j < x
+ for (; ij.getOffset() < x; ij.advance()) {
+ if (lambda.doubleValue(ij) < Double.POSITIVE_INFINITY) {
+ continue;
+ }
+ final int jbase = triangleSize(ij.getOffset());
+ final int sizej = csize.intValue(ij);
+ scratch[jbase + y] = linkage.combine(sizex, scratch[xbase + ij.getOffset()], sizey, scratch[jbase + y], sizej, mindist);
+ }
+ ij.advance(); // Skip x
+ // Write to (j, y), with y < x < j
+ for (; ij.valid(); ij.advance()) {
+ if (lambda.doubleValue(ij) < Double.POSITIVE_INFINITY) {
+ continue;
+ }
+ final int sizej = csize.intValue(ij);
+ final int jbase = triangleSize(ij.getOffset());
+ scratch[jbase + y] = linkage.combine(sizex, scratch[jbase + x], sizey, scratch[jbase + y], sizej, mindist);
+ }
+ if (prog != null) {
+ prog.incrementProcessed(LOG);
+ }
+ }
+ if (prog != null) {
+ prog.ensureCompleted(LOG);
+ }
+
+ return new PointerHierarchyRepresentationResult<>(ids, pi, lambda);
+ }
+
+ /**
+ * Compute the size of a complete x by x triangle (minus diagonal)
+ *
+ * @param x Offset
+ * @return Size of complete triangle
+ */
+ protected static int triangleSize(int x) {
+ return (x * (x - 1)) >>> 1;
+ }
+
+ @Override
+ public DoubleDistance getDistanceFactory() {
+ return DoubleDistance.FACTORY;
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ // The input relation must match our distance function:
+ return TypeUtil.array(getDistanceFunction().getInputTypeRestriction());
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ *
+ * @param <O> Object type
+ * @param <D> Distance type
+ */
+ public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ /**
+ * Option ID for linkage parameter.
+ */
+ public static final OptionID LINKAGE_ID = new OptionID("hierarchical.linkage", "Linkage method to use (e.g. Ward, Single-Link)");
+
+ /**
+ * Current linkage in use.
+ */
+ protected LinkageMethod linkage;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ // We don't call super, because we want a different default distance.
+ ObjectParameter<DistanceFunction<O, D>> distanceFunctionP = makeParameterDistanceFunction(SquaredEuclideanDistanceFunction.class, DistanceFunction.class);
+ if (config.grab(distanceFunctionP)) {
+ distanceFunction = distanceFunctionP.instantiateClass(config);
+ }
+
+ ObjectParameter<LinkageMethod> linkageP = new ObjectParameter<>(LINKAGE_ID, LinkageMethod.class);
+ linkageP.setDefaultValue(WardLinkageMethod.class);
+ if (config.grab(linkageP)) {
+ linkage = linkageP.instantiateClass(config);
+ }
+ }
+
+ @Override
+ protected NaiveAgglomerativeHierarchicalClustering<O, D> makeInstance() {
+ return new NaiveAgglomerativeHierarchicalClustering<>(distanceFunction, linkage);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/PointerHierarchyRepresentationResult.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/PointerHierarchyRepresentationResult.java
new file mode 100644
index 00000000..c339fb09
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/PointerHierarchyRepresentationResult.java
@@ -0,0 +1,97 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.database.datastore.DBIDDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
+import de.lmu.ifi.dbs.elki.result.BasicResult;
+
+/**
+ * The pointer representation of a hierarchical clustering. Each object is
+ * represented by a parent object and the distance at which it joins the parent
+ * objects cluster.
+ *
+ * @author Erich Schubert
+ *
+ * @param <D> Distance type
+ */
+public class PointerHierarchyRepresentationResult<D extends Distance<D>> extends BasicResult {
+ /**
+ * The DBIDs in this result.
+ */
+ DBIDs ids;
+
+ /**
+ * The parent DBID relation.
+ */
+ DBIDDataStore parent;
+
+ /**
+ * Distance to the parent object.
+ */
+ DataStore<D> parentDistance;
+
+ /**
+ * Constructor.
+ *
+ * @param ids IDs processed.
+ * @param parent Parent pointer.
+ * @param parentDistance Distance to parent.
+ */
+ public PointerHierarchyRepresentationResult(DBIDs ids, DBIDDataStore parent, DataStore<D> parentDistance) {
+ super("Pointer Representation", "pointer-representation");
+ this.ids = ids;
+ this.parent = parent;
+ this.parentDistance = parentDistance;
+ }
+
+ /**
+ * Get the clustered DBIDs.
+ *
+ * @return DBIDs
+ */
+ public DBIDs getDBIDs() {
+ return ids;
+ }
+
+ /**
+ * Get the parent DBID relation.
+ *
+ * @return Parent relation.
+ */
+ public DBIDDataStore getParentStore() {
+ return parent;
+ }
+
+ /**
+ * Get the distance to the parent.
+ *
+ * @return Parent distance.
+ */
+ public DataStore<D> getParentDistanceStore() {
+ return parentDistance;
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/SLINK.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/SLINK.java
new file mode 100644
index 00000000..f1b58868
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/SLINK.java
@@ -0,0 +1,368 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDBIDDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDistanceDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDVar;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.DistanceUtil;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.utilities.Alias;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
+
+/**
+ * Implementation of the efficient Single-Link Algorithm SLINK of R. Sibson.
+ *
+ * <p>
+ * Reference:<br />
+ * R. Sibson: SLINK: An optimally efficient algorithm for the single-link
+ * cluster method. <br/>
+ * In: The Computer Journal 16 (1973), No. 1, p. 30-34.
+ * </p>
+ *
+ * @author Elke Achtert
+ * @author Erich Schubert
+ *
+ * @apiviz.has SingleLinkageMethod
+ *
+ * @param <O> the type of DatabaseObject the algorithm is applied on
+ * @param <D> the type of Distance used
+ */
+@Title("SLINK: Single Link Clustering")
+@Description("Hierarchical clustering algorithm based on single-link connectivity.")
+@Reference(authors = "R. Sibson", title = "SLINK: An optimally efficient algorithm for the single-link cluster method", booktitle = "The Computer Journal 16 (1973), No. 1, p. 30-34.", url = "http://dx.doi.org/10.1093/comjnl/16.1.30")
+@Alias(value = { "de.lmu.ifi.dbs.elki.algorithm.clustering.SLINK", "clustering.SLINK", "SLINK", "single-link", "single-linkage" })
+public class SLINK<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm<O, D, PointerHierarchyRepresentationResult<D>> implements HierarchicalClusteringAlgorithm<D> {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(SLINK.class);
+
+ /**
+ * Constructor.
+ *
+ * @param distanceFunction Distance function
+ */
+ public SLINK(DistanceFunction<? super O, D> distanceFunction) {
+ super(distanceFunction);
+ }
+
+ /**
+ * Performs the SLINK algorithm on the given database.
+ */
+ public PointerHierarchyRepresentationResult<D> run(Database database, Relation<O> relation) {
+ DBIDs ids = relation.getDBIDs();
+ DistanceQuery<O, D> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
+ @SuppressWarnings("unchecked")
+ Class<D> distCls = (Class<D>) getDistanceFunction().getDistanceFactory().getClass();
+ WritableDBIDDataStore pi = DataStoreUtil.makeDBIDStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
+ WritableDataStore<D> lambda = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, distCls);
+ // Temporary storage for m.
+ WritableDataStore<D> m = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, distCls);
+
+ FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Running SLINK", ids.size(), LOG) : null;
+ // has to be an array for monotonicity reasons!
+ ModifiableDBIDs processedIDs = DBIDUtil.newArray(ids.size());
+
+ // Optimized code path for double distances
+ if (getDistanceFunction() instanceof PrimitiveDoubleDistanceFunction && lambda instanceof WritableDoubleDistanceDataStore && m instanceof WritableDoubleDistanceDataStore) {
+ @SuppressWarnings("unchecked")
+ PrimitiveDoubleDistanceFunction<? super O> dist = (PrimitiveDoubleDistanceFunction<? super O>) getDistanceFunction();
+ WritableDoubleDistanceDataStore lambdad = (WritableDoubleDistanceDataStore) lambda;
+ WritableDoubleDistanceDataStore md = (WritableDoubleDistanceDataStore) m;
+ // apply the algorithm
+ for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
+ step1double(id, pi, lambdad);
+ step2double(id, processedIDs, distQuery.getRelation(), dist, md);
+ step3double(id, pi, lambdad, processedIDs, md);
+ step4double(id, pi, lambdad, processedIDs);
+
+ processedIDs.add(id);
+
+ if (progress != null) {
+ progress.incrementProcessed(LOG);
+ }
+ }
+ } else {
+ // apply the algorithm
+ for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
+ step1(id, pi, lambda);
+ step2(id, processedIDs, distQuery, m);
+ step3(id, pi, lambda, processedIDs, m);
+ step4(id, pi, lambda, processedIDs);
+
+ processedIDs.add(id);
+
+ if (progress != null) {
+ progress.incrementProcessed(LOG);
+ }
+ }
+ }
+
+ if (progress != null) {
+ progress.ensureCompleted(LOG);
+ }
+ // We don't need m anymore.
+ m.destroy();
+ m = null;
+
+ return new PointerHierarchyRepresentationResult<>(ids, pi, lambda);
+ }
+
+ /**
+ * First step: Initialize P(id) = id, L(id) = infinity.
+ *
+ * @param id the id of the object to be inserted into the pointer
+ * representation
+ * @param pi Pi data store
+ * @param lambda Lambda data store
+ */
+ private void step1(DBIDRef id, WritableDBIDDataStore pi, WritableDataStore<D> lambda) {
+ // P(n+1) = n+1:
+ pi.put(id, id);
+ // L(n+1) = infinity
+ lambda.put(id, getDistanceFunction().getDistanceFactory().infiniteDistance());
+ }
+
+ /**
+ * Second step: Determine the pairwise distances from all objects in the
+ * pointer representation to the new object with the specified id.
+ *
+ * @param id the id of the object to be inserted into the pointer
+ * representation
+ * @param processedIDs the already processed ids
+ * @param m Data store
+ * @param distFunc Distance function to use
+ */
+ private void step2(DBIDRef id, DBIDs processedIDs, DistanceQuery<O, D> distFunc, WritableDataStore<D> m) {
+ O newObj = distFunc.getRelation().get(id);
+ for (DBIDIter it = processedIDs.iter(); it.valid(); it.advance()) {
+ // M(i) = dist(i, n+1)
+ m.put(it, distFunc.distance(it, newObj));
+ }
+ }
+
+ /**
+ * Third step: Determine the values for P and L
+ *
+ * @param id the id of the object to be inserted into the pointer
+ * representation
+ * @param pi Pi data store
+ * @param lambda Lambda data store
+ * @param processedIDs the already processed ids
+ * @param m Data store
+ */
+ private void step3(DBIDRef id, WritableDBIDDataStore pi, WritableDataStore<D> lambda, DBIDs processedIDs, WritableDataStore<D> m) {
+ DBIDVar p_i = DBIDUtil.newVar();
+ // for i = 1..n
+ for (DBIDIter it = processedIDs.iter(); it.valid(); it.advance()) {
+ D l_i = lambda.get(it);
+ D m_i = m.get(it);
+ pi.assignVar(it, p_i); // p_i = pi(it)
+ D mp_i = m.get(p_i);
+
+ // if L(i) >= M(i)
+ if (l_i.compareTo(m_i) >= 0) {
+ // M(P(i)) = min { M(P(i)), L(i) }
+ m.put(p_i, DistanceUtil.min(mp_i, l_i));
+
+ // L(i) = M(i)
+ lambda.put(it, m_i);
+
+ // P(i) = n+1;
+ pi.put(it, id);
+ } else {
+ // M(P(i)) = min { M(P(i)), M(i) }
+ m.put(p_i, DistanceUtil.min(mp_i, m_i));
+ }
+ }
+ }
+
+ /**
+ * Fourth step: Actualize the clusters if necessary
+ *
+ * @param id the id of the current object
+ * @param pi Pi data store
+ * @param lambda Lambda data store
+ * @param processedIDs the already processed ids
+ */
+ private void step4(DBIDRef id, WritableDBIDDataStore pi, WritableDataStore<D> lambda, DBIDs processedIDs) {
+ DBIDVar p_i = DBIDUtil.newVar();
+ // for i = 1..n
+ for (DBIDIter it = processedIDs.iter(); it.valid(); it.advance()) {
+ D l_i = lambda.get(it);
+ pi.assignVar(it, p_i); // p_i = pi(it)
+ D lp_i = lambda.get(p_i);
+
+ // if L(i) >= L(P(i))
+ if (l_i.compareTo(lp_i) >= 0) {
+ // P(i) = n+1
+ pi.put(it, id);
+ }
+ }
+ }
+
+ /**
+ * First step: Initialize P(id) = id, L(id) = infinity.
+ *
+ * @param id the id of the object to be inserted into the pointer
+ * representation
+ * @param pi Pi data store
+ * @param lambda Lambda data store
+ */
+ private void step1double(DBIDRef id, WritableDBIDDataStore pi, WritableDoubleDistanceDataStore lambda) {
+ // P(n+1) = n+1:
+ pi.put(id, id);
+ // L(n+1) = infinity
+ lambda.putDouble(id, Double.POSITIVE_INFINITY);
+ }
+
+ /**
+ * Second step: Determine the pairwise distances from all objects in the
+ * pointer representation to the new object with the specified id.
+ *
+ * @param id the id of the object to be inserted into the pointer
+ * representation
+ * @param processedIDs the already processed ids
+ * @param m Data store
+ * @param relation Data relation
+ * @param distFunc Distance function to use
+ */
+ private void step2double(DBIDRef id, DBIDs processedIDs, Relation<? extends O> relation, PrimitiveDoubleDistanceFunction<? super O> distFunc, WritableDoubleDistanceDataStore m) {
+ O newObj = relation.get(id);
+ for (DBIDIter it = processedIDs.iter(); it.valid(); it.advance()) {
+ // M(i) = dist(i, n+1)
+ m.putDouble(it, distFunc.doubleDistance(relation.get(it), newObj));
+ }
+ }
+
+ /**
+ * Third step: Determine the values for P and L
+ *
+ * @param id the id of the object to be inserted into the pointer
+ * representation
+ * @param pi Pi data store
+ * @param lambda Lambda data store
+ * @param processedIDs the already processed ids
+ * @param m Data store
+ */
+ private void step3double(DBIDRef id, WritableDBIDDataStore pi, WritableDoubleDistanceDataStore lambda, DBIDs processedIDs, WritableDoubleDistanceDataStore m) {
+ DBIDVar p_i = DBIDUtil.newVar();
+ // for i = 1..n
+ for (DBIDIter it = processedIDs.iter(); it.valid(); it.advance()) {
+ double l_i = lambda.doubleValue(it);
+ double m_i = m.doubleValue(it);
+ pi.assignVar(it, p_i); // p_i = pi(it)
+ double mp_i = m.doubleValue(p_i);
+
+ // if L(i) >= M(i)
+ if (l_i >= m_i) {
+ // M(P(i)) = min { M(P(i)), L(i) }
+ m.putDouble(p_i, Math.min(mp_i, l_i));
+
+ // L(i) = M(i)
+ lambda.putDouble(it, m_i);
+
+ // P(i) = n+1;
+ pi.put(it, id);
+ } else {
+ // M(P(i)) = min { M(P(i)), M(i) }
+ m.putDouble(p_i, Math.min(mp_i, m_i));
+ }
+ }
+ }
+
+ /**
+ * Fourth step: Actualize the clusters if necessary
+ *
+ * @param id the id of the current object
+ * @param pi Pi data store
+ * @param lambda Lambda data store
+ * @param processedIDs the already processed ids
+ */
+ private void step4double(DBIDRef id, WritableDBIDDataStore pi, WritableDoubleDistanceDataStore lambda, DBIDs processedIDs) {
+ DBIDVar p_i = DBIDUtil.newVar();
+ // for i = 1..n
+ for (DBIDIter it = processedIDs.iter(); it.valid(); it.advance()) {
+ double l_i = lambda.doubleValue(it);
+ pi.assignVar(it, p_i); // p_i = pi(it)
+ double lp_i = lambda.doubleValue(p_i);
+
+ // if L(i) >= L(P(i))
+ if (l_i >= lp_i) {
+ // P(i) = n+1
+ pi.put(it, id);
+ }
+ }
+ }
+
+ @Override
+ public D getDistanceFactory() {
+ return getDistanceFunction().getDistanceFactory();
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(getDistanceFunction().getInputTypeRestriction());
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ @Override
+ protected SLINK<O, D> makeInstance() {
+ return new SLINK<>(distanceFunction);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/SingleLinkageMethod.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/SingleLinkageMethod.java
new file mode 100644
index 00000000..7ef81692
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/SingleLinkageMethod.java
@@ -0,0 +1,80 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.utilities.Alias;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+
+/**
+ * Single-linkage clustering method.
+ *
+ * Reference:
+ * <p>
+ * K. Florek and J. Łukaszewicz and J. Perkal and H. Steinhaus and S. Zubrzycki<br/>
+ * Sur la liaison et la division des points d'un ensemble fini<br />
+ * In Colloquium Mathematicae (Vol. 2, No. 3-4)
+ * </p>
+ *
+ * @author Erich Schubert
+ */
+@Reference(authors = "K. Florek and J. Łukaszewicz and J. Perkal and H. Steinhaus and S. Zubrzycki", title = "Sur la liaison et la division des points d'un ensemble fini", booktitle = "Colloquium Mathematicae (Vol. 2, No. 3-4)")
+@Alias({ "single-link", "single", "slink", "nearest", "nearest-neighbor" })
+public class SingleLinkageMethod implements LinkageMethod {
+ /**
+ * Static instance of class.
+ */
+ public static final SingleLinkageMethod STATIC = new SingleLinkageMethod();
+
+ /**
+ * Constructor.
+ *
+ * @deprecated use the static instance {@link #STATIC} instead.
+ */
+ @Deprecated
+ public SingleLinkageMethod() {
+ super();
+ }
+
+ @Override
+ public double combine(int sizex, double dx, int sizey, double dy, int sizej, double dxy) {
+ return Math.min(dx, dy);
+ }
+
+ /**
+ * Class parameterizer.
+ *
+ * Returns the static instance.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ @Override
+ protected SingleLinkageMethod makeInstance() {
+ return STATIC;
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/WardLinkageMethod.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/WardLinkageMethod.java
new file mode 100644
index 00000000..488f011c
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/WardLinkageMethod.java
@@ -0,0 +1,86 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.Alias;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+
+/**
+ * Ward's method clustering method.
+ *
+ * This criterion minimizes variances, and makes most sense when used with
+ * squared Euclidean distance, see
+ * {@link de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction}
+ *
+ * Reference:
+ * <p>
+ * Ward Jr, Joe H.<br />
+ * Hierarchical grouping to optimize an objective function<br />
+ * Journal of the American statistical association 58.301 (1963): 236-244.
+ * </p>
+ *
+ * @author Erich Schubert
+ */
+@Reference(authors = "J. H. Ward Jr", title = "Hierarchical grouping to optimize an objective function", booktitle = "Journal of the American statistical association 58.301", url = "http://dx.doi.org/10.1080/01621459.1963.10500845")
+@Alias({ "ward", "variance" })
+public class WardLinkageMethod implements LinkageMethod {
+ /**
+ * Static instance of class.
+ */
+ public static final WardLinkageMethod STATIC = new WardLinkageMethod();
+
+ /**
+ * Constructor.
+ *
+ * @deprecated use the static instance {@link #STATIC} instead.
+ */
+ @Deprecated
+ public WardLinkageMethod() {
+ super();
+ }
+
+ @Override
+ public double combine(int sizex, double dx, int sizey, double dy, int sizej, double dxy) {
+ final double wx = (sizex + sizej) / (double) (sizex + sizey + sizej);
+ final double wy = (sizey + sizej) / (double) (sizex + sizey + sizej);
+ final double beta = sizej / (double) (sizex + sizey + sizej);
+ return wx * dx + wy * dy - beta * dxy;
+ }
+
+ /**
+ * Class parameterizer.
+ *
+ * Returns the static instance.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ @Override
+ protected WardLinkageMethod makeInstance() {
+ return STATIC;
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/WeightedAverageLinkageMethod.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/WeightedAverageLinkageMethod.java
new file mode 100644
index 00000000..ac0b17f5
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/WeightedAverageLinkageMethod.java
@@ -0,0 +1,84 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import de.lmu.ifi.dbs.elki.utilities.Alias;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+
+/**
+ * Weighted average linkage clustering method.
+ *
+ * This is somewhat a misnomer, as it actually ignores that the clusters should
+ * likely be weighted differently according to their size when computing the
+ * average linkage. See {@link GroupAverageLinkageMethod} for the UPGMA method
+ * that uses the group size to weight the objects the same way.
+ *
+ * Reference:
+ * <p>
+ * A. K. Jain and R. C. Dubes<br />
+ * Algorithms for Clustering Data<br />
+ * Prentice-Hall
+ * </p>
+ *
+ * @author Erich Schubert
+ */
+@Reference(authors = "A. K. Jain and R. C. Dubes", title = "Algorithms for Clustering Data", booktitle = "Algorithms for Clustering Data, Prentice-Hall")
+@Alias({ "wpgma", "WPGMA" })
+public class WeightedAverageLinkageMethod implements LinkageMethod {
+ /**
+ * Static instance of class.
+ */
+ public static final WeightedAverageLinkageMethod STATIC = new WeightedAverageLinkageMethod();
+
+ /**
+ * Constructor.
+ *
+ * @deprecated use the static instance {@link #STATIC} instead.
+ */
+ @Deprecated
+ public WeightedAverageLinkageMethod() {
+ super();
+ }
+
+ @Override
+ public double combine(int sizex, double dx, int sizey, double dy, int sizej, double dxy) {
+ return .5 * (dx + dy);
+ }
+
+ /**
+ * Class parameterizer.
+ *
+ * Returns the static instance.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ @Override
+ protected WeightedAverageLinkageMethod makeInstance() {
+ return STATIC;
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeans.java
index 47855aad..dc1fa47c 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeans.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeans.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -43,9 +43,17 @@ import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
+import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
import de.lmu.ifi.dbs.elki.utilities.datastructures.QuickSelect;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
/**
* Abstract base class for k-means implementations.
@@ -59,7 +67,7 @@ import de.lmu.ifi.dbs.elki.utilities.datastructures.QuickSelect;
* @param <D> Distance type
* @param <M> Cluster model type
*/
-public abstract class AbstractKMeans<V extends NumberVector<?>, D extends Distance<D>, M extends MeanModel<V>> extends AbstractPrimitiveDistanceBasedAlgorithm<NumberVector<?>, D, Clustering<M>> implements KMeans, ClusteringAlgorithm<Clustering<M>> {
+public abstract class AbstractKMeans<V extends NumberVector<?>, D extends Distance<D>, M extends MeanModel<V>> extends AbstractPrimitiveDistanceBasedAlgorithm<NumberVector<?>, D, Clustering<M>> implements KMeans<V, D, M>, ClusteringAlgorithm<Clustering<M>> {
/**
* Holds the value of {@link #K_ID}.
*/
@@ -102,54 +110,53 @@ public abstract class AbstractKMeans<V extends NumberVector<?>, D extends Distan
protected boolean assignToNearestCluster(Relation<V> relation, List<? extends NumberVector<?>> means, List<? extends ModifiableDBIDs> clusters) {
boolean changed = false;
- if(getDistanceFunction() instanceof PrimitiveDoubleDistanceFunction) {
+ if (getDistanceFunction() instanceof PrimitiveDoubleDistanceFunction) {
@SuppressWarnings("unchecked")
final PrimitiveDoubleDistanceFunction<? super NumberVector<?>> df = (PrimitiveDoubleDistanceFunction<? super NumberVector<?>>) getDistanceFunction();
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double mindist = Double.POSITIVE_INFINITY;
V fv = relation.get(iditer);
int minIndex = 0;
- for(int i = 0; i < k; i++) {
+ for (int i = 0; i < k; i++) {
double dist = df.doubleDistance(fv, means.get(i));
- if(dist < mindist) {
+ if (dist < mindist) {
minIndex = i;
mindist = dist;
}
}
- if(clusters.get(minIndex).add(iditer)) {
+ if (clusters.get(minIndex).add(iditer)) {
changed = true;
// Remove from previous cluster
// TODO: keep a list of cluster assignments to save this search?
- for(int i = 0; i < k; i++) {
- if(i != minIndex) {
- if(clusters.get(i).remove(iditer)) {
+ for (int i = 0; i < k; i++) {
+ if (i != minIndex) {
+ if (clusters.get(i).remove(iditer)) {
break;
}
}
}
}
}
- }
- else {
+ } else {
final PrimitiveDistanceFunction<? super NumberVector<?>, D> df = getDistanceFunction();
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
D mindist = df.getDistanceFactory().infiniteDistance();
V fv = relation.get(iditer);
int minIndex = 0;
- for(int i = 0; i < k; i++) {
+ for (int i = 0; i < k; i++) {
D dist = df.distance(fv, means.get(i));
- if(dist.compareTo(mindist) < 0) {
+ if (dist.compareTo(mindist) < 0) {
minIndex = i;
mindist = dist;
}
}
- if(clusters.get(minIndex).add(iditer)) {
+ if (clusters.get(minIndex).add(iditer)) {
changed = true;
// Remove from previous cluster
// TODO: keep a list of cluster assignments to save this search?
- for(int i = 0; i < k; i++) {
- if(i != minIndex) {
- if(clusters.get(i).remove(iditer)) {
+ for (int i = 0; i < k; i++) {
+ if (i != minIndex) {
+ if (clusters.get(i).remove(iditer)) {
break;
}
}
@@ -174,21 +181,24 @@ public abstract class AbstractKMeans<V extends NumberVector<?>, D extends Distan
* @return the mean vectors of the given clusters in the given database
*/
protected List<Vector> means(List<? extends ModifiableDBIDs> clusters, List<? extends NumberVector<?>> means, Relation<V> database) {
- List<Vector> newMeans = new ArrayList<Vector>(k);
- for(int i = 0; i < k; i++) {
+ List<Vector> newMeans = new ArrayList<>(k);
+ for (int i = 0; i < k; i++) {
ModifiableDBIDs list = clusters.get(i);
Vector mean = null;
- if(list.size() > 0) {
+ if (list.size() > 0) {
double s = 1.0 / list.size();
DBIDIter iter = list.iter();
assert (iter.valid());
mean = database.get(iter).getColumnVector().timesEquals(s);
+ double[] raw = mean.getArrayRef();
iter.advance();
- for(; iter.valid(); iter.advance()) {
- mean.plusTimesEquals(database.get(iter).getColumnVector(), s);
+ for (; iter.valid(); iter.advance()) {
+ NumberVector<?> vec = database.get(iter);
+ for (int j = 0; j < mean.getDimensionality(); j++) {
+ raw[j] += s * vec.doubleValue(j);
+ }
}
- }
- else {
+ } else {
mean = means.get(i).getColumnVector();
}
newMeans.add(mean);
@@ -207,19 +217,18 @@ public abstract class AbstractKMeans<V extends NumberVector<?>, D extends Distan
protected List<NumberVector<?>> medians(List<? extends ModifiableDBIDs> clusters, List<? extends NumberVector<?>> medians, Relation<V> database) {
final int dim = medians.get(0).getDimensionality();
final SortDBIDsBySingleDimension sorter = new SortDBIDsBySingleDimension(database);
- List<NumberVector<?>> newMedians = new ArrayList<NumberVector<?>>(k);
- for(int i = 0; i < k; i++) {
+ List<NumberVector<?>> newMedians = new ArrayList<>(k);
+ for (int i = 0; i < k; i++) {
ArrayModifiableDBIDs list = DBIDUtil.newArray(clusters.get(i));
- if(list.size() > 0) {
+ if (list.size() > 0) {
Vector mean = new Vector(dim);
- for(int d = 0; d < dim; d++) {
+ for (int d = 0; d < dim; d++) {
sorter.setDimension(d);
DBID id = QuickSelect.median(list, sorter);
mean.set(d, database.get(id).doubleValue(d));
}
newMedians.add(mean);
- }
- else {
+ } else {
newMedians.add((NumberVector<?>) medians.get(i));
}
}
@@ -235,7 +244,7 @@ public abstract class AbstractKMeans<V extends NumberVector<?>, D extends Distan
* @param op Cluster size change / Weight change
*/
protected void incrementalUpdateMean(Vector mean, V vec, int newsize, double op) {
- if(newsize == 0) {
+ if (newsize == 0) {
return; // Keep old mean
}
Vector delta = vec.getColumnVector();
@@ -256,65 +265,62 @@ public abstract class AbstractKMeans<V extends NumberVector<?>, D extends Distan
protected boolean macQueenIterate(Relation<V> relation, List<Vector> means, List<ModifiableDBIDs> clusters) {
boolean changed = false;
- if(getDistanceFunction() instanceof PrimitiveDoubleDistanceFunction) {
+ if (getDistanceFunction() instanceof PrimitiveDoubleDistanceFunction) {
// Raw distance function
@SuppressWarnings("unchecked")
final PrimitiveDoubleDistanceFunction<? super NumberVector<?>> df = (PrimitiveDoubleDistanceFunction<? super NumberVector<?>>) getDistanceFunction();
// Incremental update
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double mindist = Double.POSITIVE_INFINITY;
V fv = relation.get(iditer);
int minIndex = 0;
- for(int i = 0; i < k; i++) {
+ for (int i = 0; i < k; i++) {
double dist = df.doubleDistance(fv, means.get(i));
- if(dist < mindist) {
+ if (dist < mindist) {
minIndex = i;
mindist = dist;
}
}
// Update the cluster mean incrementally:
- for(int i = 0; i < k; i++) {
+ for (int i = 0; i < k; i++) {
ModifiableDBIDs ci = clusters.get(i);
- if(i == minIndex) {
- if(ci.add(iditer)) {
+ if (i == minIndex) {
+ if (ci.add(iditer)) {
incrementalUpdateMean(means.get(i), fv, ci.size(), +1);
changed = true;
}
- }
- else if(ci.remove(iditer)) {
+ } else if (ci.remove(iditer)) {
incrementalUpdateMean(means.get(i), fv, ci.size() + 1, -1);
changed = true;
}
}
}
- }
- else {
+ } else {
// Raw distance function
final PrimitiveDistanceFunction<? super NumberVector<?>, D> df = getDistanceFunction();
// Incremental update
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
D mindist = df.getDistanceFactory().infiniteDistance();
V fv = relation.get(iditer);
int minIndex = 0;
- for(int i = 0; i < k; i++) {
+ for (int i = 0; i < k; i++) {
D dist = df.distance(fv, means.get(i));
- if(dist.compareTo(mindist) < 0) {
+ if (dist.compareTo(mindist) < 0) {
minIndex = i;
mindist = dist;
}
}
// Update the cluster mean incrementally:
- for(int i = 0; i < k; i++) {
+ for (int i = 0; i < k; i++) {
ModifiableDBIDs ci = clusters.get(i);
- if(i == minIndex) {
- if(ci.add(iditer)) {
+ if (i == minIndex) {
+ if (ci.add(iditer)) {
incrementalUpdateMean(means.get(i), fv, ci.size(), +1);
changed = true;
}
- }
- else if(ci.remove(iditer)) {
+ } else if (ci.remove(iditer)) {
incrementalUpdateMean(means.get(i), fv, ci.size() + 1, -1);
changed = true;
}
@@ -323,4 +329,76 @@ public abstract class AbstractKMeans<V extends NumberVector<?>, D extends Distan
}
return changed;
}
+
+ @Override
+ public void setK(int k) {
+ this.k = k;
+ }
+
+ @Override
+ public void setDistanceFunction(PrimitiveDistanceFunction<? super NumberVector<?>, D> distanceFunction) {
+ this.distanceFunction = distanceFunction;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public abstract static class Parameterizer<V extends NumberVector<?>, D extends Distance<D>> extends AbstractPrimitiveDistanceBasedAlgorithm.Parameterizer<NumberVector<?>, D> {
+ /**
+ * k Parameter.
+ */
+ protected int k;
+
+ /**
+ * Maximum number of iterations.
+ */
+ protected int maxiter;
+
+ /**
+ * Initialization method.
+ */
+ protected KMeansInitialization<V> initializer;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ ObjectParameter<PrimitiveDistanceFunction<NumberVector<?>, D>> distanceFunctionP = makeParameterDistanceFunction(SquaredEuclideanDistanceFunction.class, PrimitiveDistanceFunction.class);
+ if (config.grab(distanceFunctionP)) {
+ distanceFunction = distanceFunctionP.instantiateClass(config);
+ if (!(distanceFunction instanceof EuclideanDistanceFunction) && !(distanceFunction instanceof SquaredEuclideanDistanceFunction)) {
+ getLogger().warning("k-means optimizes the sum of squares - it should be used with squared euclidean distance and may stop converging otherwise!");
+ }
+ }
+
+ IntParameter kP = new IntParameter(K_ID);
+ kP.addConstraint(new GreaterConstraint(0));
+ if (config.grab(kP)) {
+ k = kP.getValue();
+ }
+
+ ObjectParameter<KMeansInitialization<V>> initialP = new ObjectParameter<>(INIT_ID, KMeansInitialization.class, RandomlyChosenInitialMeans.class);
+ if (config.grab(initialP)) {
+ initializer = initialP.instantiateClass(config);
+ }
+
+ IntParameter maxiterP = new IntParameter(MAXITER_ID, 0);
+ maxiterP.addConstraint(new GreaterEqualConstraint(0));
+ if (config.grab(maxiterP)) {
+ maxiter = maxiterP.getValue();
+ }
+ }
+
+ /**
+ * Get class logger.
+ *
+ * @return Logger
+ */
+ abstract protected Logging getLogger();
+
+ @Override
+ abstract protected AbstractKMeans<V, D, ?> makeInstance();
+ }
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeansInitialization.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeansInitialization.java
index 3a69c806..9e3eb478 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeansInitialization.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeansInitialization.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/BestOfMultipleKMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/BestOfMultipleKMeans.java
new file mode 100644
index 00000000..30bb640c
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/BestOfMultipleKMeans.java
@@ -0,0 +1,219 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
+
+/*
+ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.quality.KMeansQualityMeasure;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.model.MeanModel;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * Run K-Means multiple times, and keep the best run.
+ *
+ * @author Stephan Baier
+ * @author Erich Schubert
+ *
+ * @param <V> Vector type
+ * @param <D> Distance type
+ * @param <M> Model type
+ */
+public class BestOfMultipleKMeans<V extends NumberVector<?>, D extends Distance<?>, M extends MeanModel<V>> extends AbstractAlgorithm<Clustering<M>> implements KMeans<V, D, M> {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(BestOfMultipleKMeans.class);
+
+ /**
+ * Number of trials to do.
+ */
+ private int trials;
+
+ /**
+ * Variant of kMeans for the bisecting step.
+ */
+ private KMeans<V, D, M> innerkMeans;
+
+ /**
+ * Quality measure which should be used.
+ */
+ private KMeansQualityMeasure<? super V, ? super D> qualityMeasure;
+
+ /**
+ * Constructor.
+ *
+ * @param trials Number of trials to do.
+ * @param innerkMeans K-Means variant to actually use.
+ * @param qualityMeasure Quality measure
+ */
+ public BestOfMultipleKMeans(int trials, KMeans<V, D, M> innerkMeans, KMeansQualityMeasure<? super V, ? super D> qualityMeasure) {
+ super();
+ this.trials = trials;
+ this.innerkMeans = innerkMeans;
+ this.qualityMeasure = qualityMeasure;
+ }
+
+ @Override
+ public Clustering<M> run(Database database, Relation<V> relation) {
+ if (!(innerkMeans.getDistanceFunction() instanceof PrimitiveDistanceFunction)) {
+ throw new AbortException("K-Means results can only be evaluated for primitive distance functions, got: " + innerkMeans.getDistanceFunction().getClass());
+ }
+ final PrimitiveDistanceFunction<? super V, D> df = (PrimitiveDistanceFunction<? super V, D>) innerkMeans.getDistanceFunction();
+ Clustering<M> bestResult = null;
+ if (trials > 1) {
+ double bestCost = Double.POSITIVE_INFINITY;
+ FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("K-means iterations", trials, LOG) : null;
+ for (int i = 0; i < trials; i++) {
+ Clustering<M> currentCandidate = innerkMeans.run(database, relation);
+ double currentCost = qualityMeasure.calculateCost(currentCandidate, df, relation);
+
+ if (LOG.isVerbose()) {
+ LOG.verbose("Cost of candidate " + i + ": " + currentCost);
+ }
+
+ if (currentCost < bestCost) {
+ bestResult = currentCandidate;
+ bestCost = currentCost;
+ }
+ if (prog != null) {
+ prog.incrementProcessed(LOG);
+ }
+ }
+ if (prog != null) {
+ prog.ensureCompleted(LOG);
+ }
+ } else {
+ bestResult = innerkMeans.run(database);
+ }
+
+ return bestResult;
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return innerkMeans.getInputTypeRestriction();
+ }
+
+ @Override
+ public DistanceFunction<? super V, D> getDistanceFunction() {
+ return innerkMeans.getDistanceFunction();
+ }
+
+ @Override
+ public void setK(int k) {
+ innerkMeans.setK(k);
+ }
+
+ @Override
+ public void setDistanceFunction(PrimitiveDistanceFunction<? super NumberVector<?>, D> distanceFunction) {
+ innerkMeans.setDistanceFunction(distanceFunction);
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Stephan Baier
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ *
+ * @param <V> Vector type
+ * @param <D> Distance type
+ * @param <M> Model type
+ */
+ public static class Parameterizer<V extends NumberVector<?>, D extends Distance<D>, M extends MeanModel<V>> extends AbstractParameterizer {
+ /**
+ * Parameter to specify the iterations of the bisecting step.
+ */
+ public static final OptionID TRIALS_ID = new OptionID("kmeans.trials", "The number of trials to run.");
+
+ /**
+ * Parameter to specify the kMeans variant.
+ */
+ public static final OptionID KMEANS_ID = new OptionID("kmeans.algorithm", "KMeans variant to run multiple times.");
+
+ /**
+ * Parameter to specify the variant of quality measure.
+ */
+ public static final OptionID QUALITYMEASURE_ID = new OptionID("kmeans.qualitymeasure", "Quality measure variant for deciding which run to keep.");
+
+ /**
+ * Number of trials to perform.
+ */
+ protected int trials;
+
+ /**
+ * Variant of kMeans to use.
+ */
+ protected KMeans<V, D, M> kMeansVariant;
+
+ /**
+ * Quality measure.
+ */
+ protected KMeansQualityMeasure<? super V, ? super D> qualityMeasure;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ IntParameter trialsP = new IntParameter(TRIALS_ID);
+ trialsP.addConstraint(new GreaterEqualConstraint(1));
+ if (config.grab(trialsP)) {
+ trials = trialsP.intValue();
+ }
+
+ ObjectParameter<KMeans<V, D, M>> kMeansVariantP = new ObjectParameter<>(KMEANS_ID, KMeans.class);
+ if (config.grab(kMeansVariantP)) {
+ kMeansVariant = kMeansVariantP.instantiateClass(config);
+ }
+
+ ObjectParameter<KMeansQualityMeasure<V, ? super D>> qualityMeasureP = new ObjectParameter<>(QUALITYMEASURE_ID, KMeansQualityMeasure.class);
+ if (config.grab(qualityMeasureP)) {
+ qualityMeasure = qualityMeasureP.instantiateClass(config);
+ }
+ }
+
+ @Override
+ protected BestOfMultipleKMeans<V, D, M> makeInstance() {
+ return new BestOfMultipleKMeans<>(trials, kMeansVariant, qualityMeasure);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/FarthestPointsInitialMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/FarthestPointsInitialMeans.java
new file mode 100644
index 00000000..a018c04b
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/FarthestPointsInitialMeans.java
@@ -0,0 +1,186 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDVar;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
+
+/**
+ * K-Means initialization by repeatedly choosing the farthest point.
+ *
+ * Note: this is less random than other initializations, so running multiple
+ * times will be more likely to return the same local minima.
+ *
+ * @author Erich Schubert
+ *
+ * @param <V> Vector type
+ * @param <D> Distance type
+ */
+public class FarthestPointsInitialMeans<V, D extends NumberDistance<D, ?>> extends AbstractKMeansInitialization<V> implements KMedoidsInitialization<V> {
+ /**
+ * Discard the first vector.
+ */
+ boolean dropfirst = true;
+
+ /**
+ * Constructor.
+ *
+ * @param rnd Random generator.
+ * @param dropfirst Flag to discard the first vector.
+ */
+ public FarthestPointsInitialMeans(RandomFactory rnd, boolean dropfirst) {
+ super(rnd);
+ this.dropfirst = dropfirst;
+ }
+
+ @Override
+ public List<V> chooseInitialMeans(Database database, Relation<V> relation, int k, PrimitiveDistanceFunction<? super NumberVector<?>, ?> distanceFunction) {
+ // Get a distance query
+ if (!(distanceFunction.getDistanceFactory() instanceof NumberDistance)) {
+ throw new AbortException("Farthest points K-Means initialization can only be used with numerical distances.");
+ }
+ @SuppressWarnings("unchecked")
+ final PrimitiveDistanceFunction<? super V, D> distF = (PrimitiveDistanceFunction<? super V, D>) distanceFunction;
+ DistanceQuery<V, D> distQ = database.getDistanceQuery(relation, distF);
+
+ // Chose first mean
+ List<V> means = new ArrayList<>(k);
+
+ Random random = rnd.getRandom();
+ DBIDIter first = DBIDUtil.randomSample(relation.getDBIDs(), 1, new Random(random.nextLong())).iter();
+ means.add(relation.get(first));
+
+ DBIDVar best = DBIDUtil.newVar(first);
+ for (int i = (dropfirst ? 0 : 1); i < k; i++) {
+ // Find farthest object:
+ double maxdist = Double.NEGATIVE_INFINITY;
+ for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
+ double dsum = 0.;
+ for (V ex : means) {
+ dsum += distQ.distance(ex, it).doubleValue();
+ }
+ if (dsum > maxdist) {
+ maxdist = dsum;
+ best.set(it);
+ }
+ }
+ // Add new mean:
+ if (k == 0) {
+ means.clear(); // Remove temporary first element.
+ }
+ means.add(relation.get(best));
+ }
+
+ return means;
+ }
+
+ @Override
+ public DBIDs chooseInitialMedoids(int k, DistanceQuery<? super V, ?> distQ2) {
+ if (!(distQ2.getDistanceFactory() instanceof NumberDistance)) {
+ throw new AbortException("Farthest points K-Means initialization can only be used with numerical distances.");
+ }
+ @SuppressWarnings("unchecked")
+ DistanceQuery<? super V, D> distQ = (DistanceQuery<? super V, D>) distQ2;
+ final Relation<?> relation = distQ.getRelation();
+ // Chose first mean
+ ArrayModifiableDBIDs means = DBIDUtil.newArray(k);
+
+ Random random = rnd.getRandom();
+ DBIDIter first = DBIDUtil.randomSample(relation.getDBIDs(), 1, new Random(random.nextLong())).iter();
+ means.add(first);
+
+ DBIDVar best = DBIDUtil.newVar(first);
+ for (int i = (dropfirst ? 0 : 1); i < k; i++) {
+ // Find farthest object:
+ double maxdist = Double.NEGATIVE_INFINITY;
+ for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
+ double dsum = 0.;
+ for (DBIDIter ex = means.iter(); ex.valid(); ex.advance()) {
+ dsum += distQ.distance(ex, it).doubleValue();
+ }
+ if (dsum > maxdist) {
+ maxdist = dsum;
+ best.set(it);
+ }
+ }
+ // Add new mean:
+ if (k == 0) {
+ means.clear(); // Remove temporary first element.
+ }
+ means.add(best);
+ }
+
+ return means;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<V, D extends NumberDistance<D, ?>> extends AbstractKMeansInitialization.Parameterizer<V> {
+ /**
+ * Option ID to control the handling of the first object chosen.
+ */
+ public static final OptionID DROPFIRST_ID = new OptionID("farthest.dropfirst", "Drop the first object chosen (which is chosen randomly) for the farthest points heuristic.");
+
+ /**
+ * Flag for discarding the first object chosen.
+ */
+ protected boolean dropfirst = true;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ Flag dropfirstP = new Flag(DROPFIRST_ID);
+ if (config.grab(dropfirstP)) {
+ dropfirst = dropfirstP.isTrue();
+ }
+ }
+
+ @Override
+ protected FarthestPointsInitialMeans<V, D> makeInstance() {
+ return new FarthestPointsInitialMeans<>(rnd, dropfirst);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/FirstKInitialMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/FirstKInitialMeans.java
index 1e51f4d6..08e2f116 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/FirstKInitialMeans.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/FirstKInitialMeans.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -26,6 +26,7 @@ import java.util.ArrayList;
import java.util.List;
import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
@@ -51,9 +52,9 @@ public class FirstKInitialMeans<V> implements KMeansInitialization<V>, KMedoidsI
}
@Override
- public List<V> chooseInitialMeans(Relation<V> relation, int k, PrimitiveDistanceFunction<? super V, ?> distanceFunction) {
+ public List<V> chooseInitialMeans(Database database, Relation<V> relation, int k, PrimitiveDistanceFunction<? super NumberVector<?>, ?> distanceFunction) {
DBIDIter iter = relation.iterDBIDs();
- List<V> means = new ArrayList<V>(k);
+ List<V> means = new ArrayList<>(k);
for(int i = 0; i < k && iter.valid(); i++, iter.advance()) {
means.add(relation.get(iter));
}
@@ -80,7 +81,7 @@ public class FirstKInitialMeans<V> implements KMeansInitialization<V>, KMedoidsI
public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
@Override
protected FirstKInitialMeans<V> makeInstance() {
- return new FirstKInitialMeans<V>();
+ return new FirstKInitialMeans<>();
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeans.java
index 68fc4e48..29c0a5c8 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeans.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeans.java
@@ -1,12 +1,10 @@
package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -25,12 +23,27 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import de.lmu.ifi.dbs.elki.algorithm.DistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.model.MeanModel;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+
/**
* Some constants and options shared among kmeans family algorithms.
*
* @author Erich Schubert
+ *
+ * @param <V> Number vector type
+ * @param <D> Distance type
+ * @param <M> Actual model type
*/
-public interface KMeans {
+public interface KMeans<V extends NumberVector<?>, D extends Distance<?>, M extends MeanModel<V>> extends ClusteringAlgorithm<Clustering<M>>, DistanceBasedAlgorithm<V, D> {
/**
* Parameter to specify the initialization method
*/
@@ -52,4 +65,27 @@ public interface KMeans {
* Parameter to specify the random generator seed.
*/
public static final OptionID SEED_ID = new OptionID("kmeans.seed", "The random number generator seed.");
-} \ No newline at end of file
+
+ /**
+ * Run the clustering algorithm.
+ *
+ * @param database Database to run on.
+ * @param rel Relation to process.
+ * @return Clustering result
+ */
+ Clustering<M> run(Database database, Relation<V> rel);
+
+ /**
+ * Set the value of k. Needed for some types of nested k-means.
+ *
+ * @param k K parameter
+ */
+ void setK(int k);
+
+ /**
+ * Set the distance function to use.
+ *
+ * @param distanceFunction Distance function.
+ */
+ void setDistanceFunction(PrimitiveDistanceFunction<? super NumberVector<?>, D> distanceFunction);
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansBisecting.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansBisecting.java
new file mode 100644
index 00000000..37071d36
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansBisecting.java
@@ -0,0 +1,231 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
+
+/*
+ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.LinkedList;
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
+import de.lmu.ifi.dbs.elki.data.Cluster;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.model.MeanModel;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.ProxyDatabase;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ChainedParameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * The bisecting k-means algorithm works by starting with an initial
+ * partitioning into two clusters, then repeated splitting of the largest
+ * cluster to get additional clusters.
+ *
+ * Reference:<br>
+ * <p>
+ * M. Steinbach, G. Karypis, V. Kumar:<br />
+ * A Comparison of Document Clustering Techniques<br />
+ * KDD workshop on text mining. Vol. 400. No. 1
+ * </p>
+ *
+ * @author Stephan Baier
+ *
+ * @param <V> Vector type
+ * @param <D> Distance type
+ * @param <M> Model type
+ */
+@Reference(authors = "M. Steinbach, G. Karypis, V. Kumar", title = "A Comparison of Document Clustering Techniques", booktitle = "KDD workshop on text mining. Vol. 400. No. 1")
+public class KMeansBisecting<V extends NumberVector<?>, D extends Distance<?>, M extends MeanModel<V>> extends AbstractAlgorithm<Clustering<M>> implements KMeans<V, D, M> {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(KMeansBisecting.class);
+
+ /**
+ * Variant of kMeans for the bisecting step.
+ */
+ private KMeans<V, D, M> innerkMeans;
+
+ /**
+ * Desired value of k.
+ */
+ private int k;
+
+ /**
+ * Constructor.
+ *
+ * @param k k parameter - number of result clusters
+ * @param innerkMeans KMeans variant parameter - for bisecting step
+ */
+ public KMeansBisecting(int k, KMeans<V, D, M> innerkMeans) {
+ super();
+ this.k = k;
+ this.innerkMeans = innerkMeans;
+ }
+
+ @Override
+ public Clustering<M> run(Database database, Relation<V> relation) {
+ ProxyDatabase proxyDB = new ProxyDatabase(relation.getDBIDs(), database);
+
+ // Linked list is preferrable for scratch, as we will A) not need that many
+ // clusters and B) be doing random removals of the largest cluster (often at
+ // the head)
+ LinkedList<Cluster<M>> currentClusterList = new LinkedList<>();
+
+ FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Bisecting k-means", k - 1, LOG) : null;
+
+ for (int j = 0; j < this.k - 1; j++) {
+ // Choose a cluster to split and project database to cluster
+ if (currentClusterList.size() == 0) {
+ proxyDB = new ProxyDatabase(relation.getDBIDs(), database);
+ } else {
+ Cluster<M> largestCluster = null;
+ for (Cluster<M> cluster : currentClusterList) {
+ if (largestCluster == null || cluster.size() > largestCluster.size()) {
+ largestCluster = cluster;
+ }
+ }
+ currentClusterList.remove(largestCluster);
+ proxyDB.setDBIDs(largestCluster.getIDs());
+ }
+
+ // Run the inner k-means algorithm:
+ // FIXME: ensure we run on the correct relation in a multirelational
+ // setting!
+ Clustering<M> innerResult = innerkMeans.run(proxyDB);
+ // Add resulting clusters to current result.
+ currentClusterList.addAll(innerResult.getAllClusters());
+
+ if (prog != null) {
+ prog.incrementProcessed(LOG);
+ }
+ if (LOG.isVerbose()) {
+ LOG.verbose("Iteration " + j);
+ }
+ }
+ if (prog != null) {
+ prog.ensureCompleted(LOG);
+ }
+
+ // add all current clusters to the result
+ Clustering<M> result = new Clustering<>("Bisecting k-Means Result", "Bisecting-k-means");
+ for (Cluster<M> cluster : currentClusterList) {
+ result.addToplevelCluster(cluster);
+ }
+ return result;
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return innerkMeans.getInputTypeRestriction();
+ }
+
+ @Override
+ public DistanceFunction<? super V, D> getDistanceFunction() {
+ return innerkMeans.getDistanceFunction();
+ }
+
+ @Override
+ public void setK(int k) {
+ this.k = k;
+ }
+
+ @Override
+ public void setDistanceFunction(PrimitiveDistanceFunction<? super NumberVector<?>, D> distanceFunction) {
+ innerkMeans.setDistanceFunction(distanceFunction);
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Stephan Baier
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ *
+ * @param <V> Vector type
+ * @param <D> Distance type
+ * @param <M> Model type
+ */
+ public static class Parameterizer<V extends NumberVector<?>, D extends Distance<?>, M extends MeanModel<V>> extends AbstractParameterizer {
+ /**
+ * Parameter to specify the kMeans variant.
+ */
+ public static final OptionID KMEANS_ID = new OptionID("bisecting.kmeansvariant", "KMeans variant");
+
+ /**
+ * Variant of kMeans
+ */
+ protected KMeans<V, D, M> kMeansVariant;
+
+ /**
+ * Desired number of clusters.
+ */
+ protected int k;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ IntParameter kP = new IntParameter(KMeans.K_ID);
+ kP.addConstraint(new GreaterConstraint(1));
+ if (config.grab(kP)) {
+ k = kP.intValue();
+ }
+
+ ObjectParameter<KMeans<V, D, M>> kMeansVariantP = new ObjectParameter<>(KMEANS_ID, KMeans.class, BestOfMultipleKMeans.class);
+ if (config.grab(kMeansVariantP)) {
+ ListParameterization kMeansVariantParameters = new ListParameterization();
+
+ // We will always invoke this with k=2!
+ kMeansVariantParameters.addParameter(KMeans.K_ID, 2);
+
+ ChainedParameterization combinedConfig = new ChainedParameterization(kMeansVariantParameters, config);
+ combinedConfig.errorsTo(config);
+ kMeansVariant = kMeansVariantP.instantiateClass(combinedConfig);
+ }
+ }
+
+ @Override
+ protected KMeansBisecting<V, D, M> makeInstance() {
+ return new KMeansBisecting<>(k, kMeansVariant);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansInitialization.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansInitialization.java
index 54b3a2ce..06fb10c1 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansInitialization.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansInitialization.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -24,6 +24,8 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
*/
import java.util.List;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
@@ -31,7 +33,7 @@ import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
* Interface for initializing K-Means
*
* @author Erich Schubert
- *
+ *
* @apiviz.landmark
*
* @param <V> Object type
@@ -40,10 +42,12 @@ public interface KMeansInitialization<V> {
/**
* Choose initial means
*
+ * @param database Database context
* @param relation Relation
* @param k Parameter k
- * @param distanceFunction Distance function
+ * @param distanceFunction Distance function
+ *
* @return List of chosen means for k-means
*/
- public abstract List<V> chooseInitialMeans(Relation<V> relation, int k, PrimitiveDistanceFunction<? super V, ?> distanceFunction);
+ public abstract List<V> chooseInitialMeans(Database database, Relation<V> relation, int k, PrimitiveDistanceFunction<? super NumberVector<?>, ?> distanceFunction);
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java
index f43c2277..e692293c 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -26,7 +26,6 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
import java.util.ArrayList;
import java.util.List;
-import de.lmu.ifi.dbs.elki.algorithm.AbstractPrimitiveDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.NumberVector;
@@ -36,19 +35,13 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.SquaredEuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
/**
* Provides the k-means algorithm, using Lloyd-style bulk iterations.
@@ -90,28 +83,23 @@ public class KMeansLloyd<V extends NumberVector<?>, D extends Distance<D>> exten
super(distanceFunction, k, maxiter, initializer);
}
- /**
- * Run k-means.
- *
- * @param database Database
- * @param relation relation to use
- * @return result
- */
+ @Override
public Clustering<KMeansModel<V>> run(Database database, Relation<V> relation) {
if (relation.size() <= 0) {
- return new Clustering<KMeansModel<V>>("k-Means Clustering", "kmeans-clustering");
+ return new Clustering<>("k-Means Clustering", "kmeans-clustering");
}
// Choose initial means
- List<? extends NumberVector<?>> means = initializer.chooseInitialMeans(relation, k, getDistanceFunction());
+ List<? extends NumberVector<?>> means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
// Setup cluster assignment store
- List<ModifiableDBIDs> clusters = new ArrayList<ModifiableDBIDs>();
+ List<ModifiableDBIDs> clusters = new ArrayList<>();
for (int i = 0; i < k; i++) {
clusters.add(DBIDUtil.newHashSet(relation.size() / k));
}
+ IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Means iteration", LOG) : null;
for (int iteration = 0; maxiter <= 0 || iteration < maxiter; iteration++) {
- if (LOG.isVerbose()) {
- LOG.verbose("K-Means iteration " + (iteration + 1));
+ if (prog != null) {
+ prog.incrementProcessed(LOG);
}
boolean changed = assignToNearestCluster(relation, means, clusters);
// Stop if no cluster assignment changed.
@@ -121,12 +109,16 @@ public class KMeansLloyd<V extends NumberVector<?>, D extends Distance<D>> exten
// Recompute means.
means = means(clusters, means, relation);
}
+ if (prog != null) {
+ prog.setCompleted(LOG);
+ }
+
// Wrap result
final NumberVector.Factory<V, ?> factory = RelationUtil.getNumberVectorFactory(relation);
- Clustering<KMeansModel<V>> result = new Clustering<KMeansModel<V>>("k-Means Clustering", "kmeans-clustering");
+ Clustering<KMeansModel<V>> result = new Clustering<>("k-Means Clustering", "kmeans-clustering");
for (int i = 0; i < clusters.size(); i++) {
- KMeansModel<V> model = new KMeansModel<V>(factory.newNumberVector(means.get(i).getColumnVector().getArrayRef()));
- result.addCluster(new Cluster<KMeansModel<V>>(clusters.get(i), model));
+ KMeansModel<V> model = new KMeansModel<>(factory.newNumberVector(means.get(i).getColumnVector().getArrayRef()));
+ result.addToplevelCluster(new Cluster<>(clusters.get(i), model));
}
return result;
}
@@ -143,53 +135,15 @@ public class KMeansLloyd<V extends NumberVector<?>, D extends Distance<D>> exten
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<?>, D extends Distance<D>> extends AbstractPrimitiveDistanceBasedAlgorithm.Parameterizer<NumberVector<?>, D> {
- /**
- * k Parameter.
- */
- protected int k;
-
- /**
- * Number of iterations.
- */
- protected int maxiter;
-
- /**
- * Initialization method.
- */
- protected KMeansInitialization<V> initializer;
-
+ public static class Parameterizer<V extends NumberVector<?>, D extends Distance<D>> extends AbstractKMeans.Parameterizer<V, D> {
@Override
- protected void makeOptions(Parameterization config) {
- ObjectParameter<PrimitiveDistanceFunction<NumberVector<?>, D>> distanceFunctionP = makeParameterDistanceFunction(SquaredEuclideanDistanceFunction.class, PrimitiveDistanceFunction.class);
- if(config.grab(distanceFunctionP)) {
- distanceFunction = distanceFunctionP.instantiateClass(config);
- if (!(distanceFunction instanceof EuclideanDistanceFunction) && !(distanceFunction instanceof SquaredEuclideanDistanceFunction)) {
- LOG.warning("k-means optimizes the sum of squares - it should be used with squared euclidean distance and may stop converging otherwise!");
- }
- }
-
- IntParameter kP = new IntParameter(K_ID);
- kP.addConstraint(new GreaterConstraint(0));
- if (config.grab(kP)) {
- k = kP.getValue();
- }
-
- ObjectParameter<KMeansInitialization<V>> initialP = new ObjectParameter<KMeansInitialization<V>>(INIT_ID, KMeansInitialization.class, RandomlyGeneratedInitialMeans.class);
- if (config.grab(initialP)) {
- initializer = initialP.instantiateClass(config);
- }
-
- IntParameter maxiterP = new IntParameter(MAXITER_ID, 0);
- maxiterP.addConstraint(new GreaterEqualConstraint(0));
- if (config.grab(maxiterP)) {
- maxiter = maxiterP.intValue();
- }
+ protected Logging getLogger() {
+ return LOG;
}
@Override
protected KMeansLloyd<V, D> makeInstance() {
- return new KMeansLloyd<V, D>(distanceFunction, k, maxiter, initializer);
+ return new KMeansLloyd<>(distanceFunction, k, maxiter, initializer);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansMacQueen.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansMacQueen.java
index 0cc7c363..bb689bd3 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansMacQueen.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansMacQueen.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -26,7 +26,6 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
import java.util.ArrayList;
import java.util.List;
-import de.lmu.ifi.dbs.elki.algorithm.AbstractPrimitiveDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.NumberVector;
@@ -37,20 +36,14 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.SquaredEuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
/**
* Provides the k-means algorithm, using MacQueen style incremental updates.
@@ -89,24 +82,18 @@ public class KMeansMacQueen<V extends NumberVector<?>, D extends Distance<D>> ex
super(distanceFunction, k, maxiter, initializer);
}
- /**
- * Run k-means.
- *
- * @param database Database
- * @param relation relation to use
- * @return Clustering result
- */
+ @Override
public Clustering<KMeansModel<V>> run(Database database, Relation<V> relation) {
if (relation.size() <= 0) {
- return new Clustering<KMeansModel<V>>("k-Means Clustering", "kmeans-clustering");
+ return new Clustering<>("k-Means Clustering", "kmeans-clustering");
}
// Choose initial means
- List<Vector> means = new ArrayList<Vector>(k);
- for (NumberVector<?> nv : initializer.chooseInitialMeans(relation, k, getDistanceFunction())) {
+ List<Vector> means = new ArrayList<>(k);
+ for (NumberVector<?> nv : initializer.chooseInitialMeans(database, relation, k, getDistanceFunction())) {
means.add(nv.getColumnVector());
}
// Initialize cluster and assign objects
- List<ModifiableDBIDs> clusters = new ArrayList<ModifiableDBIDs>();
+ List<ModifiableDBIDs> clusters = new ArrayList<>();
for (int i = 0; i < k; i++) {
clusters.add(DBIDUtil.newHashSet(relation.size() / k));
}
@@ -114,22 +101,27 @@ public class KMeansMacQueen<V extends NumberVector<?>, D extends Distance<D>> ex
// Initial recomputation of the means.
means = means(clusters, means, relation);
+ IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Means iteration", LOG) : null;
// Refine result
for (int iteration = 0; maxiter <= 0 || iteration < maxiter; iteration++) {
- if (LOG.isVerbose()) {
- LOG.verbose("K-Means iteration " + (iteration + 1));
+ if (prog != null) {
+ prog.incrementProcessed(LOG);
}
boolean changed = macQueenIterate(relation, means, clusters);
if (!changed) {
break;
}
}
+ if (prog != null) {
+ prog.setCompleted(LOG);
+ }
+
final NumberVector.Factory<V, ?> factory = RelationUtil.getNumberVectorFactory(relation);
- Clustering<KMeansModel<V>> result = new Clustering<KMeansModel<V>>("k-Means Clustering", "kmeans-clustering");
+ Clustering<KMeansModel<V>> result = new Clustering<>("k-Means Clustering", "kmeans-clustering");
for (int i = 0; i < clusters.size(); i++) {
DBIDs ids = clusters.get(i);
- KMeansModel<V> model = new KMeansModel<V>(factory.newNumberVector(means.get(i).getArrayRef()));
- result.addCluster(new Cluster<KMeansModel<V>>(ids, model));
+ KMeansModel<V> model = new KMeansModel<>(factory.newNumberVector(means.get(i).getArrayRef()));
+ result.addToplevelCluster(new Cluster<>(ids, model));
}
return result;
}
@@ -146,53 +138,15 @@ public class KMeansMacQueen<V extends NumberVector<?>, D extends Distance<D>> ex
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<?>, D extends Distance<D>> extends AbstractPrimitiveDistanceBasedAlgorithm.Parameterizer<NumberVector<?>, D> {
- /**
- * k Parameter.
- */
- protected int k;
-
- /**
- * Maximum number of iterations.
- */
- protected int maxiter;
-
- /**
- * Initialization method.
- */
- protected KMeansInitialization<V> initializer;
-
+ public static class Parameterizer<V extends NumberVector<?>, D extends Distance<D>> extends AbstractKMeans.Parameterizer<V, D> {
@Override
- protected void makeOptions(Parameterization config) {
- ObjectParameter<PrimitiveDistanceFunction<NumberVector<?>, D>> distanceFunctionP = makeParameterDistanceFunction(SquaredEuclideanDistanceFunction.class, PrimitiveDistanceFunction.class);
- if (config.grab(distanceFunctionP)) {
- distanceFunction = distanceFunctionP.instantiateClass(config);
- if (!(distanceFunction instanceof EuclideanDistanceFunction) && !(distanceFunction instanceof SquaredEuclideanDistanceFunction)) {
- LOG.warning("k-means optimizes the sum of squares - it should be used with squared euclidean distance and may stop converging otherwise!");
- }
- }
-
- IntParameter kP = new IntParameter(K_ID);
- kP.addConstraint(new GreaterConstraint(0));
- if (config.grab(kP)) {
- k = kP.getValue();
- }
-
- ObjectParameter<KMeansInitialization<V>> initialP = new ObjectParameter<KMeansInitialization<V>>(INIT_ID, KMeansInitialization.class, RandomlyGeneratedInitialMeans.class);
- if (config.grab(initialP)) {
- initializer = initialP.instantiateClass(config);
- }
-
- IntParameter maxiterP = new IntParameter(MAXITER_ID, 0);
- maxiterP.addConstraint(new GreaterEqualConstraint(0));
- if (config.grab(maxiterP)) {
- maxiter = maxiterP.getValue();
- }
+ protected Logging getLogger() {
+ return LOG;
}
@Override
protected KMeansMacQueen<V, D> makeInstance() {
- return new KMeansMacQueen<V, D>(distanceFunction, k, maxiter, initializer);
+ return new KMeansMacQueen<>(distanceFunction, k, maxiter, initializer);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansPlusPlusInitialMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansPlusPlusInitialMeans.java
index a07953da..302ca86b 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansPlusPlusInitialMeans.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansPlusPlusInitialMeans.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -26,6 +26,8 @@ import java.util.ArrayList;
import java.util.List;
import java.util.Random;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
@@ -70,17 +72,17 @@ public class KMeansPlusPlusInitialMeans<V, D extends NumberDistance<D, ?>> exten
}
@Override
- public List<V> chooseInitialMeans(Relation<V> relation, int k, PrimitiveDistanceFunction<? super V, ?> distanceFunction) {
+ public List<V> chooseInitialMeans(Database database, Relation<V> relation, int k, PrimitiveDistanceFunction<? super NumberVector<?>, ?> distanceFunction) {
// Get a distance query
if(!(distanceFunction.getDistanceFactory() instanceof NumberDistance)) {
throw new AbortException("K-Means++ initialization can only be used with numerical distances.");
}
@SuppressWarnings("unchecked")
final PrimitiveDistanceFunction<? super V, D> distF = (PrimitiveDistanceFunction<? super V, D>) distanceFunction;
- DistanceQuery<V, D> distQ = relation.getDatabase().getDistanceQuery(relation, distF);
+ DistanceQuery<V, D> distQ = database.getDistanceQuery(relation, distF);
// Chose first mean
- List<V> means = new ArrayList<V>(k);
+ List<V> means = new ArrayList<>(k);
Random random = rnd.getRandom();
DBID first = DBIDUtil.deref(DBIDUtil.randomSample(relation.getDBIDs(), 1, new Random(random.nextLong())).iter());
@@ -99,7 +101,7 @@ public class KMeansPlusPlusInitialMeans<V, D extends NumberDistance<D, ?>> exten
}
double r = random.nextDouble() * weightsum;
int pos = 0;
- while(r > 0 && pos < weights.length) {
+ while(r > 0 && pos < weights.length - 1) {
r -= weights[pos];
pos++;
}
@@ -125,7 +127,7 @@ public class KMeansPlusPlusInitialMeans<V, D extends NumberDistance<D, ?>> exten
@Override
public DBIDs chooseInitialMedoids(int k, DistanceQuery<? super V, ?> distQ2) {
if(!(distQ2.getDistanceFactory() instanceof NumberDistance)) {
- throw new AbortException("PAM initialization can only be used with numerical distances.");
+ throw new AbortException("K-Means++ initialization initialization can only be used with numerical distances.");
}
@SuppressWarnings("unchecked")
DistanceQuery<? super V, D> distQ = (DistanceQuery<? super V, D>) distQ2;
@@ -244,7 +246,7 @@ public class KMeansPlusPlusInitialMeans<V, D extends NumberDistance<D, ?>> exten
public static class Parameterizer<V, D extends NumberDistance<D, ?>> extends AbstractKMeansInitialization.Parameterizer<V> {
@Override
protected KMeansPlusPlusInitialMeans<V, D> makeInstance() {
- return new KMeansPlusPlusInitialMeans<V, D>(rnd);
+ return new KMeansPlusPlusInitialMeans<>(rnd);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMediansLloyd.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMediansLloyd.java
index 9917337e..cc7aaa9e 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMediansLloyd.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMediansLloyd.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -26,7 +26,6 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
import java.util.ArrayList;
import java.util.List;
-import de.lmu.ifi.dbs.elki.algorithm.AbstractPrimitiveDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.NumberVector;
@@ -39,13 +38,9 @@ import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
/**
* Provides the k-medians clustering algorithm, using Lloyd-style bulk
@@ -83,28 +78,23 @@ public class KMediansLloyd<V extends NumberVector<?>, D extends Distance<D>> ext
super(distanceFunction, k, maxiter, initializer);
}
- /**
- * Run k-medians.
- *
- * @param database Database
- * @param relation relation to use
- * @return result
- */
+ @Override
public Clustering<MeanModel<V>> run(Database database, Relation<V> relation) {
if (relation.size() <= 0) {
- return new Clustering<MeanModel<V>>("k-Medians Clustering", "kmedians-clustering");
+ return new Clustering<>("k-Medians Clustering", "kmedians-clustering");
}
// Choose initial medians
- List<? extends NumberVector<?>> medians = initializer.chooseInitialMeans(relation, k, getDistanceFunction());
+ List<? extends NumberVector<?>> medians = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
// Setup cluster assignment store
- List<ModifiableDBIDs> clusters = new ArrayList<ModifiableDBIDs>();
+ List<ModifiableDBIDs> clusters = new ArrayList<>();
for (int i = 0; i < k; i++) {
clusters.add(DBIDUtil.newHashSet(relation.size() / k));
}
+ IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Medians iteration", LOG) : null;
for (int iteration = 0; maxiter <= 0 || iteration < maxiter; iteration++) {
- if (LOG.isVerbose()) {
- LOG.verbose("K-Medians iteration " + (iteration + 1));
+ if (prog != null) {
+ prog.incrementProcessed(LOG);
}
boolean changed = assignToNearestCluster(relation, medians, clusters);
// Stop if no cluster assignment changed.
@@ -114,12 +104,15 @@ public class KMediansLloyd<V extends NumberVector<?>, D extends Distance<D>> ext
// Recompute medians.
medians = medians(clusters, medians, relation);
}
+ if (prog != null) {
+ prog.setCompleted(LOG);
+ }
// Wrap result
final NumberVector.Factory<V, ?> factory = RelationUtil.getNumberVectorFactory(relation);
- Clustering<MeanModel<V>> result = new Clustering<MeanModel<V>>("k-Medians Clustering", "kmedians-clustering");
+ Clustering<MeanModel<V>> result = new Clustering<>("k-Medians Clustering", "kmedians-clustering");
for (int i = 0; i < clusters.size(); i++) {
- MeanModel<V> model = new MeanModel<V>(factory.newNumberVector(medians.get(i).getColumnVector().getArrayRef()));
- result.addCluster(new Cluster<MeanModel<V>>(clusters.get(i), model));
+ MeanModel<V> model = new MeanModel<>(factory.newNumberVector(medians.get(i).getColumnVector().getArrayRef()));
+ result.addToplevelCluster(new Cluster<>(clusters.get(i), model));
}
return result;
}
@@ -136,46 +129,15 @@ public class KMediansLloyd<V extends NumberVector<?>, D extends Distance<D>> ext
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<?>, D extends Distance<D>> extends AbstractPrimitiveDistanceBasedAlgorithm.Parameterizer<NumberVector<?>, D> {
- /**
- * k Parameter.
- */
- protected int k;
-
- /**
- * Maximum number of iterations.
- */
- protected int maxiter;
-
- /**
- * Initialization method.
- */
- protected KMeansInitialization<V> initializer;
-
+ public static class Parameterizer<V extends NumberVector<?>, D extends Distance<D>> extends AbstractKMeans.Parameterizer<V, D> {
@Override
- protected void makeOptions(Parameterization config) {
- super.makeOptions(config);
- IntParameter kP = new IntParameter(K_ID);
- kP.addConstraint(new GreaterConstraint(0));
- if (config.grab(kP)) {
- k = kP.intValue();
- }
-
- ObjectParameter<KMeansInitialization<V>> initialP = new ObjectParameter<KMeansInitialization<V>>(INIT_ID, KMeansInitialization.class, RandomlyGeneratedInitialMeans.class);
- if (config.grab(initialP)) {
- initializer = initialP.instantiateClass(config);
- }
-
- IntParameter maxiterP = new IntParameter(MAXITER_ID, 0);
- maxiterP.addConstraint(new GreaterEqualConstraint(0));
- if (config.grab(maxiterP)) {
- maxiter = maxiterP.intValue();
- }
+ protected Logging getLogger() {
+ return LOG;
}
@Override
protected KMediansLloyd<V, D> makeInstance() {
- return new KMediansLloyd<V, D>(distanceFunction, k, maxiter, initializer);
+ return new KMediansLloyd<>(distanceFunction, k, maxiter, initializer);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsEM.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsEM.java
index f4398458..87a0c7ae 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsEM.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsEM.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -46,6 +46,7 @@ import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
import de.lmu.ifi.dbs.elki.math.Mean;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
@@ -119,13 +120,13 @@ public class KMedoidsEM<V, D extends NumberDistance<D, ?>> extends AbstractDista
*/
public Clustering<MedoidModel> run(Database database, Relation<V> relation) {
if (relation.size() <= 0) {
- return new Clustering<MedoidModel>("k-Medoids Clustering", "kmedoids-clustering");
+ return new Clustering<>("k-Medoids Clustering", "kmedoids-clustering");
}
DistanceQuery<V, D> distQ = database.getDistanceQuery(relation, getDistanceFunction());
// Choose initial medoids
ArrayModifiableDBIDs medoids = DBIDUtil.newArray(initializer.chooseInitialMedoids(k, distQ));
// Setup cluster assignment store
- List<ModifiableDBIDs> clusters = new ArrayList<ModifiableDBIDs>();
+ List<ModifiableDBIDs> clusters = new ArrayList<>();
for (int i = 0; i < k; i++) {
clusters.add(DBIDUtil.newHashSet(relation.size() / k));
}
@@ -135,9 +136,13 @@ public class KMedoidsEM<V, D extends NumberDistance<D, ?>> extends AbstractDista
// TODO: reuse this information, from the build phase, when possible?
assignToNearestCluster(medoids, mdists, clusters, distQ);
+ IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Medoids iteration", LOG) : null;
// Swap phase
boolean changed = true;
while (changed) {
+ if (prog != null) {
+ prog.incrementProcessed(LOG);
+ }
changed = false;
// Try to swap the medoid with a better cluster member:
int i = 0;
@@ -168,12 +173,15 @@ public class KMedoidsEM<V, D extends NumberDistance<D, ?>> extends AbstractDista
assignToNearestCluster(medoids, mdists, clusters, distQ);
}
}
+ if (prog != null) {
+ prog.setCompleted(LOG);
+ }
// Wrap result
- Clustering<MedoidModel> result = new Clustering<MedoidModel>("k-Medoids Clustering", "kmedoids-clustering");
+ Clustering<MedoidModel> result = new Clustering<>("k-Medoids Clustering", "kmedoids-clustering");
for (int i = 0; i < clusters.size(); i++) {
MedoidModel model = new MedoidModel(medoids.get(i));
- result.addCluster(new Cluster<MedoidModel>(clusters.get(i), model));
+ result.addToplevelCluster(new Cluster<>(clusters.get(i), model));
}
return result;
}
@@ -256,7 +264,7 @@ public class KMedoidsEM<V, D extends NumberDistance<D, ?>> extends AbstractDista
k = kP.intValue();
}
- ObjectParameter<KMedoidsInitialization<V>> initialP = new ObjectParameter<KMedoidsInitialization<V>>(KMeans.INIT_ID, KMedoidsInitialization.class, PAMInitialMeans.class);
+ ObjectParameter<KMedoidsInitialization<V>> initialP = new ObjectParameter<>(KMeans.INIT_ID, KMedoidsInitialization.class, PAMInitialMeans.class);
if (config.grab(initialP)) {
initializer = initialP.instantiateClass(config);
}
@@ -270,7 +278,7 @@ public class KMedoidsEM<V, D extends NumberDistance<D, ?>> extends AbstractDista
@Override
protected KMedoidsEM<V, D> makeInstance() {
- return new KMedoidsEM<V, D>(distanceFunction, k, maxiter, initializer);
+ return new KMedoidsEM<>(distanceFunction, k, maxiter, initializer);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsInitialization.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsInitialization.java
index 269e7e9e..136a4129 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsInitialization.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsInitialization.java
@@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsPAM.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsPAM.java
index 906501e4..1feda867 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsPAM.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMedoidsPAM.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -50,6 +50,7 @@ import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
@@ -124,14 +125,14 @@ public class KMedoidsPAM<V, D extends NumberDistance<D, ?>> extends AbstractDist
*/
public Clustering<MedoidModel> run(Database database, Relation<V> relation) {
if (relation.size() <= 0) {
- return new Clustering<MedoidModel>("k-Medoids Clustering", "kmedoids-clustering");
+ return new Clustering<>("k-Medoids Clustering", "kmedoids-clustering");
}
DistanceQuery<V, D> distQ = database.getDistanceQuery(relation, getDistanceFunction());
DBIDs ids = relation.getDBIDs();
// Choose initial medoids
ArrayModifiableDBIDs medoids = DBIDUtil.newArray(initializer.chooseInitialMedoids(k, distQ));
// Setup cluster assignment store
- List<ModifiableDBIDs> clusters = new ArrayList<ModifiableDBIDs>();
+ List<ModifiableDBIDs> clusters = new ArrayList<>();
for (int i = 0; i < k; i++) {
clusters.add(DBIDUtil.newHashSet(relation.size() / k));
}
@@ -141,9 +142,13 @@ public class KMedoidsPAM<V, D extends NumberDistance<D, ?>> extends AbstractDist
// TODO: reuse this information, from the build phase, when possible?
assignToNearestCluster(medoids, ids, second, clusters, distQ);
+ IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("PAM iteration", LOG) : null;
// Swap phase
boolean changed = true;
while (changed) {
+ if (prog != null) {
+ prog.incrementProcessed(LOG);
+ }
changed = false;
// Try to swap the medoid with a better cluster member:
double best = 0;
@@ -189,6 +194,9 @@ public class KMedoidsPAM<V, D extends NumberDistance<D, ?>> extends AbstractDist
}
}
}
+ if (prog != null) {
+ prog.setCompleted(LOG);
+ }
if (LOG.isDebugging()) {
LOG.debug("Best cost: " + best);
}
@@ -204,10 +212,10 @@ public class KMedoidsPAM<V, D extends NumberDistance<D, ?>> extends AbstractDist
}
// Wrap result
- Clustering<MedoidModel> result = new Clustering<MedoidModel>("k-Medoids Clustering", "kmedoids-clustering");
+ Clustering<MedoidModel> result = new Clustering<>("k-Medoids Clustering", "kmedoids-clustering");
for (int i = 0; i < clusters.size(); i++) {
MedoidModel model = new MedoidModel(medoids.get(i));
- result.addCluster(new Cluster<MedoidModel>(clusters.get(i), model));
+ result.addToplevelCluster(new Cluster<>(clusters.get(i), model));
}
return result;
}
@@ -293,7 +301,7 @@ public class KMedoidsPAM<V, D extends NumberDistance<D, ?>> extends AbstractDist
k = kP.intValue();
}
- ObjectParameter<KMedoidsInitialization<V>> initialP = new ObjectParameter<KMedoidsInitialization<V>>(KMeans.INIT_ID, KMedoidsInitialization.class, PAMInitialMeans.class);
+ ObjectParameter<KMedoidsInitialization<V>> initialP = new ObjectParameter<>(KMeans.INIT_ID, KMedoidsInitialization.class, PAMInitialMeans.class);
if (config.grab(initialP)) {
initializer = initialP.instantiateClass(config);
}
@@ -307,7 +315,7 @@ public class KMedoidsPAM<V, D extends NumberDistance<D, ?>> extends AbstractDist
@Override
protected KMedoidsPAM<V, D> makeInstance() {
- return new KMedoidsPAM<V, D>(distanceFunction, k, maxiter, initializer);
+ return new KMedoidsPAM<>(distanceFunction, k, maxiter, initializer);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/PAMInitialMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/PAMInitialMeans.java
index 1fc7160e..c7e1751f 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/PAMInitialMeans.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/PAMInitialMeans.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -25,6 +25,8 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
import java.util.ArrayList;
import java.util.List;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
@@ -69,16 +71,16 @@ public class PAMInitialMeans<V, D extends NumberDistance<D, ?>> implements KMean
}
@Override
- public List<V> chooseInitialMeans(Relation<V> relation, int k, PrimitiveDistanceFunction<? super V, ?> distanceFunction) {
+ public List<V> chooseInitialMeans(Database database, Relation<V> relation, int k, PrimitiveDistanceFunction<? super NumberVector<?>, ?> distanceFunction) {
// Get a distance query
if(!(distanceFunction.getDistanceFactory() instanceof NumberDistance)) {
throw new AbortException("PAM initialization can only be used with numerical distances.");
}
@SuppressWarnings("unchecked")
final PrimitiveDistanceFunction<? super V, D> distF = (PrimitiveDistanceFunction<? super V, D>) distanceFunction;
- final DistanceQuery<V, D> distQ = relation.getDatabase().getDistanceQuery(relation, distF);
+ final DistanceQuery<V, D> distQ = database.getDistanceQuery(relation, distF);
DBIDs medids = chooseInitialMedoids(k, distQ);
- List<V> medoids = new ArrayList<V>(k);
+ List<V> medoids = new ArrayList<>(k);
for(DBIDIter iter = medids.iter(); iter.valid(); iter.advance()) {
medoids.add(relation.get(iter));
}
@@ -179,7 +181,7 @@ public class PAMInitialMeans<V, D extends NumberDistance<D, ?>> implements KMean
public static class Parameterizer<V, D extends NumberDistance<D, ?>> extends AbstractParameterizer {
@Override
protected PAMInitialMeans<V, D> makeInstance() {
- return new PAMInitialMeans<V, D>();
+ return new PAMInitialMeans<>();
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyChosenInitialMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyChosenInitialMeans.java
index 78e59be7..214f4ce6 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyChosenInitialMeans.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyChosenInitialMeans.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -25,6 +25,8 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
import java.util.ArrayList;
import java.util.List;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
@@ -52,9 +54,9 @@ public class RandomlyChosenInitialMeans<V> extends AbstractKMeansInitialization<
}
@Override
- public List<V> chooseInitialMeans(Relation<V> relation, int k, PrimitiveDistanceFunction<? super V, ?> distanceFunction) {
+ public List<V> chooseInitialMeans(Database database, Relation<V> relation, int k, PrimitiveDistanceFunction<? super NumberVector<?>, ?> distanceFunction) {
DBIDs ids = DBIDUtil.randomSample(relation.getDBIDs(), k, rnd);
- List<V> means = new ArrayList<V>(k);
+ List<V> means = new ArrayList<>(k);
for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
means.add(relation.get(iter));
}
@@ -74,10 +76,9 @@ public class RandomlyChosenInitialMeans<V> extends AbstractKMeansInitialization<
* @apiviz.exclude
*/
public static class Parameterizer<V> extends AbstractKMeansInitialization.Parameterizer<V> {
-
@Override
protected RandomlyChosenInitialMeans<V> makeInstance() {
- return new RandomlyChosenInitialMeans<V>(rnd);
+ return new RandomlyChosenInitialMeans<>(rnd);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyGeneratedInitialMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyGeneratedInitialMeans.java
index 300f5cb0..ee90e0dc 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyGeneratedInitialMeans.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyGeneratedInitialMeans.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -27,6 +27,7 @@ import java.util.List;
import java.util.Random;
import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
@@ -54,11 +55,11 @@ public class RandomlyGeneratedInitialMeans<V extends NumberVector<?>> extends Ab
}
@Override
- public List<V> chooseInitialMeans(Relation<V> relation, int k, PrimitiveDistanceFunction<? super V, ?> distanceFunction) {
+ public List<V> chooseInitialMeans(Database database, Relation<V> relation, int k, PrimitiveDistanceFunction<? super NumberVector<?>, ?> distanceFunction) {
final int dim = RelationUtil.dimensionality(relation);
NumberVector.Factory<V, ?> factory = RelationUtil.getNumberVectorFactory(relation);
Pair<V, V> minmax = DatabaseUtil.computeMinMax(relation);
- List<V> means = new ArrayList<V>(k);
+ List<V> means = new ArrayList<>(k);
final Random random = rnd.getRandom();
for(int i = 0; i < k; i++) {
double[] r = MathUtil.randomDoubleArray(dim, random);
@@ -81,7 +82,7 @@ public class RandomlyGeneratedInitialMeans<V extends NumberVector<?>> extends Ab
public static class Parameterizer<V extends NumberVector<?>> extends AbstractKMeansInitialization.Parameterizer<V> {
@Override
protected RandomlyGeneratedInitialMeans<V> makeInstance() {
- return new RandomlyGeneratedInitialMeans<V>(rnd);
+ return new RandomlyGeneratedInitialMeans<>(rnd);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/SampleKMeansInitialization.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/SampleKMeansInitialization.java
new file mode 100644
index 00000000..9f0a1923
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/SampleKMeansInitialization.java
@@ -0,0 +1,160 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.ArrayList;
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.data.Cluster;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.model.MeanModel;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.ProxyDatabase;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.ProxyView;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ChainedParameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * Initialize k-means by running k-means on a sample of the data set only.
+ *
+ * @author Erich Schubert
+ *
+ * @param <V> Vector type
+ */
+public class SampleKMeansInitialization<V extends NumberVector<?>, D extends Distance<?>> extends AbstractKMeansInitialization<V> {
+ /**
+ * Variant of kMeans for the bisecting step.
+ */
+ private KMeans<V, D, ?> innerkMeans;
+
+ /**
+ * Sample size.
+ */
+ private double rate;
+
+ /**
+ * Constructor.
+ *
+ * @param rnd Random generator.
+ * @param innerkMeans Inner k-means algorithm.
+ * @param rate Sampling rate.
+ */
+ public SampleKMeansInitialization(RandomFactory rnd, KMeans<V, D, ?> innerkMeans, double rate) {
+ super(rnd);
+ this.innerkMeans = innerkMeans;
+ this.rate = rate;
+ }
+
+ @Override
+ public List<V> chooseInitialMeans(Database database, Relation<V> relation, int k, PrimitiveDistanceFunction<? super NumberVector<?>, ?> distanceFunction) {
+ final int samplesize = (int) Math.ceil(rate * relation.size());
+ final DBIDs sample = DBIDUtil.randomSample(relation.getDBIDs(), samplesize, rnd);
+
+ ProxyView<V> proxyv = new ProxyView<>(database, sample, relation);
+ ProxyDatabase proxydb = new ProxyDatabase(sample, proxyv);
+
+ innerkMeans.setK(k);
+ @SuppressWarnings("unchecked")
+ PrimitiveDistanceFunction<? super NumberVector<?>, D> df = (PrimitiveDistanceFunction<? super NumberVector<?>, D>) distanceFunction;
+ innerkMeans.setDistanceFunction(df);
+ Clustering<? extends MeanModel<V>> clusters = innerkMeans.run(proxydb, proxyv);
+ List<V> means = new ArrayList<>();
+ for (Cluster<? extends MeanModel<V>> cluster : clusters.getAllClusters()) {
+ means.add((V) cluster.getModel().getMean());
+ }
+
+ return means;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ *
+ * @param <V> Vector type
+ * @param <D> Distance type
+ */
+ public static class Parameterizer<V extends NumberVector<?>, D extends Distance<?>> extends AbstractKMeansInitialization.Parameterizer<V> {
+ /**
+ * Parameter to specify the kMeans variant.
+ */
+ public static final OptionID KMEANS_ID = new OptionID("kmeans.algorithm", "KMeans variant to run multiple times.");
+
+ /**
+ * Parameter to specify the sampling rate.
+ */
+ public static final OptionID SAMPLE_ID = new OptionID("kmeans.samplesize", "Sample set size (if > 1) or sampling rante (if < 1).");
+
+ /**
+ * Inner k-means algorithm to use.
+ */
+ protected KMeans<V, D, ?> innerkMeans;
+
+ /**
+ * Sampling rate.
+ */
+ protected double rate;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ ObjectParameter<KMeans<V, D, ?>> kMeansVariantP = new ObjectParameter<>(KMEANS_ID, KMeans.class);
+ if (config.grab(kMeansVariantP)) {
+ ListParameterization kMeansVariantParameters = new ListParameterization();
+
+ // We will always invoke this with k as requested from outside!
+ kMeansVariantParameters.addParameter(KMeans.K_ID, 13);
+ kMeansVariantParameters.addParameter(KMeans.DISTANCE_FUNCTION_ID, SquaredEuclideanDistanceFunction.class);
+
+ ChainedParameterization combinedConfig = new ChainedParameterization(kMeansVariantParameters, config);
+ combinedConfig.errorsTo(config);
+ innerkMeans = kMeansVariantP.instantiateClass(combinedConfig);
+ }
+
+ DoubleParameter sampleP = new DoubleParameter(SAMPLE_ID);
+ if (config.grab(sampleP)) {
+ rate = sampleP.doubleValue();
+ }
+ }
+
+ @Override
+ protected SampleKMeansInitialization<V, D> makeInstance() {
+ return new SampleKMeansInitialization<>(rnd, innerkMeans, rate);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/package-info.java
index 2ce625b0..aa4c3e24 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/package-info.java
@@ -5,7 +5,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2012
+Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/KMeansQualityMeasure.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/KMeansQualityMeasure.java
new file mode 100644
index 00000000..f2de7846
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/KMeansQualityMeasure.java
@@ -0,0 +1,54 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.quality;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.model.MeanModel;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
+
+/**
+ * Interface for computing the quality of a K-Means clustering.
+ *
+ * @author Erich Schubert
+ *
+ * @param <O> Input Object restriction type
+ * @param <D> Distance restriction type
+ */
+public interface KMeansQualityMeasure<O extends NumberVector<?>, D extends Distance<?>> {
+ /**
+ * Calculates and returns the quality measure.
+ *
+ * @param clustering Clustering to analyze
+ * @param distanceFunction Distance function to use (usually Euclidean or
+ * squared Euclidean!)
+ * @param relation Relation for accessing objects
+ * @param <V> Actual vector type (could be a subtype of O!)
+ *
+ * @return quality measure
+ */
+ <V extends O> double calculateCost(Clustering<? extends MeanModel<V>> clustering, PrimitiveDistanceFunction<? super V, ? extends D> distanceFunction, Relation<V> relation);
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/WithinClusterMeanDistanceQualityMeasure.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/WithinClusterMeanDistanceQualityMeasure.java
new file mode 100644
index 00000000..e0ddfff0
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/WithinClusterMeanDistanceQualityMeasure.java
@@ -0,0 +1,89 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.quality;
+
+/*
+ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.data.Cluster;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.model.MeanModel;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
+
+/**
+ * Class for computing the average overall distance.
+ *
+ * The average of all average pairwise distances in a cluster.
+ *
+ * @author Stephan Baier
+ */
+public class WithinClusterMeanDistanceQualityMeasure implements KMeansQualityMeasure<NumberVector<?>, NumberDistance<?, ?>> {
+ @Override
+ public <V extends NumberVector<?>> double calculateCost(Clustering<? extends MeanModel<V>> clustering, PrimitiveDistanceFunction<? super V, ? extends NumberDistance<?, ?>> distanceFunction, Relation<V> relation) {
+ @SuppressWarnings("unchecked")
+ final List<Cluster<MeanModel<V>>> clusterList = (List<Cluster<MeanModel<V>>>) (List<?>) clustering.getAllClusters();
+
+ if (distanceFunction instanceof PrimitiveDoubleDistanceFunction) {
+ @SuppressWarnings("unchecked")
+ PrimitiveDoubleDistanceFunction<? super V> df = (PrimitiveDoubleDistanceFunction<? super V>) distanceFunction;
+ double clusterDistanceSum = 0;
+ for (Cluster<MeanModel<V>> cluster : clusterList) {
+ DBIDs ids = cluster.getIDs();
+
+ // Compute sum of pairwise distances:
+ double clusterPairwiseDistanceSum = 0;
+ for (DBIDIter iter1 = ids.iter(); iter1.valid(); iter1.advance()) {
+ V obj1 = relation.get(iter1);
+ for (DBIDIter iter2 = ids.iter(); iter2.valid(); iter2.advance()) {
+ clusterPairwiseDistanceSum += df.doubleDistance(obj1, relation.get(iter2));
+ }
+ }
+ clusterDistanceSum += clusterPairwiseDistanceSum / (ids.size() * ids.size());
+ }
+
+ return clusterDistanceSum / clusterList.size();
+ } else {
+ double clusterDistanceSum = 0;
+ for (Cluster<MeanModel<V>> cluster : clusterList) {
+ DBIDs ids = cluster.getIDs();
+
+ // Compute sum of pairwise distances:
+ double clusterPairwiseDistanceSum = 0;
+ for (DBIDIter iter1 = ids.iter(); iter1.valid(); iter1.advance()) {
+ V obj1 = relation.get(iter1);
+ for (DBIDIter iter2 = ids.iter(); iter2.valid(); iter2.advance()) {
+ clusterPairwiseDistanceSum += distanceFunction.distance(obj1, relation.get(iter2)).doubleValue();
+ }
+ }
+ clusterDistanceSum += clusterPairwiseDistanceSum / (ids.size() * ids.size());
+ }
+
+ return clusterDistanceSum / clusterList.size();
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/WithinClusterVarianceQualityMeasure.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/WithinClusterVarianceQualityMeasure.java
new file mode 100644
index 00000000..32ad5210
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/WithinClusterVarianceQualityMeasure.java
@@ -0,0 +1,83 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.quality;
+
+/*
+ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.data.Cluster;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.model.MeanModel;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
+
+/**
+ * Class for computing the variance in a clustering result (sum-of-squares).
+ *
+ * @author Stephan Baier
+ */
+public class WithinClusterVarianceQualityMeasure implements KMeansQualityMeasure<NumberVector<?>, NumberDistance<?, ?>> {
+ @Override
+ public <V extends NumberVector<?>> double calculateCost(Clustering<? extends MeanModel<V>> clustering, PrimitiveDistanceFunction<? super V, ? extends NumberDistance<?, ?>> distanceFunction, Relation<V> relation) {
+ @SuppressWarnings("unchecked")
+ final List<Cluster<MeanModel<V>>> clusterList = (List<Cluster<MeanModel<V>>>) (List<?>) clustering.getAllClusters();
+
+ boolean squared = (distanceFunction instanceof SquaredEuclideanDistanceFunction);
+ if (distanceFunction instanceof PrimitiveDoubleDistanceFunction) {
+ @SuppressWarnings("unchecked")
+ PrimitiveDoubleDistanceFunction<? super V> df = (PrimitiveDoubleDistanceFunction<? super V>) distanceFunction;
+ double variance = 0.0;
+ for (Cluster<MeanModel<V>> cluster : clusterList) {
+ DBIDs ids = cluster.getIDs();
+ V mean = cluster.getModel().getMean();
+
+ for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ double dist = df.doubleDistance(relation.get(iter), mean);
+ if (squared) {
+ variance += dist;
+ } else {
+ variance += dist * dist;
+ }
+ }
+ }
+ return variance;
+ } else {
+ double variance = 0.0;
+ for (Cluster<MeanModel<V>> cluster : clusterList) {
+ DBIDs ids = cluster.getIDs();
+ V mean = cluster.getModel().getMean();
+
+ for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ double dist = distanceFunction.distance(relation.get(iter), mean).doubleValue();
+ variance += dist * dist;
+ }
+ }
+ return variance;
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/package-info.java
new file mode 100644
index 00000000..ed9a528d
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/quality/package-info.java
@@ -0,0 +1,4 @@
+/**
+ * Quality measures for k-Means results.
+ */
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.quality; \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/package-info.java
index 4ba1ce09..26fb3024 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/package-info.java
@@ -19,7 +19,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2012
+Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/CLIQUE.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/CLIQUE.java
index 37b3eb57..db026e93 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/CLIQUE.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/CLIQUE.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -172,7 +172,7 @@ public class CLIQUE<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
if(LOG.isVerbose()) {
LOG.verbose("*** 1. Identification of subspaces that contain clusters ***");
}
- SortedMap<Integer, List<CLIQUESubspace<V>>> dimensionToDenseSubspaces = new TreeMap<Integer, List<CLIQUESubspace<V>>>();
+ SortedMap<Integer, List<CLIQUESubspace<V>>> dimensionToDenseSubspaces = new TreeMap<>();
List<CLIQUESubspace<V>> denseSubspaces = findOneDimensionalDenseSubspaces(relation);
dimensionToDenseSubspaces.put(Integer.valueOf(0), denseSubspaces);
if(LOG.isVerbose()) {
@@ -204,7 +204,7 @@ public class CLIQUE<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
}
// build result
int numClusters = 1;
- Clustering<SubspaceModel<V>> result = new Clustering<SubspaceModel<V>>("CLIQUE clustering", "clique-clustering");
+ Clustering<SubspaceModel<V>> result = new Clustering<>("CLIQUE clustering", "clique-clustering");
for(Integer dim : dimensionToDenseSubspaces.keySet()) {
List<CLIQUESubspace<V>> subspaces = dimensionToDenseSubspaces.get(dim);
List<Pair<Subspace, ModifiableDBIDs>> modelsAndClusters = determineClusters(subspaces);
@@ -214,10 +214,10 @@ public class CLIQUE<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
}
for(Pair<Subspace, ModifiableDBIDs> modelAndCluster : modelsAndClusters) {
- Cluster<SubspaceModel<V>> newCluster = new Cluster<SubspaceModel<V>>(modelAndCluster.second);
- newCluster.setModel(new SubspaceModel<V>(modelAndCluster.first, Centroid.make(relation, modelAndCluster.second).toVector(relation)));
+ Cluster<SubspaceModel<V>> newCluster = new Cluster<>(modelAndCluster.second);
+ newCluster.setModel(new SubspaceModel<>(modelAndCluster.first, Centroid.make(relation, modelAndCluster.second).toVector(relation)));
newCluster.setName("cluster_" + numClusters++);
- result.addCluster(newCluster);
+ result.addToplevelCluster(newCluster);
}
}
@@ -233,7 +233,7 @@ public class CLIQUE<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
* cluster models
*/
private List<Pair<Subspace, ModifiableDBIDs>> determineClusters(List<CLIQUESubspace<V>> denseSubspaces) {
- List<Pair<Subspace, ModifiableDBIDs>> clusters = new ArrayList<Pair<Subspace, ModifiableDBIDs>>();
+ List<Pair<Subspace, ModifiableDBIDs>> clusters = new ArrayList<>();
for(CLIQUESubspace<V> subspace : denseSubspaces) {
List<Pair<Subspace, ModifiableDBIDs>> clustersInSubspace = subspace.determineClusters();
@@ -339,7 +339,7 @@ public class CLIQUE<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
}
// build the 1 dimensional units
- List<CLIQUEUnit<V>> units = new ArrayList<CLIQUEUnit<V>>((xsi * dimensionality));
+ List<CLIQUEUnit<V>> units = new ArrayList<>((xsi * dimensionality));
for(int x = 0; x < xsi; x++) {
for(int d = 0; d < dimensionality; d++) {
units.add(new CLIQUEUnit<V>(new Interval(d, unit_bounds[x][d], unit_bounds[x + 1][d])));
@@ -396,8 +396,8 @@ public class CLIQUE<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
}
}
- Collection<CLIQUEUnit<V>> denseUnits = new ArrayList<CLIQUEUnit<V>>();
- Map<Integer, CLIQUESubspace<V>> denseSubspaces = new HashMap<Integer, CLIQUESubspace<V>>();
+ Collection<CLIQUEUnit<V>> denseUnits = new ArrayList<>();
+ Map<Integer, CLIQUESubspace<V>> denseSubspaces = new HashMap<>();
for(CLIQUEUnit<V> unit : units) {
// unit is a dense unit
if(unit.selectivity(total) >= tau) {
@@ -406,7 +406,7 @@ public class CLIQUE<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
int dim = unit.getIntervals().iterator().next().getDimension();
CLIQUESubspace<V> subspace_d = denseSubspaces.get(Integer.valueOf(dim));
if(subspace_d == null) {
- subspace_d = new CLIQUESubspace<V>(dim);
+ subspace_d = new CLIQUESubspace<>(dim);
denseSubspaces.put(Integer.valueOf(dim), subspace_d);
}
subspace_d.addDenseUnit(unit);
@@ -420,7 +420,7 @@ public class CLIQUE<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
LOG.debugFine(msg.toString());
}
- List<CLIQUESubspace<V>> subspaceCandidates = new ArrayList<CLIQUESubspace<V>>(denseSubspaces.values());
+ List<CLIQUESubspace<V>> subspaceCandidates = new ArrayList<>(denseSubspaces.values());
Collections.sort(subspaceCandidates, new CLIQUESubspace.CoverageComparator());
return subspaceCandidates;
}
@@ -436,12 +436,12 @@ public class CLIQUE<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
*/
private List<CLIQUESubspace<V>> findDenseSubspaceCandidates(Relation<V> database, List<CLIQUESubspace<V>> denseSubspaces) {
// sort (k-1)-dimensional dense subspace according to their dimensions
- List<CLIQUESubspace<V>> denseSubspacesByDimensions = new ArrayList<CLIQUESubspace<V>>(denseSubspaces);
+ List<CLIQUESubspace<V>> denseSubspacesByDimensions = new ArrayList<>(denseSubspaces);
Collections.sort(denseSubspacesByDimensions, new Subspace.DimensionComparator());
// determine k-dimensional dense subspace candidates
double all = database.size();
- List<CLIQUESubspace<V>> denseSubspaceCandidates = new ArrayList<CLIQUESubspace<V>>();
+ List<CLIQUESubspace<V>> denseSubspaceCandidates = new ArrayList<>();
while(!denseSubspacesByDimensions.isEmpty()) {
CLIQUESubspace<V> s1 = denseSubspacesByDimensions.remove(0);
@@ -614,7 +614,7 @@ public class CLIQUE<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
@Override
protected CLIQUE<V> makeInstance() {
- return new CLIQUE<V>(xsi, tau, prune);
+ return new CLIQUE<>(xsi, tau, prune);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java
index a3496a0e..b17ebebb 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -62,7 +62,8 @@ import de.lmu.ifi.dbs.elki.result.optics.ClusterOrderEntry;
import de.lmu.ifi.dbs.elki.result.optics.ClusterOrderResult;
import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil;
import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
-import de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy.HierarchyReferenceLists;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy.Hierarchy;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy.Hierarchy.Iter;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -238,29 +239,29 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
}
// build the hierarchy
- buildHierarchy(database, distFunc, clusters, dimensionality);
+ Clustering<SubspaceModel<V>> clustering = new Clustering<>("DiSH clustering", "dish-clustering");
+ buildHierarchy(database, distFunc, clustering, clusters, dimensionality);
if (LOG.isVerbose()) {
StringBuilder msg = new StringBuilder("Step 4: build hierarchy");
for (Cluster<SubspaceModel<V>> c : clusters) {
msg.append('\n').append(FormatUtil.format(dimensionality, c.getModel().getDimensions())).append(" ids ").append(c.size());
- for (Cluster<SubspaceModel<V>> cluster : c.getParents()) {
- msg.append("\n parent ").append(cluster);
+ for (Iter<Cluster<SubspaceModel<V>>> iter = clustering.getClusterHierarchy().iterParents(c); iter.valid(); iter.advance()) {
+ msg.append("\n parent ").append(iter.get());
}
- for (Cluster<SubspaceModel<V>> cluster : c.getChildren()) {
- msg.append("\n child ").append(cluster);
+ for (Iter<Cluster<SubspaceModel<V>>> iter = clustering.getClusterHierarchy().iterChildren(c); iter.valid(); iter.advance()) {
+ msg.append("\n child ").append(iter.get());
}
}
LOG.verbose(msg.toString());
}
// build result
- Clustering<SubspaceModel<V>> result = new Clustering<SubspaceModel<V>>("DiSH clustering", "dish-clustering");
for (Cluster<SubspaceModel<V>> c : clusters) {
- if (c.getParents() == null || c.getParents().isEmpty()) {
- result.addCluster(c);
+ if (clustering.getClusterHierarchy().numParents(c) == 0) {
+ clustering.addToplevelCluster(c);
}
}
- return result;
+ return clustering;
}
/**
@@ -274,9 +275,9 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
private Map<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>> extractClusters(Relation<V> database, DiSHDistanceFunction.Instance<V> distFunc, ClusterOrderResult<PreferenceVectorBasedCorrelationDistance> clusterOrder) {
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Extract Clusters", database.size(), LOG) : null;
int processed = 0;
- Map<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>> clustersMap = new HashMap<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>>();
- Map<DBID, ClusterOrderEntry<PreferenceVectorBasedCorrelationDistance>> entryMap = new HashMap<DBID, ClusterOrderEntry<PreferenceVectorBasedCorrelationDistance>>();
- Map<DBID, Pair<BitSet, ArrayModifiableDBIDs>> entryToClusterMap = new HashMap<DBID, Pair<BitSet, ArrayModifiableDBIDs>>();
+ Map<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>> clustersMap = new HashMap<>();
+ Map<DBID, ClusterOrderEntry<PreferenceVectorBasedCorrelationDistance>> entryMap = new HashMap<>();
+ Map<DBID, Pair<BitSet, ArrayModifiableDBIDs>> entryToClusterMap = new HashMap<>();
for (Iterator<ClusterOrderEntry<PreferenceVectorBasedCorrelationDistance>> it = clusterOrder.iterator(); it.hasNext();) {
ClusterOrderEntry<PreferenceVectorBasedCorrelationDistance> entry = it.next();
entryMap.put(entry.getID(), entry);
@@ -287,7 +288,7 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
// get the list of (parallel) clusters for the preference vector
List<Pair<BitSet, ArrayModifiableDBIDs>> parallelClusters = clustersMap.get(preferenceVector);
if (parallelClusters == null) {
- parallelClusters = new ArrayList<Pair<BitSet, ArrayModifiableDBIDs>>();
+ parallelClusters = new ArrayList<>();
clustersMap.put(preferenceVector, parallelClusters);
}
@@ -305,7 +306,7 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
}
}
if (cluster == null) {
- cluster = new Pair<BitSet, ArrayModifiableDBIDs>(preferenceVector, DBIDUtil.newArray());
+ cluster = new Pair<>(preferenceVector, DBIDUtil.newArray());
parallelClusters.add(cluster);
}
cluster.second.add(entry.getID());
@@ -373,15 +374,13 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
private List<Cluster<SubspaceModel<V>>> sortClusters(Relation<V> database, Map<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>> clustersMap) {
final int db_dim = RelationUtil.dimensionality(database);
// int num = 1;
- List<Cluster<SubspaceModel<V>>> clusters = new ArrayList<Cluster<SubspaceModel<V>>>();
+ List<Cluster<SubspaceModel<V>>> clusters = new ArrayList<>();
for (BitSet pv : clustersMap.keySet()) {
List<Pair<BitSet, ArrayModifiableDBIDs>> parallelClusters = clustersMap.get(pv);
for (int i = 0; i < parallelClusters.size(); i++) {
Pair<BitSet, ArrayModifiableDBIDs> c = parallelClusters.get(i);
- Cluster<SubspaceModel<V>> cluster = new Cluster<SubspaceModel<V>>(c.second);
- cluster.setModel(new SubspaceModel<V>(new Subspace(c.first), Centroid.make(database, c.second).toVector(database)));
- cluster.setHierarchy(new HierarchyReferenceLists<Cluster<SubspaceModel<V>>>(cluster, new ArrayList<Cluster<SubspaceModel<V>>>(), new ArrayList<Cluster<SubspaceModel<V>>>()));
- // cluster.setName("Cluster_" + num++);
+ Cluster<SubspaceModel<V>> cluster = new Cluster<>(c.second);
+ cluster.setModel(new SubspaceModel<>(new Subspace(c.first), Centroid.make(database, c.second).toVector(database)));
String subspace = FormatUtil.format(cluster.getModel().getSubspace().getDimensions(), db_dim, "");
if (parallelClusters.size() > 1) {
cluster.setName("Cluster_" + subspace + "_" + i);
@@ -415,9 +414,9 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
private void checkClusters(Relation<V> database, DiSHDistanceFunction.Instance<V> distFunc, Map<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>> clustersMap, int minpts) {
// check if there are clusters < minpts
// and add them to not assigned
- List<Pair<BitSet, ArrayModifiableDBIDs>> notAssigned = new ArrayList<Pair<BitSet, ArrayModifiableDBIDs>>();
- Map<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>> newClustersMap = new HashMap<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>>();
- Pair<BitSet, ArrayModifiableDBIDs> noise = new Pair<BitSet, ArrayModifiableDBIDs>(new BitSet(), DBIDUtil.newArray());
+ List<Pair<BitSet, ArrayModifiableDBIDs>> notAssigned = new ArrayList<>();
+ Map<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>> newClustersMap = new HashMap<>();
+ Pair<BitSet, ArrayModifiableDBIDs> noise = new Pair<>(new BitSet(), DBIDUtil.newArray());
for (BitSet pv : clustersMap.keySet()) {
// noise
if (pv.cardinality() == 0) {
@@ -429,7 +428,7 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
// clusters
else {
List<Pair<BitSet, ArrayModifiableDBIDs>> parallelClusters = clustersMap.get(pv);
- List<Pair<BitSet, ArrayModifiableDBIDs>> newParallelClusters = new ArrayList<Pair<BitSet, ArrayModifiableDBIDs>>(parallelClusters.size());
+ List<Pair<BitSet, ArrayModifiableDBIDs>> newParallelClusters = new ArrayList<>(parallelClusters.size());
for (Pair<BitSet, ArrayModifiableDBIDs> c : parallelClusters) {
if (!pv.equals(new BitSet()) && c.second.size() < minpts) {
notAssigned.add(c);
@@ -456,7 +455,7 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
}
}
- List<Pair<BitSet, ArrayModifiableDBIDs>> noiseList = new ArrayList<Pair<BitSet, ArrayModifiableDBIDs>>(1);
+ List<Pair<BitSet, ArrayModifiableDBIDs>> noiseList = new ArrayList<>(1);
noiseList.add(noise);
clustersMap.put(noise.first, noiseList);
}
@@ -510,13 +509,15 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
* Builds the cluster hierarchy.
*
* @param distFunc the distance function
+ * @param clustering Clustering we process
* @param clusters the sorted list of clusters
* @param dimensionality the dimensionality of the data
* @param database the database containing the data objects
*/
- private void buildHierarchy(Relation<V> database, DiSHDistanceFunction.Instance<V> distFunc, List<Cluster<SubspaceModel<V>>> clusters, int dimensionality) {
+ private void buildHierarchy(Relation<V> database, DiSHDistanceFunction.Instance<V> distFunc, Clustering<SubspaceModel<V>> clustering, List<Cluster<SubspaceModel<V>>> clusters, int dimensionality) {
StringBuilder msg = new StringBuilder();
final int db_dim = RelationUtil.dimensionality(database);
+ Hierarchy<Cluster<SubspaceModel<V>>> hier = clustering.getClusterHierarchy();
for (int i = 0; i < clusters.size() - 1; i++) {
Cluster<SubspaceModel<V>> c_i = clusters.get(i);
@@ -536,9 +537,8 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
// noise level reached
if (c_j.getModel().getSubspace().dimensionality() == 0) {
// no parents exists -> parent is noise
- if (c_i.getParents().isEmpty()) {
- c_j.getChildren().add(c_i);
- c_i.getParents().add(c_j);
+ if (hier.numParents(c_i) == 0) {
+ clustering.addChildCluster(c_j, c_i);
if (LOG.isDebugging()) {
msg.append("\n [").append(FormatUtil.format(db_dim, c_j.getModel().getSubspace().getDimensions()));
msg.append("] is parent of [").append(FormatUtil.format(db_dim, c_i.getModel().getSubspace().getDimensions()));
@@ -560,9 +560,8 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
if (d <= 2 * epsilon) {
// no parent exists or c_j is not a parent of the already
// existing parents
- if (c_i.getParents().isEmpty() || !isParent(database, distFunc, c_j, c_i.getParents())) {
- c_j.getChildren().add(c_i);
- c_i.getParents().add(c_j);
+ if (hier.numParents(c_i) == 0 || !isParent(database, distFunc, c_j, hier.iterParents(c_i))) {
+ clustering.addChildCluster(c_j, c_i);
if (LOG.isDebugging()) {
msg.append("\n [").append(FormatUtil.format(db_dim, c_j.getModel().getSubspace().getDimensions()));
msg.append("] is parent of [");
@@ -591,16 +590,17 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
* @param distFunc the distance function for distance computation between the
* clusters
* @param parent the parent to be tested
- * @param children the list of children to be tested
+ * @param iter the list of children to be tested
* @return true, if the specified parent cluster is a parent of one child of
* the children clusters, false otherwise
*/
- private boolean isParent(Relation<V> database, DiSHDistanceFunction.Instance<V> distFunc, Cluster<SubspaceModel<V>> parent, List<Cluster<SubspaceModel<V>>> children) {
+ private boolean isParent(Relation<V> database, DiSHDistanceFunction.Instance<V> distFunc, Cluster<SubspaceModel<V>> parent, Iter<Cluster<SubspaceModel<V>>> iter) {
V parent_centroid = ProjectedCentroid.make(parent.getModel().getDimensions(), database, parent.getIDs()).toVector(database);
int dimensionality = RelationUtil.dimensionality(database);
int subspaceDim_parent = dimensionality - parent.getModel().getSubspace().dimensionality();
- for (Cluster<SubspaceModel<V>> child : children) {
+ for (; iter.valid(); iter.advance()) {
+ Cluster<SubspaceModel<V>> child = iter.get();
V child_centroid = ProjectedCentroid.make(child.getModel().getDimensions(), database, child.getIDs()).toVector(database);
PreferenceVectorBasedCorrelationDistance distance = distFunc.correlationDistance(parent_centroid, child_centroid, parent.getModel().getSubspace().getDimensions(), child.getModel().getSubspace().getDimensions());
if (distance.getCorrelationValue() == subspaceDim_parent) {
@@ -699,7 +699,7 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
@Override
protected DiSH<V> makeInstance() {
- return new DiSH<V>(epsilon, dishDistance, opticsO);
+ return new DiSH<>(epsilon, dishDistance, opticsO);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/HiSC.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/HiSC.java
index 58f3acef..9ac7c072 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/HiSC.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/HiSC.java
@@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2012
+Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -119,7 +119,7 @@ public class HiSC<V extends NumberVector<?>> extends OPTICS<V, PreferenceVectorB
@Override
protected HiSC<V> makeInstance() {
- return new HiSC<V>(distanceFunction);
+ return new HiSC<>(distanceFunction);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PROCLUS.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PROCLUS.java
index ef49ff10..92158734 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PROCLUS.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PROCLUS.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -49,13 +49,13 @@ import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.DistanceDBIDPair;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult;
import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultUtil;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
@@ -150,13 +150,13 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
RangeQuery<V, DoubleDistance> rangeQuery = database.getRangeQuery(distFunc);
final Random random = rnd.getRandom();
- if(RelationUtil.dimensionality(relation) < l) {
+ if (RelationUtil.dimensionality(relation) < l) {
throw new IllegalStateException("Dimensionality of data < parameter l! " + "(" + RelationUtil.dimensionality(relation) + " < " + l + ")");
}
// TODO: use a StepProgress!
// initialization phase
- if(LOG.isVerbose()) {
+ if (LOG.isVerbose()) {
LOG.verbose("1. Initialization phase...");
}
int sampleSize = Math.min(relation.size(), k_i * k);
@@ -165,7 +165,7 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
int medoidSize = Math.min(relation.size(), m_i * k);
DBIDs medoids = greedy(distFunc, sampleSet, medoidSize, random);
- if(LOG.isDebugging()) {
+ if (LOG.isDebugging()) {
StringBuilder msg = new StringBuilder();
msg.append('\n');
msg.append("sampleSize ").append(sampleSize).append('\n');
@@ -176,7 +176,7 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
}
// iterative phase
- if(LOG.isVerbose()) {
+ if (LOG.isVerbose()) {
LOG.verbose("2. Iterative phase...");
}
double bestObjective = Double.POSITIVE_INFINITY;
@@ -184,7 +184,7 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
ModifiableDBIDs m_bad = null;
ModifiableDBIDs m_current = initialSet(medoids, k, random);
- if(LOG.isDebugging()) {
+ if (LOG.isDebugging()) {
StringBuilder msg = new StringBuilder();
msg.append('\n');
msg.append("m_c ").append(m_current).append('\n');
@@ -196,12 +196,12 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
// TODO: Use DataStore and Trove for performance
Map<DBID, PROCLUSCluster> clusters = null;
int loops = 0;
- while(loops < 10) {
+ while (loops < 10) {
Map<DBID, TIntSet> dimensions = findDimensions(m_current, relation, distFunc, rangeQuery);
clusters = assignPoints(dimensions, relation);
double objectiveFunction = evaluateClusters(clusters, dimensions, relation);
- if(objectiveFunction < bestObjective) {
+ if (objectiveFunction < bestObjective) {
// restart counting loops
loops = 0;
bestObjective = objectiveFunction;
@@ -211,32 +211,32 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
m_current = computeM_current(medoids, m_best, m_bad, random);
loops++;
- if(cprogress != null) {
+ if (cprogress != null) {
cprogress.setProcessed(clusters.size(), LOG);
}
}
- if(cprogress != null) {
+ if (cprogress != null) {
cprogress.setCompleted(LOG);
}
// refinement phase
- if(LOG.isVerbose()) {
+ if (LOG.isVerbose()) {
LOG.verbose("3. Refinement phase...");
}
- List<Pair<V, TIntSet>> dimensions = findDimensions(new ArrayList<PROCLUSCluster>(clusters.values()), relation);
+ List<Pair<V, TIntSet>> dimensions = findDimensions(new ArrayList<>(clusters.values()), relation);
List<PROCLUSCluster> finalClusters = finalAssignment(dimensions, relation);
// build result
int numClusters = 1;
- Clustering<SubspaceModel<V>> result = new Clustering<SubspaceModel<V>>("ProClus clustering", "proclus-clustering");
- for(PROCLUSCluster c : finalClusters) {
- Cluster<SubspaceModel<V>> cluster = new Cluster<SubspaceModel<V>>(c.objectIDs);
- cluster.setModel(new SubspaceModel<V>(new Subspace(c.getDimensions()), c.centroid));
+ Clustering<SubspaceModel<V>> result = new Clustering<>("ProClus clustering", "proclus-clustering");
+ for (PROCLUSCluster c : finalClusters) {
+ Cluster<SubspaceModel<V>> cluster = new Cluster<>(c.objectIDs);
+ cluster.setModel(new SubspaceModel<>(new Subspace(c.getDimensions()), c.centroid));
cluster.setName("cluster_" + numClusters++);
- result.addCluster(cluster);
+ result.addToplevelCluster(cluster);
}
return result;
}
@@ -257,22 +257,22 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
// m_1 is random point of S
DBID m_i = s.remove(random.nextInt(s.size()));
medoids.add(m_i);
- if(LOG.isDebugging()) {
+ if (LOG.isDebugging()) {
LOG.debugFiner("medoids " + medoids);
}
// compute distances between each point in S and m_i
// FIXME: don't use maps, so we can work with DBIDRef
- Map<DBID, DistanceDBIDPair<DoubleDistance>> distances = new HashMap<DBID, DistanceDBIDPair<DoubleDistance>>();
- for(DBIDIter iter = s.iter(); iter.valid(); iter.advance()) {
+ Map<DBID, DistanceDBIDPair<DoubleDistance>> distances = new HashMap<>();
+ for (DBIDIter iter = s.iter(); iter.valid(); iter.advance()) {
DBID id = DBIDUtil.deref(iter);
DoubleDistance dist = distFunc.distance(id, m_i);
distances.put(id, DBIDUtil.newDistancePair(dist, id));
}
- for(int i = 1; i < m; i++) {
- // choose medoid m_i to be far from prevois medoids
- List<DistanceDBIDPair<DoubleDistance>> d = new ArrayList<DistanceDBIDPair<DoubleDistance>>(distances.values());
+ for (int i = 1; i < m; i++) {
+ // choose medoid m_i to be far from previous medoids
+ List<DistanceDBIDPair<DoubleDistance>> d = new ArrayList<>(distances.values());
DistanceDBIDResultUtil.sortByDistance(d);
m_i = DBIDUtil.deref(d.get(d.size() - 1));
@@ -281,7 +281,7 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
distances.remove(m_i);
// compute distances of each point to closest medoid
- for(DBIDIter iter = s.iter(); iter.valid(); iter.advance()) {
+ for (DBIDIter iter = s.iter(); iter.valid(); iter.advance()) {
DBID id = DBIDUtil.deref(iter);
DoubleDistance dist_new = distFunc.distance(id, m_i);
DoubleDistance dist_old = distances.get(id).getDistance();
@@ -290,7 +290,7 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
distances.put(id, DBIDUtil.newDistancePair(dist, id));
}
- if(LOG.isDebugging()) {
+ if (LOG.isDebugging()) {
LOG.debugFiner("medoids " + medoids);
}
}
@@ -309,7 +309,7 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
private ModifiableDBIDs initialSet(DBIDs sampleSet, int k, Random random) {
ArrayModifiableDBIDs s = DBIDUtil.newArray(sampleSet);
ModifiableDBIDs initialSet = DBIDUtil.newHashSet();
- while(initialSet.size() < k) {
+ while (initialSet.size() < k) {
DBID next = s.remove(random.nextInt(s.size()));
initialSet.add(next);
}
@@ -330,16 +330,15 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
m_list.removeDBIDs(m_best);
ModifiableDBIDs m_current = DBIDUtil.newHashSet();
- for(DBIDIter iter = m_best.iter(); iter.valid(); iter.advance()) {
+ for (DBIDIter iter = m_best.iter(); iter.valid(); iter.advance()) {
DBID m_i = DBIDUtil.deref(iter);
- if(m_bad.contains(m_i)) {
+ if (m_bad.contains(m_i)) {
int currentSize = m_current.size();
- while(m_current.size() == currentSize) {
+ while (m_current.size() == currentSize) {
DBID next = m_list.remove(random.nextInt(m_list.size()));
m_current.add(next);
}
- }
- else {
+ } else {
m_current.add(m_i);
}
}
@@ -358,28 +357,28 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
* @param distFunc the distance function
* @return a mapping of the medoid's id to its locality
*/
- private Map<DBID, DistanceDBIDResult<DoubleDistance>> getLocalities(DBIDs medoids, Relation<V> database, DistanceQuery<V, DoubleDistance> distFunc, RangeQuery<V, DoubleDistance> rangeQuery) {
- Map<DBID, DistanceDBIDResult<DoubleDistance>> result = new HashMap<DBID, DistanceDBIDResult<DoubleDistance>>();
+ private Map<DBID, DistanceDBIDList<DoubleDistance>> getLocalities(DBIDs medoids, Relation<V> database, DistanceQuery<V, DoubleDistance> distFunc, RangeQuery<V, DoubleDistance> rangeQuery) {
+ Map<DBID, DistanceDBIDList<DoubleDistance>> result = new HashMap<>();
- for(DBIDIter iter = medoids.iter(); iter.valid(); iter.advance()) {
+ for (DBIDIter iter = medoids.iter(); iter.valid(); iter.advance()) {
DBID m = DBIDUtil.deref(iter);
// determine minimum distance between current medoid m and any other
// medoid m_i
DoubleDistance minDist = null;
- for(DBIDIter iter2 = medoids.iter(); iter2.valid(); iter2.advance()) {
+ for (DBIDIter iter2 = medoids.iter(); iter2.valid(); iter2.advance()) {
DBID m_i = DBIDUtil.deref(iter2);
- if(DBIDUtil.equal(m_i, m)) {
+ if (DBIDUtil.equal(m_i, m)) {
continue;
}
DoubleDistance currentDist = distFunc.distance(m, m_i);
- if(minDist == null || currentDist.compareTo(minDist) < 0) {
+ if (minDist == null || currentDist.compareTo(minDist) < 0) {
minDist = currentDist;
}
}
// determine points in sphere centered at m with radius minDist
assert minDist != null;
- DistanceDBIDResult<DoubleDistance> qr = rangeQuery.getRangeForDBID(m, minDist);
+ DistanceDBIDList<DoubleDistance> qr = rangeQuery.getRangeForDBID(m, minDist);
result.put(m, qr);
}
@@ -398,32 +397,32 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
*/
private Map<DBID, TIntSet> findDimensions(DBIDs medoids, Relation<V> database, DistanceQuery<V, DoubleDistance> distFunc, RangeQuery<V, DoubleDistance> rangeQuery) {
// get localities
- Map<DBID, DistanceDBIDResult<DoubleDistance>> localities = getLocalities(medoids, database, distFunc, rangeQuery);
+ Map<DBID, DistanceDBIDList<DoubleDistance>> localities = getLocalities(medoids, database, distFunc, rangeQuery);
// compute x_ij = avg distance from points in l_i to medoid m_i
int dim = RelationUtil.dimensionality(database);
- Map<DBID, double[]> averageDistances = new HashMap<DBID, double[]>();
+ Map<DBID, double[]> averageDistances = new HashMap<>();
- for(DBIDIter iter = medoids.iter(); iter.valid(); iter.advance()) {
+ for (DBIDIter iter = medoids.iter(); iter.valid(); iter.advance()) {
DBID m_i = DBIDUtil.deref(iter);
V medoid_i = database.get(m_i);
- DistanceDBIDResult<DoubleDistance> l_i = localities.get(m_i);
+ DistanceDBIDList<DoubleDistance> l_i = localities.get(m_i);
double[] x_i = new double[dim];
- for(DBIDIter qr = l_i.iter(); qr.valid(); qr.advance()) {
+ for (DBIDIter qr = l_i.iter(); qr.valid(); qr.advance()) {
V o = database.get(qr);
- for(int d = 0; d < dim; d++) {
+ for (int d = 0; d < dim; d++) {
x_i[d] += Math.abs(medoid_i.doubleValue(d) - o.doubleValue(d));
}
}
- for(int d = 0; d < dim; d++) {
+ for (int d = 0; d < dim; d++) {
x_i[d] /= l_i.size();
}
averageDistances.put(m_i, x_i);
}
- Map<DBID, TIntSet> dimensionMap = new HashMap<DBID, TIntSet>();
- List<CTriple<Double, DBID, Integer>> z_ijs = new ArrayList<CTriple<Double, DBID, Integer>>();
- for(DBIDIter iter = medoids.iter(); iter.valid(); iter.advance()) {
+ Map<DBID, TIntSet> dimensionMap = new HashMap<>();
+ List<CTriple<Double, DBID, Integer>> z_ijs = new ArrayList<>();
+ for (DBIDIter iter = medoids.iter(); iter.valid(); iter.advance()) {
DBID m_i = DBIDUtil.deref(iter);
TIntSet dims_i = new TIntHashSet();
dimensionMap.put(m_i, dims_i);
@@ -431,33 +430,33 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
double[] x_i = averageDistances.get(m_i);
// y_i
double y_i = 0;
- for(int j = 0; j < dim; j++) {
+ for (int j = 0; j < dim; j++) {
y_i += x_i[j];
}
y_i /= dim;
// sigma_i
double sigma_i = 0;
- for(int j = 0; j < dim; j++) {
+ for (int j = 0; j < dim; j++) {
double diff = x_i[j] - y_i;
sigma_i += diff * diff;
}
sigma_i /= (dim - 1);
sigma_i = Math.sqrt(sigma_i);
- for(int j = 0; j < dim; j++) {
- z_ijs.add(new CTriple<Double, DBID, Integer>((x_i[j] - y_i) / sigma_i, m_i, j));
+ for (int j = 0; j < dim; j++) {
+ z_ijs.add(new CTriple<>((x_i[j] - y_i) / sigma_i, m_i, j));
}
}
Collections.sort(z_ijs);
int max = Math.max(k * l, 2);
- for(int m = 0; m < max; m++) {
+ for (int m = 0; m < max; m++) {
CTriple<Double, DBID, Integer> z_ij = z_ijs.get(m);
TIntSet dims_i = dimensionMap.get(z_ij.getSecond());
dims_i.add(z_ij.getThird());
- if(LOG.isDebugging()) {
+ if (LOG.isDebugging()) {
StringBuilder msg = new StringBuilder();
msg.append('\n');
msg.append("z_ij ").append(z_ij).append('\n');
@@ -480,61 +479,61 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
private List<Pair<V, TIntSet>> findDimensions(List<PROCLUSCluster> clusters, Relation<V> database) {
// compute x_ij = avg distance from points in c_i to c_i.centroid
int dim = RelationUtil.dimensionality(database);
- Map<Integer, double[]> averageDistances = new HashMap<Integer, double[]>();
+ Map<Integer, double[]> averageDistances = new HashMap<>();
- for(int i = 0; i < clusters.size(); i++) {
+ for (int i = 0; i < clusters.size(); i++) {
PROCLUSCluster c_i = clusters.get(i);
double[] x_i = new double[dim];
- for(DBIDIter iter = c_i.objectIDs.iter(); iter.valid(); iter.advance()) {
+ for (DBIDIter iter = c_i.objectIDs.iter(); iter.valid(); iter.advance()) {
V o = database.get(iter);
- for(int d = 0; d < dim; d++) {
+ for (int d = 0; d < dim; d++) {
x_i[d] += Math.abs(c_i.centroid.doubleValue(d) - o.doubleValue(d));
}
}
- for(int d = 0; d < dim; d++) {
+ for (int d = 0; d < dim; d++) {
x_i[d] /= c_i.objectIDs.size();
}
averageDistances.put(i, x_i);
}
- List<CTriple<Double, Integer, Integer>> z_ijs = new ArrayList<CTriple<Double, Integer, Integer>>();
- for(int i = 0; i < clusters.size(); i++) {
+ List<CTriple<Double, Integer, Integer>> z_ijs = new ArrayList<>();
+ for (int i = 0; i < clusters.size(); i++) {
double[] x_i = averageDistances.get(i);
// y_i
double y_i = 0;
- for(int j = 0; j < dim; j++) {
+ for (int j = 0; j < dim; j++) {
y_i += x_i[j];
}
y_i /= dim;
// sigma_i
double sigma_i = 0;
- for(int j = 0; j < dim; j++) {
+ for (int j = 0; j < dim; j++) {
double diff = x_i[j] - y_i;
sigma_i += diff * diff;
}
sigma_i /= (dim - 1);
sigma_i = Math.sqrt(sigma_i);
- for(int j = 0; j < dim; j++) {
- z_ijs.add(new CTriple<Double, Integer, Integer>((x_i[j] - y_i) / sigma_i, i, j));
+ for (int j = 0; j < dim; j++) {
+ z_ijs.add(new CTriple<>((x_i[j] - y_i) / sigma_i, i, j));
}
}
Collections.sort(z_ijs);
// mapping cluster index -> dimensions
- Map<Integer, TIntSet> dimensionMap = new HashMap<Integer, TIntSet>();
+ Map<Integer, TIntSet> dimensionMap = new HashMap<>();
int max = Math.max(k * l, 2);
- for(int m = 0; m < max; m++) {
+ for (int m = 0; m < max; m++) {
CTriple<Double, Integer, Integer> z_ij = z_ijs.get(m);
TIntSet dims_i = dimensionMap.get(z_ij.getSecond());
- if(dims_i == null) {
+ if (dims_i == null) {
dims_i = new TIntHashSet();
dimensionMap.put(z_ij.getSecond(), dims_i);
}
dims_i.add(z_ij.getThird());
- if(LOG.isDebugging()) {
+ if (LOG.isDebugging()) {
StringBuilder msg = new StringBuilder();
msg.append('\n');
msg.append("z_ij ").append(z_ij).append('\n');
@@ -544,11 +543,11 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
}
// mapping cluster -> dimensions
- List<Pair<V, TIntSet>> result = new ArrayList<Pair<V, TIntSet>>();
- for(int i : dimensionMap.keySet()) {
+ List<Pair<V, TIntSet>> result = new ArrayList<>();
+ for (int i : dimensionMap.keySet()) {
TIntSet dims_i = dimensionMap.get(i);
PROCLUSCluster c_i = clusters.get(i);
- result.add(new Pair<V, TIntSet>(c_i.centroid, dims_i));
+ result.add(new Pair<>(c_i.centroid, dims_i));
}
return result;
}
@@ -562,19 +561,19 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
* @return the assignments of the object to the clusters
*/
private Map<DBID, PROCLUSCluster> assignPoints(Map<DBID, TIntSet> dimensions, Relation<V> database) {
- Map<DBID, ModifiableDBIDs> clusterIDs = new HashMap<DBID, ModifiableDBIDs>();
- for(DBID m_i : dimensions.keySet()) {
+ Map<DBID, ModifiableDBIDs> clusterIDs = new HashMap<>();
+ for (DBID m_i : dimensions.keySet()) {
clusterIDs.put(m_i, DBIDUtil.newHashSet());
}
- for(DBIDIter it = database.iterDBIDs(); it.valid(); it.advance()) {
+ for (DBIDIter it = database.iterDBIDs(); it.valid(); it.advance()) {
DBID p_id = DBIDUtil.deref(it);
V p = database.get(p_id);
DistanceDBIDPair<DoubleDistance> minDist = null;
- for(DBID m_i : dimensions.keySet()) {
+ for (DBID m_i : dimensions.keySet()) {
V m = database.get(m_i);
DistanceDBIDPair<DoubleDistance> currentDist = DBIDUtil.newDistancePair(manhattanSegmentalDistance(p, m, dimensions.get(m_i)), m_i);
- if(minDist == null || currentDist.compareByDistance(minDist) < 0) {
+ if (minDist == null || currentDist.compareByDistance(minDist) < 0) {
minDist = currentDist;
}
}
@@ -584,17 +583,17 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
ids.add(p_id);
}
- Map<DBID, PROCLUSCluster> clusters = new HashMap<DBID, PROCLUSCluster>();
- for(DBID m_i : dimensions.keySet()) {
+ Map<DBID, PROCLUSCluster> clusters = new HashMap<>();
+ for (DBID m_i : dimensions.keySet()) {
ModifiableDBIDs objectIDs = clusterIDs.get(m_i);
- if(!objectIDs.isEmpty()) {
+ if (!objectIDs.isEmpty()) {
TIntSet clusterDimensions = dimensions.get(m_i);
V centroid = Centroid.make(database, objectIDs).toVector(database);
clusters.put(m_i, new PROCLUSCluster(objectIDs, clusterDimensions, centroid));
}
}
- if(LOG.isDebugging()) {
+ if (LOG.isDebugging()) {
StringBuilder msg = new StringBuilder();
msg.append('\n');
msg.append("clusters ").append(clusters).append('\n');
@@ -612,22 +611,22 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
* @return the assignments of the object to the clusters
*/
private List<PROCLUSCluster> finalAssignment(List<Pair<V, TIntSet>> dimensions, Relation<V> database) {
- Map<Integer, ModifiableDBIDs> clusterIDs = new HashMap<Integer, ModifiableDBIDs>();
- for(int i = 0; i < dimensions.size(); i++) {
+ Map<Integer, ModifiableDBIDs> clusterIDs = new HashMap<>();
+ for (int i = 0; i < dimensions.size(); i++) {
clusterIDs.put(i, DBIDUtil.newHashSet());
}
- for(DBIDIter it = database.iterDBIDs(); it.valid(); it.advance()) {
+ for (DBIDIter it = database.iterDBIDs(); it.valid(); it.advance()) {
DBID p_id = DBIDUtil.deref(it);
V p = database.get(p_id);
Pair<DoubleDistance, Integer> minDist = null;
- for(int i = 0; i < dimensions.size(); i++) {
+ for (int i = 0; i < dimensions.size(); i++) {
Pair<V, TIntSet> pair_i = dimensions.get(i);
V c_i = pair_i.first;
TIntSet dimensions_i = pair_i.second;
DoubleDistance currentDist = manhattanSegmentalDistance(p, c_i, dimensions_i);
- if(minDist == null || currentDist.compareTo(minDist.first) < 0) {
- minDist = new Pair<DoubleDistance, Integer>(currentDist, i);
+ if (minDist == null || currentDist.compareTo(minDist.first) < 0) {
+ minDist = new Pair<>(currentDist, i);
}
}
// add p to cluster with mindist
@@ -636,17 +635,17 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
ids.add(p_id);
}
- List<PROCLUSCluster> clusters = new ArrayList<PROCLUSCluster>();
- for(int i = 0; i < dimensions.size(); i++) {
+ List<PROCLUSCluster> clusters = new ArrayList<>();
+ for (int i = 0; i < dimensions.size(); i++) {
ModifiableDBIDs objectIDs = clusterIDs.get(i);
- if(!objectIDs.isEmpty()) {
+ if (!objectIDs.isEmpty()) {
TIntSet clusterDimensions = dimensions.get(i).second;
V centroid = Centroid.make(database, objectIDs).toVector(database);
clusters.add(new PROCLUSCluster(objectIDs, clusterDimensions, centroid));
}
}
- if(LOG.isDebugging()) {
+ if (LOG.isDebugging()) {
StringBuilder msg = new StringBuilder();
msg.append('\n');
msg.append("clusters ").append(clusters).append('\n');
@@ -667,7 +666,7 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
*/
private DoubleDistance manhattanSegmentalDistance(V o1, V o2, TIntSet dimensions) {
double result = 0;
- for (TIntIterator iter = dimensions.iterator(); iter.hasNext(); ) {
+ for (TIntIterator iter = dimensions.iterator(); iter.hasNext();) {
final int d = iter.next();
result += Math.abs(o1.doubleValue(d) - o2.doubleValue(d));
}
@@ -685,13 +684,13 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
*/
private double evaluateClusters(Map<DBID, PROCLUSCluster> clusters, Map<DBID, TIntSet> dimensions, Relation<V> database) {
double result = 0;
- for(DBID m_i : clusters.keySet()) {
+ for (DBID m_i : clusters.keySet()) {
PROCLUSCluster c_i = clusters.get(m_i);
V centroid_i = c_i.centroid;
TIntSet dims_i = dimensions.get(m_i);
double w_i = 0;
- for (TIntIterator iter = dims_i.iterator(); iter.hasNext(); ) {
+ for (TIntIterator iter = dims_i.iterator(); iter.hasNext();) {
final int j = iter.next();
w_i += avgDistance(centroid_i, c_i.objectIDs, database, j);
}
@@ -716,7 +715,7 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
*/
private double avgDistance(V centroid, DBIDs objectIDs, Relation<V> database, int dimension) {
Mean avg = new Mean();
- for(DBIDIter iter = objectIDs.iter(); iter.valid(); iter.advance()) {
+ for (DBIDIter iter = objectIDs.iter(); iter.valid(); iter.advance()) {
V o = database.get(iter);
avg.put(Math.abs(centroid.doubleValue(dimension) - o.doubleValue(dimension)));
}
@@ -733,9 +732,9 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
*/
private ModifiableDBIDs computeBadMedoids(Map<DBID, PROCLUSCluster> clusters, int threshold) {
ModifiableDBIDs badMedoids = DBIDUtil.newHashSet();
- for(DBID m_i : clusters.keySet()) {
+ for (DBID m_i : clusters.keySet()) {
PROCLUSCluster c_i = clusters.get(m_i);
- if(c_i.objectIDs.size() < threshold) {
+ if (c_i.objectIDs.size() < threshold) {
badMedoids.add(m_i);
}
}
@@ -791,11 +790,10 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
StringBuilder result = new StringBuilder();
result.append("Dimensions: [");
boolean notFirst = false;
- for(TIntIterator iter = dimensions.iterator(); iter.hasNext(); ) {
- if(notFirst) {
+ for (TIntIterator iter = dimensions.iterator(); iter.hasNext();) {
+ if (notFirst) {
result.append(',');
- }
- else {
+ } else {
notFirst = true;
}
result.append(iter.next());
@@ -813,7 +811,7 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
*/
public BitSet getDimensions() {
BitSet result = new BitSet();
- for(TIntIterator iter = dimensions.iterator(); iter.hasNext(); ) {
+ for (TIntIterator iter = dimensions.iterator(); iter.hasNext();) {
result.set(iter.next());
}
return result;
@@ -847,19 +845,19 @@ public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedCluster
IntParameter m_iP = new IntParameter(M_I_ID, 10);
m_iP.addConstraint(new GreaterConstraint(0));
- if(config.grab(m_iP)) {
+ if (config.grab(m_iP)) {
m_i = m_iP.getValue();
}
RandomParameter rndP = new RandomParameter(SEED_ID);
- if(config.grab(rndP)) {
+ if (config.grab(rndP)) {
rnd = rndP.getValue();
}
}
@Override
protected PROCLUS<V> makeInstance() {
- return new PROCLUS<V>(k, k_i, l, m_i, rnd);
+ return new PROCLUS<>(k, k_i, l, m_i, rnd);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PreDeCon.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PreDeCon.java
index fc3228eb..4e670974 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PreDeCon.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PreDeCon.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -111,7 +111,7 @@ public class PreDeCon<V extends NumberVector<?>> extends AbstractProjectedDBSCAN
@Override
protected PreDeCon<V> makeInstance() {
- return new PreDeCon<V>(epsilon, minpts, outerdist, lambda);
+ return new PreDeCon<>(epsilon, minpts, outerdist, lambda);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java
index 46c5f0b8..c8d0833e 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -43,7 +43,7 @@ import de.lmu.ifi.dbs.elki.database.ProxyDatabase;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.AbstractDimensionsSelectingDoubleDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.DimensionSelectingSubspaceDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
@@ -105,7 +105,7 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
/**
* Parameter to specify the maximum radius of the neighborhood to be
* considered, must be suitable to
- * {@link AbstractDimensionsSelectingDoubleDistanceFunction}.
+ * {@link DimensionSelectingSubspaceDistanceFunction}.
* <p>
* Key: {@code -subclu.epsilon}
* </p>
@@ -125,7 +125,7 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
* Holds the instance of the distance function specified by
* {@link #DISTANCE_FUNCTION_ID}.
*/
- private AbstractDimensionsSelectingDoubleDistanceFunction<V> distanceFunction;
+ private DimensionSelectingSubspaceDistanceFunction<V, DoubleDistance> distanceFunction;
/**
* Holds the value of {@link #EPSILON_ID}.
@@ -149,7 +149,7 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
* @param epsilon Epsilon value
* @param minpts Minpts value
*/
- public SUBCLU(AbstractDimensionsSelectingDoubleDistanceFunction<V> distanceFunction, DoubleDistance epsilon, int minpts) {
+ public SUBCLU(DimensionSelectingSubspaceDistanceFunction<V, DoubleDistance> distanceFunction, DoubleDistance epsilon, int minpts) {
super();
this.distanceFunction = distanceFunction;
this.epsilon = epsilon;
@@ -168,49 +168,49 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
StepProgress stepprog = LOG.isVerbose() ? new StepProgress(dimensionality) : null;
// Generate all 1-dimensional clusters
- if(stepprog != null) {
+ if (stepprog != null) {
stepprog.beginStep(1, "Generate all 1-dimensional clusters.", LOG);
}
// mapping of dimensionality to set of subspaces
- HashMap<Integer, List<Subspace>> subspaceMap = new HashMap<Integer, List<Subspace>>();
+ HashMap<Integer, List<Subspace>> subspaceMap = new HashMap<>();
// list of 1-dimensional subspaces containing clusters
- List<Subspace> s_1 = new ArrayList<Subspace>();
+ List<Subspace> s_1 = new ArrayList<>();
subspaceMap.put(0, s_1);
// mapping of subspaces to list of clusters
- TreeMap<Subspace, List<Cluster<Model>>> clusterMap = new TreeMap<Subspace, List<Cluster<Model>>>(new Subspace.DimensionComparator());
+ TreeMap<Subspace, List<Cluster<Model>>> clusterMap = new TreeMap<>(new Subspace.DimensionComparator());
- for(int d = 0; d < dimensionality; d++) {
+ for (int d = 0; d < dimensionality; d++) {
Subspace currentSubspace = new Subspace(d);
List<Cluster<Model>> clusters = runDBSCAN(relation, null, currentSubspace);
- if(LOG.isDebuggingFiner()) {
+ if (LOG.isDebuggingFiner()) {
StringBuilder msg = new StringBuilder();
msg.append('\n').append(clusters.size()).append(" clusters in subspace ").append(currentSubspace.dimensonsToString()).append(": \n");
- for(Cluster<Model> cluster : clusters) {
+ for (Cluster<Model> cluster : clusters) {
msg.append(" " + cluster.getIDs() + "\n");
}
LOG.debugFiner(msg.toString());
}
- if(!clusters.isEmpty()) {
+ if (!clusters.isEmpty()) {
s_1.add(currentSubspace);
clusterMap.put(currentSubspace, clusters);
}
}
// Generate (d+1)-dimensional clusters from d-dimensional clusters
- for(int d = 0; d < dimensionality - 1; d++) {
- if(stepprog != null) {
+ for (int d = 0; d < dimensionality - 1; d++) {
+ if (stepprog != null) {
stepprog.beginStep(d + 2, "Generate " + (d + 2) + "-dimensional clusters from " + (d + 1) + "-dimensional clusters.", LOG);
}
List<Subspace> subspaces = subspaceMap.get(d);
- if(subspaces == null || subspaces.isEmpty()) {
- if(stepprog != null) {
- for(int dim = d + 1; dim < dimensionality - 1; dim++) {
+ if (subspaces == null || subspaces.isEmpty()) {
+ if (stepprog != null) {
+ for (int dim = d + 1; dim < dimensionality - 1; dim++) {
stepprog.beginStep(dim + 2, "Generation of" + (dim + 2) + "-dimensional clusters not applicable, because no more " + (d + 2) + "-dimensional subspaces found.", LOG);
}
}
@@ -218,57 +218,57 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
}
List<Subspace> candidates = generateSubspaceCandidates(subspaces);
- List<Subspace> s_d = new ArrayList<Subspace>();
+ List<Subspace> s_d = new ArrayList<>();
- for(Subspace candidate : candidates) {
+ for (Subspace candidate : candidates) {
Subspace bestSubspace = bestSubspace(subspaces, candidate, clusterMap);
- if(LOG.isDebuggingFine()) {
+ if (LOG.isDebuggingFine()) {
LOG.debugFine("best subspace of " + candidate.dimensonsToString() + ": " + bestSubspace.dimensonsToString());
}
List<Cluster<Model>> bestSubspaceClusters = clusterMap.get(bestSubspace);
- List<Cluster<Model>> clusters = new ArrayList<Cluster<Model>>();
- for(Cluster<Model> cluster : bestSubspaceClusters) {
+ List<Cluster<Model>> clusters = new ArrayList<>();
+ for (Cluster<Model> cluster : bestSubspaceClusters) {
List<Cluster<Model>> candidateClusters = runDBSCAN(relation, cluster.getIDs(), candidate);
- if(!candidateClusters.isEmpty()) {
+ if (!candidateClusters.isEmpty()) {
clusters.addAll(candidateClusters);
}
}
- if(LOG.isDebuggingFine()) {
+ if (LOG.isDebuggingFine()) {
StringBuilder msg = new StringBuilder();
msg.append(clusters.size() + " cluster(s) in subspace " + candidate + ": \n");
- for(Cluster<Model> c : clusters) {
+ for (Cluster<Model> c : clusters) {
msg.append(" " + c.getIDs() + "\n");
}
LOG.debugFine(msg.toString());
}
- if(!clusters.isEmpty()) {
+ if (!clusters.isEmpty()) {
s_d.add(candidate);
clusterMap.put(candidate, clusters);
}
}
- if(!s_d.isEmpty()) {
+ if (!s_d.isEmpty()) {
subspaceMap.put(d + 1, s_d);
}
}
// build result
int numClusters = 1;
- result = new Clustering<SubspaceModel<V>>("SUBCLU clustering", "subclu-clustering");
- for(Subspace subspace : clusterMap.descendingKeySet()) {
+ result = new Clustering<>("SUBCLU clustering", "subclu-clustering");
+ for (Subspace subspace : clusterMap.descendingKeySet()) {
List<Cluster<Model>> clusters = clusterMap.get(subspace);
- for(Cluster<Model> cluster : clusters) {
- Cluster<SubspaceModel<V>> newCluster = new Cluster<SubspaceModel<V>>(cluster.getIDs());
- newCluster.setModel(new SubspaceModel<V>(subspace, Centroid.make(relation, cluster.getIDs()).toVector(relation)));
+ for (Cluster<Model> cluster : clusters) {
+ Cluster<SubspaceModel<V>> newCluster = new Cluster<>(cluster.getIDs());
+ newCluster.setModel(new SubspaceModel<>(subspace, Centroid.make(relation, cluster.getIDs()).toVector(relation)));
newCluster.setName("cluster_" + numClusters++);
- result.addCluster(newCluster);
+ result.addToplevelCluster(newCluster);
}
}
- if(stepprog != null) {
+ if (stepprog != null) {
stepprog.setCompleted(LOG);
}
return result;
@@ -300,7 +300,7 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
distanceFunction.setSelectedDimensions(subspace.getDimensions());
ProxyDatabase proxy;
- if(ids == null) {
+ if (ids == null) {
// TODO: in this case, we might want to use an index - the proxy below
// will prevent this!
ids = relation.getDBIDs();
@@ -308,18 +308,18 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
proxy = new ProxyDatabase(ids, relation);
- DBSCAN<V, DoubleDistance> dbscan = new DBSCAN<V, DoubleDistance>(distanceFunction, epsilon, minpts);
+ DBSCAN<V, DoubleDistance> dbscan = new DBSCAN<>(distanceFunction, epsilon, minpts);
// run DBSCAN
- if(LOG.isVerbose()) {
+ if (LOG.isVerbose()) {
LOG.verbose("\nRun DBSCAN on subspace " + subspace.dimensonsToString());
}
Clustering<Model> dbsres = dbscan.run(proxy);
// separate cluster and noise
List<Cluster<Model>> clusterAndNoise = dbsres.getAllClusters();
- List<Cluster<Model>> clusters = new ArrayList<Cluster<Model>>();
- for(Cluster<Model> c : clusterAndNoise) {
- if(!c.isNoise()) {
+ List<Cluster<Model>> clusters = new ArrayList<>();
+ for (Cluster<Model> c : clusterAndNoise) {
+ if (!c.isNoise()) {
clusters.add(c);
}
}
@@ -334,9 +334,9 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
* @return the {@code d+1}-dimensional subspace candidates
*/
private List<Subspace> generateSubspaceCandidates(List<Subspace> subspaces) {
- List<Subspace> candidates = new ArrayList<Subspace>();
+ List<Subspace> candidates = new ArrayList<>();
- if(subspaces.isEmpty()) {
+ if (subspaces.isEmpty()) {
return candidates;
}
@@ -344,46 +344,46 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
int d = subspaces.get(0).dimensionality();
StringBuilder msgFine = new StringBuilder("\n");
- if(LOG.isDebuggingFiner()) {
+ if (LOG.isDebuggingFiner()) {
msgFine.append("subspaces ").append(subspaces).append('\n');
}
- for(int i = 0; i < subspaces.size(); i++) {
+ for (int i = 0; i < subspaces.size(); i++) {
Subspace s1 = subspaces.get(i);
- for(int j = i + 1; j < subspaces.size(); j++) {
+ for (int j = i + 1; j < subspaces.size(); j++) {
Subspace s2 = subspaces.get(j);
Subspace candidate = s1.join(s2);
- if(candidate != null) {
- if(LOG.isDebuggingFiner()) {
+ if (candidate != null) {
+ if (LOG.isDebuggingFiner()) {
msgFine.append("candidate: ").append(candidate.dimensonsToString()).append('\n');
}
// prune irrelevant candidate subspaces
List<Subspace> lowerSubspaces = lowerSubspaces(candidate);
- if(LOG.isDebuggingFiner()) {
+ if (LOG.isDebuggingFiner()) {
msgFine.append("lowerSubspaces: ").append(lowerSubspaces).append('\n');
}
boolean irrelevantCandidate = false;
- for(Subspace s : lowerSubspaces) {
- if(!subspaces.contains(s)) {
+ for (Subspace s : lowerSubspaces) {
+ if (!subspaces.contains(s)) {
irrelevantCandidate = true;
break;
}
}
- if(!irrelevantCandidate) {
+ if (!irrelevantCandidate) {
candidates.add(candidate);
}
}
}
}
- if(LOG.isDebuggingFiner()) {
+ if (LOG.isDebuggingFiner()) {
LOG.debugFiner(msgFine.toString());
}
- if(LOG.isDebugging()) {
+ if (LOG.isDebugging()) {
StringBuilder msg = new StringBuilder();
msg.append(d + 1).append("-dimensional candidate subspaces: ");
- for(Subspace candidate : candidates) {
+ for (Subspace candidate : candidates) {
msg.append(candidate.dimensonsToString()).append(' ');
}
LOG.debug(msg.toString());
@@ -401,14 +401,14 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
*/
private List<Subspace> lowerSubspaces(Subspace subspace) {
int dimensionality = subspace.dimensionality();
- if(dimensionality <= 1) {
+ if (dimensionality <= 1) {
return null;
}
// order result according to the dimensions
- List<Subspace> result = new ArrayList<Subspace>();
+ List<Subspace> result = new ArrayList<>();
BitSet dimensions = subspace.getDimensions();
- for(int dim = dimensions.nextSetBit(0); dim >= 0; dim = dimensions.nextSetBit(dim + 1)) {
+ for (int dim = dimensions.nextSetBit(0); dim >= 0; dim = dimensions.nextSetBit(dim + 1)) {
BitSet newDimensions = (BitSet) dimensions.clone();
newDimensions.set(dim, false);
result.add(new Subspace(newDimensions));
@@ -432,14 +432,14 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
private Subspace bestSubspace(List<Subspace> subspaces, Subspace candidate, TreeMap<Subspace, List<Cluster<Model>>> clusterMap) {
Subspace bestSubspace = null;
- for(Subspace subspace : subspaces) {
+ for (Subspace subspace : subspaces) {
int min = Integer.MAX_VALUE;
- if(subspace.isSubspace(candidate)) {
+ if (subspace.isSubspace(candidate)) {
List<Cluster<Model>> clusters = clusterMap.get(subspace);
- for(Cluster<Model> cluster : clusters) {
+ for (Cluster<Model> cluster : clusters) {
int clusterSize = cluster.size();
- if(clusterSize < min) {
+ if (clusterSize < min) {
min = clusterSize;
bestSubspace = subspace;
}
@@ -472,31 +472,31 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
protected DoubleDistance epsilon = null;
- protected AbstractDimensionsSelectingDoubleDistanceFunction<V> distance = null;
+ protected DimensionSelectingSubspaceDistanceFunction<V, DoubleDistance> distance = null;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- ObjectParameter<AbstractDimensionsSelectingDoubleDistanceFunction<V>> param = new ObjectParameter<AbstractDimensionsSelectingDoubleDistanceFunction<V>>(DISTANCE_FUNCTION_ID, AbstractDimensionsSelectingDoubleDistanceFunction.class, SubspaceEuclideanDistanceFunction.class);
- if(config.grab(param)) {
+ ObjectParameter<DimensionSelectingSubspaceDistanceFunction<V, DoubleDistance>> param = new ObjectParameter<>(DISTANCE_FUNCTION_ID, DimensionSelectingSubspaceDistanceFunction.class, SubspaceEuclideanDistanceFunction.class);
+ if (config.grab(param)) {
distance = param.instantiateClass(config);
}
- DistanceParameter<DoubleDistance> epsilonP = new DistanceParameter<DoubleDistance>(EPSILON_ID, distance);
- if(config.grab(epsilonP)) {
+ DistanceParameter<DoubleDistance> epsilonP = new DistanceParameter<>(EPSILON_ID, distance);
+ if (config.grab(epsilonP)) {
epsilon = epsilonP.getValue();
}
IntParameter minptsP = new IntParameter(MINPTS_ID);
minptsP.addConstraint(new GreaterConstraint(0));
- if(config.grab(minptsP)) {
+ if (config.grab(minptsP)) {
minpts = minptsP.getValue();
}
}
@Override
protected SUBCLU<V> makeInstance() {
- return new SUBCLU<V>(distance, epsilon, minpts);
+ return new SUBCLU<>(distance, epsilon, minpts);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SubspaceClusteringAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SubspaceClusteringAlgorithm.java
index 17eb3c19..561816bd 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SubspaceClusteringAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SubspaceClusteringAlgorithm.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUESubspace.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUESubspace.java
index 6b22b233..50e3fcd5 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUESubspace.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUESubspace.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.clique;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -65,7 +65,7 @@ public class CLIQUESubspace<V extends NumberVector<?>> extends Subspace {
*/
public CLIQUESubspace(int dimension) {
super(dimension);
- denseUnits = new ArrayList<CLIQUEUnit<V>>();
+ denseUnits = new ArrayList<>();
coverage = 0;
}
@@ -76,7 +76,7 @@ public class CLIQUESubspace<V extends NumberVector<?>> extends Subspace {
*/
public CLIQUESubspace(BitSet dimensions) {
super(dimensions);
- denseUnits = new ArrayList<CLIQUEUnit<V>>();
+ denseUnits = new ArrayList<>();
coverage = 0;
}
@@ -104,12 +104,12 @@ public class CLIQUESubspace<V extends NumberVector<?>> extends Subspace {
* @return the clusters in this subspace and the corresponding cluster models
*/
public List<Pair<Subspace, ModifiableDBIDs>> determineClusters() {
- List<Pair<Subspace, ModifiableDBIDs>> clusters = new ArrayList<Pair<Subspace, ModifiableDBIDs>>();
+ List<Pair<Subspace, ModifiableDBIDs>> clusters = new ArrayList<>();
for(CLIQUEUnit<V> unit : getDenseUnits()) {
if(!unit.isAssigned()) {
ModifiableDBIDs cluster = DBIDUtil.newHashSet();
- CLIQUESubspace<V> model = new CLIQUESubspace<V>(getDimensions());
+ CLIQUESubspace<V> model = new CLIQUESubspace<>(getDimensions());
clusters.add(new Pair<Subspace, ModifiableDBIDs>(model, cluster));
dfs(unit, cluster, model);
}
@@ -217,7 +217,7 @@ public class CLIQUESubspace<V extends NumberVector<?>> extends Subspace {
return null;
}
- CLIQUESubspace<V> s = new CLIQUESubspace<V>(dimensions);
+ CLIQUESubspace<V> s = new CLIQUESubspace<>(dimensions);
for(CLIQUEUnit<V> u1 : this.getDenseUnits()) {
for(CLIQUEUnit<V> u2 : other.getDenseUnits()) {
CLIQUEUnit<V> u = u1.join(u2, all, tau);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUEUnit.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUEUnit.java
index 70f251c9..a71b2b67 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUEUnit.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUEUnit.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.clique;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -77,7 +77,7 @@ public class CLIQUEUnit<V extends NumberVector<?>> {
public CLIQUEUnit(SortedSet<Interval> intervals, ModifiableDBIDs ids) {
this.intervals = intervals;
- dimensionToInterval = new TIntObjectHashMap<Interval>();
+ dimensionToInterval = new TIntObjectHashMap<>();
for(Interval interval : intervals) {
dimensionToInterval.put(interval.getDimension(), interval);
}
@@ -93,10 +93,10 @@ public class CLIQUEUnit<V extends NumberVector<?>> {
* @param interval the interval belonging to this unit
*/
public CLIQUEUnit(Interval interval) {
- intervals = new TreeSet<Interval>();
+ intervals = new TreeSet<>();
intervals.add(interval);
- dimensionToInterval = new TIntObjectHashMap<Interval>();
+ dimensionToInterval = new TIntObjectHashMap<>();
dimensionToInterval.put(interval.getDimension(), interval);
ids = DBIDUtil.newHashSet();
@@ -254,7 +254,7 @@ public class CLIQUEUnit<V extends NumberVector<?>> {
Iterator<Interval> it1 = this.intervals.iterator();
Iterator<Interval> it2 = other.intervals.iterator();
- SortedSet<Interval> resultIntervals = new TreeSet<Interval>();
+ SortedSet<Interval> resultIntervals = new TreeSet<>();
for(int i = 0; i < this.intervals.size() - 1; i++) {
i1 = it1.next();
i2 = it2.next();
@@ -270,7 +270,7 @@ public class CLIQUEUnit<V extends NumberVector<?>> {
resultIDs.retainAll(other.ids);
if(resultIDs.size() / all >= tau) {
- return new CLIQUEUnit<V>(resultIntervals, resultIDs);
+ return new CLIQUEUnit<>(resultIntervals, resultIDs);
}
return null;
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/package-info.java
index 7a686190..7acd7572 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/package-info.java
@@ -7,7 +7,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2012
+Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/package-info.java
index 2a1eb930..2efa038d 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/package-info.java
@@ -10,7 +10,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2012
+Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelClustering.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelClustering.java
index af8fb1ea..3b5d0ec2 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelClustering.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelClustering.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.trivial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -144,7 +144,7 @@ public class ByLabelClustering extends AbstractAlgorithm<Clustering<Model>> impl
HashMap<String, DBIDs> labelMap = multiple ? multipleAssignment(relation) : singleAssignment(relation);
ModifiableDBIDs noiseids = DBIDUtil.newArray();
- Clustering<Model> result = new Clustering<Model>("By Label Clustering", "bylabel-clustering");
+ Clustering<Model> result = new Clustering<>("By Label Clustering", "bylabel-clustering");
for(Entry<String, DBIDs> entry : labelMap.entrySet()) {
DBIDs ids = entry.getValue();
if(ids.size() <= 1) {
@@ -156,13 +156,13 @@ public class ByLabelClustering extends AbstractAlgorithm<Clustering<Model>> impl
if(noisepattern != null && noisepattern.matcher(entry.getKey()).find()) {
c.setNoise(true);
}
- result.addCluster(c);
+ result.addToplevelCluster(c);
}
// Collected noise IDs.
if(noiseids.size() > 0) {
Cluster<Model> c = new Cluster<Model>("Noise", noiseids, ClusterModel.CLUSTER);
c.setNoise(true);
- result.addCluster(c);
+ result.addToplevelCluster(c);
}
return result;
}
@@ -175,7 +175,7 @@ public class ByLabelClustering extends AbstractAlgorithm<Clustering<Model>> impl
* @return a mapping of labels to ids
*/
private HashMap<String, DBIDs> singleAssignment(Relation<?> data) {
- HashMap<String, DBIDs> labelMap = new HashMap<String, DBIDs>();
+ HashMap<String, DBIDs> labelMap = new HashMap<>();
for(DBIDIter iditer = data.iterDBIDs(); iditer.valid(); iditer.advance()) {
final Object val = data.get(iditer);
@@ -193,7 +193,7 @@ public class ByLabelClustering extends AbstractAlgorithm<Clustering<Model>> impl
* @return a mapping of labels to ids
*/
private HashMap<String, DBIDs> multipleAssignment(Relation<?> data) {
- HashMap<String, DBIDs> labelMap = new HashMap<String, DBIDs>();
+ HashMap<String, DBIDs> labelMap = new HashMap<>();
for(DBIDIter iditer = data.iterDBIDs(); iditer.valid(); iditer.advance()) {
String[] labels = data.get(iditer).toString().split(" ");
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelHierarchicalClustering.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelHierarchicalClustering.java
index dfb7d37f..33101221 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelHierarchicalClustering.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelHierarchicalClustering.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.trivial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -88,8 +88,7 @@ public class ByLabelHierarchicalClustering extends AbstractAlgorithm<Clustering<
try {
Relation<ClassLabel> relation = database.getRelation(TypeUtil.CLASSLABEL);
return run(relation);
- }
- catch(NoSupportedDataTypeException e) {
+ } catch (NoSupportedDataTypeException e) {
// Otherwise, try any labellike.
return run(database.getRelation(getInputTypeRestriction()[0]));
}
@@ -101,12 +100,13 @@ public class ByLabelHierarchicalClustering extends AbstractAlgorithm<Clustering<
* @param relation The data input to use
*/
public Clustering<Model> run(Relation<?> relation) {
- HashMap<String, DBIDs> labelmap = new HashMap<String, DBIDs>();
+ HashMap<String, DBIDs> labelmap = new HashMap<>();
ModifiableDBIDs noiseids = DBIDUtil.newArray();
+ Clustering<Model> clustering = new Clustering<>("By Label Hierarchical Clustering", "bylabel-clustering");
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
final Object val = relation.get(iditer);
- if(val == null) {
+ if (val == null) {
noiseids.add(iditer);
continue;
}
@@ -115,44 +115,41 @@ public class ByLabelHierarchicalClustering extends AbstractAlgorithm<Clustering<
assign(labelmap, label, iditer);
}
- ArrayList<Cluster<Model>> clusters = new ArrayList<Cluster<Model>>(labelmap.size());
- for(Entry<String, DBIDs> entry : labelmap.entrySet()) {
+ ArrayList<Cluster<Model>> clusters = new ArrayList<>(labelmap.size());
+ for (Entry<String, DBIDs> entry : labelmap.entrySet()) {
DBIDs ids = entry.getValue();
- if(ids instanceof DBID) {
+ if (ids instanceof DBID) {
noiseids.add((DBID) ids);
continue;
}
- Cluster<Model> clus = new Cluster<Model>(entry.getKey(), ids, ClusterModel.CLUSTER, new ArrayList<Cluster<Model>>(), new ArrayList<Cluster<Model>>());
+ Cluster<Model> clus = new Cluster<Model>(entry.getKey(), ids, ClusterModel.CLUSTER);
clusters.add(clus);
}
- for(Cluster<Model> cur : clusters) {
- for(Cluster<Model> oth : clusters) {
- if(oth != cur) {
- if(oth.getName().startsWith(cur.getName())) {
- oth.getParents().add(cur);
- cur.getChildren().add(oth);
- // System.err.println(oth.getLabel() + " is a child of " +
- // cur.getLabel());
+ for (Cluster<Model> cur : clusters) {
+ boolean isrootcluster = true;
+ for (Cluster<Model> oth : clusters) {
+ if (oth != cur) {
+ if (oth.getName().startsWith(cur.getName())) {
+ clustering.addChildCluster(oth, cur);
+ if (LOG.isDebuggingFiner()) {
+ LOG.debugFiner(oth.getName() + " is a child of " + cur.getName());
+ }
+ isrootcluster = false;
}
}
}
- }
- ArrayList<Cluster<Model>> rootclusters = new ArrayList<Cluster<Model>>();
- for(Cluster<Model> cur : clusters) {
- if(cur.getParents().size() == 0) {
- rootclusters.add(cur);
+ if (isrootcluster) {
+ clustering.addToplevelCluster(cur);
}
}
// Collected noise IDs.
- if(noiseids.size() > 0) {
+ if (noiseids.size() > 0) {
Cluster<Model> c = new Cluster<Model>("Noise", noiseids, ClusterModel.CLUSTER);
c.setNoise(true);
- rootclusters.add(c);
+ clustering.addToplevelCluster(c);
}
- assert (rootclusters.size() > 0) : "No clusters found by bylabel clustering. Empty database?";
-
- return new Clustering<Model>("By Label Hierarchical Clustering", "bylabel-clustering", rootclusters);
+ return clustering;
}
/**
@@ -163,21 +160,19 @@ public class ByLabelHierarchicalClustering extends AbstractAlgorithm<Clustering<
* @param id the id of the object to be assigned
*/
private void assign(HashMap<String, DBIDs> labelMap, String label, DBIDRef id) {
- if(labelMap.containsKey(label)) {
+ if (labelMap.containsKey(label)) {
DBIDs exist = labelMap.get(label);
- if(exist instanceof DBID) {
+ if (exist instanceof DBID) {
ModifiableDBIDs n = DBIDUtil.newHashSet();
n.add((DBID) exist);
n.add(id);
labelMap.put(label, n);
- }
- else {
+ } else {
assert (exist instanceof HashSetModifiableDBIDs);
assert (exist.size() > 1);
((ModifiableDBIDs) exist).add(id);
}
- }
- else {
+ } else {
labelMap.put(label, DBIDUtil.deref(id));
}
}
@@ -191,4 +186,4 @@ public class ByLabelHierarchicalClustering extends AbstractAlgorithm<Clustering<
protected Logging getLogger() {
return LOG;
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelOrAllInOneClustering.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelOrAllInOneClustering.java
index f082db9c..76b024a2 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelOrAllInOneClustering.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelOrAllInOneClustering.java
@@ -15,7 +15,7 @@ import de.lmu.ifi.dbs.elki.database.relation.Relation;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -66,9 +66,9 @@ public class ByLabelOrAllInOneClustering extends ByLabelClustering {
// Ignore.
}
final DBIDs ids = database.getRelation(TypeUtil.ANY).getDBIDs();
- Clustering<Model> result = new Clustering<Model>("All-in-one trivial Clustering", "allinone-clustering");
+ Clustering<Model> result = new Clustering<>("All-in-one trivial Clustering", "allinone-clustering");
Cluster<Model> c = new Cluster<Model>(ids, ClusterModel.CLUSTER);
- result.addCluster(c);
+ result.addToplevelCluster(c);
return result;
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByModelClustering.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByModelClustering.java
index 2114ac16..73ad9880 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByModelClustering.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByModelClustering.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.trivial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -101,7 +101,7 @@ public class ByModelClustering extends AbstractAlgorithm<Clustering<Model>> impl
*/
public Clustering<Model> run(Relation<Model> relation) {
// Build model mapping
- HashMap<Model, ModifiableDBIDs> modelMap = new HashMap<Model, ModifiableDBIDs>();
+ HashMap<Model, ModifiableDBIDs> modelMap = new HashMap<>();
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
Model model = relation.get(iditer);
ModifiableDBIDs modelids = modelMap.get(model);
@@ -112,16 +112,16 @@ public class ByModelClustering extends AbstractAlgorithm<Clustering<Model>> impl
modelids.add(iditer);
}
- Clustering<Model> result = new Clustering<Model>("By Model Clustering", "bymodel-clustering");
+ Clustering<Model> result = new Clustering<>("By Model Clustering", "bymodel-clustering");
for(Entry<Model, ModifiableDBIDs> entry : modelMap.entrySet()) {
final Model model = entry.getKey();
final ModifiableDBIDs ids = entry.getValue();
final String name = (model instanceof GeneratorInterface) ? ((GeneratorInterface) model).getName() : model.toString();
- Cluster<Model> c = new Cluster<Model>(name, ids, model);
+ Cluster<Model> c = new Cluster<>(name, ids, model);
if(noisepattern != null && noisepattern.matcher(name).find()) {
c.setNoise(true);
}
- result.addCluster(c);
+ result.addToplevelCluster(c);
}
return result;
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllInOne.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllInOne.java
index eaa5d2b2..dae50c25 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllInOne.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllInOne.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.trivial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -63,9 +63,9 @@ public class TrivialAllInOne extends AbstractAlgorithm<Clustering<Model>> implem
public Clustering<Model> run(Relation<?> relation) {
final DBIDs ids = relation.getDBIDs();
- Clustering<Model> result = new Clustering<Model>("All-in-one trivial Clustering", "allinone-clustering");
+ Clustering<Model> result = new Clustering<>("All-in-one trivial Clustering", "allinone-clustering");
Cluster<Model> c = new Cluster<Model>(ids, ClusterModel.CLUSTER);
- result.addCluster(c);
+ result.addToplevelCluster(c);
return result;
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllNoise.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllNoise.java
index dd0f94a5..ecc7dbec 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllNoise.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllNoise.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.trivial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -62,9 +62,9 @@ public class TrivialAllNoise extends AbstractAlgorithm<Clustering<Model>> implem
public Clustering<Model> run(Relation<?> relation) {
final DBIDs ids = relation.getDBIDs();
- Clustering<Model> result = new Clustering<Model>("All-in-noise trivial Clustering", "allinnoise-clustering");
+ Clustering<Model> result = new Clustering<>("All-in-noise trivial Clustering", "allinnoise-clustering");
Cluster<Model> c = new Cluster<Model>(ids, true, ClusterModel.CLUSTER);
- result.addCluster(c);
+ result.addToplevelCluster(c);
return result;
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/package-info.java
index 5870a736..6b7b50f5 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/package-info.java
@@ -7,7 +7,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2012
+Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team