summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java')
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java86
1 files changed, 20 insertions, 66 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java
index f43c2277..e692293c 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -26,7 +26,6 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
import java.util.ArrayList;
import java.util.List;
-import de.lmu.ifi.dbs.elki.algorithm.AbstractPrimitiveDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.NumberVector;
@@ -36,19 +35,13 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.SquaredEuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
/**
* Provides the k-means algorithm, using Lloyd-style bulk iterations.
@@ -90,28 +83,23 @@ public class KMeansLloyd<V extends NumberVector<?>, D extends Distance<D>> exten
super(distanceFunction, k, maxiter, initializer);
}
- /**
- * Run k-means.
- *
- * @param database Database
- * @param relation relation to use
- * @return result
- */
+ @Override
public Clustering<KMeansModel<V>> run(Database database, Relation<V> relation) {
if (relation.size() <= 0) {
- return new Clustering<KMeansModel<V>>("k-Means Clustering", "kmeans-clustering");
+ return new Clustering<>("k-Means Clustering", "kmeans-clustering");
}
// Choose initial means
- List<? extends NumberVector<?>> means = initializer.chooseInitialMeans(relation, k, getDistanceFunction());
+ List<? extends NumberVector<?>> means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
// Setup cluster assignment store
- List<ModifiableDBIDs> clusters = new ArrayList<ModifiableDBIDs>();
+ List<ModifiableDBIDs> clusters = new ArrayList<>();
for (int i = 0; i < k; i++) {
clusters.add(DBIDUtil.newHashSet(relation.size() / k));
}
+ IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Means iteration", LOG) : null;
for (int iteration = 0; maxiter <= 0 || iteration < maxiter; iteration++) {
- if (LOG.isVerbose()) {
- LOG.verbose("K-Means iteration " + (iteration + 1));
+ if (prog != null) {
+ prog.incrementProcessed(LOG);
}
boolean changed = assignToNearestCluster(relation, means, clusters);
// Stop if no cluster assignment changed.
@@ -121,12 +109,16 @@ public class KMeansLloyd<V extends NumberVector<?>, D extends Distance<D>> exten
// Recompute means.
means = means(clusters, means, relation);
}
+ if (prog != null) {
+ prog.setCompleted(LOG);
+ }
+
// Wrap result
final NumberVector.Factory<V, ?> factory = RelationUtil.getNumberVectorFactory(relation);
- Clustering<KMeansModel<V>> result = new Clustering<KMeansModel<V>>("k-Means Clustering", "kmeans-clustering");
+ Clustering<KMeansModel<V>> result = new Clustering<>("k-Means Clustering", "kmeans-clustering");
for (int i = 0; i < clusters.size(); i++) {
- KMeansModel<V> model = new KMeansModel<V>(factory.newNumberVector(means.get(i).getColumnVector().getArrayRef()));
- result.addCluster(new Cluster<KMeansModel<V>>(clusters.get(i), model));
+ KMeansModel<V> model = new KMeansModel<>(factory.newNumberVector(means.get(i).getColumnVector().getArrayRef()));
+ result.addToplevelCluster(new Cluster<>(clusters.get(i), model));
}
return result;
}
@@ -143,53 +135,15 @@ public class KMeansLloyd<V extends NumberVector<?>, D extends Distance<D>> exten
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<?>, D extends Distance<D>> extends AbstractPrimitiveDistanceBasedAlgorithm.Parameterizer<NumberVector<?>, D> {
- /**
- * k Parameter.
- */
- protected int k;
-
- /**
- * Number of iterations.
- */
- protected int maxiter;
-
- /**
- * Initialization method.
- */
- protected KMeansInitialization<V> initializer;
-
+ public static class Parameterizer<V extends NumberVector<?>, D extends Distance<D>> extends AbstractKMeans.Parameterizer<V, D> {
@Override
- protected void makeOptions(Parameterization config) {
- ObjectParameter<PrimitiveDistanceFunction<NumberVector<?>, D>> distanceFunctionP = makeParameterDistanceFunction(SquaredEuclideanDistanceFunction.class, PrimitiveDistanceFunction.class);
- if(config.grab(distanceFunctionP)) {
- distanceFunction = distanceFunctionP.instantiateClass(config);
- if (!(distanceFunction instanceof EuclideanDistanceFunction) && !(distanceFunction instanceof SquaredEuclideanDistanceFunction)) {
- LOG.warning("k-means optimizes the sum of squares - it should be used with squared euclidean distance and may stop converging otherwise!");
- }
- }
-
- IntParameter kP = new IntParameter(K_ID);
- kP.addConstraint(new GreaterConstraint(0));
- if (config.grab(kP)) {
- k = kP.getValue();
- }
-
- ObjectParameter<KMeansInitialization<V>> initialP = new ObjectParameter<KMeansInitialization<V>>(INIT_ID, KMeansInitialization.class, RandomlyGeneratedInitialMeans.class);
- if (config.grab(initialP)) {
- initializer = initialP.instantiateClass(config);
- }
-
- IntParameter maxiterP = new IntParameter(MAXITER_ID, 0);
- maxiterP.addConstraint(new GreaterEqualConstraint(0));
- if (config.grab(maxiterP)) {
- maxiter = maxiterP.intValue();
- }
+ protected Logging getLogger() {
+ return LOG;
}
@Override
protected KMeansLloyd<V, D> makeInstance() {
- return new KMeansLloyd<V, D>(distanceFunction, k, maxiter, initializer);
+ return new KMeansLloyd<>(distanceFunction, k, maxiter, initializer);
}
}
}