diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java')
-rw-r--r-- | src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java | 86 |
1 files changed, 20 insertions, 66 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java index f43c2277..e692293c 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -26,7 +26,6 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans; import java.util.ArrayList; import java.util.List; -import de.lmu.ifi.dbs.elki.algorithm.AbstractPrimitiveDistanceBasedAlgorithm; import de.lmu.ifi.dbs.elki.data.Cluster; import de.lmu.ifi.dbs.elki.data.Clustering; import de.lmu.ifi.dbs.elki.data.NumberVector; @@ -36,19 +35,13 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; -import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction; import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distancefunction.SquaredEuclideanDistanceFunction; import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; /** * Provides the k-means algorithm, using Lloyd-style bulk iterations. @@ -90,28 +83,23 @@ public class KMeansLloyd<V extends NumberVector<?>, D extends Distance<D>> exten super(distanceFunction, k, maxiter, initializer); } - /** - * Run k-means. - * - * @param database Database - * @param relation relation to use - * @return result - */ + @Override public Clustering<KMeansModel<V>> run(Database database, Relation<V> relation) { if (relation.size() <= 0) { - return new Clustering<KMeansModel<V>>("k-Means Clustering", "kmeans-clustering"); + return new Clustering<>("k-Means Clustering", "kmeans-clustering"); } // Choose initial means - List<? extends NumberVector<?>> means = initializer.chooseInitialMeans(relation, k, getDistanceFunction()); + List<? extends NumberVector<?>> means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction()); // Setup cluster assignment store - List<ModifiableDBIDs> clusters = new ArrayList<ModifiableDBIDs>(); + List<ModifiableDBIDs> clusters = new ArrayList<>(); for (int i = 0; i < k; i++) { clusters.add(DBIDUtil.newHashSet(relation.size() / k)); } + IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Means iteration", LOG) : null; for (int iteration = 0; maxiter <= 0 || iteration < maxiter; iteration++) { - if (LOG.isVerbose()) { - LOG.verbose("K-Means iteration " + (iteration + 1)); + if (prog != null) { + prog.incrementProcessed(LOG); } boolean changed = assignToNearestCluster(relation, means, clusters); // Stop if no cluster assignment changed. @@ -121,12 +109,16 @@ public class KMeansLloyd<V extends NumberVector<?>, D extends Distance<D>> exten // Recompute means. means = means(clusters, means, relation); } + if (prog != null) { + prog.setCompleted(LOG); + } + // Wrap result final NumberVector.Factory<V, ?> factory = RelationUtil.getNumberVectorFactory(relation); - Clustering<KMeansModel<V>> result = new Clustering<KMeansModel<V>>("k-Means Clustering", "kmeans-clustering"); + Clustering<KMeansModel<V>> result = new Clustering<>("k-Means Clustering", "kmeans-clustering"); for (int i = 0; i < clusters.size(); i++) { - KMeansModel<V> model = new KMeansModel<V>(factory.newNumberVector(means.get(i).getColumnVector().getArrayRef())); - result.addCluster(new Cluster<KMeansModel<V>>(clusters.get(i), model)); + KMeansModel<V> model = new KMeansModel<>(factory.newNumberVector(means.get(i).getColumnVector().getArrayRef())); + result.addToplevelCluster(new Cluster<>(clusters.get(i), model)); } return result; } @@ -143,53 +135,15 @@ public class KMeansLloyd<V extends NumberVector<?>, D extends Distance<D>> exten * * @apiviz.exclude */ - public static class Parameterizer<V extends NumberVector<?>, D extends Distance<D>> extends AbstractPrimitiveDistanceBasedAlgorithm.Parameterizer<NumberVector<?>, D> { - /** - * k Parameter. - */ - protected int k; - - /** - * Number of iterations. - */ - protected int maxiter; - - /** - * Initialization method. - */ - protected KMeansInitialization<V> initializer; - + public static class Parameterizer<V extends NumberVector<?>, D extends Distance<D>> extends AbstractKMeans.Parameterizer<V, D> { @Override - protected void makeOptions(Parameterization config) { - ObjectParameter<PrimitiveDistanceFunction<NumberVector<?>, D>> distanceFunctionP = makeParameterDistanceFunction(SquaredEuclideanDistanceFunction.class, PrimitiveDistanceFunction.class); - if(config.grab(distanceFunctionP)) { - distanceFunction = distanceFunctionP.instantiateClass(config); - if (!(distanceFunction instanceof EuclideanDistanceFunction) && !(distanceFunction instanceof SquaredEuclideanDistanceFunction)) { - LOG.warning("k-means optimizes the sum of squares - it should be used with squared euclidean distance and may stop converging otherwise!"); - } - } - - IntParameter kP = new IntParameter(K_ID); - kP.addConstraint(new GreaterConstraint(0)); - if (config.grab(kP)) { - k = kP.getValue(); - } - - ObjectParameter<KMeansInitialization<V>> initialP = new ObjectParameter<KMeansInitialization<V>>(INIT_ID, KMeansInitialization.class, RandomlyGeneratedInitialMeans.class); - if (config.grab(initialP)) { - initializer = initialP.instantiateClass(config); - } - - IntParameter maxiterP = new IntParameter(MAXITER_ID, 0); - maxiterP.addConstraint(new GreaterEqualConstraint(0)); - if (config.grab(maxiterP)) { - maxiter = maxiterP.intValue(); - } + protected Logging getLogger() { + return LOG; } @Override protected KMeansLloyd<V, D> makeInstance() { - return new KMeansLloyd<V, D>(distanceFunction, k, maxiter, initializer); + return new KMeansLloyd<>(distanceFunction, k, maxiter, initializer); } } } |