diff options
Diffstat (limited to 'elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization')
12 files changed, 67 insertions, 44 deletions
diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/AbstractKMeansInitialization.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/AbstractKMeansInitialization.java index 723c86cc..72f9a887 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/AbstractKMeansInitialization.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/AbstractKMeansInitialization.java @@ -33,6 +33,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter; * Abstract base class for common k-means initializations. * * @author Erich Schubert + * @since 0.3 * * @param <V> Vector type */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/FarthestPointsInitialMeans.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/FarthestPointsInitialMeans.java index 4fa0d004..a686f345 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/FarthestPointsInitialMeans.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/FarthestPointsInitialMeans.java @@ -40,6 +40,7 @@ import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.NumberVectorDistanceFunction; import de.lmu.ifi.dbs.elki.math.random.RandomFactory; +import de.lmu.ifi.dbs.elki.utilities.Alias; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag; @@ -52,9 +53,11 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag; * times will be more likely to return the same local minima. * * @author Erich Schubert + * @since 0.6.0 * * @param <O> Object type for kMedoids and kMedians */ +@Alias("de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.FarthestPointsInitialMeans") public class FarthestPointsInitialMeans<O> extends AbstractKMeansInitialization<NumberVector> implements KMedoidsInitialization<O> { /** * Discard the first vector. diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/FarthestSumPointsInitialMeans.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/FarthestSumPointsInitialMeans.java index 9cceb4f2..9f223dac 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/FarthestSumPointsInitialMeans.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/FarthestSumPointsInitialMeans.java @@ -49,6 +49,7 @@ import de.lmu.ifi.dbs.elki.math.random.RandomFactory; * times will be more likely to return the same local minima. * * @author Erich Schubert + * @since 0.6.0 * * @param <O> Object type for kmedoids and kmedians */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/FirstKInitialMeans.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/FirstKInitialMeans.java index beab423e..210994e5 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/FirstKInitialMeans.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/FirstKInitialMeans.java @@ -34,15 +34,18 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.NumberVectorDistanceFunction; +import de.lmu.ifi.dbs.elki.utilities.Alias; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; /** * Initialize K-means by using the first k objects as initial means. * * @author Erich Schubert + * @since 0.4.0 * * @param <O> Object type for KMedoids */ +@Alias("de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.FirstKInitialMeans") public class FirstKInitialMeans<O> implements KMeansInitialization<NumberVector>, KMedoidsInitialization<O> { /** * Constructor. @@ -84,4 +87,4 @@ public class FirstKInitialMeans<O> implements KMeansInitialization<NumberVector> return new FirstKInitialMeans<>(); } } -}
\ No newline at end of file +} diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/KMeansInitialization.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/KMeansInitialization.java index 55ffb65e..c1cd71a0 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/KMeansInitialization.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/KMeansInitialization.java @@ -33,6 +33,7 @@ import de.lmu.ifi.dbs.elki.distance.distancefunction.NumberVectorDistanceFunctio * Interface for initializing K-Means * * @author Erich Schubert + * @since 0.2 * * @apiviz.landmark * diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/KMeansPlusPlusInitialMeans.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/KMeansPlusPlusInitialMeans.java index c1518d18..11bfef59 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/KMeansPlusPlusInitialMeans.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/KMeansPlusPlusInitialMeans.java @@ -41,6 +41,7 @@ import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.NumberVectorDistanceFunction; import de.lmu.ifi.dbs.elki.logging.LoggingUtil; import de.lmu.ifi.dbs.elki.math.random.RandomFactory; +import de.lmu.ifi.dbs.elki.utilities.Alias; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; @@ -56,6 +57,7 @@ import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; * </p> * * @author Erich Schubert + * @since 0.5.0 * * @param <O> Vector type */ @@ -63,6 +65,7 @@ import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; title = "k-means++: the advantages of careful seeding", // booktitle = "Proc. of the Eighteenth Annual ACM-SIAM Symposium on Discrete Algorithms, SODA 2007", // url = "http://dx.doi.org/10.1145/1283383.1283494") +@Alias("de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansPlusPlusInitialMeans") public class KMeansPlusPlusInitialMeans<O> extends AbstractKMeansInitialization<NumberVector> implements KMedoidsInitialization<O> { /** * Constructor. @@ -232,4 +235,4 @@ public class KMeansPlusPlusInitialMeans<O> extends AbstractKMeansInitialization< return new KMeansPlusPlusInitialMeans<>(rnd); } } -}
\ No newline at end of file +} diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/KMedoidsInitialization.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/KMedoidsInitialization.java index 0ae3723e..2dc11c52 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/KMedoidsInitialization.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/KMedoidsInitialization.java @@ -30,6 +30,7 @@ import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; * this initialization will only return members of the original data set. * * @author Erich Schubert + * @since 0.5.0 * * @param <V> Object type */ diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/PAMInitialMeans.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/PAMInitialMeans.java index 396d79e6..ea11330e 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/PAMInitialMeans.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/PAMInitialMeans.java @@ -42,6 +42,7 @@ import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; import de.lmu.ifi.dbs.elki.math.MathUtil; +import de.lmu.ifi.dbs.elki.utilities.Alias; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; @@ -57,12 +58,14 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; * </p> * * @author Erich Schubert + * @since 0.5.0 * * @param <O> Object type for KMedoids initialization */ @Reference(title = "Clustering my means of Medoids", // authors = "Kaufman, L. and Rousseeuw, P.J.", // booktitle = "Statistical Data Analysis Based on the L_1–Norm and Related Methods") +@Alias("de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.PAMInitialMeans") public class PAMInitialMeans<O> implements KMeansInitialization<NumberVector>, KMedoidsInitialization<O> { /** * Class logger. @@ -78,6 +81,9 @@ public class PAMInitialMeans<O> implements KMeansInitialization<NumberVector>, K @Override public <T extends NumberVector, V extends NumberVector> List<V> chooseInitialMeans(Database database, Relation<T> relation, int k, NumberVectorDistanceFunction<? super T> distanceFunction, NumberVector.Factory<V> factory) { + if(relation.size() < k) { + throw new AbortException("Database has less than k objects."); + } // Ugly cast; but better than code duplication. @SuppressWarnings("unchecked") Relation<O> rel = (Relation<O>) relation; @@ -97,38 +103,33 @@ public class PAMInitialMeans<O> implements KMeansInitialization<NumberVector>, K public DBIDs chooseInitialMedoids(int k, DBIDs ids, DistanceQuery<? super O> distQ) { ArrayModifiableDBIDs medids = DBIDUtil.newArray(k); DBIDVar bestid = DBIDUtil.newVar(); - WritableDoubleDataStore mindist = null; + // We need three temporary storage arrays: + WritableDoubleDataStore mindist, bestd, tempd; + mindist = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP); + bestd = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP); + tempd = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP); // First mean is chosen by having the smallest distance sum to all others. { double best = Double.POSITIVE_INFINITY; - WritableDoubleDataStore newd = null; FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Choosing initial mean", ids.size(), LOG) : null; for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { - if(newd == null) { - newd = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP); - } - int sum = 0; + double sum = 0, d; for(DBIDIter iter2 = ids.iter(); iter2.valid(); iter2.advance()) { - double d = distQ.distance(iter, iter2); - sum += d; - newd.putDouble(iter2, d); + sum += d = distQ.distance(iter, iter2); + tempd.putDouble(iter2, d); } if(sum < best) { best = sum; bestid.set(iter); - if(mindist != null) { - mindist.destroy(); - } - mindist = newd; - newd = null; + // Swap mindist and newd: + WritableDoubleDataStore temp = mindist; + mindist = tempd; + tempd = temp; } LOG.incrementProcessed(prog); } LOG.ensureCompleted(prog); - if(newd != null) { - newd.destroy(); - } medids.add(bestid); } assert(mindist != null); @@ -138,44 +139,40 @@ public class PAMInitialMeans<O> implements KMeansInitialization<NumberVector>, K LOG.incrementProcessed(prog); // First one was just chosen. for(int i = 1; i < k; i++) { double best = Double.POSITIVE_INFINITY; - WritableDoubleDataStore bestd = null, newd = null; + bestid.unset(); for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { if(medids.contains(iter)) { continue; } - if(newd == null) { - newd = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP); - } - double sum = 0.; + double sum = 0., v; for(DBIDIter iter2 = ids.iter(); iter2.valid(); iter2.advance()) { - double v = MathUtil.min(distQ.distance(iter, iter2), mindist.doubleValue(iter2)); - sum += v; - newd.put(iter2, v); + sum += v = MathUtil.min(distQ.distance(iter, iter2), mindist.doubleValue(iter2)); + tempd.put(iter2, v); } if(sum < best) { best = sum; bestid.set(iter); - if(bestd != null) { - bestd.destroy(); - } - bestd = newd; - newd = null; + // Swap bestd and newd: + WritableDoubleDataStore temp = bestd; + bestd = tempd; + tempd = temp; } } - if(bestd == null) { + if(!bestid.isSet()) { throw new AbortException("No median found that improves the criterion function?!? Too many infinite distances."); } medids.add(bestid); - if(newd != null) { - newd.destroy(); - } - mindist.destroy(); - mindist = bestd; + // Swap bestd and mindist: + WritableDoubleDataStore temp = bestd; + bestd = mindist; + mindist = temp; LOG.incrementProcessed(prog); } LOG.ensureCompleted(prog); mindist.destroy(); + bestd.destroy(); + tempd.destroy(); return medids; } @@ -192,4 +189,4 @@ public class PAMInitialMeans<O> implements KMeansInitialization<NumberVector>, K return new PAMInitialMeans<>(); } } -}
\ No newline at end of file +} diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/PredefinedInitialMeans.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/PredefinedInitialMeans.java index ca8a612a..b01651b8 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/PredefinedInitialMeans.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/PredefinedInitialMeans.java @@ -38,12 +38,13 @@ import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.VectorListParameter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleArrayListParameter; /** * Run k-means with prespecified initial means. * * @author Erich Schubert + * @since 0.7.0 */ public class PredefinedInitialMeans extends AbstractKMeansInitialization<NumberVector> { /** @@ -147,9 +148,12 @@ public class PredefinedInitialMeans extends AbstractKMeansInitialization<NumberV @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - VectorListParameter meansP = new VectorListParameter(INITIAL_MEANS); + DoubleArrayListParameter meansP = new DoubleArrayListParameter(INITIAL_MEANS); if(config.grab(meansP)) { - initialMeans = meansP.getValue(); + initialMeans = new ArrayList<>(meansP.getValue().size()); + for(double[] v : meansP.getValue()) { + initialMeans.add(new Vector(v)); + } } } diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/RandomlyChosenInitialMeans.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/RandomlyChosenInitialMeans.java index b6cfad8d..bb03b278 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/RandomlyChosenInitialMeans.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/RandomlyChosenInitialMeans.java @@ -34,6 +34,7 @@ import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.NumberVectorDistanceFunction; import de.lmu.ifi.dbs.elki.math.random.RandomFactory; +import de.lmu.ifi.dbs.elki.utilities.Alias; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; /** @@ -51,12 +52,14 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; * available in Biometrics). * * @author Erich Schubert + * @since 0.4.0 * * @param <O> Vector type */ @Reference(authors = "E. W. Forgy", // title = "Cluster analysis of multivariate data: efficiency versus interpretability of classifications", // booktitle = "Biometrics 21(3)") +@Alias("de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.RandomlyChosenInitialMeans") public class RandomlyChosenInitialMeans<O> extends AbstractKMeansInitialization<NumberVector> implements KMedoidsInitialization<O> { /** * Constructor. @@ -95,4 +98,4 @@ public class RandomlyChosenInitialMeans<O> extends AbstractKMeansInitialization< return new RandomlyChosenInitialMeans<>(rnd); } } -}
\ No newline at end of file +} diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/RandomlyGeneratedInitialMeans.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/RandomlyGeneratedInitialMeans.java index bc2181e8..30d1b00f 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/RandomlyGeneratedInitialMeans.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/RandomlyGeneratedInitialMeans.java @@ -33,13 +33,16 @@ import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; import de.lmu.ifi.dbs.elki.distance.distancefunction.NumberVectorDistanceFunction; import de.lmu.ifi.dbs.elki.math.MathUtil; import de.lmu.ifi.dbs.elki.math.random.RandomFactory; +import de.lmu.ifi.dbs.elki.utilities.Alias; /** * Initialize k-means by generating random vectors (within the data sets value * range). * * @author Erich Schubert + * @since 0.5.0 */ +@Alias("de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.RandomlyGeneratedInitialMeans") public class RandomlyGeneratedInitialMeans extends AbstractKMeansInitialization<NumberVector> { /** * Constructor. @@ -84,4 +87,4 @@ public class RandomlyGeneratedInitialMeans extends AbstractKMeansInitialization< return new RandomlyGeneratedInitialMeans(rnd); } } -}
\ No newline at end of file +} diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/SampleKMeansInitialization.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/SampleKMeansInitialization.java index ac7f7f27..f5ed00f1 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/SampleKMeansInitialization.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/initialization/SampleKMeansInitialization.java @@ -42,6 +42,7 @@ import de.lmu.ifi.dbs.elki.distance.distancefunction.NumberVectorDistanceFunctio import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction; import de.lmu.ifi.dbs.elki.logging.LoggingUtil; import de.lmu.ifi.dbs.elki.math.random.RandomFactory; +import de.lmu.ifi.dbs.elki.utilities.Alias; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ChainedParameterization; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; @@ -53,9 +54,11 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * Initialize k-means by running k-means on a sample of the data set only. * * @author Erich Schubert + * @since 0.6.0 * * @param <V> Vector type */ +@Alias("de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.SampleKMeansInitialization") public class SampleKMeansInitialization<V extends NumberVector> extends AbstractKMeansInitialization<V> { /** * Variant of kMeans to use for initialization. |