diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/BestOfMultipleKMeans.java')
-rw-r--r-- | src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/BestOfMultipleKMeans.java | 70 |
1 files changed, 30 insertions, 40 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/BestOfMultipleKMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/BestOfMultipleKMeans.java index 51e7ace9..1c9c6a71 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/BestOfMultipleKMeans.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/BestOfMultipleKMeans.java @@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -32,7 +32,6 @@ import de.lmu.ifi.dbs.elki.database.Database; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; @@ -50,10 +49,9 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * @author Erich Schubert * * @param <V> Vector type - * @param <D> Distance type * @param <M> Model type */ -public class BestOfMultipleKMeans<V extends NumberVector<?>, D extends Distance<?>, M extends MeanModel<V>> extends AbstractAlgorithm<Clustering<M>> implements KMeans<V, D, M> { +public class BestOfMultipleKMeans<V extends NumberVector, M extends MeanModel> extends AbstractAlgorithm<Clustering<M>> implements KMeans<V, M> { /** * The logger for this class. */ @@ -67,12 +65,12 @@ public class BestOfMultipleKMeans<V extends NumberVector<?>, D extends Distance< /** * Variant of kMeans for the bisecting step. */ - private KMeans<V, D, M> innerkMeans; + private KMeans<V, M> innerkMeans; /** * Quality measure which should be used. */ - private KMeansQualityMeasure<? super V, ? super D> qualityMeasure; + private KMeansQualityMeasure<? super V> qualityMeasure; /** * Constructor. @@ -81,7 +79,7 @@ public class BestOfMultipleKMeans<V extends NumberVector<?>, D extends Distance< * @param innerkMeans K-Means variant to actually use. * @param qualityMeasure Quality measure */ - public BestOfMultipleKMeans(int trials, KMeans<V, D, M> innerkMeans, KMeansQualityMeasure<? super V, ? super D> qualityMeasure) { + public BestOfMultipleKMeans(int trials, KMeans<V, M> innerkMeans, KMeansQualityMeasure<? super V> qualityMeasure) { super(); this.trials = trials; this.innerkMeans = innerkMeans; @@ -93,34 +91,27 @@ public class BestOfMultipleKMeans<V extends NumberVector<?>, D extends Distance< if(!(innerkMeans.getDistanceFunction() instanceof PrimitiveDistanceFunction)) { throw new AbortException("K-Means results can only be evaluated for primitive distance functions, got: " + innerkMeans.getDistanceFunction().getClass()); } - final PrimitiveDistanceFunction<? super V, D> df = (PrimitiveDistanceFunction<? super V, D>) innerkMeans.getDistanceFunction(); + @SuppressWarnings("unchecked") + final PrimitiveDistanceFunction<? super NumberVector> df = (PrimitiveDistanceFunction<? super NumberVector>) innerkMeans.getDistanceFunction(); + Clustering<M> bestResult = null; - if(trials > 1) { - double bestCost = Double.POSITIVE_INFINITY; - FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("K-means iterations", trials, LOG) : null; - for(int i = 0; i < trials; i++) { - Clustering<M> currentCandidate = innerkMeans.run(database, relation); - double currentCost = qualityMeasure.calculateCost(currentCandidate, df, relation); - - if(LOG.isVerbose()) { - LOG.verbose("Cost of candidate " + i + ": " + currentCost); - } - - if(currentCost < bestCost) { - bestResult = currentCandidate; - bestCost = currentCost; - } - if(prog != null) { - prog.incrementProcessed(LOG); - } + double bestCost = Double.NaN; + FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("K-means iterations", trials, LOG) : null; + for(int i = 0; i < trials; i++) { + Clustering<M> currentCandidate = innerkMeans.run(database, relation); + double currentCost = qualityMeasure.quality(currentCandidate, df, relation); + + if(LOG.isVerbose()) { + LOG.verbose("Cost of candidate " + i + ": " + currentCost); } - if(prog != null) { - prog.ensureCompleted(LOG); + + if(qualityMeasure.isBetter(currentCost, bestCost)) { + bestResult = currentCandidate; + bestCost = currentCost; } + LOG.incrementProcessed(prog); } - else { - bestResult = innerkMeans.run(database); - } + LOG.ensureCompleted(prog); return bestResult; } @@ -131,7 +122,7 @@ public class BestOfMultipleKMeans<V extends NumberVector<?>, D extends Distance< } @Override - public DistanceFunction<? super V, D> getDistanceFunction() { + public DistanceFunction<? super V> getDistanceFunction() { return innerkMeans.getDistanceFunction(); } @@ -141,7 +132,7 @@ public class BestOfMultipleKMeans<V extends NumberVector<?>, D extends Distance< } @Override - public void setDistanceFunction(PrimitiveDistanceFunction<? super NumberVector<?>, D> distanceFunction) { + public void setDistanceFunction(PrimitiveDistanceFunction<? super NumberVector> distanceFunction) { innerkMeans.setDistanceFunction(distanceFunction); } @@ -159,10 +150,9 @@ public class BestOfMultipleKMeans<V extends NumberVector<?>, D extends Distance< * @apiviz.exclude * * @param <V> Vector type - * @param <D> Distance type * @param <M> Model type */ - public static class Parameterizer<V extends NumberVector<?>, D extends Distance<D>, M extends MeanModel<V>> extends AbstractParameterizer { + public static class Parameterizer<V extends NumberVector, M extends MeanModel> extends AbstractParameterizer { /** * Parameter to specify the iterations of the bisecting step. */ @@ -186,12 +176,12 @@ public class BestOfMultipleKMeans<V extends NumberVector<?>, D extends Distance< /** * Variant of kMeans to use. */ - protected KMeans<V, D, M> kMeansVariant; + protected KMeans<V, M> kMeansVariant; /** * Quality measure. */ - protected KMeansQualityMeasure<? super V, ? super D> qualityMeasure; + protected KMeansQualityMeasure<? super V> qualityMeasure; @Override protected void makeOptions(Parameterization config) { @@ -201,19 +191,19 @@ public class BestOfMultipleKMeans<V extends NumberVector<?>, D extends Distance< trials = trialsP.intValue(); } - ObjectParameter<KMeans<V, D, M>> kMeansVariantP = new ObjectParameter<>(KMEANS_ID, KMeans.class); + ObjectParameter<KMeans<V, M>> kMeansVariantP = new ObjectParameter<>(KMEANS_ID, KMeans.class); if(config.grab(kMeansVariantP)) { kMeansVariant = kMeansVariantP.instantiateClass(config); } - ObjectParameter<KMeansQualityMeasure<V, ? super D>> qualityMeasureP = new ObjectParameter<>(QUALITYMEASURE_ID, KMeansQualityMeasure.class); + ObjectParameter<KMeansQualityMeasure<V>> qualityMeasureP = new ObjectParameter<>(QUALITYMEASURE_ID, KMeansQualityMeasure.class); if(config.grab(qualityMeasureP)) { qualityMeasure = qualityMeasureP.instantiateClass(config); } } @Override - protected BestOfMultipleKMeans<V, D, M> makeInstance() { + protected BestOfMultipleKMeans<V, M> makeInstance() { return new BestOfMultipleKMeans<>(trials, kMeansVariant, qualityMeasure); } } |