summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/BestOfMultipleKMeans.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/BestOfMultipleKMeans.java')
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/BestOfMultipleKMeans.java70
1 files changed, 30 insertions, 40 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/BestOfMultipleKMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/BestOfMultipleKMeans.java
index 51e7ace9..1c9c6a71 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/BestOfMultipleKMeans.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/BestOfMultipleKMeans.java
@@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
/*
This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -32,7 +32,6 @@ import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
@@ -50,10 +49,9 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
* @author Erich Schubert
*
* @param <V> Vector type
- * @param <D> Distance type
* @param <M> Model type
*/
-public class BestOfMultipleKMeans<V extends NumberVector<?>, D extends Distance<?>, M extends MeanModel<V>> extends AbstractAlgorithm<Clustering<M>> implements KMeans<V, D, M> {
+public class BestOfMultipleKMeans<V extends NumberVector, M extends MeanModel> extends AbstractAlgorithm<Clustering<M>> implements KMeans<V, M> {
/**
* The logger for this class.
*/
@@ -67,12 +65,12 @@ public class BestOfMultipleKMeans<V extends NumberVector<?>, D extends Distance<
/**
* Variant of kMeans for the bisecting step.
*/
- private KMeans<V, D, M> innerkMeans;
+ private KMeans<V, M> innerkMeans;
/**
* Quality measure which should be used.
*/
- private KMeansQualityMeasure<? super V, ? super D> qualityMeasure;
+ private KMeansQualityMeasure<? super V> qualityMeasure;
/**
* Constructor.
@@ -81,7 +79,7 @@ public class BestOfMultipleKMeans<V extends NumberVector<?>, D extends Distance<
* @param innerkMeans K-Means variant to actually use.
* @param qualityMeasure Quality measure
*/
- public BestOfMultipleKMeans(int trials, KMeans<V, D, M> innerkMeans, KMeansQualityMeasure<? super V, ? super D> qualityMeasure) {
+ public BestOfMultipleKMeans(int trials, KMeans<V, M> innerkMeans, KMeansQualityMeasure<? super V> qualityMeasure) {
super();
this.trials = trials;
this.innerkMeans = innerkMeans;
@@ -93,34 +91,27 @@ public class BestOfMultipleKMeans<V extends NumberVector<?>, D extends Distance<
if(!(innerkMeans.getDistanceFunction() instanceof PrimitiveDistanceFunction)) {
throw new AbortException("K-Means results can only be evaluated for primitive distance functions, got: " + innerkMeans.getDistanceFunction().getClass());
}
- final PrimitiveDistanceFunction<? super V, D> df = (PrimitiveDistanceFunction<? super V, D>) innerkMeans.getDistanceFunction();
+ @SuppressWarnings("unchecked")
+ final PrimitiveDistanceFunction<? super NumberVector> df = (PrimitiveDistanceFunction<? super NumberVector>) innerkMeans.getDistanceFunction();
+
Clustering<M> bestResult = null;
- if(trials > 1) {
- double bestCost = Double.POSITIVE_INFINITY;
- FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("K-means iterations", trials, LOG) : null;
- for(int i = 0; i < trials; i++) {
- Clustering<M> currentCandidate = innerkMeans.run(database, relation);
- double currentCost = qualityMeasure.calculateCost(currentCandidate, df, relation);
-
- if(LOG.isVerbose()) {
- LOG.verbose("Cost of candidate " + i + ": " + currentCost);
- }
-
- if(currentCost < bestCost) {
- bestResult = currentCandidate;
- bestCost = currentCost;
- }
- if(prog != null) {
- prog.incrementProcessed(LOG);
- }
+ double bestCost = Double.NaN;
+ FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("K-means iterations", trials, LOG) : null;
+ for(int i = 0; i < trials; i++) {
+ Clustering<M> currentCandidate = innerkMeans.run(database, relation);
+ double currentCost = qualityMeasure.quality(currentCandidate, df, relation);
+
+ if(LOG.isVerbose()) {
+ LOG.verbose("Cost of candidate " + i + ": " + currentCost);
}
- if(prog != null) {
- prog.ensureCompleted(LOG);
+
+ if(qualityMeasure.isBetter(currentCost, bestCost)) {
+ bestResult = currentCandidate;
+ bestCost = currentCost;
}
+ LOG.incrementProcessed(prog);
}
- else {
- bestResult = innerkMeans.run(database);
- }
+ LOG.ensureCompleted(prog);
return bestResult;
}
@@ -131,7 +122,7 @@ public class BestOfMultipleKMeans<V extends NumberVector<?>, D extends Distance<
}
@Override
- public DistanceFunction<? super V, D> getDistanceFunction() {
+ public DistanceFunction<? super V> getDistanceFunction() {
return innerkMeans.getDistanceFunction();
}
@@ -141,7 +132,7 @@ public class BestOfMultipleKMeans<V extends NumberVector<?>, D extends Distance<
}
@Override
- public void setDistanceFunction(PrimitiveDistanceFunction<? super NumberVector<?>, D> distanceFunction) {
+ public void setDistanceFunction(PrimitiveDistanceFunction<? super NumberVector> distanceFunction) {
innerkMeans.setDistanceFunction(distanceFunction);
}
@@ -159,10 +150,9 @@ public class BestOfMultipleKMeans<V extends NumberVector<?>, D extends Distance<
* @apiviz.exclude
*
* @param <V> Vector type
- * @param <D> Distance type
* @param <M> Model type
*/
- public static class Parameterizer<V extends NumberVector<?>, D extends Distance<D>, M extends MeanModel<V>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector, M extends MeanModel> extends AbstractParameterizer {
/**
* Parameter to specify the iterations of the bisecting step.
*/
@@ -186,12 +176,12 @@ public class BestOfMultipleKMeans<V extends NumberVector<?>, D extends Distance<
/**
* Variant of kMeans to use.
*/
- protected KMeans<V, D, M> kMeansVariant;
+ protected KMeans<V, M> kMeansVariant;
/**
* Quality measure.
*/
- protected KMeansQualityMeasure<? super V, ? super D> qualityMeasure;
+ protected KMeansQualityMeasure<? super V> qualityMeasure;
@Override
protected void makeOptions(Parameterization config) {
@@ -201,19 +191,19 @@ public class BestOfMultipleKMeans<V extends NumberVector<?>, D extends Distance<
trials = trialsP.intValue();
}
- ObjectParameter<KMeans<V, D, M>> kMeansVariantP = new ObjectParameter<>(KMEANS_ID, KMeans.class);
+ ObjectParameter<KMeans<V, M>> kMeansVariantP = new ObjectParameter<>(KMEANS_ID, KMeans.class);
if(config.grab(kMeansVariantP)) {
kMeansVariant = kMeansVariantP.instantiateClass(config);
}
- ObjectParameter<KMeansQualityMeasure<V, ? super D>> qualityMeasureP = new ObjectParameter<>(QUALITYMEASURE_ID, KMeansQualityMeasure.class);
+ ObjectParameter<KMeansQualityMeasure<V>> qualityMeasureP = new ObjectParameter<>(QUALITYMEASURE_ID, KMeansQualityMeasure.class);
if(config.grab(qualityMeasureP)) {
qualityMeasure = qualityMeasureP.instantiateClass(config);
}
}
@Override
- protected BestOfMultipleKMeans<V, D, M> makeInstance() {
+ protected BestOfMultipleKMeans<V, M> makeInstance() {
return new BestOfMultipleKMeans<>(trials, kMeansVariant, qualityMeasure);
}
}