diff options
Diffstat (limited to 'test/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/TestKMeansResults.java')
-rw-r--r-- | test/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/TestKMeansResults.java | 95 |
1 files changed, 95 insertions, 0 deletions
diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/TestKMeansResults.java b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/TestKMeansResults.java new file mode 100644 index 00000000..c35a7194 --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/TestKMeansResults.java @@ -0,0 +1,95 @@ +package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest; +import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansLloyd; +import de.lmu.ifi.dbs.elki.data.Clustering; +import de.lmu.ifi.dbs.elki.data.DoubleVector; +import de.lmu.ifi.dbs.elki.data.model.MeanModel; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; +import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; + +/** + * Performs a full KMeans run, and compares the result with a clustering derived + * from the data set labels. This test ensures that KMeans's performance doesn't + * unexpectedly drop on this data set (and also ensures that the algorithms + * work, as a side effect). + * + * @author Katharina Rausch + * @author Erich Schubert + */ +public class TestKMeansResults extends AbstractSimpleAlgorithmTest implements JUnit4Test { + /** + * Run KMeans with fixed parameters and compare the result to a golden + * standard. + * + * @throws ParameterException + */ + @Test + public void testKMeansLloyd() { + Database db = makeSimpleDatabase(UNITTEST + "different-densities-2d-no-noise.ascii", 1000); + + // Setup algorithm + ListParameterization params = new ListParameterization(); + params.addParameter(AbstractKMeans.K_ID, 5); + params.addParameter(AbstractKMeans.SEED_ID, 3); + AbstractKMeans<DoubleVector, DoubleDistance> kmeans = ClassGenericsUtil.parameterizeOrAbort(KMeansLloyd.class, params); + testParameterizationOk(params); + + // run KMeans on database + Clustering<MeanModel<DoubleVector>> result = kmeans.run(db); + testFMeasure(db, result, 0.998005); + testClusterSizes(result, new int[] { 199, 200, 200, 200, 201 }); + } + + /** + * Run KMeans with fixed parameters and compare the result to a golden + * standard. + * + * @throws ParameterException + */ + @Test + public void testKMeansMacQueen() { + Database db = makeSimpleDatabase(UNITTEST + "different-densities-2d-no-noise.ascii", 1000); + + // Setup algorithm + ListParameterization params = new ListParameterization(); + params.addParameter(AbstractKMeans.K_ID, 5); + params.addParameter(AbstractKMeans.SEED_ID, 3); + AbstractKMeans<DoubleVector, DoubleDistance> kmeans = ClassGenericsUtil.parameterizeOrAbort(KMeansMacQueen.class, params); + testParameterizationOk(params); + + // run KMeans on database + Clustering<MeanModel<DoubleVector>> result = kmeans.run(db); + testFMeasure(db, result, 0.998005); + testClusterSizes(result, new int[] { 199, 200, 200, 200, 201 }); + } +}
\ No newline at end of file |