diff options
Diffstat (limited to 'test/de/lmu/ifi/dbs/elki/algorithm/clustering')
-rw-r--r-- | test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestDeLiCluResults.java | 3 | ||||
-rw-r--r-- | test/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/TestCASHResults.java | 2 | ||||
-rw-r--r-- | test/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/TestNaiveAgglomerativeHierarchicalClustering.java | 140 | ||||
-rw-r--r-- | test/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/TestSLINKResults.java (renamed from test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestSLINKResults.java) | 14 | ||||
-rw-r--r-- | test/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/TestKMeansBisecting.java | 90 | ||||
-rw-r--r-- | test/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/TestKMeansQualityMeasure.java | 110 | ||||
-rw-r--r-- | test/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/TestKMeansResults.java | 6 | ||||
-rw-r--r-- | test/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/TestPROCLUSResults.java | 14 |
8 files changed, 362 insertions, 17 deletions
diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestDeLiCluResults.java b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestDeLiCluResults.java index a4fadeb2..1323c5fa 100644 --- a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestDeLiCluResults.java +++ b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestDeLiCluResults.java @@ -37,6 +37,7 @@ import de.lmu.ifi.dbs.elki.database.StaticArrayDatabase; import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; import de.lmu.ifi.dbs.elki.evaluation.clustering.ClusterContingencyTable; import de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.deliclu.DeLiCluTreeFactory; +import de.lmu.ifi.dbs.elki.persistent.AbstractPageFileFactory; import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; import de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; @@ -62,7 +63,7 @@ public class TestDeLiCluResults extends AbstractSimpleAlgorithmTest implements J ListParameterization indexparams = new ListParameterization(); // We need a special index for this algorithm: indexparams.addParameter(StaticArrayDatabase.INDEX_ID, DeLiCluTreeFactory.class); - indexparams.addParameter(DeLiCluTreeFactory.PAGE_SIZE_ID, 1000); + indexparams.addParameter(AbstractPageFileFactory.Parameterizer.PAGE_SIZE_ID, 1000); Database db = makeSimpleDatabase(UNITTEST + "hierarchical-2d.ascii", 710, indexparams, null); // Setup actual algorithm diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/TestCASHResults.java b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/TestCASHResults.java index ad11e6d2..62791b68 100644 --- a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/TestCASHResults.java +++ b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/TestCASHResults.java @@ -66,7 +66,7 @@ public class TestCASHResults extends AbstractSimpleAlgorithmTest implements JUni // run CASH on database Clustering<Model> result = cash.run(db); - testFMeasure(db, result, 0.49055); // with hierarchical pairs: 0.64102 + testFMeasure(db, result, 0.490551); // with hierarchical pairs: 0.64102 testClusterSizes(result, new int[] { 37, 71, 76, 442 }); } diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/TestNaiveAgglomerativeHierarchicalClustering.java b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/TestNaiveAgglomerativeHierarchicalClustering.java new file mode 100644 index 00000000..8ed18823 --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/TestNaiveAgglomerativeHierarchicalClustering.java @@ -0,0 +1,140 @@ +package de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest; +import de.lmu.ifi.dbs.elki.data.Clustering; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; +import de.lmu.ifi.dbs.elki.result.Result; +import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; +import de.lmu.ifi.dbs.elki.workflow.AlgorithmStep; + +/** + * Perform agglomerative hierarchical clustering, using the naive algorithm. + * + * @author Erich Schubert + */ +public class TestNaiveAgglomerativeHierarchicalClustering extends AbstractSimpleAlgorithmTest implements JUnit4Test { + // TODO: add more data sets. + + /** + * Run agglomerative hierarchical clustering with fixed parameters and compare + * the result to a golden standard. + */ + @Test + public void testSingleLink() { + Database db = makeSimpleDatabase(UNITTEST + "single-link-effect.ascii", 638); + + // Setup algorithm + ListParameterization params = new ListParameterization(); + params.addParameter(ExtractFlatClusteringFromHierarchy.Parameterizer.OUTPUTMODE_ID, ExtractFlatClusteringFromHierarchy.OutputMode.STRICT_PARTITIONS); + params.addParameter(ExtractFlatClusteringFromHierarchy.Parameterizer.MINCLUSTERS_ID, 3); + params.addParameter(AlgorithmStep.Parameterizer.ALGORITHM_ID, NaiveAgglomerativeHierarchicalClustering.class); + params.addParameter(NaiveAgglomerativeHierarchicalClustering.Parameterizer.LINKAGE_ID, SingleLinkageMethod.class); + ExtractFlatClusteringFromHierarchy<DoubleDistance> c = ClassGenericsUtil.parameterizeOrAbort(ExtractFlatClusteringFromHierarchy.class, params); + testParameterizationOk(params); + + // run clustering algorithm on database + Result result = c.run(db); + Clustering<?> clustering = findSingleClustering(result); + testFMeasure(db, clustering, 0.6829722); + testClusterSizes(clustering, new int[] { 9, 200, 429 }); + } + + /** + * Run agglomerative hierarchical clustering with fixed parameters and compare + * the result to a golden standard. + */ + @Test + public void testWard() { + Database db = makeSimpleDatabase(UNITTEST + "single-link-effect.ascii", 638); + + // Setup algorithm + ListParameterization params = new ListParameterization(); + params.addParameter(ExtractFlatClusteringFromHierarchy.Parameterizer.OUTPUTMODE_ID, ExtractFlatClusteringFromHierarchy.OutputMode.STRICT_PARTITIONS); + params.addParameter(ExtractFlatClusteringFromHierarchy.Parameterizer.MINCLUSTERS_ID, 3); + params.addParameter(AlgorithmStep.Parameterizer.ALGORITHM_ID, NaiveAgglomerativeHierarchicalClustering.class); + ExtractFlatClusteringFromHierarchy<DoubleDistance> c = ClassGenericsUtil.parameterizeOrAbort(ExtractFlatClusteringFromHierarchy.class, params); + testParameterizationOk(params); + + // run clustering algorithm on database + Result result = c.run(db); + Clustering<?> clustering = findSingleClustering(result); + testFMeasure(db, clustering, 0.93866265); + testClusterSizes(clustering, new int[] { 200, 211, 227 }); + } + + /** + * Run agglomerative hierarchical clustering with fixed parameters and compare + * the result to a golden standard. + */ + @Test + public void testGroupAverage() { + Database db = makeSimpleDatabase(UNITTEST + "single-link-effect.ascii", 638); + + // Setup algorithm + ListParameterization params = new ListParameterization(); + params.addParameter(ExtractFlatClusteringFromHierarchy.Parameterizer.OUTPUTMODE_ID, ExtractFlatClusteringFromHierarchy.OutputMode.STRICT_PARTITIONS); + params.addParameter(ExtractFlatClusteringFromHierarchy.Parameterizer.MINCLUSTERS_ID, 3); + params.addParameter(AlgorithmStep.Parameterizer.ALGORITHM_ID, NaiveAgglomerativeHierarchicalClustering.class); + params.addParameter(NaiveAgglomerativeHierarchicalClustering.Parameterizer.LINKAGE_ID, GroupAverageLinkageMethod.class); + ExtractFlatClusteringFromHierarchy<DoubleDistance> c = ClassGenericsUtil.parameterizeOrAbort(ExtractFlatClusteringFromHierarchy.class, params); + testParameterizationOk(params); + + // run clustering algorithm on database + Result result = c.run(db); + Clustering<?> clustering = findSingleClustering(result); + testFMeasure(db, clustering, 0.93866265); + testClusterSizes(clustering, new int[] { 200, 211, 227 }); + } + + /** + * Run agglomerative hierarchical clustering with fixed parameters and compare + * the result to a golden standard. + */ + @Test + public void testCompleteLink() { + Database db = makeSimpleDatabase(UNITTEST + "single-link-effect.ascii", 638); + + // Setup algorithm + ListParameterization params = new ListParameterization(); + params.addParameter(ExtractFlatClusteringFromHierarchy.Parameterizer.OUTPUTMODE_ID, ExtractFlatClusteringFromHierarchy.OutputMode.STRICT_PARTITIONS); + params.addParameter(ExtractFlatClusteringFromHierarchy.Parameterizer.MINCLUSTERS_ID, 3); + params.addParameter(AlgorithmStep.Parameterizer.ALGORITHM_ID, NaiveAgglomerativeHierarchicalClustering.class); + params.addParameter(NaiveAgglomerativeHierarchicalClustering.Parameterizer.LINKAGE_ID, CompleteLinkageMethod.class); + ExtractFlatClusteringFromHierarchy<DoubleDistance> c = ClassGenericsUtil.parameterizeOrAbort(ExtractFlatClusteringFromHierarchy.class, params); + testParameterizationOk(params); + + // run clustering algorithm on database + Result result = c.run(db); + Clustering<?> clustering = findSingleClustering(result); + testFMeasure(db, clustering, 0.938167802); + testClusterSizes(clustering, new int[] { 200, 217, 221 }); + } +} diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestSLINKResults.java b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/TestSLINKResults.java index 44160dd0..8b25cf9c 100644 --- a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestSLINKResults.java +++ b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/TestSLINKResults.java @@ -1,4 +1,4 @@ -package de.lmu.ifi.dbs.elki.algorithm.clustering; +package de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical; /* This file is part of ELKI: @@ -28,13 +28,13 @@ import org.junit.Test; import de.lmu.ifi.dbs.elki.JUnit4Test; import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest; import de.lmu.ifi.dbs.elki.data.Clustering; -import de.lmu.ifi.dbs.elki.data.DoubleVector; import de.lmu.ifi.dbs.elki.database.Database; import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; import de.lmu.ifi.dbs.elki.result.Result; import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; import de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; +import de.lmu.ifi.dbs.elki.workflow.AlgorithmStep; /** * Performs a full SLINK run, and compares the result with a clustering derived @@ -60,14 +60,16 @@ public class TestSLINKResults extends AbstractSimpleAlgorithmTest implements JUn // Setup algorithm ListParameterization params = new ListParameterization(); - params.addParameter(SLINK.Parameterizer.SLINK_MINCLUSTERS_ID, 3); - SLINK<DoubleVector, DoubleDistance> slink = ClassGenericsUtil.parameterizeOrAbort(SLINK.class, params); + params.addParameter(ExtractFlatClusteringFromHierarchy.Parameterizer.OUTPUTMODE_ID, ExtractFlatClusteringFromHierarchy.OutputMode.STRICT_PARTITIONS); + params.addParameter(ExtractFlatClusteringFromHierarchy.Parameterizer.MINCLUSTERS_ID, 3); + params.addParameter(AlgorithmStep.Parameterizer.ALGORITHM_ID, SLINK.class); + ExtractFlatClusteringFromHierarchy<DoubleDistance> slink = ClassGenericsUtil.parameterizeOrAbort(ExtractFlatClusteringFromHierarchy.class, params); testParameterizationOk(params); // run SLINK on database Result result = slink.run(db); Clustering<?> clustering = findSingleClustering(result); testFMeasure(db, clustering, 0.6829722); - testClusterSizes(clustering, new int[] { 0, 0, 9, 200, 429 }); + testClusterSizes(clustering, new int[] { 9, 200, 429 }); } -}
\ No newline at end of file +} diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/TestKMeansBisecting.java b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/TestKMeansBisecting.java new file mode 100644 index 00000000..d678981d --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/TestKMeansBisecting.java @@ -0,0 +1,90 @@ +package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest; +import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.quality.WithinClusterVarianceQualityMeasure; +import de.lmu.ifi.dbs.elki.data.Clustering; +import de.lmu.ifi.dbs.elki.data.DoubleVector; +import de.lmu.ifi.dbs.elki.data.model.MeanModel; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; +import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; + +/** + * Tests the KMeansBisecting + * + * @author Stephan Baier + */ +public class TestKMeansBisecting extends AbstractSimpleAlgorithmTest implements JUnit4Test { + /** + * Run KMeansBisecting with fixed parameters and compare cluster size to + * expected value. + */ + @Test + public void testKMeansBisectingClusterSize() { + Database db = makeSimpleDatabase(UNITTEST + "bisecting-test.csv", 300); + + // Setup algorithm + ListParameterization params = new ListParameterization(); + params.addParameter(KMeans.K_ID, 3); + params.addParameter(BestOfMultipleKMeans.Parameterizer.TRIALS_ID, 5); + params.addParameter(BestOfMultipleKMeans.Parameterizer.KMEANS_ID, KMeansLloyd.class); + params.addParameter(BestOfMultipleKMeans.Parameterizer.QUALITYMEASURE_ID, WithinClusterVarianceQualityMeasure.class); + + KMeansBisecting<DoubleVector, DoubleDistance, MeanModel<DoubleVector>> kmeans = ClassGenericsUtil.parameterizeOrAbort(KMeansBisecting.class, params); + testParameterizationOk(params); + + // run KMedians on database + Clustering<MeanModel<DoubleVector>> result = kmeans.run(db); + testClusterSizes(result, new int[] { 103, 97, 100 }); + } + + /** + * Run KMeansBisecting with fixed parameters (k = 2) and compare f-measure to + * golden standard. + */ + @Test + public void testKMeansBisectingFMeasure() { + Database db = makeSimpleDatabase(UNITTEST + "bisecting-test.csv", 300); + + // Setup algorithm + ListParameterization params = new ListParameterization(); + params.addParameter(KMeans.K_ID, 2); + params.addParameter(BestOfMultipleKMeans.Parameterizer.TRIALS_ID, 5); + params.addParameter(BestOfMultipleKMeans.Parameterizer.KMEANS_ID, KMeansLloyd.class); + params.addParameter(BestOfMultipleKMeans.Parameterizer.QUALITYMEASURE_ID, WithinClusterVarianceQualityMeasure.class); + + KMeansBisecting<DoubleVector, DoubleDistance, MeanModel<DoubleVector>> kmeans = ClassGenericsUtil.parameterizeOrAbort(KMeansBisecting.class, params); + testParameterizationOk(params); + + // run KMedians on database + Clustering<MeanModel<DoubleVector>> result = kmeans.run(db); + testFMeasure(db, result, 0.7408); + } +} diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/TestKMeansQualityMeasure.java b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/TestKMeansQualityMeasure.java new file mode 100644 index 00000000..44603617 --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/TestKMeansQualityMeasure.java @@ -0,0 +1,110 @@ +package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans; + +/* + This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import static org.junit.Assert.assertEquals; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest; +import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.quality.KMeansQualityMeasure; +import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.quality.WithinClusterMeanDistanceQualityMeasure; +import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.quality.WithinClusterVarianceQualityMeasure; +import de.lmu.ifi.dbs.elki.data.Clustering; +import de.lmu.ifi.dbs.elki.data.DoubleVector; +import de.lmu.ifi.dbs.elki.data.NumberVector; +import de.lmu.ifi.dbs.elki.data.model.MeanModel; +import de.lmu.ifi.dbs.elki.data.type.TypeUtil; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; +import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; + +/** + * Test cluster quality measure computations. + * + * @author Stephan Baier + */ +public class TestKMeansQualityMeasure extends AbstractSimpleAlgorithmTest implements JUnit4Test { + /** + * Test cluster variance. + */ + @Test + public void testVariance() { + Database db = makeSimpleDatabase(UNITTEST + "quality-measure-test.csv", 7); + Relation<DoubleVector> rel = db.getRelation(TypeUtil.DOUBLE_VECTOR_FIELD); + + // Setup algorithm + ListParameterization params = new ListParameterization(); + params = new ListParameterization(); + params.addParameter(KMeans.K_ID, 2); + params.addParameter(KMeans.INIT_ID, FirstKInitialMeans.class); + AbstractKMeans<DoubleVector, DoubleDistance, ?> kmeans = ClassGenericsUtil.parameterizeOrAbort(KMeansLloyd.class, params); + testParameterizationOk(params); + + // run KMeans on database + @SuppressWarnings("unchecked") + Clustering<MeanModel<DoubleVector>> result2 = (Clustering<MeanModel<DoubleVector>>) kmeans.run(db); + + // Test Cluster Variance + KMeansQualityMeasure<? super DoubleVector, ? super DoubleDistance> variance = new WithinClusterVarianceQualityMeasure(); + @SuppressWarnings("unchecked") + final PrimitiveDoubleDistanceFunction<NumberVector<?>> dist = (PrimitiveDoubleDistanceFunction<NumberVector<?>>) kmeans.getDistanceFunction(); + + final double quality = variance.calculateCost(result2, dist, rel); + assertEquals("Within cluster variance incorrect", 3.16666666666, quality, 1e-10); + } + + /** + * Test cluster average overall distance. + */ + @Test + public void testOverallDistance() { + + Database db = makeSimpleDatabase(UNITTEST + "quality-measure-test.csv", 7); + Relation<DoubleVector> rel = db.getRelation(TypeUtil.DOUBLE_VECTOR_FIELD); + + // Setup algorithm + ListParameterization params = new ListParameterization(); + params = new ListParameterization(); + params.addParameter(KMeans.K_ID, 2); + params.addParameter(KMeans.INIT_ID, FirstKInitialMeans.class); + AbstractKMeans<DoubleVector, DoubleDistance, ?> kmeans = ClassGenericsUtil.parameterizeOrAbort(KMeansLloyd.class, params); + testParameterizationOk(params); + + // run KMeans on database + @SuppressWarnings("unchecked") + Clustering<MeanModel<DoubleVector>> result = (Clustering<MeanModel<DoubleVector>>) kmeans.run(db); + @SuppressWarnings("unchecked") + final PrimitiveDoubleDistanceFunction<NumberVector<?>> dist = (PrimitiveDoubleDistanceFunction<NumberVector<?>>) kmeans.getDistanceFunction(); + + // Test Cluster Average Overall Distance + KMeansQualityMeasure<? super DoubleVector, ? super DoubleDistance> overall = new WithinClusterMeanDistanceQualityMeasure(); + final double quality = overall.calculateCost(result, dist, rel); + + assertEquals("Avarage overall distance not as expected.", 0.8888888888888888, quality, 1e-10); + } +} diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/TestKMeansResults.java b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/TestKMeansResults.java index bfe57052..3419352a 100644 --- a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/TestKMeansResults.java +++ b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/TestKMeansResults.java @@ -61,7 +61,7 @@ public class TestKMeansResults extends AbstractSimpleAlgorithmTest implements JU // Setup algorithm ListParameterization params = new ListParameterization(); params.addParameter(KMeans.K_ID, 5); - params.addParameter(KMeans.SEED_ID, 3); + params.addParameter(KMeans.INIT_ID, FirstKInitialMeans.class); AbstractKMeans<DoubleVector, DoubleDistance, ?> kmeans = ClassGenericsUtil.parameterizeOrAbort(KMeansLloyd.class, params); testParameterizationOk(params); @@ -84,7 +84,7 @@ public class TestKMeansResults extends AbstractSimpleAlgorithmTest implements JU // Setup algorithm ListParameterization params = new ListParameterization(); params.addParameter(KMeans.K_ID, 5); - params.addParameter(KMeans.SEED_ID, 3); + params.addParameter(KMeans.INIT_ID, FirstKInitialMeans.class); AbstractKMeans<DoubleVector, DoubleDistance, ?> kmeans = ClassGenericsUtil.parameterizeOrAbort(KMeansMacQueen.class, params); testParameterizationOk(params); @@ -107,7 +107,7 @@ public class TestKMeansResults extends AbstractSimpleAlgorithmTest implements JU // Setup algorithm ListParameterization params = new ListParameterization(); params.addParameter(KMeans.K_ID, 5); - params.addParameter(KMeans.SEED_ID, 3); + params.addParameter(KMeans.INIT_ID, FirstKInitialMeans.class); AbstractKMeans<DoubleVector, DoubleDistance, ?> kmedians = ClassGenericsUtil.parameterizeOrAbort(KMediansLloyd.class, params); testParameterizationOk(params); diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/TestPROCLUSResults.java b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/TestPROCLUSResults.java index b7dde28e..bfb94ee3 100644 --- a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/TestPROCLUSResults.java +++ b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/TestPROCLUSResults.java @@ -58,7 +58,8 @@ public class TestPROCLUSResults extends AbstractSimpleAlgorithmTest implements J ListParameterization params = new ListParameterization(); params.addParameter(PROCLUS.Parameterizer.L_ID, 1); params.addParameter(PROCLUS.Parameterizer.K_ID, 4); - params.addParameter(PROCLUS.Parameterizer.SEED_ID, 2); + // NOTE: PROCLUS quality heavily depends on random... + params.addParameter(PROCLUS.Parameterizer.SEED_ID, 0); // setup algorithm PROCLUS<DoubleVector> proclus = ClassGenericsUtil.parameterizeOrAbort(PROCLUS.class, params); @@ -67,8 +68,8 @@ public class TestPROCLUSResults extends AbstractSimpleAlgorithmTest implements J // run PROCLUS on database Clustering<?> result = proclus.run(db); - testFMeasure(db, result, 0.900947932); - testClusterSizes(result, new int[] { 15, 35, 200, 350 }); + testFMeasure(db, result, 0.6946958); + testClusterSizes(result, new int[] { 45, 151, 200, 204 }); } /** @@ -85,13 +86,14 @@ public class TestPROCLUSResults extends AbstractSimpleAlgorithmTest implements J ListParameterization params = new ListParameterization(); params.addParameter(PROCLUS.Parameterizer.L_ID, 2); params.addParameter(PROCLUS.Parameterizer.K_ID, 3); - params.addParameter(PROCLUS.Parameterizer.SEED_ID, 0); + // NOTE: PROCLUS quality heavily depends on random... + params.addParameter(PROCLUS.Parameterizer.SEED_ID, 1); PROCLUS<DoubleVector> proclus = ClassGenericsUtil.parameterizeOrAbort(PROCLUS.class, params); testParameterizationOk(params); // run PROCLUS on database Clustering<?> result = proclus.run(db); - testFMeasure(db, result, 0.739931511); - testClusterSizes(result, new int[] { 146, 259, 445 }); + testFMeasure(db, result, 0.7812455); + testClusterSizes(result, new int[] { 111, 269, 470 }); } }
\ No newline at end of file |