summaryrefslogtreecommitdiff
path: root/test/de/lmu/ifi/dbs/elki/algorithm/clustering
diff options
context:
space:
mode:
Diffstat (limited to 'test/de/lmu/ifi/dbs/elki/algorithm/clustering')
-rw-r--r--test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestDeLiCluResults.java3
-rw-r--r--test/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/TestCASHResults.java2
-rw-r--r--test/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/TestNaiveAgglomerativeHierarchicalClustering.java140
-rw-r--r--test/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/TestSLINKResults.java (renamed from test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestSLINKResults.java)14
-rw-r--r--test/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/TestKMeansBisecting.java90
-rw-r--r--test/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/TestKMeansQualityMeasure.java110
-rw-r--r--test/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/TestKMeansResults.java6
-rw-r--r--test/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/TestPROCLUSResults.java14
8 files changed, 362 insertions, 17 deletions
diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestDeLiCluResults.java b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestDeLiCluResults.java
index a4fadeb2..1323c5fa 100644
--- a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestDeLiCluResults.java
+++ b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestDeLiCluResults.java
@@ -37,6 +37,7 @@ import de.lmu.ifi.dbs.elki.database.StaticArrayDatabase;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.evaluation.clustering.ClusterContingencyTable;
import de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.deliclu.DeLiCluTreeFactory;
+import de.lmu.ifi.dbs.elki.persistent.AbstractPageFileFactory;
import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization;
@@ -62,7 +63,7 @@ public class TestDeLiCluResults extends AbstractSimpleAlgorithmTest implements J
ListParameterization indexparams = new ListParameterization();
// We need a special index for this algorithm:
indexparams.addParameter(StaticArrayDatabase.INDEX_ID, DeLiCluTreeFactory.class);
- indexparams.addParameter(DeLiCluTreeFactory.PAGE_SIZE_ID, 1000);
+ indexparams.addParameter(AbstractPageFileFactory.Parameterizer.PAGE_SIZE_ID, 1000);
Database db = makeSimpleDatabase(UNITTEST + "hierarchical-2d.ascii", 710, indexparams, null);
// Setup actual algorithm
diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/TestCASHResults.java b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/TestCASHResults.java
index ad11e6d2..62791b68 100644
--- a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/TestCASHResults.java
+++ b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/TestCASHResults.java
@@ -66,7 +66,7 @@ public class TestCASHResults extends AbstractSimpleAlgorithmTest implements JUni
// run CASH on database
Clustering<Model> result = cash.run(db);
- testFMeasure(db, result, 0.49055); // with hierarchical pairs: 0.64102
+ testFMeasure(db, result, 0.490551); // with hierarchical pairs: 0.64102
testClusterSizes(result, new int[] { 37, 71, 76, 442 });
}
diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/TestNaiveAgglomerativeHierarchicalClustering.java b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/TestNaiveAgglomerativeHierarchicalClustering.java
new file mode 100644
index 00000000..8ed18823
--- /dev/null
+++ b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/TestNaiveAgglomerativeHierarchicalClustering.java
@@ -0,0 +1,140 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import org.junit.Test;
+
+import de.lmu.ifi.dbs.elki.JUnit4Test;
+import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
+import de.lmu.ifi.dbs.elki.result.Result;
+import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization;
+import de.lmu.ifi.dbs.elki.workflow.AlgorithmStep;
+
+/**
+ * Perform agglomerative hierarchical clustering, using the naive algorithm.
+ *
+ * @author Erich Schubert
+ */
+public class TestNaiveAgglomerativeHierarchicalClustering extends AbstractSimpleAlgorithmTest implements JUnit4Test {
+ // TODO: add more data sets.
+
+ /**
+ * Run agglomerative hierarchical clustering with fixed parameters and compare
+ * the result to a golden standard.
+ */
+ @Test
+ public void testSingleLink() {
+ Database db = makeSimpleDatabase(UNITTEST + "single-link-effect.ascii", 638);
+
+ // Setup algorithm
+ ListParameterization params = new ListParameterization();
+ params.addParameter(ExtractFlatClusteringFromHierarchy.Parameterizer.OUTPUTMODE_ID, ExtractFlatClusteringFromHierarchy.OutputMode.STRICT_PARTITIONS);
+ params.addParameter(ExtractFlatClusteringFromHierarchy.Parameterizer.MINCLUSTERS_ID, 3);
+ params.addParameter(AlgorithmStep.Parameterizer.ALGORITHM_ID, NaiveAgglomerativeHierarchicalClustering.class);
+ params.addParameter(NaiveAgglomerativeHierarchicalClustering.Parameterizer.LINKAGE_ID, SingleLinkageMethod.class);
+ ExtractFlatClusteringFromHierarchy<DoubleDistance> c = ClassGenericsUtil.parameterizeOrAbort(ExtractFlatClusteringFromHierarchy.class, params);
+ testParameterizationOk(params);
+
+ // run clustering algorithm on database
+ Result result = c.run(db);
+ Clustering<?> clustering = findSingleClustering(result);
+ testFMeasure(db, clustering, 0.6829722);
+ testClusterSizes(clustering, new int[] { 9, 200, 429 });
+ }
+
+ /**
+ * Run agglomerative hierarchical clustering with fixed parameters and compare
+ * the result to a golden standard.
+ */
+ @Test
+ public void testWard() {
+ Database db = makeSimpleDatabase(UNITTEST + "single-link-effect.ascii", 638);
+
+ // Setup algorithm
+ ListParameterization params = new ListParameterization();
+ params.addParameter(ExtractFlatClusteringFromHierarchy.Parameterizer.OUTPUTMODE_ID, ExtractFlatClusteringFromHierarchy.OutputMode.STRICT_PARTITIONS);
+ params.addParameter(ExtractFlatClusteringFromHierarchy.Parameterizer.MINCLUSTERS_ID, 3);
+ params.addParameter(AlgorithmStep.Parameterizer.ALGORITHM_ID, NaiveAgglomerativeHierarchicalClustering.class);
+ ExtractFlatClusteringFromHierarchy<DoubleDistance> c = ClassGenericsUtil.parameterizeOrAbort(ExtractFlatClusteringFromHierarchy.class, params);
+ testParameterizationOk(params);
+
+ // run clustering algorithm on database
+ Result result = c.run(db);
+ Clustering<?> clustering = findSingleClustering(result);
+ testFMeasure(db, clustering, 0.93866265);
+ testClusterSizes(clustering, new int[] { 200, 211, 227 });
+ }
+
+ /**
+ * Run agglomerative hierarchical clustering with fixed parameters and compare
+ * the result to a golden standard.
+ */
+ @Test
+ public void testGroupAverage() {
+ Database db = makeSimpleDatabase(UNITTEST + "single-link-effect.ascii", 638);
+
+ // Setup algorithm
+ ListParameterization params = new ListParameterization();
+ params.addParameter(ExtractFlatClusteringFromHierarchy.Parameterizer.OUTPUTMODE_ID, ExtractFlatClusteringFromHierarchy.OutputMode.STRICT_PARTITIONS);
+ params.addParameter(ExtractFlatClusteringFromHierarchy.Parameterizer.MINCLUSTERS_ID, 3);
+ params.addParameter(AlgorithmStep.Parameterizer.ALGORITHM_ID, NaiveAgglomerativeHierarchicalClustering.class);
+ params.addParameter(NaiveAgglomerativeHierarchicalClustering.Parameterizer.LINKAGE_ID, GroupAverageLinkageMethod.class);
+ ExtractFlatClusteringFromHierarchy<DoubleDistance> c = ClassGenericsUtil.parameterizeOrAbort(ExtractFlatClusteringFromHierarchy.class, params);
+ testParameterizationOk(params);
+
+ // run clustering algorithm on database
+ Result result = c.run(db);
+ Clustering<?> clustering = findSingleClustering(result);
+ testFMeasure(db, clustering, 0.93866265);
+ testClusterSizes(clustering, new int[] { 200, 211, 227 });
+ }
+
+ /**
+ * Run agglomerative hierarchical clustering with fixed parameters and compare
+ * the result to a golden standard.
+ */
+ @Test
+ public void testCompleteLink() {
+ Database db = makeSimpleDatabase(UNITTEST + "single-link-effect.ascii", 638);
+
+ // Setup algorithm
+ ListParameterization params = new ListParameterization();
+ params.addParameter(ExtractFlatClusteringFromHierarchy.Parameterizer.OUTPUTMODE_ID, ExtractFlatClusteringFromHierarchy.OutputMode.STRICT_PARTITIONS);
+ params.addParameter(ExtractFlatClusteringFromHierarchy.Parameterizer.MINCLUSTERS_ID, 3);
+ params.addParameter(AlgorithmStep.Parameterizer.ALGORITHM_ID, NaiveAgglomerativeHierarchicalClustering.class);
+ params.addParameter(NaiveAgglomerativeHierarchicalClustering.Parameterizer.LINKAGE_ID, CompleteLinkageMethod.class);
+ ExtractFlatClusteringFromHierarchy<DoubleDistance> c = ClassGenericsUtil.parameterizeOrAbort(ExtractFlatClusteringFromHierarchy.class, params);
+ testParameterizationOk(params);
+
+ // run clustering algorithm on database
+ Result result = c.run(db);
+ Clustering<?> clustering = findSingleClustering(result);
+ testFMeasure(db, clustering, 0.938167802);
+ testClusterSizes(clustering, new int[] { 200, 217, 221 });
+ }
+}
diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestSLINKResults.java b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/TestSLINKResults.java
index 44160dd0..8b25cf9c 100644
--- a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestSLINKResults.java
+++ b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/TestSLINKResults.java
@@ -1,4 +1,4 @@
-package de.lmu.ifi.dbs.elki.algorithm.clustering;
+package de.lmu.ifi.dbs.elki.algorithm.clustering.hierarchical;
/*
This file is part of ELKI:
@@ -28,13 +28,13 @@ import org.junit.Test;
import de.lmu.ifi.dbs.elki.JUnit4Test;
import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest;
import de.lmu.ifi.dbs.elki.data.Clustering;
-import de.lmu.ifi.dbs.elki.data.DoubleVector;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.result.Result;
import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization;
+import de.lmu.ifi.dbs.elki.workflow.AlgorithmStep;
/**
* Performs a full SLINK run, and compares the result with a clustering derived
@@ -60,14 +60,16 @@ public class TestSLINKResults extends AbstractSimpleAlgorithmTest implements JUn
// Setup algorithm
ListParameterization params = new ListParameterization();
- params.addParameter(SLINK.Parameterizer.SLINK_MINCLUSTERS_ID, 3);
- SLINK<DoubleVector, DoubleDistance> slink = ClassGenericsUtil.parameterizeOrAbort(SLINK.class, params);
+ params.addParameter(ExtractFlatClusteringFromHierarchy.Parameterizer.OUTPUTMODE_ID, ExtractFlatClusteringFromHierarchy.OutputMode.STRICT_PARTITIONS);
+ params.addParameter(ExtractFlatClusteringFromHierarchy.Parameterizer.MINCLUSTERS_ID, 3);
+ params.addParameter(AlgorithmStep.Parameterizer.ALGORITHM_ID, SLINK.class);
+ ExtractFlatClusteringFromHierarchy<DoubleDistance> slink = ClassGenericsUtil.parameterizeOrAbort(ExtractFlatClusteringFromHierarchy.class, params);
testParameterizationOk(params);
// run SLINK on database
Result result = slink.run(db);
Clustering<?> clustering = findSingleClustering(result);
testFMeasure(db, clustering, 0.6829722);
- testClusterSizes(clustering, new int[] { 0, 0, 9, 200, 429 });
+ testClusterSizes(clustering, new int[] { 9, 200, 429 });
}
-} \ No newline at end of file
+}
diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/TestKMeansBisecting.java b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/TestKMeansBisecting.java
new file mode 100644
index 00000000..d678981d
--- /dev/null
+++ b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/TestKMeansBisecting.java
@@ -0,0 +1,90 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import org.junit.Test;
+
+import de.lmu.ifi.dbs.elki.JUnit4Test;
+import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.quality.WithinClusterVarianceQualityMeasure;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.DoubleVector;
+import de.lmu.ifi.dbs.elki.data.model.MeanModel;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
+import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization;
+
+/**
+ * Tests the KMeansBisecting
+ *
+ * @author Stephan Baier
+ */
+public class TestKMeansBisecting extends AbstractSimpleAlgorithmTest implements JUnit4Test {
+ /**
+ * Run KMeansBisecting with fixed parameters and compare cluster size to
+ * expected value.
+ */
+ @Test
+ public void testKMeansBisectingClusterSize() {
+ Database db = makeSimpleDatabase(UNITTEST + "bisecting-test.csv", 300);
+
+ // Setup algorithm
+ ListParameterization params = new ListParameterization();
+ params.addParameter(KMeans.K_ID, 3);
+ params.addParameter(BestOfMultipleKMeans.Parameterizer.TRIALS_ID, 5);
+ params.addParameter(BestOfMultipleKMeans.Parameterizer.KMEANS_ID, KMeansLloyd.class);
+ params.addParameter(BestOfMultipleKMeans.Parameterizer.QUALITYMEASURE_ID, WithinClusterVarianceQualityMeasure.class);
+
+ KMeansBisecting<DoubleVector, DoubleDistance, MeanModel<DoubleVector>> kmeans = ClassGenericsUtil.parameterizeOrAbort(KMeansBisecting.class, params);
+ testParameterizationOk(params);
+
+ // run KMedians on database
+ Clustering<MeanModel<DoubleVector>> result = kmeans.run(db);
+ testClusterSizes(result, new int[] { 103, 97, 100 });
+ }
+
+ /**
+ * Run KMeansBisecting with fixed parameters (k = 2) and compare f-measure to
+ * golden standard.
+ */
+ @Test
+ public void testKMeansBisectingFMeasure() {
+ Database db = makeSimpleDatabase(UNITTEST + "bisecting-test.csv", 300);
+
+ // Setup algorithm
+ ListParameterization params = new ListParameterization();
+ params.addParameter(KMeans.K_ID, 2);
+ params.addParameter(BestOfMultipleKMeans.Parameterizer.TRIALS_ID, 5);
+ params.addParameter(BestOfMultipleKMeans.Parameterizer.KMEANS_ID, KMeansLloyd.class);
+ params.addParameter(BestOfMultipleKMeans.Parameterizer.QUALITYMEASURE_ID, WithinClusterVarianceQualityMeasure.class);
+
+ KMeansBisecting<DoubleVector, DoubleDistance, MeanModel<DoubleVector>> kmeans = ClassGenericsUtil.parameterizeOrAbort(KMeansBisecting.class, params);
+ testParameterizationOk(params);
+
+ // run KMedians on database
+ Clustering<MeanModel<DoubleVector>> result = kmeans.run(db);
+ testFMeasure(db, result, 0.7408);
+ }
+}
diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/TestKMeansQualityMeasure.java b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/TestKMeansQualityMeasure.java
new file mode 100644
index 00000000..44603617
--- /dev/null
+++ b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/TestKMeansQualityMeasure.java
@@ -0,0 +1,110 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
+
+/*
+ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import static org.junit.Assert.assertEquals;
+
+import org.junit.Test;
+
+import de.lmu.ifi.dbs.elki.JUnit4Test;
+import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.quality.KMeansQualityMeasure;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.quality.WithinClusterMeanDistanceQualityMeasure;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.quality.WithinClusterVarianceQualityMeasure;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.DoubleVector;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.model.MeanModel;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
+import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization;
+
+/**
+ * Test cluster quality measure computations.
+ *
+ * @author Stephan Baier
+ */
+public class TestKMeansQualityMeasure extends AbstractSimpleAlgorithmTest implements JUnit4Test {
+ /**
+ * Test cluster variance.
+ */
+ @Test
+ public void testVariance() {
+ Database db = makeSimpleDatabase(UNITTEST + "quality-measure-test.csv", 7);
+ Relation<DoubleVector> rel = db.getRelation(TypeUtil.DOUBLE_VECTOR_FIELD);
+
+ // Setup algorithm
+ ListParameterization params = new ListParameterization();
+ params = new ListParameterization();
+ params.addParameter(KMeans.K_ID, 2);
+ params.addParameter(KMeans.INIT_ID, FirstKInitialMeans.class);
+ AbstractKMeans<DoubleVector, DoubleDistance, ?> kmeans = ClassGenericsUtil.parameterizeOrAbort(KMeansLloyd.class, params);
+ testParameterizationOk(params);
+
+ // run KMeans on database
+ @SuppressWarnings("unchecked")
+ Clustering<MeanModel<DoubleVector>> result2 = (Clustering<MeanModel<DoubleVector>>) kmeans.run(db);
+
+ // Test Cluster Variance
+ KMeansQualityMeasure<? super DoubleVector, ? super DoubleDistance> variance = new WithinClusterVarianceQualityMeasure();
+ @SuppressWarnings("unchecked")
+ final PrimitiveDoubleDistanceFunction<NumberVector<?>> dist = (PrimitiveDoubleDistanceFunction<NumberVector<?>>) kmeans.getDistanceFunction();
+
+ final double quality = variance.calculateCost(result2, dist, rel);
+ assertEquals("Within cluster variance incorrect", 3.16666666666, quality, 1e-10);
+ }
+
+ /**
+ * Test cluster average overall distance.
+ */
+ @Test
+ public void testOverallDistance() {
+
+ Database db = makeSimpleDatabase(UNITTEST + "quality-measure-test.csv", 7);
+ Relation<DoubleVector> rel = db.getRelation(TypeUtil.DOUBLE_VECTOR_FIELD);
+
+ // Setup algorithm
+ ListParameterization params = new ListParameterization();
+ params = new ListParameterization();
+ params.addParameter(KMeans.K_ID, 2);
+ params.addParameter(KMeans.INIT_ID, FirstKInitialMeans.class);
+ AbstractKMeans<DoubleVector, DoubleDistance, ?> kmeans = ClassGenericsUtil.parameterizeOrAbort(KMeansLloyd.class, params);
+ testParameterizationOk(params);
+
+ // run KMeans on database
+ @SuppressWarnings("unchecked")
+ Clustering<MeanModel<DoubleVector>> result = (Clustering<MeanModel<DoubleVector>>) kmeans.run(db);
+ @SuppressWarnings("unchecked")
+ final PrimitiveDoubleDistanceFunction<NumberVector<?>> dist = (PrimitiveDoubleDistanceFunction<NumberVector<?>>) kmeans.getDistanceFunction();
+
+ // Test Cluster Average Overall Distance
+ KMeansQualityMeasure<? super DoubleVector, ? super DoubleDistance> overall = new WithinClusterMeanDistanceQualityMeasure();
+ final double quality = overall.calculateCost(result, dist, rel);
+
+ assertEquals("Avarage overall distance not as expected.", 0.8888888888888888, quality, 1e-10);
+ }
+}
diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/TestKMeansResults.java b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/TestKMeansResults.java
index bfe57052..3419352a 100644
--- a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/TestKMeansResults.java
+++ b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/TestKMeansResults.java
@@ -61,7 +61,7 @@ public class TestKMeansResults extends AbstractSimpleAlgorithmTest implements JU
// Setup algorithm
ListParameterization params = new ListParameterization();
params.addParameter(KMeans.K_ID, 5);
- params.addParameter(KMeans.SEED_ID, 3);
+ params.addParameter(KMeans.INIT_ID, FirstKInitialMeans.class);
AbstractKMeans<DoubleVector, DoubleDistance, ?> kmeans = ClassGenericsUtil.parameterizeOrAbort(KMeansLloyd.class, params);
testParameterizationOk(params);
@@ -84,7 +84,7 @@ public class TestKMeansResults extends AbstractSimpleAlgorithmTest implements JU
// Setup algorithm
ListParameterization params = new ListParameterization();
params.addParameter(KMeans.K_ID, 5);
- params.addParameter(KMeans.SEED_ID, 3);
+ params.addParameter(KMeans.INIT_ID, FirstKInitialMeans.class);
AbstractKMeans<DoubleVector, DoubleDistance, ?> kmeans = ClassGenericsUtil.parameterizeOrAbort(KMeansMacQueen.class, params);
testParameterizationOk(params);
@@ -107,7 +107,7 @@ public class TestKMeansResults extends AbstractSimpleAlgorithmTest implements JU
// Setup algorithm
ListParameterization params = new ListParameterization();
params.addParameter(KMeans.K_ID, 5);
- params.addParameter(KMeans.SEED_ID, 3);
+ params.addParameter(KMeans.INIT_ID, FirstKInitialMeans.class);
AbstractKMeans<DoubleVector, DoubleDistance, ?> kmedians = ClassGenericsUtil.parameterizeOrAbort(KMediansLloyd.class, params);
testParameterizationOk(params);
diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/TestPROCLUSResults.java b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/TestPROCLUSResults.java
index b7dde28e..bfb94ee3 100644
--- a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/TestPROCLUSResults.java
+++ b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/TestPROCLUSResults.java
@@ -58,7 +58,8 @@ public class TestPROCLUSResults extends AbstractSimpleAlgorithmTest implements J
ListParameterization params = new ListParameterization();
params.addParameter(PROCLUS.Parameterizer.L_ID, 1);
params.addParameter(PROCLUS.Parameterizer.K_ID, 4);
- params.addParameter(PROCLUS.Parameterizer.SEED_ID, 2);
+ // NOTE: PROCLUS quality heavily depends on random...
+ params.addParameter(PROCLUS.Parameterizer.SEED_ID, 0);
// setup algorithm
PROCLUS<DoubleVector> proclus = ClassGenericsUtil.parameterizeOrAbort(PROCLUS.class, params);
@@ -67,8 +68,8 @@ public class TestPROCLUSResults extends AbstractSimpleAlgorithmTest implements J
// run PROCLUS on database
Clustering<?> result = proclus.run(db);
- testFMeasure(db, result, 0.900947932);
- testClusterSizes(result, new int[] { 15, 35, 200, 350 });
+ testFMeasure(db, result, 0.6946958);
+ testClusterSizes(result, new int[] { 45, 151, 200, 204 });
}
/**
@@ -85,13 +86,14 @@ public class TestPROCLUSResults extends AbstractSimpleAlgorithmTest implements J
ListParameterization params = new ListParameterization();
params.addParameter(PROCLUS.Parameterizer.L_ID, 2);
params.addParameter(PROCLUS.Parameterizer.K_ID, 3);
- params.addParameter(PROCLUS.Parameterizer.SEED_ID, 0);
+ // NOTE: PROCLUS quality heavily depends on random...
+ params.addParameter(PROCLUS.Parameterizer.SEED_ID, 1);
PROCLUS<DoubleVector> proclus = ClassGenericsUtil.parameterizeOrAbort(PROCLUS.class, params);
testParameterizationOk(params);
// run PROCLUS on database
Clustering<?> result = proclus.run(db);
- testFMeasure(db, result, 0.739931511);
- testClusterSizes(result, new int[] { 146, 259, 445 });
+ testFMeasure(db, result, 0.7812455);
+ testClusterSizes(result, new int[] { 111, 269, 470 });
}
} \ No newline at end of file