diff options
author | Andrej Shadura <andrewsh@debian.org> | 2019-03-09 22:30:28 +0000 |
---|---|---|
committer | Andrej Shadura <andrewsh@debian.org> | 2019-03-09 22:30:28 +0000 |
commit | cde76aeb42240f7270bc6605c606ae07d2dc5a7d (patch) | |
tree | c3ebf1d7745224f524da31dbabc5d76b9ea75916 /test/de/lmu/ifi |
Import Upstream version 0.4.0~beta1
Diffstat (limited to 'test/de/lmu/ifi')
61 files changed, 4358 insertions, 0 deletions
diff --git a/test/de/lmu/ifi/dbs/elki/AllTests.java b/test/de/lmu/ifi/dbs/elki/AllTests.java new file mode 100644 index 00000000..388ad193 --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/AllTests.java @@ -0,0 +1,37 @@ +package de.lmu.ifi.dbs.elki; + +import java.util.List; + +import junit.framework.JUnit4TestAdapter; +import junit.framework.Test; +import junit.framework.TestCase; +import junit.framework.TestSuite; +import de.lmu.ifi.dbs.elki.utilities.InspectionUtil; + +/** + * Build a test suite with all tests included with ELKI + * + * @author Erich Schubert + */ +public class AllTests extends TestSuite { + /** + * Build a test suite with all tests included in ELKI. + * + * @return Test suite + */ + public static Test suite() { + TestSuite suite = new TestSuite(); + List<Class<?>> tests = InspectionUtil.findAllImplementations(TestCase.class, false); + tests.addAll(InspectionUtil.findAllImplementations(JUnit4Test.class, false)); + for(Class<?> cls : tests) { + if (cls == AllTests.class) { + continue; + } + Test test = new JUnit4TestAdapter(cls); + if(test != null) { + suite.addTest(test); + } + } + return suite; + } +}
\ No newline at end of file diff --git a/test/de/lmu/ifi/dbs/elki/JUnit4Test.java b/test/de/lmu/ifi/dbs/elki/JUnit4Test.java new file mode 100644 index 00000000..0a8126ba --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/JUnit4Test.java @@ -0,0 +1,14 @@ +package de.lmu.ifi.dbs.elki; + +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/** + * This interface is used for test-discovery by the {@link AllTests} TestSuite. + * + * @author Erich Schubert + */ +@RunWith(JUnit4.class) +public interface JUnit4Test { + // empty marker interface +}
\ No newline at end of file diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/AbstractSimpleAlgorithmTest.java b/test/de/lmu/ifi/dbs/elki/algorithm/AbstractSimpleAlgorithmTest.java new file mode 100644 index 00000000..f17f9ebf --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/algorithm/AbstractSimpleAlgorithmTest.java @@ -0,0 +1,237 @@ +package de.lmu.ifi.dbs.elki.algorithm; + +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.File; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; + +import de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.ByLabelClustering; +import de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.ByLabelHierarchicalClustering; +import de.lmu.ifi.dbs.elki.data.Cluster; +import de.lmu.ifi.dbs.elki.data.Clustering; +import de.lmu.ifi.dbs.elki.data.model.Model; +import de.lmu.ifi.dbs.elki.data.type.TypeUtil; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.database.StaticArrayDatabase; +import de.lmu.ifi.dbs.elki.database.ids.DBID; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.datasource.FileBasedDatabaseConnection; +import de.lmu.ifi.dbs.elki.datasource.filter.FixedDBIDsFilter; +import de.lmu.ifi.dbs.elki.evaluation.paircounting.PairCountingFMeasure; +import de.lmu.ifi.dbs.elki.evaluation.roc.ComputeROCCurve; +import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.result.Result; +import de.lmu.ifi.dbs.elki.result.ResultUtil; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; + +/** + * Abstract base class useful for testing various algorithms. + * + * @author Erich Schubert + */ +public abstract class AbstractSimpleAlgorithmTest { + /** + * Base path for unit test files. + */ + public final static String UNITTEST = "data/testdata/unittests/"; + + /** + * Notice: this is okay for tests - don't use this for frequently used + * objects, use a static instance instead! + */ + protected Logging logger = Logging.getLogger(this.getClass()); + + /** + * Validate that parameterization succeeded: no parameters left, no + * parameterization errors. + * + * @param config Parameterization to test + */ + protected void testParameterizationOk(ListParameterization config) { + if(config.hasUnusedParameters()) { + fail("Unused parameters: " + config.getRemainingParameters()); + } + if(config.hasErrors()) { + config.logAndClearReportedErrors(); + fail("Parameterization errors."); + } + } + + /** + * Generate a simple DoubleVector database from a file. + * + * @param filename File to load + * @param expectedSize Expected size in records + * @param params Extra parameters + * @return Database + */ + protected <T> Database makeSimpleDatabase(String filename, int expectedSize, ListParameterization params, Class<?>[] filters) { + org.junit.Assert.assertTrue("Test data set not found: " + filename, (new File(filename)).exists()); + params.addParameter(FileBasedDatabaseConnection.INPUT_ID, filename); + + List<Class<?>> filterlist = new ArrayList<Class<?>>(); + filterlist.add(FixedDBIDsFilter.class); + if (filters != null) { + for (Class<?> filter : filters) { + filterlist.add(filter); + } + } + params.addParameter(FileBasedDatabaseConnection.FILTERS_ID, filterlist); + params.addParameter(FixedDBIDsFilter.IDSTART_ID, 1); + Database db = ClassGenericsUtil.parameterizeOrAbort(StaticArrayDatabase.class, params); + + testParameterizationOk(params); + + db.initialize(); + Relation<?> rel = db.getRelation(TypeUtil.ANY); + org.junit.Assert.assertEquals("Database size does not match.", expectedSize, rel.size()); + return db; + } + + /** + * Generate a simple DoubleVector database from a file. + * + * @param filename File to load + * @param expectedSize Expected size in records + * @return Database + */ + protected <T> Database makeSimpleDatabase(String filename, int expectedSize) { + return makeSimpleDatabase(filename, expectedSize, new ListParameterization(), null); + } + + /** + * Find a clustering result, fail if there is more than one or none. + * + * @param result Base result + * @return Clustering + */ + protected Clustering<?> findSingleClustering(Result result) { + List<Clustering<? extends Model>> clusterresults = ResultUtil.getClusteringResults(result); + assertTrue("No unique clustering found in result.", clusterresults.size() == 1); + Clustering<? extends Model> clustering = clusterresults.get(0); + return clustering; + } + + /** + * Test the clustering result by comparing the score with an expected value. + * + * @param database Database to test + * @param clustering Clustering result + * @param expected Expected score + */ + protected <O> void testFMeasure(Database database, Clustering<?> clustering, double expected) { + // Run by-label as reference + ByLabelClustering bylabel = new ByLabelClustering(); + Clustering<Model> rbl = bylabel.run(database); + + double score = PairCountingFMeasure.compareClusterings(clustering, rbl, 1.0); + if(logger.isVerbose()) { + logger.verbose(this.getClass().getSimpleName() + " score: " + score + " expect: " + expected); + } + org.junit.Assert.assertEquals(this.getClass().getSimpleName() + ": Score does not match.", expected, score, 0.0001); + } + + /** + * Test the clustering result by comparing the score with an expected value. + * + * @param database Database to test + * @param clustering Clustering result + * @param expected Expected score + */ + protected <O> void testFMeasureHierarchical(Database database, Clustering<?> clustering, double expected) { + // Run by-label as reference + ByLabelHierarchicalClustering bylabel = new ByLabelHierarchicalClustering(); + Clustering<Model> rbl = bylabel.run(database); + + double score = PairCountingFMeasure.compareClusterings(clustering, rbl, 1.0, false, true); + if(logger.isVerbose()) { + logger.verbose(this.getClass().getSimpleName() + " score: " + score + " expect: " + expected); + } + org.junit.Assert.assertEquals(this.getClass().getSimpleName() + ": Score does not match.", expected, score, 0.0001); + } + + /** + * Validate the cluster sizes with an expected result. + * + * @param clustering Clustering to test + * @param expected Expected cluster sizes + */ + protected void testClusterSizes(Clustering<?> clustering, int[] expected) { + List<Integer> sizes = new java.util.ArrayList<Integer>(); + for(Cluster<?> cl : clustering.getAllClusters()) { + sizes.add(cl.size()); + } + // Sort both + Collections.sort(sizes); + Arrays.sort(expected); + // Report + // if(logger.isVerbose()) { + StringBuffer buf = new StringBuffer(); + buf.append("Cluster sizes: ["); + for(int i = 0; i < sizes.size(); i++) { + if(i > 0) { + buf.append(", "); + } + buf.append(sizes.get(i)); + } + buf.append("]"); + // } + // Test + org.junit.Assert.assertEquals("Number of clusters does not match expectations." + buf.toString(), expected.length, sizes.size()); + for(int i = 0; i < expected.length; i++) { + org.junit.Assert.assertEquals("Cluster size does not match at position " + i, expected[i], (int) sizes.get(i)); + } + } + + /** + * Test the AUC value for an outlier result. + * + * @param db Database + * @param positive Positive class name + * @param result Outlier result to process + * @param expected Expected AUC value + */ + protected void testAUC(Database db, String positive, OutlierResult result, double expected) { + ListParameterization params = new ListParameterization(); + params.addParameter(ComputeROCCurve.POSITIVE_CLASS_NAME_ID, positive); + ComputeROCCurve rocCurve = ClassGenericsUtil.parameterizeOrAbort(ComputeROCCurve.class, params); + + // Ensure the result has been added to the hierarchy: + if(db.getHierarchy().getParents(result).size() < 1) { + db.getHierarchy().add(db, result); + } + + // Compute ROC and AUC: + rocCurve.processNewResult(db, result); + // Find the ROC results + Iterator<ComputeROCCurve.ROCResult> iter = ResultUtil.filteredResults(result, ComputeROCCurve.ROCResult.class); + org.junit.Assert.assertTrue("No ROC result found.", iter.hasNext()); + double auc = iter.next().getAUC(); + org.junit.Assert.assertFalse("More than one ROC result found.", iter.hasNext()); + org.junit.Assert.assertEquals("ROC value does not match.", expected, auc, 0.0001); + } + + /** + * Test the outlier score of a single object. + * + * @param result Result object to use + * @param id Object ID + * @param expected expected value + */ + protected void testSingleScore(OutlierResult result, int id, double expected) { + org.junit.Assert.assertNotNull("No outlier result", result); + org.junit.Assert.assertNotNull("No score result.", result.getScores()); + final DBID dbid = DBIDUtil.importInteger(id); + org.junit.Assert.assertNotNull("No result for ID " + id, result.getScores().get(dbid)); + double actual = result.getScores().get(dbid); + org.junit.Assert.assertEquals("Outlier score of object " + id + " doesn't match.", expected, actual, 0.0001); + } +}
\ No newline at end of file diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/TestKNNJoin.java b/test/de/lmu/ifi/dbs/elki/algorithm/TestKNNJoin.java new file mode 100644 index 00000000..c2d7f650 --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/algorithm/TestKNNJoin.java @@ -0,0 +1,193 @@ +package de.lmu.ifi.dbs.elki.algorithm; + +import java.util.Arrays; +import java.util.List; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.data.DoubleVector; +import de.lmu.ifi.dbs.elki.data.NumberVector; +import de.lmu.ifi.dbs.elki.data.type.TypeUtil; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.database.QueryUtil; +import de.lmu.ifi.dbs.elki.database.StaticArrayDatabase; +import de.lmu.ifi.dbs.elki.database.datastore.DataStore; +import de.lmu.ifi.dbs.elki.database.ids.DBID; +import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair; +import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; +import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.datasource.FileBasedDatabaseConnection; +import de.lmu.ifi.dbs.elki.datasource.filter.FixedDBIDsFilter; +import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distancefunction.ManhattanDistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; +import de.lmu.ifi.dbs.elki.index.tree.TreeIndexFactory; +import de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialEntry; +import de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.deliclu.DeLiCluTree; +import de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.deliclu.DeLiCluTreeFactory; +import de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.rstar.RStarTree; +import de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.rstar.RStarTreeFactory; +import de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.rstar.RStarTreeNode; +import de.lmu.ifi.dbs.elki.math.MeanVariance; +import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; +import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.KNNList; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; + +public class TestKNNJoin implements JUnit4Test { + // the following values depend on the data set used! + String dataset = "data/testdata/unittests/uebungsblatt-2d-mini.csv"; + + // size of the data set + int shoulds = 20; + + // mean number of 2NN + double mean2nnEuclid = 2.85; + + // variance + double var2nnEuclid = 0.87105; + + // mean number of 2NN + double mean2nnManhattan = 2.9; + + // variance + double var2nnManhattan = 0.83157894; + + @Test + public void testLinearScan() { + ListParameterization inputparams = new ListParameterization(); + inputparams.addParameter(FileBasedDatabaseConnection.INPUT_ID, dataset); + List<Class<?>> filters = Arrays.asList(new Class<?>[] { FixedDBIDsFilter.class }); + inputparams.addParameter(FileBasedDatabaseConnection.FILTERS_ID, filters); + inputparams.addParameter(FixedDBIDsFilter.IDSTART_ID, 1); + + // get database + Database db = ClassGenericsUtil.parameterizeOrAbort(StaticArrayDatabase.class, inputparams); + inputparams.failOnErrors(); + + db.initialize(); + Relation<NumberVector<?, ?>> relation = db.getRelation(TypeUtil.NUMBER_VECTOR_FIELD); + // verify data set size. + org.junit.Assert.assertEquals("Database size does not match.", shoulds, relation.size()); + + // Euclidean + { + DistanceQuery<NumberVector<?, ?>, DoubleDistance> dq = db.getDistanceQuery(relation, EuclideanDistanceFunction.STATIC); + KNNQuery<NumberVector<?, ?>, DoubleDistance> knnq = QueryUtil.getLinearScanKNNQuery(dq); + + MeanVariance meansize = new MeanVariance(); + for(DBID id : relation.iterDBIDs()) { + List<DistanceResultPair<DoubleDistance>> knnlist = knnq.getKNNForDBID(id, 2); + meansize.put(knnlist.size()); + } + org.junit.Assert.assertEquals("Euclidean mean 2NN", mean2nnEuclid, meansize.getMean(), 0.00001); + org.junit.Assert.assertEquals("Euclidean variance 2NN", var2nnEuclid, meansize.getSampleVariance(), 0.00001); + } + // Manhattan + { + DistanceQuery<NumberVector<?, ?>, DoubleDistance> dq = db.getDistanceQuery(relation, ManhattanDistanceFunction.STATIC); + KNNQuery<NumberVector<?, ?>, DoubleDistance> knnq = QueryUtil.getLinearScanKNNQuery(dq); + + MeanVariance meansize = new MeanVariance(); + for(DBID id : relation.iterDBIDs()) { + List<DistanceResultPair<DoubleDistance>> knnlist = knnq.getKNNForDBID(id, 2); + meansize.put(knnlist.size()); + } + org.junit.Assert.assertEquals("Manhattan mean 2NN", mean2nnManhattan, meansize.getMean(), 0.00001); + org.junit.Assert.assertEquals("Manhattan variance 2NN", var2nnManhattan, meansize.getSampleVariance(), 0.00001); + } + } + + /** + * Test {@link RStarTree} using a file based database connection. + * + * @throws ParameterException on errors. + */ + @Test + public void testKNNJoinRtreeMini() { + ListParameterization spatparams = new ListParameterization(); + spatparams.addParameter(StaticArrayDatabase.INDEX_ID, RStarTreeFactory.class); + spatparams.addParameter(TreeIndexFactory.PAGE_SIZE_ID, 200); + + doKNNJoin(spatparams); + } + + /** + * Test {@link RStarTree} using a file based database connection. + * + * @throws ParameterException on errors. + */ + @Test + public void testKNNJoinRtreeMaxi() { + ListParameterization spatparams = new ListParameterization(); + spatparams.addParameter(StaticArrayDatabase.INDEX_ID, RStarTreeFactory.class); + spatparams.addParameter(TreeIndexFactory.PAGE_SIZE_ID, 2000); + + doKNNJoin(spatparams); + } + + /** + * Test {@link DeLiCluTree} using a file based database connection. + * + * @throws ParameterException on errors. + */ + @Test + public void testKNNJoinDeLiCluTreeMini() { + ListParameterization spatparams = new ListParameterization(); + spatparams.addParameter(StaticArrayDatabase.INDEX_ID, DeLiCluTreeFactory.class); + spatparams.addParameter(TreeIndexFactory.PAGE_SIZE_ID, 200); + + doKNNJoin(spatparams); + } + + /** + * Actual test routine. + * + * @param inputparams + * @throws ParameterException + */ + void doKNNJoin(ListParameterization inputparams) { + inputparams.addParameter(FileBasedDatabaseConnection.INPUT_ID, dataset); + List<Class<?>> filters = Arrays.asList(new Class<?>[] { FixedDBIDsFilter.class }); + inputparams.addParameter(FileBasedDatabaseConnection.FILTERS_ID, filters); + inputparams.addParameter(FixedDBIDsFilter.IDSTART_ID, 1); + + // get database + Database db = ClassGenericsUtil.parameterizeOrAbort(StaticArrayDatabase.class, inputparams); + inputparams.failOnErrors(); + + db.initialize(); + Relation<NumberVector<?, ?>> relation = db.getRelation(TypeUtil.NUMBER_VECTOR_FIELD); + // verify data set size. + org.junit.Assert.assertEquals("Database size does not match.", shoulds, relation.size()); + + // Euclidean + { + KNNJoin<DoubleVector, DoubleDistance, ?, ?> knnjoin = new KNNJoin<DoubleVector, DoubleDistance, RStarTreeNode, SpatialEntry>(EuclideanDistanceFunction.STATIC, 2); + DataStore<KNNList<DoubleDistance>> result = knnjoin.run(db); + + MeanVariance meansize = new MeanVariance(); + for(DBID id : relation.getDBIDs()) { + KNNList<DoubleDistance> knnlist = result.get(id); + meansize.put(knnlist.size()); + } + org.junit.Assert.assertEquals("Euclidean mean 2NN", mean2nnEuclid, meansize.getMean(), 0.00001); + org.junit.Assert.assertEquals("Euclidean variance 2NN", var2nnEuclid, meansize.getSampleVariance(), 0.00001); + } + // Manhattan + { + KNNJoin<DoubleVector, DoubleDistance, ?, ?> knnjoin = new KNNJoin<DoubleVector, DoubleDistance, RStarTreeNode, SpatialEntry>(ManhattanDistanceFunction.STATIC, 2); + DataStore<KNNList<DoubleDistance>> result = knnjoin.run(db); + + MeanVariance meansize = new MeanVariance(); + for(DBID id : relation.getDBIDs()) { + KNNList<DoubleDistance> knnlist = result.get(id); + meansize.put(knnlist.size()); + } + org.junit.Assert.assertEquals("Manhattan mean 2NN", mean2nnManhattan, meansize.getMean(), 0.00001); + org.junit.Assert.assertEquals("Manhattan variance 2NN", var2nnManhattan, meansize.getSampleVariance(), 0.00001); + } + } +} diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestDBSCANResults.java b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestDBSCANResults.java new file mode 100644 index 00000000..e953024e --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestDBSCANResults.java @@ -0,0 +1,73 @@ +package de.lmu.ifi.dbs.elki.algorithm.clustering; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest; +import de.lmu.ifi.dbs.elki.data.Clustering; +import de.lmu.ifi.dbs.elki.data.DoubleVector; +import de.lmu.ifi.dbs.elki.data.model.Model; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; +import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; + +/** + * Performs a full DBSCAN run, and compares the result with a clustering derived + * from the data set labels. This test ensures that DBSCAN performance doesn't + * unexpectedly drop on this data set (and also ensures that the algorithms + * work, as a side effect). + * + * @author Elke Achtert + * @author Erich Schubert + * @author Katharina Rausch + */ +public class TestDBSCANResults extends AbstractSimpleAlgorithmTest implements JUnit4Test { + /** + * Run DBSCAN with fixed parameters and compare the result to a golden + * standard. + * + * @throws ParameterException + */ + @Test + public void testDBSCANResults() { + Database db = makeSimpleDatabase(UNITTEST + "3clusters-and-noise-2d.csv", 330); + + // setup algorithm + ListParameterization params = new ListParameterization(); + params.addParameter(DBSCAN.EPSILON_ID, 0.04); + params.addParameter(DBSCAN.MINPTS_ID, 20); + DBSCAN<DoubleVector, DoubleDistance> dbscan = ClassGenericsUtil.parameterizeOrAbort(DBSCAN.class, params); + testParameterizationOk(params); + + // run DBSCAN on database + Clustering<Model> result = dbscan.run(db); + + testFMeasure(db, result, 0.996413); + testClusterSizes(result, new int[] { 29, 50, 101, 150 }); + } + + /** + * Run DBSCAN with fixed parameters and compare the result to a golden + * standard. + * + * @throws ParameterException + */ + @Test + public void testDBSCANOnSingleLinkDataset() { + Database db = makeSimpleDatabase(UNITTEST + "single-link-effect.ascii", 638); + + // Setup algorithm + ListParameterization params = new ListParameterization(); + params.addParameter(DBSCAN.EPSILON_ID, 11.5); + params.addParameter(DBSCAN.MINPTS_ID, 120); + DBSCAN<DoubleVector, DoubleDistance> dbscan = ClassGenericsUtil.parameterizeOrAbort(DBSCAN.class, params); + testParameterizationOk(params); + + // run DBSCAN on database + Clustering<Model> result = dbscan.run(db); + testFMeasure(db, result, 0.954382); + testClusterSizes(result, new int[] { 11, 200, 203, 224 }); + } +}
\ No newline at end of file diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestDeLiCluResults.java b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestDeLiCluResults.java new file mode 100644 index 00000000..0c02f6e2 --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestDeLiCluResults.java @@ -0,0 +1,53 @@ +package de.lmu.ifi.dbs.elki.algorithm.clustering; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest; +import de.lmu.ifi.dbs.elki.data.Clustering; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.database.StaticArrayDatabase; +import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; +import de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.deliclu.DeLiCluTreeFactory; +import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; + +/** + * Performs a full DeLiClu run, and compares the result with a clustering + * derived from the data set labels. This test ensures that DeLiClu's + * performance doesn't unexpectedly drop on this data set (and also ensures that + * the algorithms work, as a side effect). + * + * @author Katharina Rausch + * @author Erich Schubert + */ +public class TestDeLiCluResults extends AbstractSimpleAlgorithmTest implements JUnit4Test { + /** + * Run DeLiClu with fixed parameters and compare the result to a golden + * standard. + * + * @throws ParameterException + */ + @Test + public void testDeLiCluResults() { + ListParameterization indexparams = new ListParameterization(); + // We need a special index for this algorithm: + indexparams.addParameter(StaticArrayDatabase.INDEX_ID, DeLiCluTreeFactory.class); + indexparams.addParameter(DeLiCluTreeFactory.PAGE_SIZE_ID, 1000); + Database db = makeSimpleDatabase(UNITTEST + "hierarchical-2d.ascii", 710, indexparams, null); + + // Setup actual algorithm + ListParameterization params = new ListParameterization(); + params.addParameter(DeLiClu.MINPTS_ID, 18); + params.addParameter(OPTICSXi.XI_ID, 0.038); + params.addParameter(OPTICSXi.XIALG_ID, DeLiClu.class); + OPTICSXi<DoubleDistance> opticsxi = ClassGenericsUtil.parameterizeOrAbort(OPTICSXi.class, params); + testParameterizationOk(params); + + // run DeLiClu on database + Clustering<?> clustering = opticsxi.run(db); + testFMeasure(db, clustering, 0.8765283); + testClusterSizes(clustering, new int[] { 108, 121, 210, 271 }); + } +}
\ No newline at end of file diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestEMResults.java b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestEMResults.java new file mode 100644 index 00000000..e3e4c984 --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestEMResults.java @@ -0,0 +1,47 @@ +package de.lmu.ifi.dbs.elki.algorithm.clustering; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest; +import de.lmu.ifi.dbs.elki.data.Clustering; +import de.lmu.ifi.dbs.elki.data.DoubleVector; +import de.lmu.ifi.dbs.elki.data.model.EMModel; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; + +/** + * Performs a full EM run, and compares the result with a clustering derived + * from the data set labels. This test ensures that EM's performance doesn't + * unexpectedly drop on this data set (and also ensures that the algorithms + * work, as a side effect). + * + * @author Katharina Rausch + * @author Erich Schubert + */ +public class TestEMResults extends AbstractSimpleAlgorithmTest implements JUnit4Test { + /** + * Run EM with fixed parameters and compare the result to a golden + * standard. + * + * @throws ParameterException + */ + @Test + public void testEMResults() { + Database db = makeSimpleDatabase(UNITTEST + "hierarchical-2d.ascii", 710); + + // Setup algorithm + ListParameterization params = new ListParameterization(); + params.addParameter(EM.SEED_ID, 1); + params.addParameter(EM.K_ID, 5); + EM<DoubleVector> em = ClassGenericsUtil.parameterizeOrAbort(EM.class, params); + testParameterizationOk(params); + + // run EM on database + Clustering<EMModel<DoubleVector>> result = em.run(db); + testFMeasure(db, result, 0.961587); + testClusterSizes(result, new int[] { 5, 91, 98, 200, 316 }); + } +}
\ No newline at end of file diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestKMeansResults.java b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestKMeansResults.java new file mode 100644 index 00000000..78cf3138 --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestKMeansResults.java @@ -0,0 +1,48 @@ +package de.lmu.ifi.dbs.elki.algorithm.clustering; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest; +import de.lmu.ifi.dbs.elki.data.Clustering; +import de.lmu.ifi.dbs.elki.data.DoubleVector; +import de.lmu.ifi.dbs.elki.data.model.MeanModel; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; +import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; + +/** + * Performs a full KMeans run, and compares the result with a clustering derived + * from the data set labels. This test ensures that KMeans's performance doesn't + * unexpectedly drop on this data set (and also ensures that the algorithms + * work, as a side effect). + * + * @author Katharina Rausch + * @author Erich Schubert + */ +public class TestKMeansResults extends AbstractSimpleAlgorithmTest implements JUnit4Test { + /** + * Run KMeans with fixed parameters and compare the result to a golden + * standard. + * + * @throws ParameterException + */ + @Test + public void testKMeansResults() { + Database db = makeSimpleDatabase(UNITTEST + "different-densities-2d-no-noise.ascii", 1000); + + // Setup algorithm + ListParameterization params = new ListParameterization(); + params.addParameter(KMeans.K_ID, 5); + params.addParameter(KMeans.SEED_ID, 3); + KMeans<DoubleVector, DoubleDistance> kmeans = ClassGenericsUtil.parameterizeOrAbort(KMeans.class, params); + testParameterizationOk(params); + + // run KMeans on database + Clustering<MeanModel<DoubleVector>> result = kmeans.run(db); + testFMeasure(db, result, 0.998005); + testClusterSizes(result, new int[] { 199, 200, 200, 200, 201 }); + } +}
\ No newline at end of file diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestOPTICSResults.java b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestOPTICSResults.java new file mode 100644 index 00000000..b9bd1067 --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestOPTICSResults.java @@ -0,0 +1,48 @@ +package de.lmu.ifi.dbs.elki.algorithm.clustering; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest; +import de.lmu.ifi.dbs.elki.data.Clustering; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; +import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; + +/** + * Performs a full OPTICS run, and compares the result with a clustering derived + * from the data set labels. This test ensures that OPTICS's performance doesn't + * unexpectedly drop on this data set (and also ensures that the algorithms + * work, as a side effect). + * + * @author Katharina Rausch + * @author Erich Schubert + */ +public class TestOPTICSResults extends AbstractSimpleAlgorithmTest implements JUnit4Test { + /** + * Run OPTICS with fixed parameters and compare the result to a golden + * standard. + * + * @throws ParameterException + */ + @Test + public void testOPTICSResults() { + Database db = makeSimpleDatabase(UNITTEST + "hierarchical-2d.ascii", 710); + + // Setup algorithm + ListParameterization params = new ListParameterization(); + params.addParameter(OPTICS.MINPTS_ID, 18); + params.addParameter(OPTICSXi.XI_ID, 0.038); + params.addParameter(OPTICSXi.XIALG_ID, OPTICS.class); + OPTICSXi<DoubleDistance> opticsxi = ClassGenericsUtil.parameterizeOrAbort(OPTICSXi.class, params); + testParameterizationOk(params); + + // run OPTICS on database + Clustering<?> clustering = opticsxi.run(db); + + testFMeasure(db, clustering, 0.874062); + testClusterSizes(clustering, new int[] { 109, 121, 210, 270 }); + } +}
\ No newline at end of file diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestSLINKResults.java b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestSLINKResults.java new file mode 100644 index 00000000..b7374942 --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestSLINKResults.java @@ -0,0 +1,50 @@ +package de.lmu.ifi.dbs.elki.algorithm.clustering; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest; +import de.lmu.ifi.dbs.elki.data.Clustering; +import de.lmu.ifi.dbs.elki.data.DoubleVector; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; +import de.lmu.ifi.dbs.elki.result.Result; +import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; + +/** + * Performs a full SLINK run, and compares the result with a clustering derived + * from the data set labels. This test ensures that SLINK's performance doesn't + * unexpectedly drop on this data set (and also ensures that the algorithms + * work, as a side effect). + * + * @author Katharina Rausch + * @author Erich Schubert + */ +public class TestSLINKResults extends AbstractSimpleAlgorithmTest implements JUnit4Test { + // TODO: add a test for a non-single-link dataset? + + /** + * Run SLINK with fixed parameters and compare the result to a golden + * standard. + * + * @throws ParameterException + */ + @Test + public void testSLINKResults() { + Database db = makeSimpleDatabase(UNITTEST + "single-link-effect.ascii", 638); + + // Setup algorithm + ListParameterization params = new ListParameterization(); + params.addParameter(SLINK.SLINK_MINCLUSTERS_ID, 3); + SLINK<DoubleVector, DoubleDistance> slink = ClassGenericsUtil.parameterizeOrAbort(SLINK.class, params); + testParameterizationOk(params); + + // run SLINK on database + Result result = slink.run(db); + Clustering<?> clustering = findSingleClustering(result); + testFMeasure(db, clustering, 0.6829722); + testClusterSizes(clustering, new int[] { 0, 0, 9, 200, 429 }); + } +}
\ No newline at end of file diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestSNNClusteringResults.java b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestSNNClusteringResults.java new file mode 100644 index 00000000..1c54e5ec --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/TestSNNClusteringResults.java @@ -0,0 +1,50 @@ +package de.lmu.ifi.dbs.elki.algorithm.clustering; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest; +import de.lmu.ifi.dbs.elki.data.Clustering; +import de.lmu.ifi.dbs.elki.data.DoubleVector; +import de.lmu.ifi.dbs.elki.data.model.Model; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; +import de.lmu.ifi.dbs.elki.index.preprocessed.snn.SharedNearestNeighborPreprocessor; +import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; + +/** + * Performs a full SNNClustering run, and compares the result with a clustering + * derived from the data set labels. This test ensures that SNNClustering's + * performance doesn't unexpectedly drop on this data set (and also ensures that + * the algorithms work, as a side effect). + * + * @author Katharina Rausch + * @author Erich Schubert + */ +public class TestSNNClusteringResults extends AbstractSimpleAlgorithmTest implements JUnit4Test { + /** + * Run SNNClustering with fixed parameters and compare the result to a golden + * standard. + * + * @throws ParameterException + */ + @Test + public void testSNNClusteringResults() { + Database db = makeSimpleDatabase(UNITTEST + "different-densities-2d.ascii", 1200); + + // Setup algorithm + ListParameterization params = new ListParameterization(); + params.addParameter(SNNClustering.EPSILON_ID, 77); + params.addParameter(SNNClustering.MINPTS_ID, 28); + params.addParameter(SharedNearestNeighborPreprocessor.Factory.NUMBER_OF_NEIGHBORS_ID, 100); + SNNClustering<DoubleVector, DoubleDistance> snn = ClassGenericsUtil.parameterizeOrAbort(SNNClustering.class, params); + testParameterizationOk(params); + + // run SNN on database + Clustering<Model> result = snn.run(db); + testFMeasure(db, result, 0.835000); + testClusterSizes(result, new int[] { 76, 213, 219, 225, 231, 236 }); + } +}
\ No newline at end of file diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/TestCASHResults.java b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/TestCASHResults.java new file mode 100644 index 00000000..fc099a91 --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/TestCASHResults.java @@ -0,0 +1,87 @@ +package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest; +import de.lmu.ifi.dbs.elki.data.Clustering; +import de.lmu.ifi.dbs.elki.data.model.Model; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.datasource.FileBasedDatabaseConnection; +import de.lmu.ifi.dbs.elki.datasource.parser.ParameterizationFunctionLabelParser; +import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; + +/** + * Perform a full CASH run, and compare the result with a clustering derived + * from the data set labels. This test ensures that CASH performance doesn't + * unexpectedly drop on this data set (and also ensures that the algorithms + * work, as a side effect). + * + * @author Erich Schubert + * @author Katharina Rausch + */ +public class TestCASHResults extends AbstractSimpleAlgorithmTest implements JUnit4Test { + /** + * Run CASH with fixed parameters and compare the result to a golden standard. + * + * @throws ParameterException on errors. + */ + @Test + public void testCASHResults() { + ListParameterization inp = new ListParameterization(); + // CASH input + inp.addParameter(FileBasedDatabaseConnection.PARSER_ID, ParameterizationFunctionLabelParser.class); + // Input + Database db = makeSimpleDatabase(UNITTEST + "hierarchical-3d2d1d.csv", 600, inp, null); + + // CASH parameters + ListParameterization params = new ListParameterization(); + params.addParameter(CASH.JITTER_ID, 0.7); + params.addParameter(CASH.MINPTS_ID, 50); + params.addParameter(CASH.MAXLEVEL_ID, 25); + params.addFlag(CASH.ADJUST_ID); + + // setup algorithm + CASH cash = ClassGenericsUtil.parameterizeOrAbort(CASH.class, params); + testParameterizationOk(params); + + // run CASH on database + Clustering<Model> result = cash.run(db); + + testFMeasureHierarchical(db, result, 0.64102); + testClusterSizes(result, new int[] { 37, 71, 76, 442 }); + // Old heap scored worse? + // testFMeasureHierarchical(db, result, 0.638727); + // testClusterSizes(result, new int[] { 13, 65, 74, 75, 442 }); + } + + /** + * Run CASH with fixed parameters and compare the result to a golden standard. + * + * @throws ParameterException on errors. + */ + @Test + public void testCASHEmbedded() { + // CASH input + ListParameterization inp = new ListParameterization(); + inp.addParameter(FileBasedDatabaseConnection.PARSER_ID, ParameterizationFunctionLabelParser.class); + Database db = makeSimpleDatabase(UNITTEST + "correlation-embedded-2-4d.ascii", 600, inp, null); + + // CASH parameters + ListParameterization params = new ListParameterization(); + params.addParameter(CASH.JITTER_ID, 0.7); + params.addParameter(CASH.MINPTS_ID, 160); + params.addParameter(CASH.MAXLEVEL_ID, 40); + + // setup algorithm + CASH cash = ClassGenericsUtil.parameterizeOrAbort(CASH.class, params); + testParameterizationOk(params); + + // run CASH on database + Clustering<Model> result = cash.run(db); + testFMeasure(db, result, 0.443246); + testClusterSizes(result, new int[] { 169, 196, 235 }); + } +}
\ No newline at end of file diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/TestCOPACResults.java b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/TestCOPACResults.java new file mode 100644 index 00000000..cc265c90 --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/TestCOPACResults.java @@ -0,0 +1,91 @@ +package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest; +import de.lmu.ifi.dbs.elki.algorithm.clustering.DBSCAN; +import de.lmu.ifi.dbs.elki.data.Clustering; +import de.lmu.ifi.dbs.elki.data.DoubleVector; +import de.lmu.ifi.dbs.elki.data.model.Model; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; +import de.lmu.ifi.dbs.elki.index.preprocessed.localpca.KNNQueryFilteredPCAIndex; +import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredRunner; +import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCARunner; +import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PercentageEigenPairFilter; +import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.WeightedCovarianceMatrixBuilder; +import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions.ErfcWeight; +import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; + +/** + * Perform a full COPAC run, and compare the result with a clustering derived + * from the data set labels. This test ensures that COPAC performance doesn't + * unexpectedly drop on this data set (and also ensures that the algorithms + * work, as a side effect). + * + * @author Erich Schubert + * @author Katharina Rausch + */ +public class TestCOPACResults extends AbstractSimpleAlgorithmTest implements JUnit4Test { + /** + * Run COPAC with fixed parameters and compare the result to a golden + * standard. + * + * @throws ParameterException on errors. + */ + @Test + public void testCOPACResults() { + Database db = makeSimpleDatabase(UNITTEST + "correlation-hierarchy.csv", 450); + + // these parameters are not picked too smartly - room for improvement. + ListParameterization params = new ListParameterization(); + params.addParameter(COPAC.PARTITION_ALGORITHM_ID, DBSCAN.class); + params.addParameter(DBSCAN.EPSILON_ID, 0.02); + params.addParameter(DBSCAN.MINPTS_ID, 50); + params.addParameter(COPAC.PREPROCESSOR_ID, KNNQueryFilteredPCAIndex.Factory.class); + params.addParameter(KNNQueryFilteredPCAIndex.Factory.K_ID, 15); + + COPAC<DoubleVector, DoubleDistance> copac = ClassGenericsUtil.parameterizeOrAbort(COPAC.class, params); + testParameterizationOk(params); + + // run COPAC on database + Clustering<Model> result = copac.run(db); + + testFMeasure(db, result, 0.842521); + testClusterSizes(result, new int[] { 6, 16, 32, 196, 200 }); + } + + /** + * Run COPAC with fixed parameters and compare the result to a golden + * standard. + * + * @throws ParameterException on errors. + */ + @Test + public void testCOPACOverlap() { + Database db = makeSimpleDatabase(UNITTEST + "correlation-overlap-3-5d.ascii", 650); + + // Setup algorithm + ListParameterization params = new ListParameterization(); + params.addParameter(COPAC.PARTITION_ALGORITHM_ID, DBSCAN.class); + params.addParameter(DBSCAN.EPSILON_ID, 0.5); + params.addParameter(DBSCAN.MINPTS_ID, 20); + params.addParameter(COPAC.PREPROCESSOR_ID, KNNQueryFilteredPCAIndex.Factory.class); + params.addParameter(KNNQueryFilteredPCAIndex.Factory.K_ID, 45); + // PCA + params.addParameter(PCARunner.PCA_COVARIANCE_MATRIX, WeightedCovarianceMatrixBuilder.class); + params.addParameter(WeightedCovarianceMatrixBuilder.WEIGHT_ID, ErfcWeight.class); + params.addParameter(PCAFilteredRunner.PCA_EIGENPAIR_FILTER, PercentageEigenPairFilter.class); + params.addParameter(PercentageEigenPairFilter.ALPHA_ID, 0.8); + + COPAC<DoubleVector, DoubleDistance> copac = ClassGenericsUtil.parameterizeOrAbort(COPAC.class, params); + testParameterizationOk(params); + + Clustering<Model> result = copac.run(db); + testFMeasure(db, result, 0.84687864); + testClusterSizes(result, new int[] { 1, 22, 22, 29, 34, 158, 182, 202 }); + } +}
\ No newline at end of file diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/TestERiCResults.java b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/TestERiCResults.java new file mode 100644 index 00000000..06f3c723 --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/TestERiCResults.java @@ -0,0 +1,107 @@ +package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest; +import de.lmu.ifi.dbs.elki.algorithm.clustering.DBSCAN; +import de.lmu.ifi.dbs.elki.data.Clustering; +import de.lmu.ifi.dbs.elki.data.DoubleVector; +import de.lmu.ifi.dbs.elki.data.model.CorrelationModel; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.distance.distancefunction.correlation.ERiCDistanceFunction; +import de.lmu.ifi.dbs.elki.index.preprocessed.localpca.KNNQueryFilteredPCAIndex; +import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredRunner; +import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCARunner; +import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PercentageEigenPairFilter; +import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.RelativeEigenPairFilter; +import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.WeightedCovarianceMatrixBuilder; +import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions.ErfcWeight; +import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; + +/** + * Perform a full ERiC run, and compare the result with a clustering derived + * from the data set labels. This test ensures that ERiC performance doesn't + * unexpectedly drop on this data set (and also ensures that the algorithms + * work, as a side effect). + * + * @author Erich Schubert + * @author Katharina Rausch + */ +public class TestERiCResults extends AbstractSimpleAlgorithmTest implements JUnit4Test { + /** + * Run ERiC with fixed parameters and compare the result to a golden standard. + * + * @throws ParameterException on errors. + */ + @Test + public void testERiCResults() { + Database db = makeSimpleDatabase(UNITTEST + "hierarchical-3d2d1d.csv", 600); + + // ERiC + ListParameterization params = new ListParameterization(); + params.addParameter(COPAC.PARTITION_ALGORITHM_ID, DBSCAN.class); + params.addParameter(DBSCAN.MINPTS_ID, 30); + params.addParameter(DBSCAN.EPSILON_ID, 0); + // ERiC Distance function in DBSCAN: + params.addParameter(COPAC.PARTITION_DISTANCE_ID, ERiCDistanceFunction.class); + params.addParameter(ERiCDistanceFunction.DELTA_ID, 0.20); + params.addParameter(ERiCDistanceFunction.TAU_ID, 0.04); + // Preprocessing via Local PCA: + params.addParameter(COPAC.PREPROCESSOR_ID, KNNQueryFilteredPCAIndex.Factory.class); + params.addParameter(KNNQueryFilteredPCAIndex.Factory.K_ID, 50); + // PCA + params.addParameter(PCARunner.PCA_COVARIANCE_MATRIX, WeightedCovarianceMatrixBuilder.class); + params.addParameter(WeightedCovarianceMatrixBuilder.WEIGHT_ID, ErfcWeight.class); + params.addParameter(PCAFilteredRunner.PCA_EIGENPAIR_FILTER, RelativeEigenPairFilter.class); + params.addParameter(RelativeEigenPairFilter.EIGENPAIR_FILTER_RALPHA, 1.60); + + ERiC<DoubleVector> eric = ClassGenericsUtil.parameterizeOrAbort(ERiC.class, params); + testParameterizationOk(params); + + // run ERiC on database + Clustering<CorrelationModel<DoubleVector>> result = eric.run(db); + + testFMeasureHierarchical(db, result, 0.9204825); + testClusterSizes(result, new int[] { 109, 184, 307 }); + } + + /** + * Run ERiC with fixed parameters and compare the result to a golden standard. + * + * @throws ParameterException on errors. + */ + @Test + public void testERiCOverlap() { + Database db = makeSimpleDatabase(UNITTEST + "correlation-overlap-3-5d.ascii", 650); + + // Setup algorithm + ListParameterization params = new ListParameterization(); + // ERiC + params.addParameter(COPAC.PARTITION_ALGORITHM_ID, DBSCAN.class); + params.addParameter(DBSCAN.MINPTS_ID, 15); + params.addParameter(DBSCAN.EPSILON_ID, 0); + // ERiC Distance function in DBSCAN: + params.addParameter(COPAC.PARTITION_DISTANCE_ID, ERiCDistanceFunction.class); + params.addParameter(ERiCDistanceFunction.DELTA_ID, 1.0); + params.addParameter(ERiCDistanceFunction.TAU_ID, 1.0); + // Preprocessing via Local PCA: + params.addParameter(COPAC.PREPROCESSOR_ID, KNNQueryFilteredPCAIndex.Factory.class); + params.addParameter(KNNQueryFilteredPCAIndex.Factory.K_ID, 45); + // PCA + params.addParameter(PCARunner.PCA_COVARIANCE_MATRIX, WeightedCovarianceMatrixBuilder.class); + params.addParameter(WeightedCovarianceMatrixBuilder.WEIGHT_ID, ErfcWeight.class); + params.addParameter(PCAFilteredRunner.PCA_EIGENPAIR_FILTER, PercentageEigenPairFilter.class); + params.addParameter(PercentageEigenPairFilter.ALPHA_ID, 0.6); + + ERiC<DoubleVector> eric = ClassGenericsUtil.parameterizeOrAbort(ERiC.class, params); + testParameterizationOk(params); + + // run ERiC on database + Clustering<CorrelationModel<DoubleVector>> result = eric.run(db); + testFMeasure(db, result, 0.831136946); + testClusterSizes(result, new int[] { 29, 189, 207, 225 }); + } +}
\ No newline at end of file diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/TestFourCResults.java b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/TestFourCResults.java new file mode 100644 index 00000000..a0c97d6e --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/TestFourCResults.java @@ -0,0 +1,75 @@ +package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest; +import de.lmu.ifi.dbs.elki.algorithm.clustering.AbstractProjectedDBSCAN; +import de.lmu.ifi.dbs.elki.data.Clustering; +import de.lmu.ifi.dbs.elki.data.DoubleVector; +import de.lmu.ifi.dbs.elki.data.model.Model; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; + +/** + * Perform a full 4C run, and compare the result with a clustering derived from + * the data set labels. This test ensures that 4C performance doesn't + * unexpectedly drop on this data set (and also ensures that the algorithms + * work, as a side effect). + * + * @author Erich Schubert + * @author Katharina Rausch + */ +public class TestFourCResults extends AbstractSimpleAlgorithmTest implements JUnit4Test { + /** + * Run 4F with fixed parameters and compare the result to a golden standard. + * + * @throws ParameterException on errors. + */ + @Test + public void testFourCResults() { + Database db = makeSimpleDatabase(UNITTEST + "hierarchical-3d2d1d.csv", 600); + + // Setup 4C + ListParameterization params = new ListParameterization(); + params.addParameter(AbstractProjectedDBSCAN.EPSILON_ID, 0.30); + params.addParameter(AbstractProjectedDBSCAN.MINPTS_ID, 20); + params.addParameter(AbstractProjectedDBSCAN.LAMBDA_ID, 5); + + FourC<DoubleVector> fourc = ClassGenericsUtil.parameterizeOrAbort(FourC.class, params); + testParameterizationOk(params); + + // run 4C on database + Clustering<Model> result = fourc.run(db); + + testFMeasureHierarchical(db, result, 0.79467); + testClusterSizes(result, new int[] { 5, 595 }); + } + + /** + * Run ERiC with fixed parameters and compare the result to a golden standard. + * + * @throws ParameterException on errors. + */ + @Test + public void testFourCOverlap() { + Database db = makeSimpleDatabase(UNITTEST + "correlation-overlap-3-5d.ascii", 650); + + // Setup algorithm + ListParameterization params = new ListParameterization(); + // 4C + params.addParameter(AbstractProjectedDBSCAN.EPSILON_ID, 1.2); + params.addParameter(AbstractProjectedDBSCAN.MINPTS_ID, 5); + params.addParameter(AbstractProjectedDBSCAN.LAMBDA_ID, 3); + + FourC<DoubleVector> fourc = ClassGenericsUtil.parameterizeOrAbort(FourC.class, params); + testParameterizationOk(params); + + // run 4C on database + Clustering<Model> result = fourc.run(db); + testFMeasure(db, result, 0.48305405); + testClusterSizes(result, new int[] { 65, 70, 515 }); + } +}
\ No newline at end of file diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/TestORCLUSResults.java b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/TestORCLUSResults.java new file mode 100644 index 00000000..33c29371 --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/TestORCLUSResults.java @@ -0,0 +1,76 @@ +package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest; +import de.lmu.ifi.dbs.elki.data.Clustering; +import de.lmu.ifi.dbs.elki.data.DoubleVector; +import de.lmu.ifi.dbs.elki.data.model.Model; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; + +/** + * Performs a full ORCLUS run, and compares the result with a clustering derived + * from the data set labels. This test ensures that ORCLUS performance doesn't + * unexpectedly drop on this data set (and also ensures that the algorithms + * work, as a side effect). + * + * @author Elke Achtert + * @author Katharina Rausch + */ +public class TestORCLUSResults extends AbstractSimpleAlgorithmTest implements JUnit4Test { + /** + * Run ORCLUS with fixed parameters and compare the result to a golden + * standard. + * + * @throws ParameterException on errors. + */ + @Test + public void testORCLUSResults() { + Database db = makeSimpleDatabase(UNITTEST + "correlation-hierarchy.csv", 450); + + ListParameterization params = new ListParameterization(); + // these parameters are not picked too smartly - room for improvement. + params.addParameter(ORCLUS.K_ID, 3); + params.addParameter(ORCLUS.L_ID, 1); + params.addParameter(ORCLUS.SEED_ID, 2); + + // setup algorithm + ORCLUS<DoubleVector> orclus = ClassGenericsUtil.parameterizeOrAbort(ORCLUS.class, params); + testParameterizationOk(params); + + // run ORCLUS on database + Clustering<Model> result = orclus.run(db); + + testFMeasureHierarchical(db, result, 0.789113); + testClusterSizes(result, new int[] { 22, 27, 401 }); + } + + /** + * Run ORCLUS with fixed parameters and compare the result to a golden + * standard. + * + * @throws ParameterException on errors. + */ + @Test + public void testORCLUSSkewedDisjoint() { + Database db = makeSimpleDatabase(UNITTEST + "correlation-skewed-disjoint-3-5d.ascii", 601); + + // Setup algorithm + ListParameterization params = new ListParameterization(); + params.addParameter(ORCLUS.K_ID, 3); + params.addParameter(ORCLUS.L_ID, 4); + params.addParameter(ORCLUS.SEED_ID, 9); + + ORCLUS<DoubleVector> orclus = ClassGenericsUtil.parameterizeOrAbort(ORCLUS.class, params); + testParameterizationOk(params); + + // run ORCLUS on database + Clustering<Model> result = orclus.run(db); + testFMeasure(db, result, 0.8687866); + testClusterSizes(result, new int[] { 170, 200, 231 }); + } +}
\ No newline at end of file diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/TestCLIQUEResults.java b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/TestCLIQUEResults.java new file mode 100644 index 00000000..d3d8cdbe --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/TestCLIQUEResults.java @@ -0,0 +1,80 @@ +package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest; +import de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.ByLabelClustering; +import de.lmu.ifi.dbs.elki.data.Clustering; +import de.lmu.ifi.dbs.elki.data.DoubleVector; +import de.lmu.ifi.dbs.elki.data.model.Model; +import de.lmu.ifi.dbs.elki.data.model.SubspaceModel; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.evaluation.paircounting.PairCountingFMeasure; +import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; + +/** + * Performs a full CLIQUE run, and compares the result with a clustering derived + * from the data set labels. This test ensures that CLIQUE performance doesn't + * unexpectedly drop on this data set (and also ensures that the algorithms + * work, as a side effect). + * + * @author Elke Achtert + * @author Katharina Rausch + * @author Erich Schubert + */ +public class TestCLIQUEResults extends AbstractSimpleAlgorithmTest implements JUnit4Test { + /** + * Run CLIQUE with fixed parameters and compare the result to a golden + * standard. + * + * @throws ParameterException + */ + @Test + public void testCLIQUEResults() { + Database db = makeSimpleDatabase(UNITTEST + "subspace-simple.csv", 600); + + ListParameterization params = new ListParameterization(); + params.addParameter(CLIQUE.TAU_ID, "0.1"); + params.addParameter(CLIQUE.XSI_ID, 20); + + // setup algorithm + CLIQUE<DoubleVector> clique = ClassGenericsUtil.parameterizeOrAbort(CLIQUE.class, params); + testParameterizationOk(params); + + // run CLIQUE on database + Clustering<SubspaceModel<DoubleVector>> result = clique.run(db); + // Run by-label as reference + ByLabelClustering bylabel = new ByLabelClustering(true, null); + Clustering<Model> rbl = bylabel.run(db); + + double score = PairCountingFMeasure.compareClusterings(result, rbl, 1.0); + org.junit.Assert.assertEquals(this.getClass().getSimpleName() + ": Score does not match.", 0.9882, score, 0.0001); + testClusterSizes(result, new int[] { 200, 200, 216, 400 }); + } + + /** + * Run CLIQUE with fixed parameters and compare the result to a golden + * standard. + * + * @throws ParameterException + */ + @Test + public void testCLIQUESubspaceOverlapping() { + Database db = makeSimpleDatabase(UNITTEST + "subspace-overlapping-3-4d.ascii", 850); + + // Setup algorithm + ListParameterization params = new ListParameterization(); + params.addParameter(CLIQUE.TAU_ID, 0.2); + params.addParameter(CLIQUE.XSI_ID, 6); + CLIQUE<DoubleVector> clique = ClassGenericsUtil.parameterizeOrAbort(CLIQUE.class, params); + testParameterizationOk(params); + + // run CLIQUE on database + Clustering<SubspaceModel<DoubleVector>> result = clique.run(db); + testFMeasure(db, result, 0.433661); + testClusterSizes(result, new int[] { 255, 409, 458, 458, 480 }); + } +}
\ No newline at end of file diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/TestDiSHResults.java b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/TestDiSHResults.java new file mode 100644 index 00000000..2cb08fd7 --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/TestDiSHResults.java @@ -0,0 +1,71 @@ +package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest; +import de.lmu.ifi.dbs.elki.data.Clustering; +import de.lmu.ifi.dbs.elki.data.DoubleVector; +import de.lmu.ifi.dbs.elki.data.model.SubspaceModel; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; + +/** + * Performs a full DiSH run, and compares the result with a clustering derived + * from the data set labels. This test ensures that DiSH performance doesn't + * unexpectedly drop on this data set (and also ensures that the algorithms + * work, as a side effect). + * + * @author Elke Achtert + * @author Katharina Rausch + * @author Erich Schubert + */ +public class TestDiSHResults extends AbstractSimpleAlgorithmTest implements JUnit4Test { + /** + * Run DiSH with fixed parameters and compare the result to a golden standard. + * + * @throws ParameterException + */ + @Test + public void testDiSHResults() { + Database db = makeSimpleDatabase(UNITTEST + "subspace-hierarchy.csv", 450); + + ListParameterization params = new ListParameterization(); + params.addParameter(DiSH.EPSILON_ID, 0.005); + params.addParameter(DiSH.MU_ID, 50); + + // setup algorithm + DiSH<DoubleVector> dish = ClassGenericsUtil.parameterizeOrAbort(DiSH.class, params); + testParameterizationOk(params); + + // run DiSH on database + Clustering<SubspaceModel<DoubleVector>> result = dish.run(db); + + testFMeasureHierarchical(db, result, 0.9991258); + testClusterSizes(result, new int[] { 51, 199, 200 }); + } + + /** + * Run DiSH with fixed parameters and compare the result to a golden standard. + * + * @throws ParameterException + */ + @Test + public void testDiSHSubspaceOverlapping() { + Database db = makeSimpleDatabase(UNITTEST + "subspace-overlapping-4-5d.ascii", 1100); + + // Setup algorithm + ListParameterization params = new ListParameterization(); + params.addParameter(DiSH.EPSILON_ID, 0.1); + params.addParameter(DiSH.MU_ID, 30); + DiSH<DoubleVector> dish = ClassGenericsUtil.parameterizeOrAbort(DiSH.class, params); + testParameterizationOk(params); + + // run DiSH on database + Clustering<SubspaceModel<DoubleVector>> result = dish.run(db); + testFMeasure(db, result, 0.6376870); + testClusterSizes(result, new int[] { 33, 52, 72, 109, 172, 314, 348 }); + } +}
\ No newline at end of file diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/TestPROCLUSResults.java b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/TestPROCLUSResults.java new file mode 100644 index 00000000..66e7ed0b --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/TestPROCLUSResults.java @@ -0,0 +1,75 @@ +package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest; +import de.lmu.ifi.dbs.elki.data.Clustering; +import de.lmu.ifi.dbs.elki.data.DoubleVector; +import de.lmu.ifi.dbs.elki.data.model.Model; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; + +/** + * Performs a full PROCLUS run, and compares the result with a clustering + * derived from the data set labels. This test ensures that PROCLUS performance + * doesn't unexpectedly drop on this data set (and also ensures that the + * algorithms work, as a side effect). + * + * @author Elke Achtert + * @author Katharina Rausch + * @author Erich Schubert + */ +public class TestPROCLUSResults extends AbstractSimpleAlgorithmTest implements JUnit4Test { + /** + * Run PROCLUS with fixed parameters and compare the result to a golden + * standard. + * + * @throws ParameterException + */ + @Test + public void testPROCLUSResults() { + Database db = makeSimpleDatabase(UNITTEST + "subspace-simple.csv", 600); + + ListParameterization params = new ListParameterization(); + params.addParameter(PROCLUS.L_ID, 1); + params.addParameter(PROCLUS.K_ID, 4); + params.addParameter(PROCLUS.SEED_ID, 1); + + // setup algorithm + PROCLUS<DoubleVector> proclus = ClassGenericsUtil.parameterizeOrAbort(PROCLUS.class, params); + testParameterizationOk(params); + + // run PROCLUS on database + Clustering<Model> result = proclus.run(db); + + testFMeasure(db, result, 0.68932); + testClusterSizes(result, new int[] { 78, 93, 203, 226 }); + } + + /** + * Run PROCLUS with fixed parameters and compare the result to a golden + * standard. + * + * @throws ParameterException + */ + @Test + public void testPROCLUSSubspaceOverlapping() { + Database db = makeSimpleDatabase(UNITTEST + "subspace-overlapping-3-4d.ascii", 850); + + // Setup algorithm + ListParameterization params = new ListParameterization(); + params.addParameter(PROCLUS.L_ID, 2); + params.addParameter(PROCLUS.K_ID, 3); + params.addParameter(PROCLUS.SEED_ID, 2); + PROCLUS<DoubleVector> proclus = ClassGenericsUtil.parameterizeOrAbort(PROCLUS.class, params); + testParameterizationOk(params); + + // run PROCLUS on database + Clustering<Model> result = proclus.run(db); + testFMeasure(db, result, 0.9673718); + testClusterSizes(result, new int[] { 150, 289, 411 }); + } +}
\ No newline at end of file diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/TestPreDeConResults.java b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/TestPreDeConResults.java new file mode 100644 index 00000000..25704194 --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/TestPreDeConResults.java @@ -0,0 +1,87 @@ +package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest; +import de.lmu.ifi.dbs.elki.algorithm.clustering.AbstractProjectedDBSCAN; +import de.lmu.ifi.dbs.elki.data.Clustering; +import de.lmu.ifi.dbs.elki.data.DoubleVector; +import de.lmu.ifi.dbs.elki.data.model.Model; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.datasource.filter.ClassLabelFilter; +import de.lmu.ifi.dbs.elki.index.preprocessed.subspaceproj.PreDeConSubspaceIndex.Factory; +import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; + +/** + * Perform a full PreDeCon run, and compare the result with a clustering derived + * from the data set labels. This test ensures that PreDeCon performance doesn't + * unexpectedly drop on this data set (and also ensures that the algorithms + * work, as a side effect). + * + * @author Erich Schubert + * @author Katharina Rausch + */ +public class TestPreDeConResults extends AbstractSimpleAlgorithmTest implements JUnit4Test { + /** + * Run PreDeCon with fixed parameters and compare the result to a golden + * standard. + * + * @throws ParameterException + */ + @Test + public void testPreDeConResults() { + // Additional input parameters + ListParameterization inp = new ListParameterization(); + inp.addParameter(ClassLabelFilter.CLASS_LABEL_INDEX_ID, 1); + Class<?>[] filters = new Class<?>[] { ClassLabelFilter.class }; + // FIXME: makeSimpleDatabase currently does also add FILTERS, this doesn't work. + Database db = makeSimpleDatabase(UNITTEST + "axis-parallel-subspace-clusters-6d.csv.gz", 2500, inp, filters); + + ListParameterization params = new ListParameterization(); + // PreDeCon + // FIXME: These parameters do NOT work... + params.addParameter(AbstractProjectedDBSCAN.EPSILON_ID, 50); + params.addParameter(AbstractProjectedDBSCAN.MINPTS_ID, 50); + params.addParameter(AbstractProjectedDBSCAN.LAMBDA_ID, 2); + + // setup algorithm + PreDeCon<DoubleVector> predecon = ClassGenericsUtil.parameterizeOrAbort(PreDeCon.class, params); + testParameterizationOk(params); + + // run PredeCon on database + Clustering<Model> result = predecon.run(db); + + // FIXME: find working parameters... + testFMeasure(db, result, 0.40153); + testClusterSizes(result, new int[] { 2500 }); + } + + /** + * Run PreDeCon with fixed parameters and compare the result to a golden + * standard. + * + * @throws ParameterException + */ + @Test + public void testPreDeConSubspaceOverlapping() { + Database db = makeSimpleDatabase(UNITTEST + "subspace-overlapping-3-4d.ascii", 850); + + // Setup algorithm + ListParameterization params = new ListParameterization(); + // PreDeCon + params.addParameter(AbstractProjectedDBSCAN.EPSILON_ID, 2.0); + params.addParameter(AbstractProjectedDBSCAN.MINPTS_ID, 7); + params.addParameter(AbstractProjectedDBSCAN.LAMBDA_ID, 4); + params.addParameter(Factory.DELTA_ID, 0.04); + PreDeCon<DoubleVector> predecon = ClassGenericsUtil.parameterizeOrAbort(PreDeCon.class, params); + testParameterizationOk(params); + + // run PredeCon on database + Clustering<Model> result = predecon.run(db); + testFMeasure(db, result, 0.6470817); + testClusterSizes(result, new int[] { 7, 10, 10, 13, 15, 16, 16, 18, 28, 131, 586 }); + } +}
\ No newline at end of file diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/TestSUBCLUResults.java b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/TestSUBCLUResults.java new file mode 100644 index 00000000..65756bbc --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/TestSUBCLUResults.java @@ -0,0 +1,73 @@ +package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest; +import de.lmu.ifi.dbs.elki.data.Clustering; +import de.lmu.ifi.dbs.elki.data.DoubleVector; +import de.lmu.ifi.dbs.elki.data.model.SubspaceModel; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; + +/** + * Performs a full SUBCLU run, and compares the result with a clustering derived + * from the data set labels. This test ensures that SUBCLU performance doesn't + * unexpectedly drop on this data set (and also ensures that the algorithms + * work, as a side effect). + * + * @author Elke Achtert + * @author Katharina Rausch + * @author Erich Schubert + */ +public class TestSUBCLUResults extends AbstractSimpleAlgorithmTest implements JUnit4Test { + /** + * Run SUBCLU with fixed parameters and compare the result to a golden + * standard. + * + * @throws ParameterException + */ + @Test + public void testSUBCLUResults() { + Database db = makeSimpleDatabase(UNITTEST + "subspace-simple.csv", 600); + + ListParameterization params = new ListParameterization(); + params.addParameter(SUBCLU.EPSILON_ID, 0.001); + params.addParameter(SUBCLU.MINPTS_ID, 100); + + // setup algorithm + SUBCLU<DoubleVector> subclu = ClassGenericsUtil.parameterizeOrAbort(SUBCLU.class, params); + testParameterizationOk(params); + + // run SUBCLU on database + Clustering<SubspaceModel<DoubleVector>> result = subclu.run(db); + + testFMeasure(db, result, 0.9090); + testClusterSizes(result, new int[] { 191, 194, 395 }); + } + + /** + * Run SUBCLU with fixed parameters and compare the result to a golden + * standard. + * + * @throws ParameterException + */ + @Test + public void testSUBCLUSubspaceOverlapping() { + Database db = makeSimpleDatabase(UNITTEST + "subspace-overlapping-3-4d.ascii", 850); + + // Setup algorithm + ListParameterization params = new ListParameterization(); + params.addParameter(SUBCLU.EPSILON_ID, 0.04); + params.addParameter(SUBCLU.MINPTS_ID, 70); + SUBCLU<DoubleVector> subclu = ClassGenericsUtil.parameterizeOrAbort(SUBCLU.class, params); + testParameterizationOk(params); + + // run SUBCLU on database + Clustering<SubspaceModel<DoubleVector>> result = subclu.run(db); + testFMeasure(db, result, 0.49279033); + testClusterSizes(result, new int[] { 99, 247, 303, 323, 437, 459 }); + } +}
\ No newline at end of file diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestABOD.java b/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestABOD.java new file mode 100644 index 00000000..4a4a635c --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestABOD.java @@ -0,0 +1,37 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest; +import de.lmu.ifi.dbs.elki.data.DoubleVector; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; + +/** + * Tests the ABOD algorithm. + * + * @author Lucia Cichella + */ +public class TestABOD extends AbstractSimpleAlgorithmTest implements JUnit4Test { + @Test + public void testABOD() { + Database db = makeSimpleDatabase(UNITTEST + "outlier-3d-3clusters.ascii", 960); + + // Parameterization + ListParameterization params = new ListParameterization(); + params.addParameter(ABOD.K_ID, 5); + + // setup Algorithm + ABOD<DoubleVector> abod = ClassGenericsUtil.parameterizeOrAbort(ABOD.class, params); + testParameterizationOk(params); + + // run ABOD on database + OutlierResult result = abod.run(db); + + testSingleScore(result, 945, 3.7108897864090475E-4); + testAUC(db, "Noise", result, 0.9638148148148148); + } +}
\ No newline at end of file diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestAggarwalYuEvolutionary.java b/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestAggarwalYuEvolutionary.java new file mode 100644 index 00000000..e5eb09d5 --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestAggarwalYuEvolutionary.java @@ -0,0 +1,40 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest; +import de.lmu.ifi.dbs.elki.data.DoubleVector; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; + +/** + * Tests the AggarwalYuEvolutionary algorithm. + * + * @author Lucia Cichella + */ +public class TestAggarwalYuEvolutionary extends AbstractSimpleAlgorithmTest implements JUnit4Test { + @Test + public void testAggarwalYuEvolutionary() { + Database db = makeSimpleDatabase(UNITTEST + "outlier-3d-3clusters.ascii", 960); + + // Parameterization + ListParameterization params = new ListParameterization(); + params.addParameter(AggarwalYuEvolutionary.K_ID, 2); + params.addParameter(AggarwalYuEvolutionary.PHI_ID, 8); + params.addParameter(AggarwalYuEvolutionary.M_ID, 5); + params.addParameter(AggarwalYuEvolutionary.SEED_ID, 0); + + // setup Algorithm + AggarwalYuEvolutionary<DoubleVector> aggarwalYuEvolutionary = ClassGenericsUtil.parameterizeOrAbort(AggarwalYuEvolutionary.class, params); + testParameterizationOk(params); + + // run AggarwalYuEvolutionary on database + OutlierResult result = aggarwalYuEvolutionary.run(db); + + testSingleScore(result, 945, 16.6553612449883); + testAUC(db, "Noise", result, 0.5799537037037); + } +}
\ No newline at end of file diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestAggarwalYuNaive.java b/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestAggarwalYuNaive.java new file mode 100644 index 00000000..cf396f35 --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestAggarwalYuNaive.java @@ -0,0 +1,38 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest; +import de.lmu.ifi.dbs.elki.data.DoubleVector; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; + +/** + * Tests the AggarwalYuNaive algorithm. + * + * @author Lucia Cichella + */ +public class TestAggarwalYuNaive extends AbstractSimpleAlgorithmTest implements JUnit4Test { + @Test + public void testAggarwalYuNaive() { + Database db = makeSimpleDatabase(UNITTEST + "outlier-3d-3clusters.ascii", 960); + + // Parameterization + ListParameterization params = new ListParameterization(); + params.addParameter(AggarwalYuNaive.K_ID, 2); + params.addParameter(AggarwalYuNaive.PHI_ID, 8); + + // setup Algorithm + AggarwalYuNaive<DoubleVector> aggarwalYuNaive = ClassGenericsUtil.parameterizeOrAbort(AggarwalYuNaive.class, params); + testParameterizationOk(params); + + // run AggarwalYuNaive on database + OutlierResult result = aggarwalYuNaive.run(db); + + testSingleScore(result, 945, -2.3421601750764798); + testAUC(db, "Noise", result, 0.8652037037037); + } +}
\ No newline at end of file diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestDBOutlierDetection.java b/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestDBOutlierDetection.java new file mode 100644 index 00000000..5fb9c983 --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestDBOutlierDetection.java @@ -0,0 +1,39 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest; +import de.lmu.ifi.dbs.elki.data.DoubleVector; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; + +/** + * Tests the DBOutlierDetection algorithm. + * + * @author Lucia Cichella + */ +public class TestDBOutlierDetection extends AbstractSimpleAlgorithmTest implements JUnit4Test{ + @Test + public void testDBOutlierDetection() { + Database db = makeSimpleDatabase(UNITTEST + "outlier-fire.ascii", 1025); + + // Parameterization + ListParameterization params = new ListParameterization(); + params.addParameter(DBOutlierDetection.D_ID, 0.175); + params.addParameter(DBOutlierDetection.P_ID, 0.98); + + //setup Algorithm + DBOutlierDetection<DoubleVector, DoubleDistance> dbOutlierDetection = ClassGenericsUtil.parameterizeOrAbort(DBOutlierDetection.class, params); + testParameterizationOk(params); + + //run DBOutlierDetection on database + OutlierResult result = dbOutlierDetection.run(db); + + testSingleScore(result, 1025, 0.0); + testAUC(db, "Noise", result, 0.97487179); + } +}
\ No newline at end of file diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestDBOutlierScore.java b/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestDBOutlierScore.java new file mode 100644 index 00000000..6f76efe2 --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestDBOutlierScore.java @@ -0,0 +1,38 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest; +import de.lmu.ifi.dbs.elki.data.DoubleVector; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; + +/** + * Tests the DBOutlierScore algorithm. + * + * @author Lucia Cichella + */ +public class TestDBOutlierScore extends AbstractSimpleAlgorithmTest implements JUnit4Test { + @Test + public void testDBOutlierScore() { + Database db = makeSimpleDatabase(UNITTEST + "outlier-fire.ascii", 1025); + + // Parameterization + ListParameterization params = new ListParameterization(); + params.addParameter(DBOutlierScore.D_ID, 0.175); + + // setup Algorithm + DBOutlierScore<DoubleVector, DoubleDistance> dbOutlierScore = ClassGenericsUtil.parameterizeOrAbort(DBOutlierScore.class, params); + testParameterizationOk(params); + + // run DBOutlierScore on database + OutlierResult result = dbOutlierScore.run(db); + + testSingleScore(result, 1025, 0.688780487804878); + testAUC(db, "Noise", result, 0.992565641); + } +}
\ No newline at end of file diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestGaussianModel.java b/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestGaussianModel.java new file mode 100644 index 00000000..96107e8a --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestGaussianModel.java @@ -0,0 +1,36 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest; +import de.lmu.ifi.dbs.elki.data.DoubleVector; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; + +/** + * Tests the GaussianModel algorithm. + * + * @author Lucia Cichella + */ +public class TestGaussianModel extends AbstractSimpleAlgorithmTest implements JUnit4Test { + @Test + public void testGaussianModel() { + Database db = makeSimpleDatabase(UNITTEST + "outlier-fire.ascii", 1025); + + // Parameterization + ListParameterization params = new ListParameterization(); + + // setup Algorithm + GaussianModel<DoubleVector> gaussianModel = ClassGenericsUtil.parameterizeOrAbort(GaussianModel.class, params); + testParameterizationOk(params); + + // run GaussianModel on database + OutlierResult result = gaussianModel.run(db); + + testSingleScore(result, 1025, 2.8312466458765426); + testAUC(db, "Noise", result, 0.9937641025641025); + } +}
\ No newline at end of file diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestGaussianUniformMixture.java b/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestGaussianUniformMixture.java new file mode 100644 index 00000000..a59c7810 --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestGaussianUniformMixture.java @@ -0,0 +1,36 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest; +import de.lmu.ifi.dbs.elki.data.DoubleVector; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; + +/** + * Tests the GaussianUniformMixture algorithm. + * + * @author Lucia Cichella + */ +public class TestGaussianUniformMixture extends AbstractSimpleAlgorithmTest implements JUnit4Test { + @Test + public void testGaussianUniformMixture() { + Database db = makeSimpleDatabase(UNITTEST + "outlier-fire.ascii", 1025); + + // Parameterization + ListParameterization params = new ListParameterization(); + + // setup Algorithm + GaussianUniformMixture<DoubleVector> gaussianUniformMixture = ClassGenericsUtil.parameterizeOrAbort(GaussianUniformMixture.class, params); + testParameterizationOk(params); + + // run GaussianUniformMixture on database + OutlierResult result = gaussianUniformMixture.run(db); + + testSingleScore(result, 1025, -20.2862041); + testAUC(db, "Noise", result, 0.94404102); + } +}
\ No newline at end of file diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestINFLO.java b/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestINFLO.java new file mode 100644 index 00000000..25092ad5 --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestINFLO.java @@ -0,0 +1,38 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest; +import de.lmu.ifi.dbs.elki.data.DoubleVector; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; + +/** + * Tests the INFLO algorithm. + * + * @author Lucia Cichella + */ +public class TestINFLO extends AbstractSimpleAlgorithmTest implements JUnit4Test { + @Test + public void testINFLO() { + Database db = makeSimpleDatabase(UNITTEST + "outlier-3d-3clusters.ascii", 960); + + // Parameterization + ListParameterization params = new ListParameterization(); + params.addParameter(INFLO.K_ID, 29); + + // setup Algorithm + INFLO<DoubleVector, DoubleDistance> inflo = ClassGenericsUtil.parameterizeOrAbort(INFLO.class, params); + testParameterizationOk(params); + + // run INFLO on database + OutlierResult result = inflo.run(db); + + testSingleScore(result, 945, 2.5711647857619484); + testAUC(db, "Noise", result, 0.935222); + } +}
\ No newline at end of file diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestKNNOutlier.java b/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestKNNOutlier.java new file mode 100644 index 00000000..c5ba91b7 --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestKNNOutlier.java @@ -0,0 +1,38 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest; +import de.lmu.ifi.dbs.elki.data.DoubleVector; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; + +/** + * Tests the KNNOutlier algorithm. + * + * @author Lucia Cichella + */ +public class TestKNNOutlier extends AbstractSimpleAlgorithmTest implements JUnit4Test { + @Test + public void testKNNOutlier() { + Database db = makeSimpleDatabase(UNITTEST + "outlier-3d-3clusters.ascii", 960); + + // Parameterization + ListParameterization params = new ListParameterization(); + params.addParameter(KNNOutlier.K_ID, 2); + + // setup Algorithm + KNNOutlier<DoubleVector, DoubleDistance> knnOutlier = ClassGenericsUtil.parameterizeOrAbort(KNNOutlier.class, params); + testParameterizationOk(params); + + // run KNNOutlier on database + OutlierResult result = knnOutlier.run(db); + + testSingleScore(result, 945, 0.4793554700168577); + testAUC(db, "Noise", result, 0.991462962962963); + } +}
\ No newline at end of file diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestKNNWeightOutlier.java b/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestKNNWeightOutlier.java new file mode 100644 index 00000000..e84db06c --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestKNNWeightOutlier.java @@ -0,0 +1,38 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest; +import de.lmu.ifi.dbs.elki.data.DoubleVector; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; + +/** + * Tests the KNNWeightOutlier algorithm. + * + * @author Lucia Cichella + */ +public class TestKNNWeightOutlier extends AbstractSimpleAlgorithmTest implements JUnit4Test { + @Test + public void testKNNWeightOutlier() { + Database db = makeSimpleDatabase(UNITTEST + "outlier-3d-3clusters.ascii", 960); + + // Parameterization + ListParameterization params = new ListParameterization(); + params.addParameter(KNNWeightOutlier.K_ID, 5); + + // setup Algorithm + KNNWeightOutlier<DoubleVector, DoubleDistance> knnWeightOutlier = ClassGenericsUtil.parameterizeOrAbort(KNNWeightOutlier.class, params); + testParameterizationOk(params); + + // run KNNWeightOutlier on database + OutlierResult result = knnWeightOutlier.run(db); + + testSingleScore(result, 945, 2.384117261027324); + testAUC(db, "Noise", result, 0.9912777777777778); + } +}
\ No newline at end of file diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestLDOF.java b/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestLDOF.java new file mode 100644 index 00000000..cdcbd94b --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestLDOF.java @@ -0,0 +1,38 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest; +import de.lmu.ifi.dbs.elki.data.DoubleVector; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; + +/** + * Tests the LDOF algorithm. + * + * @author Lucia Cichella + */ +public class TestLDOF extends AbstractSimpleAlgorithmTest implements JUnit4Test { + @Test + public void testLDOF() { + Database db = makeSimpleDatabase(UNITTEST + "outlier-fire.ascii", 1025); + + // Parameterization + ListParameterization params = new ListParameterization(); + params.addParameter(LDOF.K_ID, 25); + + // setup Algorithm + LDOF<DoubleVector, DoubleDistance> ldof = ClassGenericsUtil.parameterizeOrAbort(LDOF.class, params); + testParameterizationOk(params); + + // run LDOF on database + OutlierResult result = ldof.run(db); + + testSingleScore(result, 1025, 0.8976268846182947); + testAUC(db, "Noise", result, 0.9637948717948718); + } +}
\ No newline at end of file diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestLOCI.java b/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestLOCI.java new file mode 100644 index 00000000..f4e95736 --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestLOCI.java @@ -0,0 +1,38 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest; +import de.lmu.ifi.dbs.elki.data.DoubleVector; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; + +/** + * Tests the LOCI algorithm. + * + * @author Lucia Cichella + */ +public class TestLOCI extends AbstractSimpleAlgorithmTest implements JUnit4Test { + @Test + public void testLOCI() { + Database db = makeSimpleDatabase(UNITTEST + "3clusters-and-noise-2d.csv", 330); + + // Parameterization + ListParameterization params = new ListParameterization(); + params.addParameter(LOCI.RMAX_ID, 0.5); + + // setup Algorithm + LOCI<DoubleVector, DoubleDistance> loci = ClassGenericsUtil.parameterizeOrAbort(LOCI.class, params); + testParameterizationOk(params); + + // run LOCI on database + OutlierResult result = loci.run(db); + + testAUC(db, "Noise", result, 0.954444); + testSingleScore(result, 146, 4.14314916); + } +}
\ No newline at end of file diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestLOF.java b/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestLOF.java new file mode 100644 index 00000000..b7586344 --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestLOF.java @@ -0,0 +1,38 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest; +import de.lmu.ifi.dbs.elki.data.DoubleVector; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; + +/** + * Tests the LOF algorithm. + * + * @author Lucia Cichella + */ +public class TestLOF extends AbstractSimpleAlgorithmTest implements JUnit4Test { + @Test + public void testLOF() { + Database db = makeSimpleDatabase(UNITTEST + "outlier-axis-subspaces-6d.ascii", 1345); + + // Parameterization + ListParameterization params = new ListParameterization(); + params.addParameter(LOF.K_ID, 10); + + // setup Algorithm + LOF<DoubleVector, DoubleDistance> lof = ClassGenericsUtil.parameterizeOrAbort(LOF.class, params); + testParameterizationOk(params); + + // run LOF on database + OutlierResult result = lof.run(db); + + testSingleScore(result, 1293, 1.1945314199156365); + testAUC(db, "Noise", result, 0.8921680672268908); + } +}
\ No newline at end of file diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestLoOP.java b/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestLoOP.java new file mode 100644 index 00000000..0aa10999 --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestLoOP.java @@ -0,0 +1,38 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest; +import de.lmu.ifi.dbs.elki.data.DoubleVector; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; + +/** + * Tests the LoOP algorithm. + * + * @author Lucia Cichella + */ +public class TestLoOP extends AbstractSimpleAlgorithmTest implements JUnit4Test { + @Test + public void testLoOP() { + Database db = makeSimpleDatabase(UNITTEST + "outlier-3d-3clusters.ascii", 960); + + // Parameterization + ListParameterization params = new ListParameterization(); + params.addParameter(LoOP.KCOMP_ID, 15); + + // setup Algorithm + LoOP<DoubleVector, DoubleDistance> loop = ClassGenericsUtil.parameterizeOrAbort(LoOP.class, params); + testParameterizationOk(params); + + // run LoOP on database + OutlierResult result = loop.run(db); + + testSingleScore(result, 945, 0.39805457858293325); + testAUC(db, "Noise", result, 0.9443796296296296); + } +}
\ No newline at end of file diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestOPTICSOF.java b/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestOPTICSOF.java new file mode 100644 index 00000000..218e52a8 --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestOPTICSOF.java @@ -0,0 +1,39 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest; +import de.lmu.ifi.dbs.elki.algorithm.clustering.OPTICS; +import de.lmu.ifi.dbs.elki.data.DoubleVector; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; + +/** + * Tests the OPTICS-OF algorithm. + * + * @author Lucia Cichella + */ +public class TestOPTICSOF extends AbstractSimpleAlgorithmTest implements JUnit4Test { + @Test + public void testOPTICSOF() { + Database db = makeSimpleDatabase(UNITTEST + "outlier-parabolic.ascii", 530); + + // Parameterization + ListParameterization params = new ListParameterization(); + params.addParameter(OPTICS.MINPTS_ID, 22); + + // setup Algorithm + OPTICSOF<DoubleVector, DoubleDistance> opticsof = ClassGenericsUtil.parameterizeOrAbort(OPTICSOF.class, params); + testParameterizationOk(params); + + // run OPTICSOF on database + OutlierResult result = opticsof.run(db); + + testSingleScore(result, 416, 1.6108343626651815); + testAUC(db, "Noise", result, 0.9058); + } +}
\ No newline at end of file diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestOnlineLOF.java b/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestOnlineLOF.java new file mode 100644 index 00000000..21e3d12c --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestOnlineLOF.java @@ -0,0 +1,139 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier; + +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.util.ArrayList; +import java.util.Random; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.data.DoubleVector; +import de.lmu.ifi.dbs.elki.data.VectorUtil; +import de.lmu.ifi.dbs.elki.data.type.TypeUtil; +import de.lmu.ifi.dbs.elki.database.HashmapDatabase; +import de.lmu.ifi.dbs.elki.database.UpdatableDatabase; +import de.lmu.ifi.dbs.elki.database.ids.DBID; +import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.datasource.FileBasedDatabaseConnection; +import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle; +import de.lmu.ifi.dbs.elki.distance.distancefunction.CosineDistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; +import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil; +import de.lmu.ifi.dbs.elki.utilities.exceptions.UnableToComplyException; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; + +/** + * Tests the OnlineLOF algorithm. Compares the result of the static LOF + * algorithm to the result of the OnlineLOF algorithm, where some insertions and + * deletions (of the previously inserted objects) have been applied to the + * database. + * + * @author Elke Achtert + * + */ +public class TestOnlineLOF implements JUnit4Test { + // the following values depend on the data set used! + static String dataset = "data/testdata/unittests/3clusters-and-noise-2d.csv"; + + // parameter k for LOF and OnlineLOF + static int k = 5; + + // neighborhood distance function for LOF and OnlineLOF + @SuppressWarnings("rawtypes") + static DistanceFunction neighborhoodDistanceFunction = EuclideanDistanceFunction.STATIC; + + // reachability distance function for LOF and OnlineLOF + @SuppressWarnings("rawtypes") + static DistanceFunction reachabilityDistanceFunction = CosineDistanceFunction.STATIC; + + // seed for the generator + static int seed = 5; + + // size of the data set + static int size = 50; + + /** + * First, run the {@link LOF} algorithm on the database. Second, run the + * {@link OnlineLOF} algorithm on the database, insert new objects and + * afterwards delete them. Then, compare the two results for equality. + * + * @throws UnableToComplyException + */ + @SuppressWarnings("unchecked") + @Test + public void testOnlineLOF() throws UnableToComplyException { + UpdatableDatabase db = getDatabase(); + + // 1. Run LOF + LOF<DoubleVector, DoubleDistance> lof = new LOF<DoubleVector, DoubleDistance>(k, neighborhoodDistanceFunction, reachabilityDistanceFunction); + OutlierResult result1 = lof.run(db); + + // 2. Run OnlineLOF (with insertions and removals) on database + OutlierResult result2 = runOnlineLOF(db); + + // 3. Compare results + Relation<Double> scores1 = result1.getScores(); + Relation<Double> scores2 = result2.getScores(); + + for(DBID id : scores1.getDBIDs()) { + Double lof1 = scores1.get(id); + Double lof2 = scores2.get(id); + assertTrue("lof(" + id + ") != lof(" + id + "): " + lof1 + " != " + lof2, lof1.equals(lof2)); + } + } + + /** + * Run OnlineLOF (with insertions and removals) on database. + */ + @SuppressWarnings("unchecked") + private static OutlierResult runOnlineLOF(UpdatableDatabase db) throws UnableToComplyException { + Relation<DoubleVector> rep = db.getRelation(TypeUtil.DOUBLE_VECTOR_FIELD); + + // setup algorithm + OnlineLOF<DoubleVector, DoubleDistance> lof = new OnlineLOF<DoubleVector, DoubleDistance>(k, neighborhoodDistanceFunction, reachabilityDistanceFunction); + + // run OnlineLOF on database + OutlierResult result = lof.run(db); + + // insert new objects + ArrayList<DoubleVector> insertions = new ArrayList<DoubleVector>(); + DoubleVector o = DatabaseUtil.assumeVectorField(rep).getFactory(); + Random random = new Random(seed); + for(int i = 0; i < size; i++) { + DoubleVector obj = VectorUtil.randomVector(o, random); + insertions.add(obj); + } + DBIDs deletions = db.insert(MultipleObjectsBundle.makeSimple(rep.getDataTypeInformation(), insertions)); + + // delete objects + db.delete(deletions); + + return result; + } + + /** + * Returns the database. + */ + private static UpdatableDatabase getDatabase() { + ListParameterization params = new ListParameterization(); + params.addParameter(FileBasedDatabaseConnection.INPUT_ID, dataset); + + UpdatableDatabase db = ClassGenericsUtil.parameterizeOrAbort(HashmapDatabase.class, params); + params.failOnErrors(); + if(params.hasUnusedParameters()) { + fail("Unused parameters: " + params.getRemainingParameters()); + } + + // get database + db.initialize(); + return db; + } + +} diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestReferenceBasedOutlierDetection.java b/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestReferenceBasedOutlierDetection.java new file mode 100644 index 00000000..a8f17231 --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestReferenceBasedOutlierDetection.java @@ -0,0 +1,40 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest; +import de.lmu.ifi.dbs.elki.data.DoubleVector; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; +import de.lmu.ifi.dbs.elki.utilities.referencepoints.GridBasedReferencePoints; + +/** + * Tests the ReferenceBasedOutlierDetection algorithm. + * + * @author Lucia Cichella + */ +public class TestReferenceBasedOutlierDetection extends AbstractSimpleAlgorithmTest implements JUnit4Test { + @Test + public void testReferenceBasedOutlierDetection() { + Database db = makeSimpleDatabase(UNITTEST + "outlier-3d-3clusters.ascii", 960); + + // Parameterization + ListParameterization params = new ListParameterization(); + params.addParameter(ReferenceBasedOutlierDetection.K_ID, 11); + params.addParameter(GridBasedReferencePoints.GRID_ID, 11); + + // setup Algorithm + ReferenceBasedOutlierDetection<DoubleVector, DoubleDistance> referenceBasedOutlierDetection = ClassGenericsUtil.parameterizeOrAbort(ReferenceBasedOutlierDetection.class, params); + testParameterizationOk(params); + + // run ReferenceBasedOutlierDetection on database + OutlierResult result = referenceBasedOutlierDetection.run(db); + + testSingleScore(result, 945, 0.9260829537195538); + testAUC(db, "Noise", result, 0.9892407407407409); + } +}
\ No newline at end of file diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestSOD.java b/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestSOD.java new file mode 100644 index 00000000..d26ab55a --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/algorithm/outlier/TestSOD.java @@ -0,0 +1,40 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest; +import de.lmu.ifi.dbs.elki.data.DoubleVector; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; +import de.lmu.ifi.dbs.elki.index.preprocessed.snn.SharedNearestNeighborPreprocessor; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; + +/** + * Tests the SOD algorithm. + * + * @author Lucia Cichella + */ +public class TestSOD extends AbstractSimpleAlgorithmTest implements JUnit4Test { + @Test + public void testSOD() { + Database db = makeSimpleDatabase(UNITTEST + "outlier-axis-subspaces-6d.ascii", 1345); + + // Parameterization + ListParameterization params = new ListParameterization(); + params.addParameter(SOD.KNN_ID, 25); + params.addParameter(SharedNearestNeighborPreprocessor.Factory.NUMBER_OF_NEIGHBORS_ID, 19); + + // setup Algorithm + SOD<DoubleVector, DoubleDistance> sod = ClassGenericsUtil.parameterizeOrAbort(SOD.class, params); + testParameterizationOk(params); + + // run SOD on database + OutlierResult result = sod.run(db); + + testSingleScore(result, 1293, 1.7277777); + testAUC(db, "Noise", result, 0.94956862); + } +}
\ No newline at end of file diff --git a/test/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/TestFeatureBagging.java b/test/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/TestFeatureBagging.java new file mode 100644 index 00000000..68d68f62 --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/TestFeatureBagging.java @@ -0,0 +1,61 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier.meta; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.algorithm.AbstractSimpleAlgorithmTest; +import de.lmu.ifi.dbs.elki.algorithm.outlier.LOF; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; + +/** + * Tests the Feature Bagging algorithm. + * + * @author Erich Schubert + */ +public class TestFeatureBagging extends AbstractSimpleAlgorithmTest implements JUnit4Test { + @Test + public void testFeatureBaggingSum() { + Database db = makeSimpleDatabase(UNITTEST + "outlier-axis-subspaces-6d.ascii", 1345); + + // Parameterization + ListParameterization params = new ListParameterization(); + params.addParameter(LOF.K_ID, 10); + params.addParameter(FeatureBagging.Parameterizer.NUM_ID, 10); + params.addParameter(FeatureBagging.Parameterizer.SEED_ID, 1); + + // setup Algorithm + FeatureBagging fb = ClassGenericsUtil.parameterizeOrAbort(FeatureBagging.class, params); + testParameterizationOk(params); + + // run AggarwalYuEvolutionary on database + OutlierResult result = fb.run(db); + + testSingleScore(result, 1293, 11.8295414); + testAUC(db, "Noise", result, 0.9066106); + } + + @Test + public void testFeatureBaggingBreadth() { + Database db = makeSimpleDatabase(UNITTEST + "outlier-axis-subspaces-6d.ascii", 1345); + + // Parameterization + ListParameterization params = new ListParameterization(); + params.addParameter(LOF.K_ID, 10); + params.addParameter(FeatureBagging.Parameterizer.NUM_ID, 10); + params.addParameter(FeatureBagging.Parameterizer.SEED_ID, 5); + params.addFlag(FeatureBagging.Parameterizer.BREADTH_ID); + + // setup Algorithm + FeatureBagging fb = ClassGenericsUtil.parameterizeOrAbort(FeatureBagging.class, params); + testParameterizationOk(params); + + // run AggarwalYuEvolutionary on database + OutlierResult result = fb.run(db); + + testSingleScore(result, 1293, 1.321709879); + testAUC(db, "Noise", result, 0.88466106); + } +}
\ No newline at end of file diff --git a/test/de/lmu/ifi/dbs/elki/data/spatial/TestPolygon.java b/test/de/lmu/ifi/dbs/elki/data/spatial/TestPolygon.java new file mode 100644 index 00000000..385461e7 --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/data/spatial/TestPolygon.java @@ -0,0 +1,51 @@ +package de.lmu.ifi.dbs.elki.data.spatial; + +import static org.junit.Assert.*; + +import java.util.ArrayList; +import java.util.List; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector; + +public class TestPolygon implements JUnit4Test { + @Test + public void testPolygonContainment() { + final Polygon p1, p2, p3; + { + List<Vector> v1 = new ArrayList<Vector>(); + v1.add(new Vector(0, 0)); + v1.add(new Vector(.9, 0)); + v1.add(new Vector(0, .9)); + p1 = new Polygon(v1); + } + { + List<Vector> v2 = new ArrayList<Vector>(); + v2.add(new Vector(1, 1)); + v2.add(new Vector(1, .1)); + v2.add(new Vector(.1, 1)); + p2 = new Polygon(v2); + } + { + List<Vector> v3 = new ArrayList<Vector>(); + v3.add(new Vector(.1, .1)); + v3.add(new Vector(.1, .9)); + v3.add(new Vector(.9, .9)); + v3.add(new Vector(.9, .1)); + p3 = new Polygon(v3); + } + Vector pou = new Vector(-1, -1); + Vector p22 = new Vector(.2, .2); + assertFalse("P2 not in p1", p1.containsPoint2D(pou)); + assertFalse("P2 not in p2", p2.containsPoint2D(pou)); + assertFalse("P2 not in p3", p3.containsPoint2D(pou)); + assertTrue("P2 not in p1", p1.containsPoint2D(p22)); + assertFalse("P2 in p2", p2.containsPoint2D(p22)); + assertTrue("P2 not in p3", p3.containsPoint2D(p22)); + assertFalse("Polygons p1 and p2 must not intersect.", p1.intersects2DIncomplete(p2)); + assertTrue("Polygons p1 and p3 must intersect.", p1.intersects2DIncomplete(p3)); + assertTrue("Polygons p2 and p3 must intersect.", p2.intersects2DIncomplete(p3)); + } +} diff --git a/test/de/lmu/ifi/dbs/elki/evaluation/TestComputeROC.java b/test/de/lmu/ifi/dbs/elki/evaluation/TestComputeROC.java new file mode 100644 index 00000000..46adb1bb --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/evaluation/TestComputeROC.java @@ -0,0 +1,55 @@ +package de.lmu.ifi.dbs.elki.evaluation; + +import java.util.ArrayList; +import java.util.List; + +import junit.framework.Assert; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.database.ids.DBID; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; +import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs; +import de.lmu.ifi.dbs.elki.evaluation.roc.ROC; +import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleDoublePair; +import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; + +/** + * Test to validate ROC curve computation. + * + * @author Erich Schubert + * + */ +public class TestComputeROC implements JUnit4Test { + /** + * Test ROC curve generation, including curve simplification + */ + @Test + public void testROCCurve() { + HashSetModifiableDBIDs positive = DBIDUtil.newHashSet(); + positive.add(DBIDUtil.importInteger(1)); + positive.add(DBIDUtil.importInteger(2)); + positive.add(DBIDUtil.importInteger(3)); + positive.add(DBIDUtil.importInteger(4)); + positive.add(DBIDUtil.importInteger(5)); + + ArrayList<Pair<Double, DBID>> distances = new ArrayList<Pair<Double, DBID>>(); + distances.add(new Pair<Double, DBID>(0.0, DBIDUtil.importInteger(1))); + distances.add(new Pair<Double, DBID>(1.0, DBIDUtil.importInteger(2))); + distances.add(new Pair<Double, DBID>(2.0, DBIDUtil.importInteger(6))); + distances.add(new Pair<Double, DBID>(3.0, DBIDUtil.importInteger(7))); + distances.add(new Pair<Double, DBID>(3.0, DBIDUtil.importInteger(3))); + distances.add(new Pair<Double, DBID>(4.0, DBIDUtil.importInteger(8))); + distances.add(new Pair<Double, DBID>(4.0, DBIDUtil.importInteger(4))); + distances.add(new Pair<Double, DBID>(5.0, DBIDUtil.importInteger(9))); + distances.add(new Pair<Double, DBID>(6.0, DBIDUtil.importInteger(5))); + + List<DoubleDoublePair> roccurve = ROC.materializeROC(9, positive, distances.iterator()); + // System.out.println(roccurve); + Assert.assertEquals("ROC curve too complex", 6, roccurve.size()); + + double auc = ROC.computeAUC(roccurve); + Assert.assertEquals("ROC AUC not right.", 0.6, auc, 0.0001); + } +} diff --git a/test/de/lmu/ifi/dbs/elki/evaluation/TestPairCountingFMeasure.java b/test/de/lmu/ifi/dbs/elki/evaluation/TestPairCountingFMeasure.java new file mode 100644 index 00000000..4944db3d --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/evaluation/TestPairCountingFMeasure.java @@ -0,0 +1,78 @@ +package de.lmu.ifi.dbs.elki.evaluation; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.ByLabelClustering; +import de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.TrivialAllInOne; +import de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.TrivialAllNoise; +import de.lmu.ifi.dbs.elki.data.Clustering; +import de.lmu.ifi.dbs.elki.data.model.Model; +import de.lmu.ifi.dbs.elki.data.type.TypeUtil; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.database.StaticArrayDatabase; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.datasource.FileBasedDatabaseConnection; +import de.lmu.ifi.dbs.elki.evaluation.paircounting.PairCountingFMeasure; +import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; + +/** + * Validate {@link PairCountingFMeasure} with respect to its ability to compare + * data clusterings. + * + * @author Erich Schubert + */ +public class TestPairCountingFMeasure implements JUnit4Test { + // the following values depend on the data set used! + String dataset = "data/testdata/unittests/hierarchical-3d2d1d.csv"; + + // size of the data set + int shoulds = 600; + + /** + * Validate {@link PairCountingFMeasure} with respect to its ability to + * compare data clusterings. + * + * @throws ParameterException on errors. + */ + @Test + public void testCompareDatabases() { + ListParameterization params = new ListParameterization(); + // Input + params.addParameter(FileBasedDatabaseConnection.INPUT_ID, dataset); + + // get database + Database db = ClassGenericsUtil.parameterizeOrAbort(StaticArrayDatabase.class, params); + db.initialize(); + + // verify data set size. + Relation<?> rel = db.getRelation(TypeUtil.ANY); + assertTrue(rel.size() == shoulds); + + // run all-in-one + TrivialAllInOne allinone = new TrivialAllInOne(); + Clustering<Model> rai = allinone.run(db); + + // run all-in-noise + TrivialAllNoise allinnoise = new TrivialAllNoise(); + Clustering<Model> ran = allinnoise.run(db); + + // run by-label + ByLabelClustering bylabel = new ByLabelClustering(); + Clustering<?> rbl = bylabel.run(db); + + assertEquals(1.0, PairCountingFMeasure.compareClusterings(rai, rai), Double.MIN_VALUE); + assertEquals(1.0, PairCountingFMeasure.compareClusterings(ran, ran), Double.MIN_VALUE); + assertEquals(1.0, PairCountingFMeasure.compareClusterings(rbl, rbl), Double.MIN_VALUE); + + assertEquals(0.009950248756218905, PairCountingFMeasure.compareClusterings(ran, rbl, true, false), Double.MIN_VALUE); + assertEquals(0.0033277870216306157, PairCountingFMeasure.compareClusterings(rai, ran, true, false), Double.MIN_VALUE); + + assertEquals(0.5 /* 0.3834296724470135 */, PairCountingFMeasure.compareClusterings(rai, rbl), Double.MIN_VALUE); + } +}
\ No newline at end of file diff --git a/test/de/lmu/ifi/dbs/elki/index/TestIndexStructures.java b/test/de/lmu/ifi/dbs/elki/index/TestIndexStructures.java new file mode 100644 index 00000000..8d30bea4 --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/index/TestIndexStructures.java @@ -0,0 +1,202 @@ +package de.lmu.ifi.dbs.elki.index; + +import static junit.framework.Assert.assertEquals; +import static junit.framework.Assert.assertTrue; + +import java.util.List; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.data.DoubleVector; +import de.lmu.ifi.dbs.elki.data.type.TypeUtil; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.database.StaticArrayDatabase; +import de.lmu.ifi.dbs.elki.database.ids.DBID; +import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair; +import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; +import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; +import de.lmu.ifi.dbs.elki.database.query.knn.LinearScanKNNQuery; +import de.lmu.ifi.dbs.elki.database.query.range.LinearScanRangeQuery; +import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.datasource.FileBasedDatabaseConnection; +import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; +import de.lmu.ifi.dbs.elki.index.tree.TreeIndexFactory; +import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.mtree.MTree; +import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.mtree.MTreeFactory; +import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.query.MetricalIndexKNNQuery; +import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.query.MetricalIndexRangeQuery; +import de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.AbstractRStarTreeFactory; +import de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.query.DoubleDistanceRStarTreeKNNQuery; +import de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.query.DoubleDistanceRStarTreeRangeQuery; +import de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.rstar.RStarTree; +import de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.rstar.RStarTreeFactory; +import de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.util.ApproximateLeastOverlapInsertionStrategy; +import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; + +/** + * Test case to validate some index structures for accuracy. For a known data + * set and query point, the top 10 nearest neighbors are queried and verified. + * + * Note that the internal operation of the index structure is not tested this + * way, only whether the database object with the index still returns reasonable + * results. + * + * @author Erich Schubert + */ +public class TestIndexStructures implements JUnit4Test { + // the following values depend on the data set used! + String dataset = "data/testdata/unittests/hierarchical-3d2d1d.csv"; + + // size of the data set + int shoulds = 600; + + // query point + double[] querypoint = new double[] { 0.5, 0.5, 0.5 }; + + // number of kNN to query + int k = 10; + + // the 10 next neighbors of the query point + double[][] shouldc = new double[][] { { 0.45000428746088883, 0.484504234161508, 0.5538595167151342 }, { 0.4111050036231091, 0.429204794352013, 0.4689430202460606 }, { 0.4758477631164003, 0.6021538103067177, 0.5556807408692025 }, { 0.4163288957164025, 0.49604545242979536, 0.4054361013566713 }, { 0.5819940640461848, 0.48586944418231115, 0.6289592025558619 }, { 0.4373568207802466, 0.3468650110814596, 0.49566951629699485 }, { 0.40283109564192643, 0.6301433694690401, 0.44313571161129883 }, { 0.6545840114867083, 0.4919617658889418, 0.5905461546078652 }, { 0.6011097673869055, 0.6562921241634017, 0.44830647520493694 }, { 0.5127485678175534, 0.29708449200895504, 0.561722374659424 }, }; + + // and their distances + double[] shouldd = new double[] { 0.07510351238126374, 0.11780839322826206, 0.11882371989803064, 0.1263282354232315, 0.15347043712184602, 0.1655090505771259, 0.17208323533934652, 0.17933052146586306, 0.19319066655063877, 0.21247795391113142 }; + + DoubleDistance eps = new DoubleDistance(0.21247795391113142); + + /** + * Test exact query, also to validate the test is correct. + * + * @throws ParameterException on errors. + */ + @Test + public void testExact() { + ListParameterization params = new ListParameterization(); + testFileBasedDatabaseConnection(params, LinearScanKNNQuery.class, LinearScanRangeQuery.class); + } + + /** + * Test {@link MTree} using a file based database connection. + * + * @throws ParameterException on errors. + */ + @Test + public void testMetrical() { + ListParameterization metparams = new ListParameterization(); + metparams.addParameter(StaticArrayDatabase.INDEX_ID, MTreeFactory.class); + metparams.addParameter(TreeIndexFactory.PAGE_SIZE_ID, 100); + testFileBasedDatabaseConnection(metparams, MetricalIndexKNNQuery.class, MetricalIndexRangeQuery.class); + } + + /** + * Test {@link RStarTree} using a file based database connection. + * + * @throws ParameterException on errors. + */ + @Test + public void testRStarTree() { + ListParameterization spatparams = new ListParameterization(); + spatparams.addParameter(StaticArrayDatabase.INDEX_ID, RStarTreeFactory.class); + spatparams.addParameter(TreeIndexFactory.PAGE_SIZE_ID, 300); + testFileBasedDatabaseConnection(spatparams, DoubleDistanceRStarTreeKNNQuery.class, DoubleDistanceRStarTreeRangeQuery.class); + } + + /** + * Test {@link RStarTree} using a file based database connection. With "fast" + * mode enabled on an extreme level (since this should only reduce + * performance, not accuracy!) + * + * @throws ParameterException on errors. + */ + @Test + public void testRStarTreeFast() { + ListParameterization spatparams = new ListParameterization(); + spatparams.addParameter(StaticArrayDatabase.INDEX_ID, RStarTreeFactory.class); + spatparams.addParameter(AbstractRStarTreeFactory.INSERTION_STRATEGY_ID, ApproximateLeastOverlapInsertionStrategy.class); + spatparams.addParameter(ApproximateLeastOverlapInsertionStrategy.Parameterizer.INSERTION_CANDIDATES_ID, 1); + spatparams.addParameter(TreeIndexFactory.PAGE_SIZE_ID, 300); + testFileBasedDatabaseConnection(spatparams, DoubleDistanceRStarTreeKNNQuery.class, DoubleDistanceRStarTreeRangeQuery.class); + } + + /** + * Test {@link XTree} using a file based database connection. + * + * @throws ParameterException + */ +// @Test +// public void testXTree() { +// ListParameterization xtreeparams = new ListParameterization(); +// xtreeparams.addParameter(StaticArrayDatabase.INDEX_ID, experimentalcode.shared.index.xtree.XTreeFactory.class); +// xtreeparams.addParameter(TreeIndexFactory.PAGE_SIZE_ID, 300); +// testFileBasedDatabaseConnection(xtreeparams, DoubleDistanceRStarTreeKNNQuery.class, DoubleDistanceRStarTreeRangeQuery.class); +// } + + /** + * Actual test routine. + * + * @param inputparams + * @throws ParameterException + */ + void testFileBasedDatabaseConnection(ListParameterization inputparams, Class<?> expectKNNQuery, Class<?> expectRangeQuery) { + inputparams.addParameter(FileBasedDatabaseConnection.INPUT_ID, dataset); + + // get database + Database db = ClassGenericsUtil.parameterizeOrAbort(StaticArrayDatabase.class, inputparams); + db.initialize(); + Relation<DoubleVector> rep = db.getRelation(TypeUtil.DOUBLE_VECTOR_FIELD); + DistanceQuery<DoubleVector, DoubleDistance> dist = db.getDistanceQuery(rep, EuclideanDistanceFunction.STATIC); + + // verify data set size. + assertTrue(rep.size() == shoulds); + + { + // get the 10 next neighbors + DoubleVector dv = new DoubleVector(querypoint); + KNNQuery<DoubleVector, DoubleDistance> knnq = db.getKNNQuery(dist, k); + assertTrue("Returned knn query is not of expected class.", expectKNNQuery.isAssignableFrom(knnq.getClass())); + List<DistanceResultPair<DoubleDistance>> ids = knnq.getKNNForObject(dv, k); + assertEquals("Result size does not match expectation!", shouldd.length, ids.size()); + + // verify that the neighbors match. + int i = 0; + for(DistanceResultPair<DoubleDistance> res : ids) { + // Verify distance + assertEquals("Expected distance doesn't match.", shouldd[i], res.getDistance().doubleValue()); + // verify vector + DBID id = res.getDBID(); + DoubleVector c = rep.get(id); + DoubleVector c2 = new DoubleVector(shouldc[i]); + assertEquals("Expected vector doesn't match: " + c.toString(), 0.0, dist.distance(c, c2).doubleValue(), 0.00001); + + i++; + } + } + { + // Do a range query + DoubleVector dv = new DoubleVector(querypoint); + RangeQuery<DoubleVector, DoubleDistance> rangeq = db.getRangeQuery(dist, eps); + assertTrue("Returned range query is not of expected class.", expectRangeQuery.isAssignableFrom(rangeq.getClass())); + List<DistanceResultPair<DoubleDistance>> ids = rangeq.getRangeForObject(dv, eps); + assertEquals("Result size does not match expectation!", shouldd.length, ids.size()); + + // verify that the neighbors match. + int i = 0; + for(DistanceResultPair<DoubleDistance> res : ids) { + // Verify distance + assertEquals("Expected distance doesn't match.", shouldd[i], res.getDistance().doubleValue()); + // verify vector + DBID id = res.getDBID(); + DoubleVector c = rep.get(id); + DoubleVector c2 = new DoubleVector(shouldc[i]); + assertEquals("Expected vector doesn't match: " + c.toString(), 0.0, dist.distance(c, c2).doubleValue(), 0.00001); + + i++; + } + } + } +}
\ No newline at end of file diff --git a/test/de/lmu/ifi/dbs/elki/index/preprocessed/TestMaterializedKNNAndRKNNPreprocessor.java b/test/de/lmu/ifi/dbs/elki/index/preprocessed/TestMaterializedKNNAndRKNNPreprocessor.java new file mode 100644 index 00000000..2d95074a --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/index/preprocessed/TestMaterializedKNNAndRKNNPreprocessor.java @@ -0,0 +1,174 @@ +package de.lmu.ifi.dbs.elki.index.preprocessed; + +import static junit.framework.Assert.assertEquals; +import static junit.framework.Assert.assertTrue; + +import java.util.ArrayList; +import java.util.List; +import java.util.Random; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.data.DoubleVector; +import de.lmu.ifi.dbs.elki.data.VectorUtil; +import de.lmu.ifi.dbs.elki.data.type.TypeUtil; +import de.lmu.ifi.dbs.elki.database.HashmapDatabase; +import de.lmu.ifi.dbs.elki.database.UpdatableDatabase; +import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; +import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair; +import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; +import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; +import de.lmu.ifi.dbs.elki.database.query.knn.LinearScanKNNQuery; +import de.lmu.ifi.dbs.elki.database.query.rknn.LinearScanRKNNQuery; +import de.lmu.ifi.dbs.elki.database.query.rknn.RKNNQuery; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.datasource.FileBasedDatabaseConnection; +import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle; +import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; +import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNAndRKNNPreprocessor; +import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor; +import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; +import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil; +import de.lmu.ifi.dbs.elki.utilities.exceptions.UnableToComplyException; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; + +/** + * Test case to validate the dynamic updates of materialized kNN and RkNN + * preprocessors. + * + * + * some index structures for accuracy. For a known data set and query point, the + * top 10 nearest neighbors are queried and verified. + * + * + * @author Elke Achtert + */ +public class TestMaterializedKNNAndRKNNPreprocessor implements JUnit4Test { + // the following values depend on the data set used! + static String dataset = "data/testdata/unittests/3clusters-and-noise-2d.csv"; + + // number of kNN to query + int k = 10; + + // the size of objects inserted and deleted + int updatesize = 12; + + int seed = 5; + + // size of the data set + int shoulds = 330; + + /** + * Actual test routine. + * + * @throws ParameterException + * @throws UnableToComplyException + */ + @Test + public void testPreprocessor() throws ParameterException, UnableToComplyException { + ListParameterization params = new ListParameterization(); + params.addParameter(FileBasedDatabaseConnection.INPUT_ID, dataset); + + // get database + UpdatableDatabase db = ClassGenericsUtil.parameterizeOrAbort(HashmapDatabase.class, params); + db.initialize(); + Relation<DoubleVector> rep = db.getRelation(TypeUtil.DOUBLE_VECTOR_FIELD); + DistanceQuery<DoubleVector, DoubleDistance> distanceQuery = db.getDistanceQuery(rep, EuclideanDistanceFunction.STATIC); + + // verify data set size. + assertEquals("Data set size doesn't match parameters.", shoulds, rep.size()); + + // get linear queries + LinearScanKNNQuery<DoubleVector, DoubleDistance> lin_knn_query = new LinearScanKNNQuery<DoubleVector, DoubleDistance>(distanceQuery); + LinearScanRKNNQuery<DoubleVector, DoubleDistance> lin_rknn_query = new LinearScanRKNNQuery<DoubleVector, DoubleDistance>(distanceQuery, lin_knn_query, k); + + // get preprocessed queries + ListParameterization config = new ListParameterization(); + config.addParameter(MaterializeKNNPreprocessor.Factory.DISTANCE_FUNCTION_ID, distanceQuery.getDistanceFunction()); + config.addParameter(MaterializeKNNPreprocessor.Factory.K_ID, k); + MaterializeKNNAndRKNNPreprocessor<DoubleVector, DoubleDistance> preproc = new MaterializeKNNAndRKNNPreprocessor<DoubleVector, DoubleDistance>(rep, distanceQuery.getDistanceFunction(), k); + KNNQuery<DoubleVector, DoubleDistance> preproc_knn_query = preproc.getKNNQuery(distanceQuery, k); + RKNNQuery<DoubleVector, DoubleDistance> preproc_rknn_query = preproc.getRKNNQuery(distanceQuery); + // add as index + db.addIndex(preproc); + assertTrue("Preprocessor knn query class incorrect.", !(preproc_knn_query instanceof LinearScanKNNQuery)); + assertTrue("Preprocessor rknn query class incorrect.", !(preproc_rknn_query instanceof LinearScanKNNQuery)); + + // test queries + testKNNQueries(rep, lin_knn_query, preproc_knn_query, k); + testRKNNQueries(rep, lin_rknn_query, preproc_rknn_query, k); + // also test partial queries, forward only + testKNNQueries(rep, lin_knn_query, preproc_knn_query, k / 2); + + // insert new objects + List<DoubleVector> insertions = new ArrayList<DoubleVector>(); + DoubleVector o = DatabaseUtil.assumeVectorField(rep).getFactory(); + Random random = new Random(seed); + for(int i = 0; i < updatesize; i++) { + DoubleVector obj = VectorUtil.randomVector(o, random); + insertions.add(obj); + } + System.out.println("Insert " + insertions); + System.out.println(); + DBIDs deletions = db.insert(MultipleObjectsBundle.makeSimple(rep.getDataTypeInformation(), insertions)); + + // test queries + testKNNQueries(rep, lin_knn_query, preproc_knn_query, k); + testRKNNQueries(rep, lin_rknn_query, preproc_rknn_query, k); + + // delete objects + System.out.println("Delete " + deletions); + db.delete(deletions); + + // test queries + testKNNQueries(rep, lin_knn_query, preproc_knn_query, k); + testRKNNQueries(rep, lin_rknn_query, preproc_rknn_query, k); + } + + private void testKNNQueries(Relation<DoubleVector> rep, KNNQuery<DoubleVector, DoubleDistance> lin_knn_query, KNNQuery<DoubleVector, DoubleDistance> preproc_knn_query, int k) { + ArrayDBIDs sample = DBIDUtil.ensureArray(rep.getDBIDs()); + List<List<DistanceResultPair<DoubleDistance>>> lin_knn_ids = lin_knn_query.getKNNForBulkDBIDs(sample, k); + List<List<DistanceResultPair<DoubleDistance>>> preproc_knn_ids = preproc_knn_query.getKNNForBulkDBIDs(sample, k); + for(int i = 0; i < rep.size(); i++) { + List<DistanceResultPair<DoubleDistance>> lin_knn = lin_knn_ids.get(i); + List<DistanceResultPair<DoubleDistance>> pre_knn = preproc_knn_ids.get(i); + if(!lin_knn.equals(pre_knn)) { + System.out.println("LIN kNN " + lin_knn); + System.out.println("PRE kNN " + pre_knn); + } + assertEquals("kNN sizes do not agree.", lin_knn.size(), pre_knn.size()); + for(int j = 0; j < lin_knn.size(); j++) { + assertTrue("kNNs of linear scan and preprocessor do not match!", lin_knn.get(j).getDBID().equals(pre_knn.get(j).getDBID())); + assertTrue("kNNs of linear scan and preprocessor do not match!", lin_knn.get(j).getDistance().equals(pre_knn.get(j).getDistance())); + } + } + System.out.println("knns ok"); + } + + private void testRKNNQueries(Relation<DoubleVector> rep, RKNNQuery<DoubleVector, DoubleDistance> lin_rknn_query, RKNNQuery<DoubleVector, DoubleDistance> preproc_rknn_query, int k) { + ArrayDBIDs sample = DBIDUtil.ensureArray(rep.getDBIDs()); + List<List<DistanceResultPair<DoubleDistance>>> lin_rknn_ids = lin_rknn_query.getRKNNForBulkDBIDs(sample, k); + List<List<DistanceResultPair<DoubleDistance>>> preproc_rknn_ids = preproc_rknn_query.getRKNNForBulkDBIDs(sample, k); + for(int i = 0; i < rep.size(); i++) { + List<DistanceResultPair<DoubleDistance>> lin_rknn = lin_rknn_ids.get(i); + List<DistanceResultPair<DoubleDistance>> pre_rknn = preproc_rknn_ids.get(i); + if(!lin_rknn.equals(pre_rknn)) { + System.out.println("LIN RkNN " + lin_rknn); + System.out.println("PRE RkNN " + pre_rknn); + System.out.println(); + } + assertEquals("rkNN sizes do not agree for k=" + k, lin_rknn.size(), pre_rknn.size()); + for(int j = 0; j < lin_rknn.size(); j++) { + assertTrue("rkNNs of linear scan and preprocessor do not match!", lin_rknn.get(j).getDBID().equals(pre_rknn.get(j).getDBID())); + assertTrue("rkNNs of linear scan and preprocessor do not match!", lin_rknn.get(j).getDistance().equals(pre_rknn.get(j).getDistance())); + } + } + System.out.println("rknns ok"); + System.out.println(); + } +}
\ No newline at end of file diff --git a/test/de/lmu/ifi/dbs/elki/logging/TestOutputStreamLogger.java b/test/de/lmu/ifi/dbs/elki/logging/TestOutputStreamLogger.java new file mode 100644 index 00000000..97e77867 --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/logging/TestOutputStreamLogger.java @@ -0,0 +1,38 @@ +package de.lmu.ifi.dbs.elki.logging; + +import static org.junit.Assert.assertEquals; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.Writer; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; + +/** + * Small experiment to assert the console output logger works as expected. + * + * @author Erich Schubert + * + */ +public class TestOutputStreamLogger implements JUnit4Test { + /** + * Write a couple of messages to the console output writer and compare the + * resulting characters. + * + * @throws IOException on errors. + */ + @Test + public final void testWriteString() throws IOException { + ByteArrayOutputStream buf = new ByteArrayOutputStream(); + Writer wri = new OutputStreamLogger(buf); + wri.write("Test." + OutputStreamLogger.NEWLINE); + wri.write("\r123"); + wri.write("\r23"); + wri.write("\r3"); + wri.write("Test."); + String should = "Test." + OutputStreamLogger.NEWLINE + "\r123\r \r23\r \r3" + OutputStreamLogger.NEWLINE + "Test."; + assertEquals("Output doesn't match requirements.", should, buf.toString()); + } +} diff --git a/test/de/lmu/ifi/dbs/elki/math/TestAffineTransformation.java b/test/de/lmu/ifi/dbs/elki/math/TestAffineTransformation.java new file mode 100644 index 00000000..c5616e8d --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/math/TestAffineTransformation.java @@ -0,0 +1,220 @@ +package de.lmu.ifi.dbs.elki.math; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotSame; +import static org.junit.Assert.assertTrue; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.math.linearalgebra.AffineTransformation; +import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix; +import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector; + +/** + * JUnit Test for the class {@link AffineTransformation} + * + * @author Erich Schubert + * + */ +public class TestAffineTransformation implements JUnit4Test { + /** + * Test identity transform + */ + @Test + public void testIdentityTransform() { + int testdim = 5; + AffineTransformation t = new AffineTransformation(testdim); + assertTrue(t.getDimensionality() == testdim); + Matrix tm = t.getTransformation(); + assertEquals("initial transformation matrix should be unity", tm, Matrix.unitMatrix(testdim + 1)); + + // test application to a vector + double[] dv = new double[testdim]; + for(int i = 0; i < testdim; i++) { + dv[i] = i * i + testdim; + } + Vector v1 = new Vector(dv.clone()); + Vector v2 = new Vector(dv.clone()); + + Vector v3 = t.apply(v1); + assertEquals("identity transformation wasn't identical", v2, v3); + + Vector v4 = t.applyInverse(v2); + assertEquals("inverse of identity wasn't identity", v1, v4); + } + + /** + * Test adding translation vectors + */ + @Test + public void testTranslation() { + int testdim = 5; + AffineTransformation t = new AffineTransformation(testdim); + assertTrue(t.getDimensionality() == testdim); + Matrix tm = t.getTransformation(); + assertNotSame("getTransformation is expected to return a new copy", tm, t.getTransformation()); + assertEquals("initial transformation matrix should be unity", tm, Matrix.unitMatrix(testdim + 1)); + + // translation vector + double[] tv = new double[testdim]; + for(int i = 0; i < testdim; i++) { + tv[i] = i + testdim; + } + t.addTranslation(new Vector(tv)); + + Matrix tm2 = t.getTransformation(); + // Manually do the same changes to the matrix tm + for(int i = 0; i < testdim; i++) { + tm.set(i, testdim, i + testdim); + } + // Compare the results + assertEquals("Translation wasn't added correctly to matrix.", tm, tm2); + + // test application to a vector + double[] dv1 = new double[testdim]; + double[] dv2 = new double[testdim]; + for(int i = 0; i < testdim; i++) { + dv1[i] = i * i + testdim; + dv2[i] = i * i + i + 2 * testdim; + } + Vector v1 = new Vector(dv1); + Vector v2t = new Vector(dv2); + + Vector v1t = t.apply(v1); + assertEquals("Vector wasn't translated properly forward.", v2t, v1t); + Vector v2b = t.applyInverse(v2t); + assertEquals("Vector wasn't translated properly backwards.", v1, v2b); + Vector v1b = t.applyInverse(v1t); + assertEquals("Vector wasn't translated properly back and forward.", v1, v1b); + + // Translation + Vector vd = v1.minus(v2b); + Vector vtd = v1t.minus(v2t); + assertEquals("Translation changed vector difference.", vd, vtd); + + // Translation shouldn't change relative vectors. + assertEquals("Relative vectors weren't left unchanged by translation!", v1, t.applyRelative(v1)); + assertEquals("Relative vectors weren't left unchanged by translation!", v2t, t.applyRelative(v2t)); + assertEquals("Relative vectors weren't left unchanged by translation!", v1t, t.applyRelative(v1t)); + assertEquals("Relative vectors weren't left unchanged by translation!", v2b, t.applyRelative(v2b)); + } + + /** + * Test direct inclusion of matrices + */ + @Test + public void testMatrix() { + int testdim = 5; + int axis1 = 1; + int axis2 = 3; + + assert (axis1 < testdim); + assert (axis2 < testdim); + // don't change the angle; we'll be using that executing the rotation + // three times will be identity (approximately) + double angle = Math.toRadians(360 / 3); + AffineTransformation t = new AffineTransformation(testdim); + assertTrue(t.getDimensionality() == testdim); + Matrix tm = t.getTransformation(); + assertNotSame("getTransformation is expected to return a new copy", tm, t.getTransformation()); + assertEquals("initial transformation matrix should be unity", tm, Matrix.unitMatrix(testdim + 1)); + + // rotation matrix + double[][] rm = new double[testdim][testdim]; + for(int i = 0; i < testdim; i++) { + rm[i][i] = 1; + } + // add the rotation + rm[axis1][axis1] = +Math.cos(angle); + rm[axis1][axis2] = -Math.sin(angle); + rm[axis2][axis1] = +Math.sin(angle); + rm[axis2][axis2] = +Math.cos(angle); + t.addMatrix(new Matrix(rm)); + Matrix tm2 = t.getTransformation(); + + // We know that we didn't do any translations and tm is the unity matrix + // so we can manually do the rotation on it, too. + tm.set(axis1, axis1, +Math.cos(angle)); + tm.set(axis1, axis2, -Math.sin(angle)); + tm.set(axis2, axis1, +Math.sin(angle)); + tm.set(axis2, axis2, +Math.cos(angle)); + + // Compare the results + assertEquals("Rotation wasn't added correctly to matrix.", tm, tm2); + + // test application to a vector + double[] dv = new double[testdim]; + for(int i = 0; i < testdim; i++) { + dv[i] = i * i + testdim; + } + Vector v1 = new Vector(dv); + Vector v2 = t.apply(v1); + Vector v3 = t.applyInverse(v2); + assertTrue("Forward-Backward didn't work correctly.", v1.minus(v3).euclideanLength() < 0.0001); + Vector v4 = t.apply(t.apply(t.apply(v1))); + assertTrue("Triple-Rotation by 120 degree didn't work", v1.minus(v4).euclideanLength() < 0.0001); + + // Rotation shouldn't disagree for relative vectors. + // (they just are not affected by translation!) + assertEquals("Relative vectors were affected differently by pure rotation!", v2, t.applyRelative(v1)); + + // should do the same as built-in rotation! + AffineTransformation t2 = new AffineTransformation(testdim); + t2.addRotation(axis1, axis2, angle); + Vector t2v2 = t2.apply(v1); + assertTrue("Manual rotation and AffineTransformation.addRotation disagree.", v2.minus(t2v2).euclideanLength() < 0.0001); + } + + /** + * Test {@link AffineTransformation#reorderAxesTransformation} + */ + @Test + public void testReorder() { + Vector v = new Vector(new double[] { 3, 5, 7 }); + // all permutations + Vector p1 = new Vector(new double[] { 3, 5, 7 }); + Vector p2 = new Vector(new double[] { 3, 7, 5 }); + Vector p3 = new Vector(new double[] { 5, 3, 7 }); + Vector p4 = new Vector(new double[] { 5, 7, 3 }); + Vector p5 = new Vector(new double[] { 7, 3, 5 }); + Vector p6 = new Vector(new double[] { 7, 5, 3 }); + Vector[] ps = new Vector[] { + // with no arguments. + p1, + // with just one argument. + p1, p3, p5, + // with two arguments. + p1, p2, p3, p4, p5, p6, + }; + + // index in reference array + int idx = 0; + // with 0 arguments + { + AffineTransformation aff = AffineTransformation.reorderAxesTransformation(v.getDimensionality(), new int[] {}); + Vector n = aff.apply(v).minus(ps[idx]); + assertEquals("Permutation " + idx + " doesn't match.", n.euclideanLength(), 0.0, 0.001); + idx++; + } + // with one argument + for(int d1 = 1; d1 <= 3; d1++) { + AffineTransformation aff = AffineTransformation.reorderAxesTransformation(v.getDimensionality(), new int[] { d1 }); + Vector n = aff.apply(v).minus(ps[idx]); + assertEquals("Permutation " + idx + " doesn't match.", n.euclideanLength(), 0.0, 0.001); + idx++; + } + // with two arguments + for(int d1 = 1; d1 <= 3; d1++) { + for(int d2 = 1; d2 <= 3; d2++) { + if(d1 == d2) { + continue; + } + AffineTransformation aff = AffineTransformation.reorderAxesTransformation(v.getDimensionality(), new int[] { d1, d2 }); + Vector n = aff.apply(v).minus(ps[idx]); + assertEquals("Permutation " + idx + " doesn't match.", n.euclideanLength(), 0.0, 0.001); + idx++; + } + } + } +} diff --git a/test/de/lmu/ifi/dbs/elki/math/TestFlexiHistogram.java b/test/de/lmu/ifi/dbs/elki/math/TestFlexiHistogram.java new file mode 100644 index 00000000..3ce38f8b --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/math/TestFlexiHistogram.java @@ -0,0 +1,65 @@ +package de.lmu.ifi.dbs.elki.math; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.utilities.iterator.IterableUtil; +import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; + +/** + * JUnit test to test the {@link ReplacingHistogram} class. + * + * @author Erich Schubert + */ +public class TestFlexiHistogram implements JUnit4Test { + FlexiHistogram<Double, Double> hist; + + /** + * Test that adds some data to the histogram and compares results. + */ + @Test + public final void testHistogram() { + Double[] filled = { 0.0, 1.23, 4.56, 7.89, 0.0 }; + Double[] changed = { 0.0, 1.35, 8.01, 14.67, 9.01, 2.34 }; + Double[] resized = { -1.23, 1.35, 22.68, 11.35, 0.0, 0.0, -4.56 }; + Double[] expanded = { 1., 0.0, 0.0, 0.0, 0.0, 0.0, 29.59 }; + hist = FlexiHistogram.DoubleSumHistogram(5); + hist.aggregate(0.0, 0.0); + hist.aggregate(0.15, 1.23); + hist.aggregate(0.25, 4.56); + hist.aggregate(0.35, 7.89); + hist.aggregate(0.5, 0.0); + assertArrayEquals("Filled histogram doesn't match", filled, hist.getData().toArray(new Double[0])); + hist.aggregate(0.15, 0.12); + hist.aggregate(0.25, 3.45); + hist.aggregate(0.35, 6.78); + hist.aggregate(0.45, 9.01); + hist.aggregate(0.55, 2.34); + assertArrayEquals("Changed histogram doesn't match", changed, hist.getData().toArray(new Double[0])); + hist.aggregate(-.13, -1.23); + hist.aggregate(1.13, -4.56); + assertArrayEquals("Resized histogram doesn't match", resized, hist.getData().toArray(new Double[0])); + + // compare results via Iterator. + int off = 0; + for(Pair<Double, Double> pair : hist) { + assertEquals("Array iterator bin position", -0.1 + 0.2 * off, pair.getFirst(), 0.00001); + assertEquals("Array iterator bin contents", resized[off], pair.getSecond(), 0.00001); + off++; + } + // backwards... + off--; + for(Pair<Double, Double> pair : IterableUtil.fromIterator(hist.reverseIterator())) { + assertEquals("Array iterator bin position", -0.1 + 0.2 * off, pair.getFirst(), 0.00001); + assertEquals("Array iterator bin contents", resized[off], pair.getSecond(), 0.00001); + off--; + } + + // totally break out of the data range + hist.aggregate(-10., 1.); + assertArrayEquals("Expanded histogram doesn't match", expanded, hist.getData().toArray(new Double[0])); + } +} diff --git a/test/de/lmu/ifi/dbs/elki/math/TestKernelDensityFitting.java b/test/de/lmu/ifi/dbs/elki/math/TestKernelDensityFitting.java new file mode 100644 index 00000000..cc0a1f5c --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/math/TestKernelDensityFitting.java @@ -0,0 +1,142 @@ +package de.lmu.ifi.dbs.elki.math; + +import static org.junit.Assert.assertEquals; + +import java.util.Arrays; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.data.DoubleVector; +import de.lmu.ifi.dbs.elki.data.type.TypeUtil; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.database.StaticArrayDatabase; +import de.lmu.ifi.dbs.elki.database.ids.DBID; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.datasource.FileBasedDatabaseConnection; +import de.lmu.ifi.dbs.elki.math.linearalgebra.fitting.FittingFunction; +import de.lmu.ifi.dbs.elki.math.linearalgebra.fitting.GaussianFittingFunction; +import de.lmu.ifi.dbs.elki.math.linearalgebra.fitting.LevenbergMarquardtMethod; +import de.lmu.ifi.dbs.elki.math.statistics.GaussianKernelDensityFunction; +import de.lmu.ifi.dbs.elki.math.statistics.KernelDensityEstimator; +import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; + +/** + * JUnit test that does a complete Kernel and Levenberg-Marquadt fitting. + * + * @author Erich Schubert + */ +public class TestKernelDensityFitting implements JUnit4Test { + // the following values depend on the data set used! + String dataset = "data/testdata/unittests/gaussian-1d-for-fitting.csv"; + + // data set size for verification. + int realsize = 100; + + double realmean = 0.5; + + double realstdd = 1.5; + + /** + * The test will load the given data set and perform a Levenberq-Marquadt + * fitting on a kernelized density estimation. The test evaluates the fitting + * quality to ensure that the results remain stable and significantly better + * than traditional estimation. + * + * @throws ParameterException on errors. + */ + @Test + public final void testFitDoubleArray() { + ListParameterization config = new ListParameterization(); + // Input + config.addParameter(FileBasedDatabaseConnection.INPUT_ID, dataset); + // This data was generated with a mean of 0.0 and stddev 1.23, + + // get database + Database db = ClassGenericsUtil.parameterizeOrAbort(StaticArrayDatabase.class, config); + db.initialize(); + Relation<DoubleVector> rep = db.getRelation(TypeUtil.DOUBLE_VECTOR_FIELD); + + // verify data set size. + assertEquals("Data set size doesn't match parameters.", realsize, rep.size()); + + double splitval = 0.5; + + double[] fulldata = new double[rep.size()]; + // transform into double array + { + int i = 0; + for(DBID id : rep.iterDBIDs()) { + fulldata[i] = rep.get(id).doubleValue(1); + i++; + } + } + Arrays.sort(fulldata); + + // Check that the initial parameters match what we were expecting from the + // data. + double[] fullparams = estimateInitialParameters(fulldata); + assertEquals("Full Mean before fitting", 0.4446105, fullparams[0], 0.0001); + assertEquals("Full Stddev before fitting", 1.4012001, fullparams[1], 0.0001); + + // Do a fit using only part of the data and check the results are right. + double[] fullfit = run(fulldata, fullparams); + assertEquals("Full Mean after fitting", 0.64505, fullfit[0], 0.01); + assertEquals("Full Stddev after fitting", 1.5227889, fullfit[1], 0.01); + + int splitpoint = 0; + while(fulldata[splitpoint] < splitval && splitpoint < fulldata.length) { + splitpoint++; + } + double[] halfdata = Arrays.copyOf(fulldata, splitpoint); + + // Check that the initial parameters match what we were expecting from the + // data. + double[] params = estimateInitialParameters(halfdata); + assertEquals("Mean before fitting", -0.65723044, params[0], 0.0001); + assertEquals("Stddev before fitting", 1.0112391, params[1], 0.0001); + + // Do a fit using only part of the data and check the results are right. + double[] ps = run(halfdata, params); + assertEquals("Mean after fitting", 0.45980, ps[0], 0.01); + assertEquals("Stddev after fitting", 1.320427, ps[1], 0.01); + } + + private double[] estimateInitialParameters(double[] data) { + double[] params = new double[3]; + // compute averages + MeanVariance mv = new MeanVariance(); + for(double d : data) { + mv.put(d); + } + + params[0] = mv.getMean(); + params[1] = mv.getSampleStddev(); + // guess initial amplitude for an gaussian distribution. + double c1 = MathUtil.erf(Math.abs(data[0] - params[0]) / (params[1] * Math.sqrt(2))); + double c2 = MathUtil.erf(Math.abs(data[data.length - 1] - params[0]) / (params[1] * Math.sqrt(2))); + params[2] = 1.0 / Math.min(c1, c2); + // System.out.println("Mean: " + params[0] + " Stddev: " + params[1] + + // " Amp: " + params[2]); + return params; + } + + private double[] run(double[] data, double[] params) { + FittingFunction func = new GaussianFittingFunction(); + boolean[] dofit = { true, true, true }; + KernelDensityEstimator de = new KernelDensityEstimator(data, GaussianKernelDensityFunction.KERNEL); + LevenbergMarquardtMethod fit = new LevenbergMarquardtMethod(func, params, dofit, data, de.getDensity(), de.getVariance()); + // for(int i = 0; i < 100; i++) { + // fit.iterate(); + // double[] ps = fit.getParams(); + // System.out.println("Mean: " + ps[0] + " Stddev: " + ps[1] + " Amp: " + + // ps[2]+" Chi: "+fit.getChiSq()); + // } + fit.run(); + // double[] ps = fit.getParams(); + // System.out.println("Result: "+ps[0]+" "+ps[1]+" "+ps[2]+" Chi: "+fit.getChiSq()+" Iter: "+fit.maxruns); + return fit.getParams(); + } +}
\ No newline at end of file diff --git a/test/de/lmu/ifi/dbs/elki/math/TestLevenbergMarquardtGaussianFitting.java b/test/de/lmu/ifi/dbs/elki/math/TestLevenbergMarquardtGaussianFitting.java new file mode 100644 index 00000000..49c84996 --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/math/TestLevenbergMarquardtGaussianFitting.java @@ -0,0 +1,78 @@ +package de.lmu.ifi.dbs.elki.math; + +import static org.junit.Assert.assertEquals; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.math.linearalgebra.fitting.GaussianFittingFunction; +import de.lmu.ifi.dbs.elki.math.linearalgebra.fitting.LevenbergMarquardtMethod; + +/** + * Test to evaluate Levenberg-Marquardt fitting on a given Gaussian + * distribution. + * + * @author Erich Schubert + * + */ +public class TestLevenbergMarquardtGaussianFitting implements JUnit4Test { + /** + * Evaluate on a symmetric Gaussian distribution. Traditional estimation + * already has the mean quite good, but is far off on the stddev. The improved + * fitting is much better on the stddev. + */ + // these points were generated with mean 0.12345 and stddev 0.98765 + @Test + public void testSymmetric() { + double[] testx = { -0.787791050195, -0.738026373791, -0.688261697386, -0.638497020982, -0.588732344578, -0.538967668174, -0.489202991769, -0.439438315365, -0.389673638961, -0.339908962556, -0.290144286152, -0.240379609748, -0.190614933344, -0.140850256939, -0.0910855805352, -0.0413209041309, 0.00844377227332, 0.0582084486776, 0.107973125082, 0.157737801486, 0.20750247789, 0.257267154295, 0.307031830699, 0.356796507103, 0.406561183507, 0.456325859912, 0.506090536316, 0.55585521272, 0.605619889124, 0.655384565529, 0.705149241933, 0.754913918337, 0.804678594741, 0.854443271146, 0.90420794755, 0.953972623954, 1.00373730036, 1.05350197676, 1.10326665317 }; + double[] testy = { 0.25319163934, 0.210993032783, 0.26122946916, 0.301418618261, 0.309456448082, 0.319503735357, 0.327541565177, 0.285342958621, 0.371749629189, 0.345626682273, 0.357683427004, 0.343617224818, 0.365721256824, 0.363711799369, 0.39586311865, 0.389834746285, 0.456146842302, 0.434042810296, 0.39586311865, 0.40390094847, 0.442080640117, 0.375768544099, 0.355673969549, 0.373759086644, 0.39586311865, 0.371749629189, 0.345626682273, 0.361702341914, 0.381796916465, 0.357683427004, 0.405910405925, 0.353664512093, 0.349645597183, 0.267257841525, 0.263238926615, 0.313475362992, 0.243144352064, 0.25721055425, 0.221040320058 }; + double mean = 0.122895805963; + double stddev = 0.542856090502; + double stddevq = stddev * Math.sqrt(2); + double[] s = new double[testx.length]; + for(int i = 0; i < testx.length; i++) { + s[i] = 1.0; + } + double[] params = { mean, stddevq, 1 }; + boolean[] dofit = { true, true, false }; + LevenbergMarquardtMethod fit = new LevenbergMarquardtMethod(new GaussianFittingFunction(), params, dofit, testx, testy, s); + for(int i = 0; i < 50; i++) { + fit.iterate(); + } + double[] ps = fit.getParams(); + // compare results. + double[] should = { 0.152986763079, 1.00115077, 1 }; + assertEquals("Mean doesn't match.", should[0], ps[0], 0.0001); + assertEquals("Stddev doesn't match.", should[1], ps[1], 0.0001); + assertEquals("Scaling doesn't match.", should[2], ps[2], 0.0001); + } + + /** + * Same experiment, but only with one leg of the distribution. This results in + * the traditional mean being far off. + */ + @Test + public void testAsymmetric() { + double[] testx = { 0.157737801486, 0.20750247789, 0.257267154295, 0.307031830699, 0.356796507103, 0.406561183507, 0.456325859912, 0.506090536316, 0.55585521272, 0.605619889124, 0.655384565529, 0.705149241933, 0.754913918337, 0.804678594741, 0.854443271146, 0.90420794755, 0.953972623954, 1.00373730036, 1.05350197676, 1.10326665317, 1.15303132957, 1.20279600598, 1.25256068238, 1.30232535878, 1.35209003519, 1.40185471159, 1.451619388, 1.5013840644, 1.55114874081, 1.60091341721, 1.65067809361, 1.70044277002, 1.75020744642, 1.79997212283, 1.84973679923, 1.89950147564, 1.94926615204, 1.99903082844, 2.04879550485, 2.09856018125, 2.14832485766, 2.19808953406, 2.24785421046, 2.29761888687, 2.34738356327, 2.39714823968, 2.44691291608, 2.49667759249, 2.54644226889, 2.59620694529, 2.6459716217, 2.6957362981, 2.74550097451, 2.79526565091, 2.84503032732, 2.89479500372, 2.94455968012, 2.99432435653, 3.04408903293, 3.09385370934 }; + double[] testy = { 0.40390094847, 0.442080640117, 0.375768544099, 0.355673969549, 0.373759086644, 0.39586311865, 0.371749629189, 0.345626682273, 0.361702341914, 0.381796916465, 0.357683427004, 0.405910405925, 0.353664512093, 0.349645597183, 0.267257841525, 0.263238926615, 0.313475362992, 0.243144352064, 0.25721055425, 0.221040320058, 0.247163266974, 0.219030862603, 0.267257841525, 0.186879543322, 0.184870085867, 0.160756596406, 0.202955202963, 0.132624192035, 0.150709309131, 0.158747138951, 0.100472872754, 0.124586362215, 0.116548532394, 0.132624192035, 0.078368840748, 0.0843972131132, 0.0582742661972, 0.0763593832929, 0.100472872754, 0.052245893832, 0.0562648087421, 0.0462175214668, 0.0321513192812, 0.0421986065566, 0.026122946916, 0.0321513192812, 0.0140662021855, 0.0120567447305, 0.0241134894609, 0.0140662021855, 0.0160756596406, 0.0140662021855, 0.00803782982031, 0.00602837236523, 0.0120567447305, 0.00803782982031, 0.00803782982031, 0.00602837236523, 0.0100472872754, 0.00200945745508 }; + double mean = 0.951868470698; + double stddev = 0.571932920001; + double stddevq = stddev * Math.sqrt(2); + double[] s = new double[testx.length]; + for(int i = 0; i < testx.length; i++) { + s[i] = 1.0; + } + double[] params = { mean, stddevq, 1 }; + boolean[] dofit = { true, true, false }; + LevenbergMarquardtMethod fit = new LevenbergMarquardtMethod(new GaussianFittingFunction(), params, dofit, testx, testy, s); + for(int i = 0; i < 50; i++) { + fit.iterate(); + } + double[] ps = fit.getParams(); + // compare results. + double[] should = { 0.1557811515, 1.006463733, 1 }; + assertEquals("Mean doesn't match.", should[0], ps[0], 0.0001); + assertEquals("Stddev doesn't match.", should[1], ps[1], 0.0001); + assertEquals("Scaling doesn't match.", should[2], ps[2], 0.0001); + } +}
\ No newline at end of file diff --git a/test/de/lmu/ifi/dbs/elki/math/TestMatrix.java b/test/de/lmu/ifi/dbs/elki/math/TestMatrix.java new file mode 100644 index 00000000..49042ceb --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/math/TestMatrix.java @@ -0,0 +1,66 @@ +package de.lmu.ifi.dbs.elki.math; + +import java.util.Random; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix; + +public class TestMatrix implements JUnit4Test { + + @Test + public void testTransposedOperations() { + for(int i = 0; i < 100; i++) { + randomizedTransposedTest(); + randomizedTestAsymmetric(); + } + } + + private void randomizedTransposedTest() { + Random r = new Random(); + int dim = r.nextInt(30) + 10; + Matrix A = new Matrix(dim, dim); + Matrix B = new Matrix(dim, dim); + for(int i = 0; i < dim; i++) { + for(int j = 0; j < dim; j++) { + A.set(i, j, (r.nextDouble() - .5) * 10); + B.set(i, j, (r.nextDouble() - .5) * 10); + } + } + + Matrix AT_B = A.transpose().times(B); + org.junit.Assert.assertTrue("A.transposeTimes(B) does not equal A.transpose.times(B)", A.transposeTimes(B).almostEquals(AT_B)); + Matrix A_BT = A.times(B.transpose()); + org.junit.Assert.assertTrue("A.timesTranspose(B) does not equal A.times(B.transpose)", A.timesTranspose(B).almostEquals(A_BT)); + org.junit.Assert.assertTrue("Usually (!) AT_B != A_BT!", !AT_B.almostEquals(A_BT)); + Matrix AT_BT = A.transpose().times(B.transpose()); + org.junit.Assert.assertTrue("A.transposeTimesTranspose(B) does not equal (B.times(A)).transpose", B.times(A).transpose().almostEquals(AT_BT)); + org.junit.Assert.assertTrue("A.transposeTimesTranspose(B) does not equal A.transpose.times(B.transpose)", A.transposeTimesTranspose(B).almostEquals(AT_BT)); + } + + private void randomizedTestAsymmetric() { + Random r = new Random(); + int dim1 = r.nextInt(30) + 10; + int dim2 = r.nextInt(30) + 10; + int dim3 = r.nextInt(30) + 10; + Matrix A = new Matrix(dim1, dim2); + Matrix B = new Matrix(dim2, dim3); + for(int i = 0; i < dim1; i++) { + for(int j = 0; j < dim2; j++) { + A.set(i, j, (r.nextDouble() - .5) * 10); + } + } + for(int i = 0; i < dim2; i++) { + for(int j = 0; j < dim3; j++) { + B.set(i, j, (r.nextDouble() - .5) * 10); + } + } + + Matrix A_B = A.times(B); + Matrix BT_AT = B.transpose().times(A.transpose()); + Matrix BT_AT2 = B.transposeTimesTranspose(A); + org.junit.Assert.assertTrue("B.transposeTimesTranspose(A) does not equal (A.times(B)).transpose", A_B.transpose().almostEquals(BT_AT)); + org.junit.Assert.assertTrue("B.transposeTimesTranspose(A) does not equal B.transpose.times(A.transpose)", BT_AT2.almostEquals(BT_AT)); + } +}
\ No newline at end of file diff --git a/test/de/lmu/ifi/dbs/elki/math/TestQuickSelect.java b/test/de/lmu/ifi/dbs/elki/math/TestQuickSelect.java new file mode 100644 index 00000000..36ed4521 --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/math/TestQuickSelect.java @@ -0,0 +1,90 @@ +package de.lmu.ifi.dbs.elki.math; + +import static org.junit.Assert.assertEquals; + +import java.util.Arrays; +import java.util.Random; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.math.statistics.QuickSelect; + +/** + * Test the QuickSelect math class. + * + * @author Erich Schubert + */ +public class TestQuickSelect implements JUnit4Test { + /** + * Array size to use. + */ + final int SIZE = 10000; + + @Test + public void testRandomDoubles() { + for(int i = 1; i < 10; i++) { + testQuickSelect(i); + } + testQuickSelect(SIZE); + testQuickSelect(SIZE + 1); + } + + private void testQuickSelect(int size) { + double[] data = new double[size]; + double[] test; + + // Make a random generator, but remember the seed for debugging. + Random r = new Random(); + long seed = r.nextLong(); + r = new Random(seed); + + // Produce data + for(int i = 0; i < size; i++) { + data[i] = r.nextDouble(); + } + // Duplicate for reference, sort. + test = Arrays.copyOf(data, size); + Arrays.sort(test); + + // Run QuickSelect and compare with full sort + // After a few iterations, the array will be largely sorted. + // Better just run the whole test a few times, than doing too many + // iterations here. + for(int j = 0; j < 20; j++) { + int q = r.nextInt(size); + double r1 = QuickSelect.quickSelect(data, q); + double r2 = test[q]; + assertEquals("QuickSelect returned incorrect element. Seed=" + seed, r2, r1, Double.MIN_VALUE); + } + double med = QuickSelect.median(data); + if(size % 2 == 1) { + double met = test[(size - 1) / 2]; + assertEquals("QuickSelect returned incorrect median. Seed=" + seed, met, med, Double.MIN_VALUE); + } + else { + double met = (test[(size - 1) / 2] + test[(size + 1) / 2]) / 2; + assertEquals("QuickSelect returned incorrect median. Seed=" + seed, med, met, Double.MIN_VALUE); + } + double qua = QuickSelect.quantile(data, 0.5); + assertEquals("Median and 0.5 quantile do not agree. Seed=" + seed, med, qua, 1E-15); + } + + @Test + public void testPartialArray() { + double data[] = new double[] { 0.1, 0.2, 1, 2, 3, 0.9, 0.95 }; + assertEquals("Partial median incorrect.", 1, QuickSelect.median(data, 2, 2), Double.MIN_VALUE); + assertEquals("Partial median incorrect.", 3, QuickSelect.median(data, 4, 4), Double.MIN_VALUE); + // Note: do not change the order, since this modifies the array. + assertEquals("Partial median incorrect.", 2, QuickSelect.median(data, 2, 4), Double.MIN_VALUE); + // Note: do not change the order, since this modifies the array. + assertEquals("Full median incorrect.", 0.95, QuickSelect.median(data), Double.MIN_VALUE); + } + + + @Test + public void testTies() { + double data[] = new double[] { 0.1, 0.1, 0.9, 0.9, 0.5, 0.9, 0.1, 0.1, 0.1, 0.9, 0.9, 0.9, 0.9, 0.1, 0.1 }; + assertEquals("Full median incorrect.", 0.5, QuickSelect.median(data), Double.MIN_VALUE); + } +}
\ No newline at end of file diff --git a/test/de/lmu/ifi/dbs/elki/math/TestReplacingHistogram.java b/test/de/lmu/ifi/dbs/elki/math/TestReplacingHistogram.java new file mode 100644 index 00000000..e9e505d2 --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/math/TestReplacingHistogram.java @@ -0,0 +1,51 @@ +package de.lmu.ifi.dbs.elki.math; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; + +/** + * JUnit test to test the {@link ReplacingHistogram} class. + * @author Erich Schubert + */ +public class TestReplacingHistogram implements JUnit4Test { + ReplacingHistogram<Double> hist; + + /** + * Test that adds some data to the histogram and compares results. + */ + @Test + public final void testHistogram() { + Double[] initial = { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; + Double[] filled = { 0.0, 1.23, 4.56, 7.89, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; + Double[] changed = { 0.0, 1.35, 8.01, 14.67, 9.01, 2.34, 0.0, 0.0, 0.0, 0.0 }; + Double[] resized = { -1.23, 0.0, 0.0, 1.35, 8.01, 14.67, 9.01, 2.34, 0.0, 0.0, 0.0, 0.0, 0.0, -4.56 }; + hist = ReplacingHistogram.DoubleHistogram(10, 0.0, 1.0); + assertArrayEquals("Empty histogram doesn't match", initial, hist.getData().toArray(new Double[0])); + hist.replace(0.15, 1.23); + hist.replace(0.25, 4.56); + hist.replace(0.35, 7.89); + assertArrayEquals("Filled histogram doesn't match", filled, hist.getData().toArray(new Double[0])); + hist.replace(0.15, 0.12 + hist.get(0.15)); + hist.replace(0.25, 3.45 + hist.get(0.25)); + hist.replace(0.35, 6.78 + hist.get(0.35)); + hist.replace(0.45, 9.01 + hist.get(0.45)); + hist.replace(0.50, 2.34 + hist.get(0.50)); + assertArrayEquals("Changed histogram doesn't match", changed, hist.getData().toArray(new Double[0])); + hist.replace(-.13, -1.23 + hist.get(-.13)); + hist.replace(1.13, -4.56 + hist.get(1.13)); + assertArrayEquals("Resized histogram doesn't match", resized, hist.getData().toArray(new Double[0])); + + // compare results via Iterator. + int off = 0; + for (Pair<Double, Double> pair : hist) { + assertEquals("Array iterator bin position", -0.15 + 0.1 * off, pair.getFirst(), 0.00001); + assertEquals("Array iterator bin contents", resized[off], pair.getSecond(), 0.00001); + off++; + } + } +} diff --git a/test/de/lmu/ifi/dbs/elki/math/TestWeightFunctions.java b/test/de/lmu/ifi/dbs/elki/math/TestWeightFunctions.java new file mode 100644 index 00000000..8164e28e --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/math/TestWeightFunctions.java @@ -0,0 +1,55 @@ +package de.lmu.ifi.dbs.elki.math; + +import static org.junit.Assert.assertEquals; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions.ConstantWeight; +import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions.ErfcStddevWeight; +import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions.ErfcWeight; +import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions.ExponentialWeight; +import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions.GaussStddevWeight; +import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions.GaussWeight; +import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions.LinearWeight; +import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions.WeightFunction; + +/** + * JUnit test to assert consistency of a couple of Weight functions + * @author Erich Schubert + * + */ +public class TestWeightFunctions implements JUnit4Test { + /** + * Just a 'boring' value test for completeness. + */ + @Test + public void testGetWeight() { + WeightFunction[] wf = {new ConstantWeight(), new ErfcWeight(), new ErfcStddevWeight(), + new GaussWeight(), new GaussStddevWeight(), new LinearWeight(), new ExponentialWeight()}; + double[] at0 = {1.0, 1.0, 1.0, 1.0, 0.3989422804014327, 1.0, 1.0}; + double[] at01 = {1.0, 0.8693490686884612, 0.920344325445942, 0.9772372209558107, + 0.3969525474770118, 0.91, 0.7943282347242815}; + double[] at09 = {1.0, 0.13877499454059491, 0.36812025069351895, 0.15488166189124816, + 0.2660852498987548, 0.18999999999999995, 0.12589254117941673}; + double[] at10 = {1.0, 0.10000000000000016, 0.317310507862914, 0.10000000000000002, + 0.24197072451914337, 0.09999999999999998, 0.10000000000000002}; + + assert(wf.length == at0.length); + assert(wf.length == at01.length); + assert(wf.length == at09.length); + assert(wf.length == at10.length); + + for (int i=0; i < wf.length; i++) { + double val0 = wf[i].getWeight(0, 1, 1); + double val01 = wf[i].getWeight(0.1, 1, 1); + double val09 = wf[i].getWeight(0.9, 1, 1); + double val10 = wf[i].getWeight(1.0, 1, 1); + assertEquals(wf[i].getClass().getSimpleName()+" at 0.0", at0[i], val0, Double.MIN_VALUE); + assertEquals(wf[i].getClass().getSimpleName()+" at 0.1", at01[i], val01, Double.MIN_VALUE); + assertEquals(wf[i].getClass().getSimpleName()+" at 0.9", at09[i], val09, Double.MIN_VALUE); + assertEquals(wf[i].getClass().getSimpleName()+" at 1.0", at10[i], val10, Double.MIN_VALUE); + } + } + +} diff --git a/test/de/lmu/ifi/dbs/elki/persistent/TestByteArrayUtil.java b/test/de/lmu/ifi/dbs/elki/persistent/TestByteArrayUtil.java new file mode 100644 index 00000000..7cd14c14 --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/persistent/TestByteArrayUtil.java @@ -0,0 +1,87 @@ +package de.lmu.ifi.dbs.elki.persistent; + +import java.nio.ByteBuffer; + +import org.junit.Assert; +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; + +/** + * Test some of the varint functions. + * + * @author Erich Schubert + */ +public class TestByteArrayUtil implements JUnit4Test { + /** + * Test the Varint functions + */ + @Test + public void dotestUnsignedVarint32() { + int[] testvals = { 0, 1, 127, 128, 16383, 16384, 2097151, 2097152, 268435455, 268435456, Integer.MAX_VALUE, 0xFFFFFFFF }; + int[] elen = { 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 5 }; + ByteBuffer buffer = ByteBuffer.allocate(100); + Assert.assertEquals("Test incorrectly set up.", testvals.length, elen.length); + + // Fill the buffer + int totallen = 0; + for(int i = 0; i < testvals.length; i++) { + ByteArrayUtil.writeUnsignedVarint(buffer, testvals[i]); + totallen += elen[i]; + Assert.assertEquals("len(Varint(" + testvals[i] + ")) != " + elen[i], totallen, buffer.position()); + } + // Seek and read again. + buffer.position(0); + totallen = 0; + for(int i = 0; i < testvals.length; i++) { + int read = ByteArrayUtil.readUnsignedVarint(buffer); + Assert.assertEquals("Varint read failed.", testvals[i], read); + totallen += elen[i]; + Assert.assertEquals("len(Varint(" + testvals[i] + ")) != " + elen[i], totallen, buffer.position()); + } + } + + /** + * Test the Varint functions + */ + @Test + public void dotestSignedVarint32() { + int[] testvals = { 0, 1, -1, 63, -64, 64, -65, 8191, -8192, 8192, -8193, 1048575, -1048576, 1048576, -1048577, 134217727, -134217728, 134217728, -134217729, Integer.MAX_VALUE, Integer.MIN_VALUE }; + int[] elen = { 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5 }; + ByteBuffer buffer = ByteBuffer.allocate(100); + Assert.assertEquals("Test incorrectly set up.", testvals.length, elen.length); + + // Fill the buffer + int totallen = 0; + for(int i = 0; i < testvals.length; i++) { + ByteArrayUtil.writeSignedVarint(buffer, testvals[i]); + totallen += elen[i]; + Assert.assertEquals("len(Varint(" + testvals[i] + ")) != " + elen[i], totallen, buffer.position()); + } + // Seek and read again. + buffer.position(0); + totallen = 0; + for(int i = 0; i < testvals.length; i++) { + int read = ByteArrayUtil.readSignedVarint(buffer); + Assert.assertEquals("Varint read failed.", testvals[i], read); + totallen += elen[i]; + Assert.assertEquals("len(Varint(" + testvals[i] + ")) != " + elen[i], totallen, buffer.position()); + } + } + + /** + * Official examples + */ + @Test + public void dotestVarintExamples() { + byte[] test = { 0x03, (byte) 0x8E, 0x02, (byte) 0x9E, (byte) 0xA7, 0x05, (byte) 0x96, 0x01 }; + int[] expect = { 3, 270, 86942, 150 }; + ByteBuffer buffer = ByteBuffer.wrap(test); + for(int i = 0; i < expect.length; i++) { + int read = ByteArrayUtil.readUnsignedVarint(buffer); + Assert.assertEquals(expect[i], read); + } + Assert.assertEquals("Not all data processed.", 0, buffer.remaining()); + } + +} diff --git a/test/de/lmu/ifi/dbs/elki/persistent/TestOnDiskArray.java b/test/de/lmu/ifi/dbs/elki/persistent/TestOnDiskArray.java new file mode 100644 index 00000000..67a51d0d --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/persistent/TestOnDiskArray.java @@ -0,0 +1,97 @@ +package de.lmu.ifi.dbs.elki.persistent; + +import java.io.File; +import java.io.IOException; +import java.nio.ByteBuffer; + +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; + +/** + * Test to validate proper OnDiskArray operation. + * + * @author Erich Schubert + */ +// TODO: also test with a static sample file. +public class TestOnDiskArray implements JUnit4Test { + File file = new File("OnDiskArrayTestFile.test.dat"); + + /** + * Check that we don't overwrite any file. + * + * @throws Exception on errors. + */ + @Before + public void safetyCheck() throws Exception { + if(file.exists()) { + Assert.fail("Could not run test - test file already exists."); + } + } + + /** + * Clean up afterwards + * + * @throws Exception on errors. + */ + @After + public void cleanup() throws Exception { + if(file != null && file.exists()) { + if(!file.delete()) { + Assert.fail("Error cleaning up: can't remove test file."); + } + } + } + + /** + * Test the OnDiskArray class. + * + * @throws IOException on errors. + */ + @Test + public void dotestOnDiskArray() throws IOException { + final int extraheadersize = 2; + final int recsize = 3; + int numrec = 4; + // Only applicable to the version we are testing. + final int ODR_HEADER_SIZE = 4 * 4; + OnDiskArray array = new OnDiskArray(file, 1, extraheadersize, recsize, numrec); + byte[] header = { 42, 23 }; + array.getExtraHeader().put(header); + byte[] record1 = { 31, 41, 59 }; + byte[] record2 = { 26, 53, 58 }; + array.getRecordBuffer(0).put(record1); + array.getRecordBuffer(1).put(record2); + array.getRecordBuffer(2).put(record2); + array.getRecordBuffer(3).put(record1); + array.resizeFile(5); + numrec = 5; + array.getRecordBuffer(4).put(record1); + array.close(); + + // validate file size + Assert.assertEquals("File size doesn't match.", ODR_HEADER_SIZE + extraheadersize + recsize * numrec, file.length()); + + OnDiskArray roarray = new OnDiskArray(file, 1, 2, 3, false); + Assert.assertEquals("Number of records incorrect.", numrec, roarray.getNumRecords()); + + byte[] buf = new byte[recsize]; + ByteBuffer hbuf = roarray.getExtraHeader(); + for(int i = 0; i < header.length; i++) { + Assert.assertEquals("Header doesn't match.", header[i], hbuf.get()); + } + roarray.getRecordBuffer(0).get(buf); + Assert.assertArrayEquals("Record 0 doesn't match.", record1, buf); + roarray.getRecordBuffer(4).get(buf); + Assert.assertArrayEquals("Record 4 doesn't match.", record1, buf); + roarray.getRecordBuffer(1).get(buf); + Assert.assertArrayEquals("Record 1 doesn't match.", record2, buf); + roarray.getRecordBuffer(2).get(buf); + Assert.assertArrayEquals("Record 2 doesn't match.", record2, buf); + roarray.getRecordBuffer(3).get(buf); + Assert.assertArrayEquals("Record 3 doesn't match.", record1, buf); + } +}
\ No newline at end of file diff --git a/test/de/lmu/ifi/dbs/elki/persistent/TestOnDiskUpperTriangleMatrix.java b/test/de/lmu/ifi/dbs/elki/persistent/TestOnDiskUpperTriangleMatrix.java new file mode 100644 index 00000000..e076da1a --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/persistent/TestOnDiskUpperTriangleMatrix.java @@ -0,0 +1,94 @@ +package de.lmu.ifi.dbs.elki.persistent; + +import java.io.File; +import java.io.IOException; + +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; + +/** + * Test the on-disk OnDiskUpperTriangleMatrix class. + * @author Erich Schubert + * + */ +// TODO: also test with a static sample file. +public class TestOnDiskUpperTriangleMatrix implements JUnit4Test { + static File file = new File("UpperTriangleTestFile.test.dat"); + + /** + * Check that we don't overwrite any file. + * @throws Exception on errors. + */ + @Before + public void safetyCheck() throws Exception { + if(file.exists()) { + Assert.fail("Could not run test - test file already exists."); + } + } + + /** + * Clean up afterwards + * @throws Exception on errors. + */ + @After + public void cleanup() throws Exception { + if(file != null && file.exists()) { + if(!file.delete()) { + Assert.fail("Error cleaning up: can't remove test file."); + } + } + } + + /** + * Test the ondisk triangle matrix + * @throws IOException on errors. + */ + @Test + public void testUpperTriangleMatrix() throws IOException { + final int extraheadersize = 2; + final int recsize = 3; + int matsize = 2; + // Only applicable to the version we are testing. + final int ODR_HEADER_SIZE = 4 * 4 + 4; + OnDiskUpperTriangleMatrix array = new OnDiskUpperTriangleMatrix(file, 1, extraheadersize, recsize, matsize); + byte[] record1 = { 31, 41, 59 }; + byte[] record2 = { 26, 53, 58 }; + byte[] record3 = { 97, 93, 1 }; + array.getRecordBuffer(0, 0).put(record1); + array.getRecordBuffer(0, 1).put(record2); + array.getRecordBuffer(1, 1).put(record3); + // test resizing. + matsize = 3; + array.resizeMatrix(3); + array.getRecordBuffer(0, 2).put(record3); + array.getRecordBuffer(1, 2).put(record2); + array.getRecordBuffer(2, 2).put(record1); + array.close(); + + // validate file size + Assert.assertEquals("File size doesn't match.", ODR_HEADER_SIZE + extraheadersize + recsize * matsize * (matsize + 1) / 2, file.length()); + + OnDiskUpperTriangleMatrix roarray = new OnDiskUpperTriangleMatrix(file, 1, extraheadersize, recsize, false); + Assert.assertEquals("Number of records incorrect.", matsize, roarray.getMatrixSize()); + + byte[] buf = new byte[recsize]; + roarray.getRecordBuffer(0,0).get(buf); + Assert.assertArrayEquals("Record 0,0 doesn't match.", record1, buf); + roarray.getRecordBuffer(0,1).get(buf); + Assert.assertArrayEquals("Record 0,1 doesn't match.", record2, buf); + roarray.getRecordBuffer(1,1).get(buf); + Assert.assertArrayEquals("Record 1,1 doesn't match.", record3, buf); + roarray.getRecordBuffer(1,0).get(buf); + Assert.assertArrayEquals("Record 1,0 doesn't match.", record2, buf); + roarray.getRecordBuffer(0,2).get(buf); + Assert.assertArrayEquals("Record 0,2 doesn't match.", record3, buf); + roarray.getRecordBuffer(1,2).get(buf); + Assert.assertArrayEquals("Record 1,2 doesn't match.", record2, buf); + roarray.getRecordBuffer(2,2).get(buf); + Assert.assertArrayEquals("Record 2,2 doesn't match.", record1, buf); + } +} diff --git a/test/de/lmu/ifi/dbs/elki/utilities/datastructures/TestHeap.java b/test/de/lmu/ifi/dbs/elki/utilities/datastructures/TestHeap.java new file mode 100644 index 00000000..44c84f83 --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/utilities/datastructures/TestHeap.java @@ -0,0 +1,41 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures; + +import static org.junit.Assert.assertEquals; + +import java.util.Collections; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap; + +/** + * Test the in-memory heap class. + * + * @author Erich Schubert + */ +public class TestHeap { + /** + * Puts 10 integers into both an ascending and a descending heap and verifies + * they come out in sequence. + */ + @Test + public void testHeap() { + Integer[] data = { 5, 3, 4, 2, 7, 1, 9, 8, 10, 6 }; + Integer[] asc = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + Integer[] desc = { 10, 9, 8, 7, 6, 5, 4, 3, 2, 1 }; + Heap<Integer> hasc = new Heap<Integer>(); + Heap<Integer> hdesc = new Heap<Integer>(Collections.reverseOrder()); + for(Integer i : data) { + hasc.add(i); + hdesc.add(i); + } + for(int i = 0; i < asc.length; i++) { + final Integer gota = hasc.poll(); + assertEquals("Objects sorted incorrectly at ascending position "+i, asc[i], gota); + } + for(int i = 0; i < desc.length; i++) { + final Integer gotd = hdesc.poll(); + assertEquals("Objects sorted incorrectly at descending position "+i, desc[i], gotd); + } + } +} diff --git a/test/de/lmu/ifi/dbs/elki/utilities/datastructures/TestTiedTopBoundedHeap.java b/test/de/lmu/ifi/dbs/elki/utilities/datastructures/TestTiedTopBoundedHeap.java new file mode 100644 index 00000000..829c5880 --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/utilities/datastructures/TestTiedTopBoundedHeap.java @@ -0,0 +1,71 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures; + +import static org.junit.Assert.assertEquals; + +import java.util.Collections; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap; +import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TiedTopBoundedHeap; + +/** + * Test the in-memory bounded heap class. + * + * @author Erich Schubert + */ +public class TestTiedTopBoundedHeap { + /** + * Test bounded heap + */ + @Test + public void testTiedTopBoundedHeap() { + Integer[] data = { 5, 3, 4, 2, 7, 1, 9, 8, 10, 6, 5 }; + Integer[] asc = { 5, 5, 6, 7, 8, 9, 10 }; + Integer[] desc = { 5, 5, 4, 3, 2, 1 }; + Heap<Integer> hasc = new TiedTopBoundedHeap<Integer>(asc.length - 1); + Heap<Integer> hdesc = new TiedTopBoundedHeap<Integer>(desc.length - 1, Collections.reverseOrder()); + for(Integer i : data) { + hasc.add(i); + hdesc.add(i); + } + //LoggingUtil.warning("Heap: "+hasc.toString()+ " -- "+hdesc.toString()); + assertEquals("Ascending heap size doesn't match", asc.length, hasc.size()); + assertEquals("Descending heap size doesn't match", desc.length, hdesc.size()); + for(int i = 0; i < asc.length; i++) { + final Integer gota = hasc.poll(); + assertEquals("Objects sorted incorrectly at ascending position "+i, asc[i], gota); + } + for(int i = 0; i < desc.length; i++) { + final Integer gotd = hdesc.poll(); + assertEquals("Objects sorted incorrectly at descending position "+i, desc[i], gotd); + } + } + /** + * Test bounded heap + */ + @Test + public void testTiedTopBoundedHeapTrival() { + Heap<Integer> heap1 = new TiedTopBoundedHeap<Integer>(1); + Heap<Integer> heap2 = new TiedTopBoundedHeap<Integer>(1); + Heap<Integer> heap3 = new TiedTopBoundedHeap<Integer>(1); + Heap<Integer> heap4 = new TiedTopBoundedHeap<Integer>(1); + Heap<Integer> heap5 = new TiedTopBoundedHeap<Integer>(1); + heap2.add(2); + heap4.add(0); + for(int i = 0; i < 10; i++) { + heap1.add(1); + heap2.add(1); + heap3.add(1); + heap4.add(1); + heap5.add(1); + } + heap3.add(2); + heap5.add(0); + assertEquals("First heap size doesn't match", 10, heap1.size()); + assertEquals("Second heap size doesn't match", 1, heap2.size()); + assertEquals("Third heap size doesn't match", 1, heap3.size()); + assertEquals("Fourth heap size doesn't match", 10, heap4.size()); + assertEquals("Fifth heap size doesn't match", 10, heap5.size()); + } +} diff --git a/test/de/lmu/ifi/dbs/elki/utilities/datastructures/TestTopBoundedHeap.java b/test/de/lmu/ifi/dbs/elki/utilities/datastructures/TestTopBoundedHeap.java new file mode 100644 index 00000000..c1fc3a66 --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/utilities/datastructures/TestTopBoundedHeap.java @@ -0,0 +1,41 @@ +package de.lmu.ifi.dbs.elki.utilities.datastructures; + +import static org.junit.Assert.assertEquals; + +import java.util.Collections; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap; +import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TopBoundedHeap; + +/** + * Test the in-memory bounded heap class. + * + * @author Erich Schubert + */ +public class TestTopBoundedHeap { + /** + * Test bounded heap + */ + @Test + public void testBoundedHeap() { + Integer[] data = { 5, 3, 4, 2, 7, 1, 9, 8, 10, 6 }; + Integer[] asc = { 5, 6, 7, 8, 9, 10 }; + Integer[] desc = { 6, 5, 4, 3, 2, 1 }; + Heap<Integer> hasc = new TopBoundedHeap<Integer>(asc.length); + Heap<Integer> hdesc = new TopBoundedHeap<Integer>(desc.length, Collections.reverseOrder()); + for(Integer i : data) { + hasc.add(i); + hdesc.add(i); + } + for(int i = 0; i < asc.length; i++) { + final Integer gota = hasc.poll(); + assertEquals("Objects sorted incorrectly at ascending position "+i, asc[i], gota); + } + for(int i = 0; i < desc.length; i++) { + final Integer gotd = hdesc.poll(); + assertEquals("Objects sorted incorrectly at descending position "+i, desc[i], gotd); + } + } +} diff --git a/test/de/lmu/ifi/dbs/elki/visualization/TestLinearScale.java b/test/de/lmu/ifi/dbs/elki/visualization/TestLinearScale.java new file mode 100644 index 00000000..8a303f4c --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/visualization/TestLinearScale.java @@ -0,0 +1,41 @@ +package de.lmu.ifi.dbs.elki.visualization; + +import static org.junit.Assert.assertEquals; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.visualization.scales.LinearScale; + + +/** + * Test class to test rounding of the linear scale. + * + * @author Erich Schubert + * + */ +public class TestLinearScale implements JUnit4Test { + + /** + * Produces a simple linear scale and verifies the tick lines are placed as expected. + */ + @Test + public final void testLinearScale() { + LinearScale a = new LinearScale(3,97); + assertEquals("Minimum for scale 3-97 not as expected.", 0.0, a.getMin(), Double.MIN_VALUE); + assertEquals("Maximum for scale 3-97 not as expected.", 100.0, a.getMax(), Double.MIN_VALUE); + + LinearScale b = new LinearScale(-97, -3); + assertEquals("Minimum for scale -97 : -3 not as expected.", -100.0, b.getMin(), Double.MIN_VALUE); + assertEquals("Maximum for scale -97 : -3 not as expected.", 0.0, b.getMax(), Double.MIN_VALUE); + + LinearScale c = new LinearScale(-3, 37); + assertEquals("Minimum for scale -3 : 37 not as expected.", -10.0, c.getMin(), Double.MIN_VALUE); + assertEquals("Maximum for scale -3 : 37 not as expected.", 40.0, c.getMax(), Double.MIN_VALUE); + + LinearScale d = new LinearScale(-37, 3); + assertEquals("Minimum for scale -37 : 3 not as expected.", -40.0, d.getMin(), Double.MIN_VALUE); + assertEquals("Maximum for scale -37 : 3 not as expected.", 10.0, d.getMax(), Double.MIN_VALUE); + } + +} |