package de.lmu.ifi.dbs.elki.algorithm; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.io.File; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.List; import de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.ByLabelClustering; import de.lmu.ifi.dbs.elki.data.Cluster; import de.lmu.ifi.dbs.elki.data.Clustering; import de.lmu.ifi.dbs.elki.data.model.Model; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; import de.lmu.ifi.dbs.elki.database.Database; import de.lmu.ifi.dbs.elki.database.StaticArrayDatabase; import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.datasource.FileBasedDatabaseConnection; import de.lmu.ifi.dbs.elki.datasource.filter.FixedDBIDsFilter; import de.lmu.ifi.dbs.elki.evaluation.clustering.ClusterContingencyTable; import de.lmu.ifi.dbs.elki.evaluation.outlier.OutlierROCCurve; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.result.Result; import de.lmu.ifi.dbs.elki.result.ResultUtil; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; /** * Abstract base class useful for testing various algorithms. * * @author Erich Schubert */ public abstract class AbstractSimpleAlgorithmTest { /** * Base path for unit test files. */ public final static String UNITTEST = "data/testdata/unittests/"; /** * Notice: this is okay for tests - don't use this for frequently used * objects, use a static instance instead! */ protected Logging logger = Logging.getLogger(this.getClass()); /** * Validate that parameterization succeeded: no parameters left, no * parameterization errors. * * @param config Parameterization to test */ protected void testParameterizationOk(ListParameterization config) { if (config.hasUnusedParameters()) { fail("Unused parameters: " + config.getRemainingParameters()); } if (config.hasErrors()) { config.logAndClearReportedErrors(); fail("Parameterization errors."); } } /** * Generate a simple DoubleVector database from a file. * * @param filename File to load * @param expectedSize Expected size in records * @param params Extra parameters * @return Database */ protected Database makeSimpleDatabase(String filename, int expectedSize, ListParameterization params, Class[] filters) { org.junit.Assert.assertTrue("Test data set not found: " + filename, (new File(filename)).exists()); params.addParameter(FileBasedDatabaseConnection.INPUT_ID, filename); List> filterlist = new ArrayList<>(); filterlist.add(FixedDBIDsFilter.class); if (filters != null) { for (Class filter : filters) { filterlist.add(filter); } } params.addParameter(FileBasedDatabaseConnection.FILTERS_ID, filterlist); params.addParameter(FixedDBIDsFilter.IDSTART_ID, 1); Database db = ClassGenericsUtil.parameterizeOrAbort(StaticArrayDatabase.class, params); testParameterizationOk(params); db.initialize(); Relation rel = db.getRelation(TypeUtil.ANY); org.junit.Assert.assertEquals("Database size does not match.", expectedSize, rel.size()); return db; } /** * Generate a simple DoubleVector database from a file. * * @param filename File to load * @param expectedSize Expected size in records * @return Database */ protected Database makeSimpleDatabase(String filename, int expectedSize) { return makeSimpleDatabase(filename, expectedSize, new ListParameterization(), null); } /** * Find a clustering result, fail if there is more than one or none. * * @param result Base result * @return Clustering */ protected Clustering findSingleClustering(Result result) { List> clusterresults = ResultUtil.getClusteringResults(result); assertTrue("No unique clustering found in result.", clusterresults.size() == 1); Clustering clustering = clusterresults.get(0); return clustering; } /** * Test the clustering result by comparing the score with an expected value. * * @param database Database to test * @param clustering Clustering result * @param expected Expected score */ protected void testFMeasure(Database database, Clustering clustering, double expected) { // Run by-label as reference ByLabelClustering bylabel = new ByLabelClustering(); Clustering rbl = bylabel.run(database); ClusterContingencyTable ct = new ClusterContingencyTable(true, false); ct.process(clustering, rbl); double score = ct.getPaircount().f1Measure(); if (logger.isVerbose()) { logger.verbose(this.getClass().getSimpleName() + " score: " + score + " expect: " + expected); } org.junit.Assert.assertEquals(this.getClass().getSimpleName() + ": Score does not match.", expected, score, 0.0001); } /** * Validate the cluster sizes with an expected result. * * @param clustering Clustering to test * @param expected Expected cluster sizes */ protected void testClusterSizes(Clustering clustering, int[] expected) { List sizes = new java.util.ArrayList<>(); for (Cluster cl : clustering.getAllClusters()) { sizes.add(cl.size()); } // Sort both Collections.sort(sizes); Arrays.sort(expected); // Report // if(logger.isVerbose()) { StringBuilder buf = new StringBuilder(); buf.append("Cluster sizes: ["); for (int i = 0; i < sizes.size(); i++) { if (i > 0) { buf.append(", "); } buf.append(sizes.get(i)); } buf.append("]"); // } // Test org.junit.Assert.assertEquals("Number of clusters does not match expectations. " + buf.toString(), expected.length, sizes.size()); for (int i = 0; i < expected.length; i++) { org.junit.Assert.assertEquals("Cluster size does not match at position " + i + " in " + buf.toString(), expected[i], (int) sizes.get(i)); } } /** * Test the AUC value for an outlier result. * * @param db Database * @param positive Positive class name * @param result Outlier result to process * @param expected Expected AUC value */ protected void testAUC(Database db, String positive, OutlierResult result, double expected) { ListParameterization params = new ListParameterization(); params.addParameter(OutlierROCCurve.POSITIVE_CLASS_NAME_ID, positive); OutlierROCCurve rocCurve = ClassGenericsUtil.parameterizeOrAbort(OutlierROCCurve.class, params); // Ensure the result has been added to the hierarchy: if (db.getHierarchy().numParents(result) < 1) { db.getHierarchy().add(db, result); } // Compute ROC and AUC: rocCurve.processNewResult(db, result); // Find the ROC results Collection rocs = ResultUtil.filterResults(result, OutlierROCCurve.ROCResult.class); org.junit.Assert.assertTrue("No ROC result found.", !rocs.isEmpty()); double auc = rocs.iterator().next().getAUC(); org.junit.Assert.assertFalse("More than one ROC result found.", rocs.size() > 1); org.junit.Assert.assertEquals("ROC value does not match.", expected, auc, 0.0001); } /** * Test the outlier score of a single object. * * @param result Result object to use * @param id Object ID * @param expected expected value */ protected void testSingleScore(OutlierResult result, int id, double expected) { org.junit.Assert.assertNotNull("No outlier result", result); org.junit.Assert.assertNotNull("No score result.", result.getScores()); final DBID dbid = DBIDUtil.importInteger(id); org.junit.Assert.assertNotNull("No result for ID " + id, result.getScores().get(dbid)); double actual = result.getScores().get(dbid); org.junit.Assert.assertEquals("Outlier score of object " + id + " doesn't match.", expected, actual, 0.0001); } }