diff options
Diffstat (limited to 'test/de/lmu/ifi/dbs/elki/evaluation')
-rw-r--r-- | test/de/lmu/ifi/dbs/elki/evaluation/TestComputeROC.java | 55 | ||||
-rw-r--r-- | test/de/lmu/ifi/dbs/elki/evaluation/TestPairCountingFMeasure.java | 78 |
2 files changed, 133 insertions, 0 deletions
diff --git a/test/de/lmu/ifi/dbs/elki/evaluation/TestComputeROC.java b/test/de/lmu/ifi/dbs/elki/evaluation/TestComputeROC.java new file mode 100644 index 00000000..46adb1bb --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/evaluation/TestComputeROC.java @@ -0,0 +1,55 @@ +package de.lmu.ifi.dbs.elki.evaluation; + +import java.util.ArrayList; +import java.util.List; + +import junit.framework.Assert; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.database.ids.DBID; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; +import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs; +import de.lmu.ifi.dbs.elki.evaluation.roc.ROC; +import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleDoublePair; +import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; + +/** + * Test to validate ROC curve computation. + * + * @author Erich Schubert + * + */ +public class TestComputeROC implements JUnit4Test { + /** + * Test ROC curve generation, including curve simplification + */ + @Test + public void testROCCurve() { + HashSetModifiableDBIDs positive = DBIDUtil.newHashSet(); + positive.add(DBIDUtil.importInteger(1)); + positive.add(DBIDUtil.importInteger(2)); + positive.add(DBIDUtil.importInteger(3)); + positive.add(DBIDUtil.importInteger(4)); + positive.add(DBIDUtil.importInteger(5)); + + ArrayList<Pair<Double, DBID>> distances = new ArrayList<Pair<Double, DBID>>(); + distances.add(new Pair<Double, DBID>(0.0, DBIDUtil.importInteger(1))); + distances.add(new Pair<Double, DBID>(1.0, DBIDUtil.importInteger(2))); + distances.add(new Pair<Double, DBID>(2.0, DBIDUtil.importInteger(6))); + distances.add(new Pair<Double, DBID>(3.0, DBIDUtil.importInteger(7))); + distances.add(new Pair<Double, DBID>(3.0, DBIDUtil.importInteger(3))); + distances.add(new Pair<Double, DBID>(4.0, DBIDUtil.importInteger(8))); + distances.add(new Pair<Double, DBID>(4.0, DBIDUtil.importInteger(4))); + distances.add(new Pair<Double, DBID>(5.0, DBIDUtil.importInteger(9))); + distances.add(new Pair<Double, DBID>(6.0, DBIDUtil.importInteger(5))); + + List<DoubleDoublePair> roccurve = ROC.materializeROC(9, positive, distances.iterator()); + // System.out.println(roccurve); + Assert.assertEquals("ROC curve too complex", 6, roccurve.size()); + + double auc = ROC.computeAUC(roccurve); + Assert.assertEquals("ROC AUC not right.", 0.6, auc, 0.0001); + } +} diff --git a/test/de/lmu/ifi/dbs/elki/evaluation/TestPairCountingFMeasure.java b/test/de/lmu/ifi/dbs/elki/evaluation/TestPairCountingFMeasure.java new file mode 100644 index 00000000..4944db3d --- /dev/null +++ b/test/de/lmu/ifi/dbs/elki/evaluation/TestPairCountingFMeasure.java @@ -0,0 +1,78 @@ +package de.lmu.ifi.dbs.elki.evaluation; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import org.junit.Test; + +import de.lmu.ifi.dbs.elki.JUnit4Test; +import de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.ByLabelClustering; +import de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.TrivialAllInOne; +import de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.TrivialAllNoise; +import de.lmu.ifi.dbs.elki.data.Clustering; +import de.lmu.ifi.dbs.elki.data.model.Model; +import de.lmu.ifi.dbs.elki.data.type.TypeUtil; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.database.StaticArrayDatabase; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.datasource.FileBasedDatabaseConnection; +import de.lmu.ifi.dbs.elki.evaluation.paircounting.PairCountingFMeasure; +import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; + +/** + * Validate {@link PairCountingFMeasure} with respect to its ability to compare + * data clusterings. + * + * @author Erich Schubert + */ +public class TestPairCountingFMeasure implements JUnit4Test { + // the following values depend on the data set used! + String dataset = "data/testdata/unittests/hierarchical-3d2d1d.csv"; + + // size of the data set + int shoulds = 600; + + /** + * Validate {@link PairCountingFMeasure} with respect to its ability to + * compare data clusterings. + * + * @throws ParameterException on errors. + */ + @Test + public void testCompareDatabases() { + ListParameterization params = new ListParameterization(); + // Input + params.addParameter(FileBasedDatabaseConnection.INPUT_ID, dataset); + + // get database + Database db = ClassGenericsUtil.parameterizeOrAbort(StaticArrayDatabase.class, params); + db.initialize(); + + // verify data set size. + Relation<?> rel = db.getRelation(TypeUtil.ANY); + assertTrue(rel.size() == shoulds); + + // run all-in-one + TrivialAllInOne allinone = new TrivialAllInOne(); + Clustering<Model> rai = allinone.run(db); + + // run all-in-noise + TrivialAllNoise allinnoise = new TrivialAllNoise(); + Clustering<Model> ran = allinnoise.run(db); + + // run by-label + ByLabelClustering bylabel = new ByLabelClustering(); + Clustering<?> rbl = bylabel.run(db); + + assertEquals(1.0, PairCountingFMeasure.compareClusterings(rai, rai), Double.MIN_VALUE); + assertEquals(1.0, PairCountingFMeasure.compareClusterings(ran, ran), Double.MIN_VALUE); + assertEquals(1.0, PairCountingFMeasure.compareClusterings(rbl, rbl), Double.MIN_VALUE); + + assertEquals(0.009950248756218905, PairCountingFMeasure.compareClusterings(ran, rbl, true, false), Double.MIN_VALUE); + assertEquals(0.0033277870216306157, PairCountingFMeasure.compareClusterings(rai, ran, true, false), Double.MIN_VALUE); + + assertEquals(0.5 /* 0.3834296724470135 */, PairCountingFMeasure.compareClusterings(rai, rbl), Double.MIN_VALUE); + } +}
\ No newline at end of file |