summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/EvaluatePairCountingFMeasure.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/evaluation/paircounting/EvaluatePairCountingFMeasure.java')
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/paircounting/EvaluatePairCountingFMeasure.java179
1 files changed, 179 insertions, 0 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/EvaluatePairCountingFMeasure.java b/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/EvaluatePairCountingFMeasure.java
new file mode 100644
index 00000000..43de1cb7
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/EvaluatePairCountingFMeasure.java
@@ -0,0 +1,179 @@
+package de.lmu.ifi.dbs.elki.evaluation.paircounting;
+/*
+This file is part of ELKI:
+Environment for Developing KDD-Applications Supported by Index-Structures
+
+Copyright (C) 2011
+Ludwig-Maximilians-Universität München
+Lehr- und Forschungseinheit für Datenbanksysteme
+ELKI Development Team
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.ByLabelClustering;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.evaluation.Evaluator;
+import de.lmu.ifi.dbs.elki.evaluation.outlier.JudgeOutlierScores;
+import de.lmu.ifi.dbs.elki.evaluation.paircounting.generator.PairSortedGeneratorInterface;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+import de.lmu.ifi.dbs.elki.result.CollectionResult;
+import de.lmu.ifi.dbs.elki.result.HierarchicalResult;
+import de.lmu.ifi.dbs.elki.result.Result;
+import de.lmu.ifi.dbs.elki.result.ResultUtil;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+import de.lmu.ifi.dbs.elki.utilities.pairs.Triple;
+
+/**
+ * Evaluate a clustering result by comparing it to an existing cluster label.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.landmark
+ * @apiviz.has PairCountingFMeasure
+ * @apiviz.has EvaluatePairCountingFMeasure.ScoreResult oneway - - «create»
+ */
+public class EvaluatePairCountingFMeasure implements Evaluator {
+ /**
+ * Logger for debug output.
+ */
+ protected static final Logging logger = Logging.getLogger(JudgeOutlierScores.class);
+
+ /**
+ * Parameter to obtain the reference clustering. Defaults to a flat label
+ * clustering.
+ */
+ public static final OptionID REFERENCE_ID = OptionID.getOrCreateOptionID("paircounting.reference", "Reference clustering to compare with. Defaults to a by-label clustering.");
+
+ /**
+ * Parameter flag for special noise handling.
+ */
+ public static final OptionID NOISE_ID = OptionID.getOrCreateOptionID("paircounting.noisespecial", "Use special handling for noise clusters.");
+
+ /**
+ * Reference algorithm.
+ */
+ private ClusteringAlgorithm<?> referencealg;
+
+ /**
+ * Apply special handling to noise "clusters".
+ */
+ private boolean noiseSpecialHandling;
+
+ /**
+ * Constructor.
+ *
+ * @param referencealg Reference clustering
+ * @param noiseSpecialHandling Noise handling flag
+ */
+ public EvaluatePairCountingFMeasure(ClusteringAlgorithm<?> referencealg, boolean noiseSpecialHandling) {
+ super();
+ this.referencealg = referencealg;
+ this.noiseSpecialHandling = noiseSpecialHandling;
+ }
+
+ @Override
+ public void processNewResult(HierarchicalResult baseResult, Result result) {
+ Database db = ResultUtil.findDatabase(baseResult);
+ List<Clustering<?>> crs = ResultUtil.getClusteringResults(result);
+ if(crs == null || crs.size() < 1) {
+ // logger.warning("No clustering results found - nothing to evaluate!");
+ return;
+ }
+ // Compute the reference clustering
+ Result refres = referencealg.run(db);
+ List<Clustering<?>> refcrs = ResultUtil.getClusteringResults(refres);
+ if(refcrs.size() == 0) {
+ logger.warning("Reference algorithm did not return a clustering result!");
+ return;
+ }
+ if(refcrs.size() > 1) {
+ logger.warning("Reference algorithm returned more than one result!");
+ }
+ Clustering<?> refc = refcrs.get(0);
+ for(Clustering<?> c : crs) {
+ PairSortedGeneratorInterface first = PairCountingFMeasure.getPairGenerator(c, noiseSpecialHandling, false);
+ PairSortedGeneratorInterface second = PairCountingFMeasure.getPairGenerator(refc, noiseSpecialHandling, false);
+ Triple<Integer, Integer, Integer> countedPairs = PairCountingFMeasure.countPairs(first, second);
+ // Use double, since we want double results at the end!
+ double sum = countedPairs.first + countedPairs.second + countedPairs.third;
+ double inboth = countedPairs.first / sum;
+ double infirst = countedPairs.second / sum;
+ double insecond = countedPairs.third / sum;
+ double fmeasure = PairCountingFMeasure.fMeasure(countedPairs.first, countedPairs.second, countedPairs.third, 1.0);
+ ArrayList<Vector> s = new ArrayList<Vector>(4);
+ s.add(new Vector(new double[] { fmeasure, inboth, infirst, insecond }));
+ db.getHierarchy().add(c, new ScoreResult(s));
+ }
+ }
+
+ /**
+ * Result object for outlier score judgements.
+ *
+ * @author Erich Schubert
+ */
+ public static class ScoreResult extends CollectionResult<Vector> {
+ /**
+ * Constructor.
+ *
+ * @param col score result
+ */
+ public ScoreResult(Collection<Vector> col) {
+ super("Pair Counting F-Measure", "pair-fmeasure", col);
+ }
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ protected ClusteringAlgorithm< ?> referencealg = null;
+
+ protected boolean noiseSpecialHandling = false;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ ObjectParameter<ClusteringAlgorithm<?>> referencealgP = new ObjectParameter<ClusteringAlgorithm<?>>(REFERENCE_ID, ClusteringAlgorithm.class, ByLabelClustering.class);
+ if(config.grab(referencealgP)) {
+ referencealg = referencealgP.instantiateClass(config);
+ }
+
+ Flag noiseSpecialHandlingF = new Flag(NOISE_ID);
+ if(config.grab(noiseSpecialHandlingF)) {
+ noiseSpecialHandling = noiseSpecialHandlingF.getValue();
+ }
+ }
+
+ @Override
+ protected EvaluatePairCountingFMeasure makeInstance() {
+ return new EvaluatePairCountingFMeasure(referencealg, noiseSpecialHandling);
+ }
+ }
+} \ No newline at end of file