1 files changed, 179 insertions, 0 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/EvaluatePairCountingFMeasure.java b/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/EvaluatePairCountingFMeasure.java
new file mode 100644
index 00000000..43de1cb7
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/EvaluatePairCountingFMeasure.java
@@ -0,0 +1,179 @@
+package de.lmu.ifi.dbs.elki.evaluation.paircounting;
+/*
+This file is part of ELKI:
+Environment for Developing KDD-Applications Supported by Index-Structures
+
+Copyright (C) 2011
+Ludwig-Maximilians-Universität München
+Lehr- und Forschungseinheit für Datenbanksysteme
+ELKI Development Team
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.ByLabelClustering;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.evaluation.Evaluator;
+import de.lmu.ifi.dbs.elki.evaluation.outlier.JudgeOutlierScores;
+import de.lmu.ifi.dbs.elki.evaluation.paircounting.generator.PairSortedGeneratorInterface;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+import de.lmu.ifi.dbs.elki.result.CollectionResult;
+import de.lmu.ifi.dbs.elki.result.HierarchicalResult;
+import de.lmu.ifi.dbs.elki.result.Result;
+import de.lmu.ifi.dbs.elki.result.ResultUtil;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+import de.lmu.ifi.dbs.elki.utilities.pairs.Triple;
+
+/**
+ * Evaluate a clustering result by comparing it to an existing cluster label.
+ * 
+ * @author Erich Schubert
+ * 
+ * @apiviz.landmark
+ * @apiviz.has PairCountingFMeasure
+ * @apiviz.has EvaluatePairCountingFMeasure.ScoreResult oneway - - «create»
+ */
+public class EvaluatePairCountingFMeasure implements Evaluator {
+  /**
+   * Logger for debug output.
+   */
+  protected static final Logging logger = Logging.getLogger(JudgeOutlierScores.class);
+
+  /**
+   * Parameter to obtain the reference clustering. Defaults to a flat label
+   * clustering.
+   */
+  public static final OptionID REFERENCE_ID = OptionID.getOrCreateOptionID("paircounting.reference", "Reference clustering to compare with. Defaults to a by-label clustering.");
+
+  /**
+   * Parameter flag for special noise handling.
+   */
+  public static final OptionID NOISE_ID = OptionID.getOrCreateOptionID("paircounting.noisespecial", "Use special handling for noise clusters.");
+
+  /**
+   * Reference algorithm.
+   */
+  private ClusteringAlgorithm<?> referencealg;
+
+  /**
+   * Apply special handling to noise "clusters".
+   */
+  private boolean noiseSpecialHandling;
+
+  /**
+   * Constructor.
+   * 
+   * @param referencealg Reference clustering
+   * @param noiseSpecialHandling Noise handling flag
+   */
+  public EvaluatePairCountingFMeasure(ClusteringAlgorithm<?> referencealg, boolean noiseSpecialHandling) {
+    super();
+    this.referencealg = referencealg;
+    this.noiseSpecialHandling = noiseSpecialHandling;
+  }
+
+  @Override
+  public void processNewResult(HierarchicalResult baseResult, Result result) {
+    Database db = ResultUtil.findDatabase(baseResult);
+    List<Clustering<?>> crs = ResultUtil.getClusteringResults(result);
+    if(crs == null || crs.size() < 1) {
+      // logger.warning("No clustering results found - nothing to evaluate!");
+      return;
+    }
+    // Compute the reference clustering
+    Result refres = referencealg.run(db);
+    List<Clustering<?>> refcrs = ResultUtil.getClusteringResults(refres);
+    if(refcrs.size() == 0) {
+      logger.warning("Reference algorithm did not return a clustering result!");
+      return;
+    }
+    if(refcrs.size() > 1) {
+      logger.warning("Reference algorithm returned more than one result!");
+    }
+    Clustering<?> refc = refcrs.get(0);
+    for(Clustering<?> c : crs) {
+      PairSortedGeneratorInterface first = PairCountingFMeasure.getPairGenerator(c, noiseSpecialHandling, false);
+      PairSortedGeneratorInterface second = PairCountingFMeasure.getPairGenerator(refc, noiseSpecialHandling, false);
+      Triple<Integer, Integer, Integer> countedPairs = PairCountingFMeasure.countPairs(first, second);
+      // Use double, since we want double results at the end!
+      double sum = countedPairs.first + countedPairs.second + countedPairs.third;
+      double inboth = countedPairs.first / sum;
+      double infirst = countedPairs.second / sum;
+      double insecond = countedPairs.third / sum;
+      double fmeasure = PairCountingFMeasure.fMeasure(countedPairs.first, countedPairs.second, countedPairs.third, 1.0);
+      ArrayList<Vector> s = new ArrayList<Vector>(4);
+      s.add(new Vector(new double[] { fmeasure, inboth, infirst, insecond }));
+      db.getHierarchy().add(c, new ScoreResult(s));
+    }
+  }
+
+  /**
+   * Result object for outlier score judgements.
+   * 
+   * @author Erich Schubert
+   */
+  public static class ScoreResult extends CollectionResult<Vector> {
+    /**
+     * Constructor.
+     * 
+     * @param col score result
+     */
+    public ScoreResult(Collection<Vector> col) {
+      super("Pair Counting F-Measure", "pair-fmeasure", col);
+    }
+  }
+
+  /**
+   * Parameterization class.
+   * 
+   * @author Erich Schubert
+   * 
+   * @apiviz.exclude
+   */
+  public static class Parameterizer extends AbstractParameterizer {
+    protected ClusteringAlgorithm< ?> referencealg = null;
+
+    protected boolean noiseSpecialHandling = false;
+
+    @Override
+    protected void makeOptions(Parameterization config) {
+      super.makeOptions(config);
+      ObjectParameter<ClusteringAlgorithm<?>> referencealgP = new ObjectParameter<ClusteringAlgorithm<?>>(REFERENCE_ID, ClusteringAlgorithm.class, ByLabelClustering.class);
+      if(config.grab(referencealgP)) {
+        referencealg = referencealgP.instantiateClass(config);
+      }
+
+      Flag noiseSpecialHandlingF = new Flag(NOISE_ID);
+      if(config.grab(noiseSpecialHandlingF)) {
+        noiseSpecialHandling = noiseSpecialHandlingF.getValue();
+      }
+    }
+
+    @Override
+    protected EvaluatePairCountingFMeasure makeInstance() {
+      return new EvaluatePairCountingFMeasure(referencealg, noiseSpecialHandling);
+    }
+  }
+}
+\ No newline at end of file