1 files changed, 288 insertions, 0 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/clustering/EvaluateClustering.java b/src/de/lmu/ifi/dbs/elki/evaluation/clustering/EvaluateClustering.java
new file mode 100644
index 00000000..c82f5cf5
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/clustering/EvaluateClustering.java
@@ -0,0 +1,288 @@
+package de.lmu.ifi.dbs.elki.evaluation.clustering;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.Iterator;
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.ByLabelClustering;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.evaluation.Evaluator;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.result.BasicResult;
+import de.lmu.ifi.dbs.elki.result.HierarchicalResult;
+import de.lmu.ifi.dbs.elki.result.Result;
+import de.lmu.ifi.dbs.elki.result.ResultUtil;
+import de.lmu.ifi.dbs.elki.result.textwriter.TextWriteable;
+import de.lmu.ifi.dbs.elki.result.textwriter.TextWriterStream;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * Evaluate a clustering result by comparing it to an existing cluster label.
+ * 
+ * @author Erich Schubert
+ * 
+ * @apiviz.landmark
+ * @apiviz.uses ClusterContingencyTable
+ * @apiviz.has EvaluateClustering.ScoreResult oneway - - «create»
+ */
+public class EvaluateClustering implements Evaluator {
+  /**
+   * Logger for debug output.
+   */
+  protected static final Logging logger = Logging.getLogger(EvaluateClustering.class);
+
+  /**
+   * Parameter to obtain the reference clustering. Defaults to a flat label
+   * clustering.
+   */
+  public static final OptionID REFERENCE_ID = OptionID.getOrCreateOptionID("paircounting.reference", "Reference clustering to compare with. Defaults to a by-label clustering.");
+
+  /**
+   * Parameter flag for special noise handling.
+   */
+  public static final OptionID NOISE_ID = OptionID.getOrCreateOptionID("paircounting.noisespecial", "Use special handling for noise clusters.");
+
+  /**
+   * Parameter flag to disable self-pairing
+   */
+  public static final OptionID SELFPAIR_ID = OptionID.getOrCreateOptionID("paircounting.selfpair", "Enable self-pairing for cluster comparison.");
+
+  /**
+   * Reference algorithm.
+   */
+  private ClusteringAlgorithm<?> referencealg;
+
+  /**
+   * Apply special handling to noise "clusters".
+   */
+  private boolean noiseSpecialHandling;
+
+  /**
+   * Use self-pairing in pair-counting measures
+   */
+  private boolean selfPairing;
+
+  /**
+   * Constructor.
+   * 
+   * @param referencealg Reference clustering
+   * @param noiseSpecialHandling Noise handling flag
+   * @param selfPairing Self-pairing flag
+   */
+  public EvaluateClustering(ClusteringAlgorithm<?> referencealg, boolean noiseSpecialHandling, boolean selfPairing) {
+    super();
+    this.referencealg = referencealg;
+    this.noiseSpecialHandling = noiseSpecialHandling;
+    this.selfPairing = selfPairing;
+  }
+
+  @Override
+  public void processNewResult(HierarchicalResult baseResult, Result result) {
+    Database db = ResultUtil.findDatabase(baseResult);
+    List<Clustering<?>> crs = ResultUtil.getClusteringResults(result);
+    if(crs == null || crs.size() < 1) {
+      return;
+    }
+    // Compute the reference clustering
+    Clustering<?> refc = null;
+    // Try to find an existing reference clustering (globally)
+    if(refc == null) {
+      Iterator<Clustering<?>> cs = ResultUtil.filteredResults(baseResult, Clustering.class);
+      while(cs.hasNext()) {
+        Clustering<?> test = cs.next();
+        if(isReferenceResult(test)) {
+          refc = test;
+          break;
+        }
+      }
+    }
+    // Try to find an existing reference clustering (locally)
+    if(refc == null) {
+      Iterator<Clustering<?>> cs = ResultUtil.filteredResults(result, Clustering.class);
+      while(cs.hasNext()) {
+        Clustering<?> test = cs.next();
+        if(isReferenceResult(test)) {
+          refc = test;
+          break;
+        }
+      }
+    }
+    if(refc == null) {
+      logger.debug("Generating a new reference clustering.");
+      Result refres = referencealg.run(db);
+      List<Clustering<?>> refcrs = ResultUtil.getClusteringResults(refres);
+      if(refcrs.size() == 0) {
+        logger.warning("Reference algorithm did not return a clustering result!");
+        return;
+      }
+      if(refcrs.size() > 1) {
+        logger.warning("Reference algorithm returned more than one result!");
+      }
+      refc = refcrs.get(0);
+    }
+    else {
+      logger.debug("Using existing clustering: " + refc.getLongName() + " " + refc.getShortName());
+    }
+    for(Clustering<?> c : crs) {
+      if(c == refc) {
+        continue;
+      }
+      ClusterContingencyTable contmat = new ClusterContingencyTable(selfPairing, noiseSpecialHandling);
+      contmat.process(refc, c);
+
+      db.getHierarchy().add(c, new ScoreResult(contmat));
+    }
+  }
+
+  private boolean isReferenceResult(Clustering<?> t) {
+    // FIXME: don't hard-code strings
+    if(t.getShortName().startsWith("bylabel-")) {
+      return true;
+    }
+    if(t.getShortName().startsWith("bymodel-")) {
+      return true;
+    }
+    return false;
+  }
+
+  /**
+   * Result object for outlier score judgements.
+   * 
+   * @author Erich Schubert
+   * 
+   * @apiviz.composedOf ClusterContingencyTable
+   */
+  public static class ScoreResult extends BasicResult implements TextWriteable {
+    /**
+     * Cluster contingency table
+     */
+    protected ClusterContingencyTable contmat;
+
+    /**
+     * Constructor.
+     * 
+     * @param contmat score result
+     */
+    public ScoreResult(ClusterContingencyTable contmat) {
+      super("Cluster-Evalation", "cluster-evaluation");
+      this.contmat = contmat;
+    }
+
+    /**
+     * Get the contingency table
+     * 
+     * @return the contingency table
+     */
+    public ClusterContingencyTable getContingencyTable() {
+      return contmat;
+    }
+
+    @Override
+    public void writeToText(TextWriterStream out, String label) {
+      out.commentPrint("Pair-F1, ");
+      out.commentPrint("Pair-Precision, ");
+      out.commentPrint("Pair-Recall, ");
+      out.commentPrint("Pair-Rand, ");
+      out.commentPrint("Pair-AdjustedRand, ");
+      out.commentPrint("Pair-FowlkesMallows, ");
+      out.commentPrint("Pair-Jaccard, ");
+      out.commentPrint("Pair-Mirkin, ");
+      out.commentPrint("Entropy-VI, ");
+      out.commentPrint("Entropy-NormalizedVI, ");
+      out.commentPrint("Entropy-F1, ");
+      out.commentPrint("Edit-F1, ");
+      out.commentPrint("SM-InvPurity, ");
+      out.commentPrint("SM-Purity, ");
+      out.commentPrint("SM-F1, ");
+      out.commentPrint("BCubed-Precision, ");
+      out.commentPrint("BCubed-Recall, ");
+      out.commentPrint("BCubed-F1");
+      out.flush();
+      out.inlinePrint(contmat.getPaircount().f1Measure());
+      out.inlinePrint(contmat.getPaircount().precision());
+      out.inlinePrint(contmat.getPaircount().recall());
+      out.inlinePrint(contmat.getPaircount().randIndex());
+      out.inlinePrint(contmat.getPaircount().adjustedRandIndex());
+      out.inlinePrint(contmat.getPaircount().fowlkesMallows());
+      out.inlinePrint(contmat.getPaircount().jaccard());
+      out.inlinePrint(contmat.getPaircount().mirkin());
+      out.inlinePrint(contmat.getEntropy().variationOfInformation());
+      out.inlinePrint(contmat.getEntropy().normalizedVariationOfInformation());
+      out.inlinePrint(contmat.getEdit().f1Measure());
+      out.inlinePrint(contmat.getSetMatching().inversePurity());
+      out.inlinePrint(contmat.getSetMatching().purity());
+      out.inlinePrint(contmat.getSetMatching().f1Measure());
+      out.inlinePrint(contmat.getBCubed().precision());
+      out.inlinePrint(contmat.getBCubed().recall());
+      out.inlinePrint(contmat.getBCubed().f1Measure());
+      out.flush();
+    }
+  }
+
+  /**
+   * Parameterization class.
+   * 
+   * @author Erich Schubert
+   * 
+   * @apiviz.exclude
+   */
+  public static class Parameterizer extends AbstractParameterizer {
+    protected ClusteringAlgorithm<?> referencealg = null;
+
+    protected boolean noiseSpecialHandling = false;
+
+    protected boolean selfPairing = false;
+
+    @Override
+    protected void makeOptions(Parameterization config) {
+      super.makeOptions(config);
+      ObjectParameter<ClusteringAlgorithm<?>> referencealgP = new ObjectParameter<ClusteringAlgorithm<?>>(REFERENCE_ID, ClusteringAlgorithm.class, ByLabelClustering.class);
+      if(config.grab(referencealgP)) {
+        referencealg = referencealgP.instantiateClass(config);
+      }
+
+      Flag noiseSpecialHandlingF = new Flag(NOISE_ID);
+      if(config.grab(noiseSpecialHandlingF)) {
+        noiseSpecialHandling = noiseSpecialHandlingF.getValue();
+      }
+
+      Flag selfPairingF = new Flag(SELFPAIR_ID);
+      if(config.grab(selfPairingF)) {
+        selfPairing = selfPairingF.getValue();
+      }
+    }
+
+    @Override
+    protected EvaluateClustering makeInstance() {
+      return new EvaluateClustering(referencealg, noiseSpecialHandling, !selfPairing);
+    }
+  }
+}
+\ No newline at end of file