diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/evaluation/paircounting/EvaluatePairCountingFMeasure.java')
-rw-r--r-- | src/de/lmu/ifi/dbs/elki/evaluation/paircounting/EvaluatePairCountingFMeasure.java | 179 |
1 files changed, 179 insertions, 0 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/EvaluatePairCountingFMeasure.java b/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/EvaluatePairCountingFMeasure.java new file mode 100644 index 00000000..43de1cb7 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/EvaluatePairCountingFMeasure.java @@ -0,0 +1,179 @@ +package de.lmu.ifi.dbs.elki.evaluation.paircounting; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm; +import de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.ByLabelClustering; +import de.lmu.ifi.dbs.elki.data.Clustering; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.evaluation.Evaluator; +import de.lmu.ifi.dbs.elki.evaluation.outlier.JudgeOutlierScores; +import de.lmu.ifi.dbs.elki.evaluation.paircounting.generator.PairSortedGeneratorInterface; +import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector; +import de.lmu.ifi.dbs.elki.result.CollectionResult; +import de.lmu.ifi.dbs.elki.result.HierarchicalResult; +import de.lmu.ifi.dbs.elki.result.Result; +import de.lmu.ifi.dbs.elki.result.ResultUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; +import de.lmu.ifi.dbs.elki.utilities.pairs.Triple; + +/** + * Evaluate a clustering result by comparing it to an existing cluster label. + * + * @author Erich Schubert + * + * @apiviz.landmark + * @apiviz.has PairCountingFMeasure + * @apiviz.has EvaluatePairCountingFMeasure.ScoreResult oneway - - «create» + */ +public class EvaluatePairCountingFMeasure implements Evaluator { + /** + * Logger for debug output. + */ + protected static final Logging logger = Logging.getLogger(JudgeOutlierScores.class); + + /** + * Parameter to obtain the reference clustering. Defaults to a flat label + * clustering. + */ + public static final OptionID REFERENCE_ID = OptionID.getOrCreateOptionID("paircounting.reference", "Reference clustering to compare with. Defaults to a by-label clustering."); + + /** + * Parameter flag for special noise handling. + */ + public static final OptionID NOISE_ID = OptionID.getOrCreateOptionID("paircounting.noisespecial", "Use special handling for noise clusters."); + + /** + * Reference algorithm. + */ + private ClusteringAlgorithm<?> referencealg; + + /** + * Apply special handling to noise "clusters". + */ + private boolean noiseSpecialHandling; + + /** + * Constructor. + * + * @param referencealg Reference clustering + * @param noiseSpecialHandling Noise handling flag + */ + public EvaluatePairCountingFMeasure(ClusteringAlgorithm<?> referencealg, boolean noiseSpecialHandling) { + super(); + this.referencealg = referencealg; + this.noiseSpecialHandling = noiseSpecialHandling; + } + + @Override + public void processNewResult(HierarchicalResult baseResult, Result result) { + Database db = ResultUtil.findDatabase(baseResult); + List<Clustering<?>> crs = ResultUtil.getClusteringResults(result); + if(crs == null || crs.size() < 1) { + // logger.warning("No clustering results found - nothing to evaluate!"); + return; + } + // Compute the reference clustering + Result refres = referencealg.run(db); + List<Clustering<?>> refcrs = ResultUtil.getClusteringResults(refres); + if(refcrs.size() == 0) { + logger.warning("Reference algorithm did not return a clustering result!"); + return; + } + if(refcrs.size() > 1) { + logger.warning("Reference algorithm returned more than one result!"); + } + Clustering<?> refc = refcrs.get(0); + for(Clustering<?> c : crs) { + PairSortedGeneratorInterface first = PairCountingFMeasure.getPairGenerator(c, noiseSpecialHandling, false); + PairSortedGeneratorInterface second = PairCountingFMeasure.getPairGenerator(refc, noiseSpecialHandling, false); + Triple<Integer, Integer, Integer> countedPairs = PairCountingFMeasure.countPairs(first, second); + // Use double, since we want double results at the end! + double sum = countedPairs.first + countedPairs.second + countedPairs.third; + double inboth = countedPairs.first / sum; + double infirst = countedPairs.second / sum; + double insecond = countedPairs.third / sum; + double fmeasure = PairCountingFMeasure.fMeasure(countedPairs.first, countedPairs.second, countedPairs.third, 1.0); + ArrayList<Vector> s = new ArrayList<Vector>(4); + s.add(new Vector(new double[] { fmeasure, inboth, infirst, insecond })); + db.getHierarchy().add(c, new ScoreResult(s)); + } + } + + /** + * Result object for outlier score judgements. + * + * @author Erich Schubert + */ + public static class ScoreResult extends CollectionResult<Vector> { + /** + * Constructor. + * + * @param col score result + */ + public ScoreResult(Collection<Vector> col) { + super("Pair Counting F-Measure", "pair-fmeasure", col); + } + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + protected ClusteringAlgorithm< ?> referencealg = null; + + protected boolean noiseSpecialHandling = false; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + ObjectParameter<ClusteringAlgorithm<?>> referencealgP = new ObjectParameter<ClusteringAlgorithm<?>>(REFERENCE_ID, ClusteringAlgorithm.class, ByLabelClustering.class); + if(config.grab(referencealgP)) { + referencealg = referencealgP.instantiateClass(config); + } + + Flag noiseSpecialHandlingF = new Flag(NOISE_ID); + if(config.grab(noiseSpecialHandlingF)) { + noiseSpecialHandling = noiseSpecialHandlingF.getValue(); + } + } + + @Override + protected EvaluatePairCountingFMeasure makeInstance() { + return new EvaluatePairCountingFMeasure(referencealg, noiseSpecialHandling); + } + } +}
\ No newline at end of file |