diff options
author | Andrej Shadura <andrewsh@debian.org> | 2019-03-09 22:30:28 +0000 |
---|---|---|
committer | Andrej Shadura <andrewsh@debian.org> | 2019-03-09 22:30:28 +0000 |
commit | cde76aeb42240f7270bc6605c606ae07d2dc5a7d (patch) | |
tree | c3ebf1d7745224f524da31dbabc5d76b9ea75916 /src/de/lmu/ifi/dbs/elki/evaluation/paircounting/PairCountingFMeasure.java |
Import Upstream version 0.4.0~beta1
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/evaluation/paircounting/PairCountingFMeasure.java')
-rw-r--r-- | src/de/lmu/ifi/dbs/elki/evaluation/paircounting/PairCountingFMeasure.java | 240 |
1 files changed, 240 insertions, 0 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/PairCountingFMeasure.java b/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/PairCountingFMeasure.java new file mode 100644 index 00000000..aae966fe --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/PairCountingFMeasure.java @@ -0,0 +1,240 @@ +package de.lmu.ifi.dbs.elki.evaluation.paircounting; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import java.util.Collection; + +import de.lmu.ifi.dbs.elki.data.Cluster; +import de.lmu.ifi.dbs.elki.data.Clustering; +import de.lmu.ifi.dbs.elki.data.model.Model; +import de.lmu.ifi.dbs.elki.evaluation.paircounting.generator.PairGeneratorMerge; +import de.lmu.ifi.dbs.elki.evaluation.paircounting.generator.PairGeneratorNoise; +import de.lmu.ifi.dbs.elki.evaluation.paircounting.generator.PairGeneratorSingleCluster; +import de.lmu.ifi.dbs.elki.evaluation.paircounting.generator.PairSortedGeneratorInterface; +import de.lmu.ifi.dbs.elki.utilities.pairs.Triple; + +/** + * Compare two clustering results using a pair-counting F-Measure. + * + * A pair are any two objects that belong to the same cluster. + * + * Two clusterings are compared by comparing their pairs; if two clusterings + * completely agree, they also agree on every pair; even when the clusters and + * points are ordered differently. + * + * An empty clustering will of course have no pairs, the trivial all-in-one + * clustering of course has n^2 pairs. Therefore neither recall nor precision + * itself are useful, however their combination -- the F-Measure -- is useful. + * + * @author Erich Schubert + * + * @apiviz.uses de.lmu.ifi.dbs.elki.evaluation.paircounting.generator.PairSortedGeneratorInterface + * @apiviz.uses de.lmu.ifi.dbs.elki.evaluation.paircounting.generator.PairGeneratorNoise + * @apiviz.uses de.lmu.ifi.dbs.elki.evaluation.paircounting.generator.PairGeneratorSingleCluster + * @apiviz.uses de.lmu.ifi.dbs.elki.evaluation.paircounting.generator.PairGeneratorMerge + */ +public class PairCountingFMeasure { + /** + * Get a pair generator for the given Clustering + * + * @param <R> Clustering result class + * @param <M> Model type + * @param clusters Clustering result + * @param noiseSpecial Special handling for "noise clusters" + * @param hierarchicalSpecial Special handling for hierarchical clusters + * @return Sorted pair generator + */ + public static <R extends Clustering<M>, M extends Model> PairSortedGeneratorInterface getPairGenerator(R clusters, boolean noiseSpecial, boolean hierarchicalSpecial) { + // collect all clusters into a flat list. + Collection<Cluster<M>> allclusters = clusters.getAllClusters(); + + // Make generators for each cluster + PairSortedGeneratorInterface[] gens = new PairSortedGeneratorInterface[allclusters.size()]; + int i = 0; + for(Cluster<?> c : allclusters) { + if(noiseSpecial && c.isNoise()) { + gens[i] = new PairGeneratorNoise(c); + } + else { + gens[i] = new PairGeneratorSingleCluster(c, hierarchicalSpecial); + } + i++; + } + return new PairGeneratorMerge(gens); + } + + /** + * Compare two clustering results. + * + * @param <R> Result type + * @param <M> Model type + * @param <S> Result type + * @param <N> Model type + * @param result1 first result + * @param result2 second result + * @param beta Beta value for the F-Measure + * @param noiseSpecial Noise receives special treatment + * @param hierarchicalSpecial Special handling for hierarchical clusters + * @return Pair counting F-Measure result. + */ + public static <R extends Clustering<M>, M extends Model, S extends Clustering<N>, N extends Model> double compareClusterings(R result1, S result2, double beta, boolean noiseSpecial, boolean hierarchicalSpecial) { + PairSortedGeneratorInterface first = getPairGenerator(result1, noiseSpecial, hierarchicalSpecial); + PairSortedGeneratorInterface second = getPairGenerator(result2, noiseSpecial, hierarchicalSpecial); + Triple<Integer, Integer, Integer> countedPairs = countPairs(first, second); + return fMeasure(countedPairs.first, countedPairs.second, countedPairs.third, beta); + } + + /** + * Compare two clustering results. + * + * @param <R> Result type + * @param <M> Model type + * @param <S> Result type + * @param <N> Model type + * @param result1 first result + * @param result2 second result + * @param beta Beta value for the F-Measure + * @return Pair counting F-Measure result. + */ + public static <R extends Clustering<M>, M extends Model, S extends Clustering<N>, N extends Model> double compareClusterings(R result1, S result2, double beta) { + return compareClusterings(result1, result2, beta, false, false); + } + + /** + * Compare two clustering results. + * + * @param <R> Result type + * @param <M> Model type + * @param <S> Result type + * @param <N> Model type + * @param result1 first result + * @param result2 second result + * @param noiseSpecial Noise receives special treatment + * @return Pair counting F-1-Measure result. + */ + public static <R extends Clustering<M>, M extends Model, S extends Clustering<N>, N extends Model> double compareClusterings(R result1, S result2, boolean noiseSpecial, boolean hierarchicalSpecial) { + return compareClusterings(result1, result2, 1.0, noiseSpecial, hierarchicalSpecial); + } + + /** + * Compare two clustering results. + * + * @param <R> Result type + * @param <M> Model type + * @param <S> Result type + * @param <N> Model type + * @param result1 first result + * @param result2 second result + * @return Pair counting F-1-Measure result. + */ + public static <R extends Clustering<M>, M extends Model, S extends Clustering<N>, N extends Model> double compareClusterings(R result1, S result2) { + return compareClusterings(result1, result2, 1.0, false, false); + } + + /** + * Compare two sets of generated pairs. It determines how many objects of the + * first set are in both sets, just in the first set or just in the second + * set.</p> + * + * + * @param <R> Result type + * @param <M> Model type + * @param <S> Result type + * @param <N> Model type + * @param result1 first result + * @param result2 second result + * @return Returns a {@link Triple} that contains the number of objects that + * are in both sets (FIRST), the number of objects that are just in + * the first set (SECOND) and the number of object that are just in + * the second set (THIRD). + * + */ + public static <R extends Clustering<M>, M extends Model, S extends Clustering<N>, N extends Model> Triple<Integer, Integer, Integer> countPairs(R result1, S result2) { + PairSortedGeneratorInterface first = getPairGenerator(result1, false, false); + PairSortedGeneratorInterface second = getPairGenerator(result2, false, false); + return countPairs(first, second); + } + + /** + * Compare two sets of generated pairs. It determines how many objects of the + * first set are in both sets, just in the first set or just in the second + * set.</p> + * + * @param first first set + * @param second second set + * @return Returns a {@link Triple} that contains the number of objects that + * are in both sets (FIRST), the number of objects that are just in + * the first set (SECOND) and the number of object that are just in + * the second set (THIRD). + */ + public static Triple<Integer, Integer, Integer> countPairs(PairSortedGeneratorInterface first, PairSortedGeneratorInterface second) { + int inboth = 0; + int infirst = 0; + int insecond = 0; + + while(first.current() != null && second.current() != null) { + int cmp = first.current().compareTo(second.current()); + if(cmp == 0) { + inboth++; + first.next(); + second.next(); + } + else if(cmp < 0) { + infirst++; + first.next(); + } + else { + insecond++; + second.next(); + } + } + while(first.current() != null) { + infirst++; + first.next(); + } + while(second.current() != null) { + insecond++; + second.next(); + } + return new Triple<Integer, Integer, Integer>(inboth, infirst, insecond); + } + + /** + * Computes the F-measure of the given parameters.</p> + * <p> + * Returns + * <code>((1+beta*beta) * inBoth) / ((1+beta*beta) * inBoth + (beta*beta)*inFirst + inSecond)</code> + * </p> + * + * @param inBoth The number of objects that are in both sets. + * @param inFirst The number of objects that are in the first set. + * @param inSecond The number of objects that are in the second set. + * @param beta The beta values for the f-measure. + * @return The F-measure. + */ + public static double fMeasure(int inBoth, int inFirst, int inSecond, double beta) { + // System.out.println("Both: "+inboth+" First: "+infirst+" Second: "+insecond); + double fmeasure = ((1 + beta * beta) * inBoth) / ((1 + beta * beta) * inBoth + (beta * beta) * inFirst + inSecond); + return fmeasure; + } +} |