summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/PairCountingFMeasure.java
diff options
context:
space:
mode:
authorAndrej Shadura <andrewsh@debian.org>2019-03-09 22:30:28 +0000
committerAndrej Shadura <andrewsh@debian.org>2019-03-09 22:30:28 +0000
commitcde76aeb42240f7270bc6605c606ae07d2dc5a7d (patch)
treec3ebf1d7745224f524da31dbabc5d76b9ea75916 /src/de/lmu/ifi/dbs/elki/evaluation/paircounting/PairCountingFMeasure.java
Import Upstream version 0.4.0~beta1
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/evaluation/paircounting/PairCountingFMeasure.java')
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/paircounting/PairCountingFMeasure.java240
1 files changed, 240 insertions, 0 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/PairCountingFMeasure.java b/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/PairCountingFMeasure.java
new file mode 100644
index 00000000..aae966fe
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/PairCountingFMeasure.java
@@ -0,0 +1,240 @@
+package de.lmu.ifi.dbs.elki.evaluation.paircounting;
+/*
+This file is part of ELKI:
+Environment for Developing KDD-Applications Supported by Index-Structures
+
+Copyright (C) 2011
+Ludwig-Maximilians-Universität München
+Lehr- und Forschungseinheit für Datenbanksysteme
+ELKI Development Team
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+import java.util.Collection;
+
+import de.lmu.ifi.dbs.elki.data.Cluster;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.model.Model;
+import de.lmu.ifi.dbs.elki.evaluation.paircounting.generator.PairGeneratorMerge;
+import de.lmu.ifi.dbs.elki.evaluation.paircounting.generator.PairGeneratorNoise;
+import de.lmu.ifi.dbs.elki.evaluation.paircounting.generator.PairGeneratorSingleCluster;
+import de.lmu.ifi.dbs.elki.evaluation.paircounting.generator.PairSortedGeneratorInterface;
+import de.lmu.ifi.dbs.elki.utilities.pairs.Triple;
+
+/**
+ * Compare two clustering results using a pair-counting F-Measure.
+ *
+ * A pair are any two objects that belong to the same cluster.
+ *
+ * Two clusterings are compared by comparing their pairs; if two clusterings
+ * completely agree, they also agree on every pair; even when the clusters and
+ * points are ordered differently.
+ *
+ * An empty clustering will of course have no pairs, the trivial all-in-one
+ * clustering of course has n^2 pairs. Therefore neither recall nor precision
+ * itself are useful, however their combination -- the F-Measure -- is useful.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.uses de.lmu.ifi.dbs.elki.evaluation.paircounting.generator.PairSortedGeneratorInterface
+ * @apiviz.uses de.lmu.ifi.dbs.elki.evaluation.paircounting.generator.PairGeneratorNoise
+ * @apiviz.uses de.lmu.ifi.dbs.elki.evaluation.paircounting.generator.PairGeneratorSingleCluster
+ * @apiviz.uses de.lmu.ifi.dbs.elki.evaluation.paircounting.generator.PairGeneratorMerge
+ */
+public class PairCountingFMeasure {
+ /**
+ * Get a pair generator for the given Clustering
+ *
+ * @param <R> Clustering result class
+ * @param <M> Model type
+ * @param clusters Clustering result
+ * @param noiseSpecial Special handling for "noise clusters"
+ * @param hierarchicalSpecial Special handling for hierarchical clusters
+ * @return Sorted pair generator
+ */
+ public static <R extends Clustering<M>, M extends Model> PairSortedGeneratorInterface getPairGenerator(R clusters, boolean noiseSpecial, boolean hierarchicalSpecial) {
+ // collect all clusters into a flat list.
+ Collection<Cluster<M>> allclusters = clusters.getAllClusters();
+
+ // Make generators for each cluster
+ PairSortedGeneratorInterface[] gens = new PairSortedGeneratorInterface[allclusters.size()];
+ int i = 0;
+ for(Cluster<?> c : allclusters) {
+ if(noiseSpecial && c.isNoise()) {
+ gens[i] = new PairGeneratorNoise(c);
+ }
+ else {
+ gens[i] = new PairGeneratorSingleCluster(c, hierarchicalSpecial);
+ }
+ i++;
+ }
+ return new PairGeneratorMerge(gens);
+ }
+
+ /**
+ * Compare two clustering results.
+ *
+ * @param <R> Result type
+ * @param <M> Model type
+ * @param <S> Result type
+ * @param <N> Model type
+ * @param result1 first result
+ * @param result2 second result
+ * @param beta Beta value for the F-Measure
+ * @param noiseSpecial Noise receives special treatment
+ * @param hierarchicalSpecial Special handling for hierarchical clusters
+ * @return Pair counting F-Measure result.
+ */
+ public static <R extends Clustering<M>, M extends Model, S extends Clustering<N>, N extends Model> double compareClusterings(R result1, S result2, double beta, boolean noiseSpecial, boolean hierarchicalSpecial) {
+ PairSortedGeneratorInterface first = getPairGenerator(result1, noiseSpecial, hierarchicalSpecial);
+ PairSortedGeneratorInterface second = getPairGenerator(result2, noiseSpecial, hierarchicalSpecial);
+ Triple<Integer, Integer, Integer> countedPairs = countPairs(first, second);
+ return fMeasure(countedPairs.first, countedPairs.second, countedPairs.third, beta);
+ }
+
+ /**
+ * Compare two clustering results.
+ *
+ * @param <R> Result type
+ * @param <M> Model type
+ * @param <S> Result type
+ * @param <N> Model type
+ * @param result1 first result
+ * @param result2 second result
+ * @param beta Beta value for the F-Measure
+ * @return Pair counting F-Measure result.
+ */
+ public static <R extends Clustering<M>, M extends Model, S extends Clustering<N>, N extends Model> double compareClusterings(R result1, S result2, double beta) {
+ return compareClusterings(result1, result2, beta, false, false);
+ }
+
+ /**
+ * Compare two clustering results.
+ *
+ * @param <R> Result type
+ * @param <M> Model type
+ * @param <S> Result type
+ * @param <N> Model type
+ * @param result1 first result
+ * @param result2 second result
+ * @param noiseSpecial Noise receives special treatment
+ * @return Pair counting F-1-Measure result.
+ */
+ public static <R extends Clustering<M>, M extends Model, S extends Clustering<N>, N extends Model> double compareClusterings(R result1, S result2, boolean noiseSpecial, boolean hierarchicalSpecial) {
+ return compareClusterings(result1, result2, 1.0, noiseSpecial, hierarchicalSpecial);
+ }
+
+ /**
+ * Compare two clustering results.
+ *
+ * @param <R> Result type
+ * @param <M> Model type
+ * @param <S> Result type
+ * @param <N> Model type
+ * @param result1 first result
+ * @param result2 second result
+ * @return Pair counting F-1-Measure result.
+ */
+ public static <R extends Clustering<M>, M extends Model, S extends Clustering<N>, N extends Model> double compareClusterings(R result1, S result2) {
+ return compareClusterings(result1, result2, 1.0, false, false);
+ }
+
+ /**
+ * Compare two sets of generated pairs. It determines how many objects of the
+ * first set are in both sets, just in the first set or just in the second
+ * set.</p>
+ *
+ *
+ * @param <R> Result type
+ * @param <M> Model type
+ * @param <S> Result type
+ * @param <N> Model type
+ * @param result1 first result
+ * @param result2 second result
+ * @return Returns a {@link Triple} that contains the number of objects that
+ * are in both sets (FIRST), the number of objects that are just in
+ * the first set (SECOND) and the number of object that are just in
+ * the second set (THIRD).
+ *
+ */
+ public static <R extends Clustering<M>, M extends Model, S extends Clustering<N>, N extends Model> Triple<Integer, Integer, Integer> countPairs(R result1, S result2) {
+ PairSortedGeneratorInterface first = getPairGenerator(result1, false, false);
+ PairSortedGeneratorInterface second = getPairGenerator(result2, false, false);
+ return countPairs(first, second);
+ }
+
+ /**
+ * Compare two sets of generated pairs. It determines how many objects of the
+ * first set are in both sets, just in the first set or just in the second
+ * set.</p>
+ *
+ * @param first first set
+ * @param second second set
+ * @return Returns a {@link Triple} that contains the number of objects that
+ * are in both sets (FIRST), the number of objects that are just in
+ * the first set (SECOND) and the number of object that are just in
+ * the second set (THIRD).
+ */
+ public static Triple<Integer, Integer, Integer> countPairs(PairSortedGeneratorInterface first, PairSortedGeneratorInterface second) {
+ int inboth = 0;
+ int infirst = 0;
+ int insecond = 0;
+
+ while(first.current() != null && second.current() != null) {
+ int cmp = first.current().compareTo(second.current());
+ if(cmp == 0) {
+ inboth++;
+ first.next();
+ second.next();
+ }
+ else if(cmp < 0) {
+ infirst++;
+ first.next();
+ }
+ else {
+ insecond++;
+ second.next();
+ }
+ }
+ while(first.current() != null) {
+ infirst++;
+ first.next();
+ }
+ while(second.current() != null) {
+ insecond++;
+ second.next();
+ }
+ return new Triple<Integer, Integer, Integer>(inboth, infirst, insecond);
+ }
+
+ /**
+ * Computes the F-measure of the given parameters.</p>
+ * <p>
+ * Returns
+ * <code>((1+beta*beta) * inBoth) / ((1+beta*beta) * inBoth + (beta*beta)*inFirst + inSecond)</code>
+ * </p>
+ *
+ * @param inBoth The number of objects that are in both sets.
+ * @param inFirst The number of objects that are in the first set.
+ * @param inSecond The number of objects that are in the second set.
+ * @param beta The beta values for the f-measure.
+ * @return The F-measure.
+ */
+ public static double fMeasure(int inBoth, int inFirst, int inSecond, double beta) {
+ // System.out.println("Both: "+inboth+" First: "+infirst+" Second: "+insecond);
+ double fmeasure = ((1 + beta * beta) * inBoth) / ((1 + beta * beta) * inBoth + (beta * beta) * inFirst + inSecond);
+ return fmeasure;
+ }
+}