summaryrefslogtreecommitdiff
path: root/elki/src/main/java/de/lmu/ifi/dbs/elki/evaluation/clustering/SetMatchingPurity.java
diff options
context:
space:
mode:
authorAndrej Shadura <andrewsh@debian.org>2019-03-09 22:30:41 +0000
committerAndrej Shadura <andrewsh@debian.org>2019-03-09 22:30:41 +0000
commit38212b3127e90751fb39cda34250bc11be62b76c (patch)
treedc1397346030e9695bd763dddc93b3be527cd643 /elki/src/main/java/de/lmu/ifi/dbs/elki/evaluation/clustering/SetMatchingPurity.java
parent337087b668d3a54f3afee3a9adb597a32e9f7e94 (diff)
Import Upstream version 0.7.0
Diffstat (limited to 'elki/src/main/java/de/lmu/ifi/dbs/elki/evaluation/clustering/SetMatchingPurity.java')
-rw-r--r--elki/src/main/java/de/lmu/ifi/dbs/elki/evaluation/clustering/SetMatchingPurity.java195
1 files changed, 195 insertions, 0 deletions
diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/evaluation/clustering/SetMatchingPurity.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/evaluation/clustering/SetMatchingPurity.java
new file mode 100644
index 00000000..bbef4b5f
--- /dev/null
+++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/evaluation/clustering/SetMatchingPurity.java
@@ -0,0 +1,195 @@
+package de.lmu.ifi.dbs.elki.evaluation.clustering;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2015
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.evaluation.clustering.ClusterContingencyTable.Util;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+
+/**
+ * Set matching purity measures.
+ *
+ * References:
+ * <p>
+ * Zhao, Y. and Karypis, G.<br />
+ * Criterion functions for document clustering: Experiments and analysis<br />
+ * University of Minnesota, Department of Computer Science, Technical Report
+ * 01-40, 2001
+ * </p>
+ * <p>
+ * Meilă, M<br />
+ * Comparing clusterings<br />
+ * University of Washington, Seattle, Technical Report 418, 2002
+ * </p>
+ * <p>
+ * Steinbach, M. and Karypis, G. and Kumar, V.<br />
+ * A comparison of document clustering techniques<br />
+ * KDD workshop on text mining, 2000
+ * </p>
+ * <p>
+ * E. Amigó, J. Gonzalo, J. Artiles, and F. Verdejo <br />
+ * A comparison of extrinsic clustering evaluation metrics based on formal
+ * constraints<br />
+ * Inf. Retrieval, vol. 12, no. 5, pp. 461–486, 2009
+ * </p>
+ *
+ * @author Sascha Goldhofer
+ */
+@Reference(authors = "Meilă, M", //
+title = "Comparing clusterings", //
+booktitle = "University of Washington, Seattle, Technical Report 418, 2002", //
+url = "http://www.stat.washington.edu/mmp/Papers/compare-colt.pdf")
+public class SetMatchingPurity {
+ /**
+ * Result cache
+ */
+ protected double smPurity = -1.0, smInversePurity = -1.0, smFFirst = -1.0,
+ smFSecond = -1.0;
+
+ /**
+ * Constructor.
+ *
+ * @param table Contingency table
+ */
+ protected SetMatchingPurity(ClusterContingencyTable table) {
+ super();
+ int numobj = table.contingency[table.size1][table.size2];
+ {
+ smPurity = 0.0;
+ smFFirst = 0.0;
+ // iterate first clustering
+ for(int i1 = 0; i1 < table.size1; i1++) {
+ double precisionMax = 0.0;
+ double fMax = 0.0;
+ for(int i2 = 0; i2 < table.size2; i2++) {
+ precisionMax = Math.max(precisionMax, (1.0 * table.contingency[i1][i2]));
+ fMax = Math.max(fMax, (2.0 * table.contingency[i1][i2]) / (table.contingency[i1][table.size2] + table.contingency[table.size1][i2]));
+ // / numobj));
+ }
+ smPurity += (precisionMax / numobj);
+ smFFirst += (table.contingency[i1][table.size2] / (double) table.contingency[table.size1][table.size2]) * fMax;
+ // * contingency[i1][size2]/numobj;
+ }
+ }
+ {
+ smInversePurity = 0.0;
+ smFSecond = 0.0;
+ // iterate second clustering
+ for(int i2 = 0; i2 < table.size2; i2++) {
+ double recallMax = 0.0;
+ double fMax = 0.0;
+ for(int i1 = 0; i1 < table.size1; i1++) {
+ recallMax = Math.max(recallMax, (1.0 * table.contingency[i1][i2]));
+ fMax = Math.max(fMax, (2.0 * table.contingency[i1][i2]) / (table.contingency[i1][table.size2] + table.contingency[table.size1][i2]));
+ // / numobj));
+ }
+ smInversePurity += (recallMax / numobj);
+ smFSecond += (table.contingency[table.size1][i2] / (double) table.contingency[table.size1][table.size2]) * fMax;
+ // * contingency[i1][size2]/numobj;
+ }
+ }
+ }
+
+ /**
+ * Get the set matchings purity (first:second clustering) (normalized, 1 =
+ * equal)
+ *
+ * @return purity
+ */
+ @Reference(authors = "Zhao, Y. and Karypis, G.", //
+ title = "Criterion functions for document clustering: Experiments and analysis", //
+ booktitle = "University of Minnesota, Department of Computer Science, Technical Report 01-40, 2001", //
+ url = "http://www-users.cs.umn.edu/~karypis/publications/Papers/PDF/vscluster.pdf")
+ public double purity() {
+ return smPurity;
+ }
+
+ /**
+ * Get the set matchings inverse purity (second:first clustering) (normalized,
+ * 1 = equal)
+ *
+ * @return Inverse purity
+ */
+ public double inversePurity() {
+ return smInversePurity;
+ }
+
+ /**
+ * Get the set matching F1-Measure
+ *
+ * <p>
+ * Steinbach, M. and Karypis, G. and Kumar, V.<br />
+ * A comparison of document clustering techniques<br />
+ * KDD workshop on text mining, 2000
+ * </p>
+ *
+ * @return Set Matching F1-Measure
+ */
+ @Reference(authors = "Steinbach, M. and Karypis, G. and Kumar, V.", //
+ title = "A comparison of document clustering techniques", //
+ booktitle = "KDD workshop on text mining, 2000", //
+ url = "http://www-users.itlabs.umn.edu/~karypis/publications/Papers/PDF/doccluster.pdf")
+ public double f1Measure() {
+ return Util.f1Measure(purity(), inversePurity());
+ }
+
+ /**
+ * Get the Van Rijsbergen’s F measure (asymmetric) for first clustering
+ *
+ * <p>
+ * E. Amigó, J. Gonzalo, J. Artiles, and F. Verdejo <br />
+ * A comparison of extrinsic clustering evaluation metrics based on formal
+ * constraints<br />
+ * Inf. Retrieval, vol. 12, no. 5, pp. 461–486, 2009
+ * </p>
+ *
+ * @return Set Matching F-Measure of first clustering
+ */
+ @Reference(authors = "E. Amigó, J. Gonzalo, J. Artiles, and F. Verdejo", //
+ title = "A comparison of extrinsic clustering evaluation metrics based on formal constraints", //
+ booktitle = "Inf. Retrieval, vol. 12, no. 5", //
+ url = "http://dx.doi.org/10.1007/s10791-009-9106-z")
+ public double fMeasureFirst() {
+ return smFFirst;
+ }
+
+ /**
+ * Get the Van Rijsbergen’s F measure (asymmetric) for second clustering
+ *
+ * <p>
+ * E. Amigó, J. Gonzalo, J. Artiles, and F. Verdejo <br />
+ * A comparison of extrinsic clustering evaluation metrics based on formal
+ * constraints<br />
+ * Inf. Retrieval, vol. 12, no. 5, pp. 461–486, 2009
+ * </p>
+ *
+ * @return Set Matching F-Measure of second clustering
+ */
+ @Reference(authors = "E. Amigó, J. Gonzalo, J. Artiles, and F. Verdejo", //
+ title = "A comparison of extrinsic clustering evaluation metrics based on formal constraints", //
+ booktitle = "Inf. Retrieval, vol. 12, no. 5", //
+ url = "http://dx.doi.org/10.1007/s10791-009-9106-z")
+ public double fMeasureSecond() {
+ return smFSecond;
+ }
+} \ No newline at end of file