summaryrefslogtreecommitdiff
path: root/elki/src/main/java/de/lmu/ifi/dbs/elki/evaluation/clustering/internal/EvaluatePBMIndex.java
diff options
context:
space:
mode:
authorAndrej Shadura <andrewsh@debian.org>2019-03-09 22:30:41 +0000
committerAndrej Shadura <andrewsh@debian.org>2019-03-09 22:30:41 +0000
commit38212b3127e90751fb39cda34250bc11be62b76c (patch)
treedc1397346030e9695bd763dddc93b3be527cd643 /elki/src/main/java/de/lmu/ifi/dbs/elki/evaluation/clustering/internal/EvaluatePBMIndex.java
parent337087b668d3a54f3afee3a9adb597a32e9f7e94 (diff)
Import Upstream version 0.7.0
Diffstat (limited to 'elki/src/main/java/de/lmu/ifi/dbs/elki/evaluation/clustering/internal/EvaluatePBMIndex.java')
-rw-r--r--elki/src/main/java/de/lmu/ifi/dbs/elki/evaluation/clustering/internal/EvaluatePBMIndex.java275
1 files changed, 275 insertions, 0 deletions
diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/evaluation/clustering/internal/EvaluatePBMIndex.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/evaluation/clustering/internal/EvaluatePBMIndex.java
new file mode 100644
index 00000000..2c3a20e0
--- /dev/null
+++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/evaluation/clustering/internal/EvaluatePBMIndex.java
@@ -0,0 +1,275 @@
+package de.lmu.ifi.dbs.elki.evaluation.clustering.internal;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2015
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.Iterator;
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.data.Cluster;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.NumberVectorDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction;
+import de.lmu.ifi.dbs.elki.evaluation.Evaluator;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic;
+import de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic;
+import de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid;
+import de.lmu.ifi.dbs.elki.result.EvaluationResult;
+import de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup;
+import de.lmu.ifi.dbs.elki.result.Result;
+import de.lmu.ifi.dbs.elki.result.ResultHierarchy;
+import de.lmu.ifi.dbs.elki.result.ResultUtil;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.EnumParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ *
+ * Compute the PBM of a data set
+ *
+ * Reference:
+ * <p>
+ * M. K. Pakhira, and S. Bandyopadhyay, and U. Maulik<br />
+ * Validity index for crisp and fuzzy clusters<br />
+ * Pattern recognition, 37(3)
+ * </p>
+ *
+ * @author Stephan Baier
+ * @author Erich Schubert
+ *
+ * @apiviz.composedOf NoiseHandling
+ */
+@Reference(authors = "M. K. Pakhira, and S. Bandyopadhyay, and U. Maulik", //
+title = "Validity index for crisp and fuzzy clusters",//
+booktitle = "Pattern recognition, 37(3)",//
+url = "http://dx.doi.org/10.1016/j.patcog.2003.06.005")
+public class EvaluatePBMIndex implements Evaluator {
+ /**
+ * Logger for debug output.
+ */
+ private static final Logging LOG = Logging.getLogger(EvaluatePBMIndex.class);
+
+ /**
+ * Option for noise handling.
+ */
+ private NoiseHandling noiseHandling;
+
+ /**
+ * Distance function to use.
+ */
+ private NumberVectorDistanceFunction<?> distanceFunction;
+
+ /**
+ * Key for logging statistics.
+ */
+ private String key = EvaluatePBMIndex.class.getName();
+
+ /**
+ * Constructor.
+ *
+ * @param distance Distance function
+ * @param noiseOpt Flag to control noise handling
+ */
+ public EvaluatePBMIndex(NumberVectorDistanceFunction<?> distance, NoiseHandling noiseOpt) {
+ super();
+ this.distanceFunction = distance;
+ this.noiseHandling = noiseOpt;
+ }
+
+ /**
+ * Evaluate a single clustering.
+ *
+ * @param db Database
+ * @param rel Data relation
+ * @param c Clustering
+ * @return PBM
+ */
+ public double evaluateClustering(Database db, Relation<? extends NumberVector> rel, Clustering<?> c) {
+ List<? extends Cluster<?>> clusters = c.getAllClusters();
+ NumberVector[] centroids = new NumberVector[clusters.size()];
+ int ignorednoise = EvaluateSimplifiedSilhouette.centroids(rel, clusters, centroids, noiseHandling);
+
+ // Build global centroid and cluster count:
+ final int dim = RelationUtil.dimensionality(rel);
+ Centroid overallCentroid = new Centroid(dim);
+ EvaluateVarianceRatioCriteria.globalCentroid(overallCentroid, rel, clusters, centroids, noiseHandling);
+
+ // Maximum distance between centroids:
+ double max = 0;
+ for(int i = 0; i < centroids.length; i++) {
+ if(centroids[i] == null && noiseHandling != NoiseHandling.TREAT_NOISE_AS_SINGLETONS) {
+ continue;
+ }
+ for(int j = i + 1; j < centroids.length; j++) {
+ if(centroids[j] == null && noiseHandling != NoiseHandling.TREAT_NOISE_AS_SINGLETONS) {
+ continue;
+ }
+ if(centroids[i] == null && centroids[j] == null) {
+ // Need to compute pairwise distances of noise clusters.
+ for(DBIDIter iti = clusters.get(i).getIDs().iter(); iti.valid(); iti.advance()) {
+ for(DBIDIter itj = clusters.get(j).getIDs().iter(); itj.valid(); itj.advance()) {
+ double dist = distanceFunction.distance(rel.get(iti), rel.get(itj));
+ max = dist > max ? dist : max;
+ }
+ }
+ }
+ else if(centroids[i] == null) {
+ for(DBIDIter iti = clusters.get(i).getIDs().iter(); iti.valid(); iti.advance()) {
+ double dist = distanceFunction.distance(rel.get(iti), centroids[j]);
+ max = dist > max ? dist : max;
+ }
+ }
+ else if(centroids[j] == null) {
+ for(DBIDIter itj = clusters.get(j).getIDs().iter(); itj.valid(); itj.advance()) {
+ double dist = distanceFunction.distance(centroids[i], rel.get(itj));
+ max = dist > max ? dist : max;
+ }
+ }
+ else {
+ double dist = distanceFunction.distance(centroids[i], centroids[j]);
+ max = dist > max ? dist : max;
+ }
+ }
+ }
+
+ // a: Distance to own centroid
+ // b: Distance to overall centroid
+ double a = 0, b = 0;
+ Iterator<? extends Cluster<?>> ci = clusters.iterator();
+ for(int i = 0; ci.hasNext(); i++) {
+ Cluster<?> cluster = ci.next();
+ if(cluster.size() <= 1 || cluster.isNoise()) {
+ switch(noiseHandling){
+ case IGNORE_NOISE:
+ continue; // Ignored
+ case TREAT_NOISE_AS_SINGLETONS:
+ // Singletons: a = 0 by definition.
+ for(DBIDIter it = cluster.getIDs().iter(); it.valid(); it.advance()) {
+ b += SquaredEuclideanDistanceFunction.STATIC.distance(overallCentroid, rel.get(it));
+ }
+ continue; // with NEXT cluster.
+ case MERGE_NOISE:
+ break; // Treat like a cluster below:
+ }
+ }
+
+ for(DBIDIter it = cluster.getIDs().iter(); it.valid(); it.advance()) {
+ NumberVector obj = rel.get(it);
+ a += distanceFunction.distance(centroids[i], obj);
+ b += distanceFunction.distance(overallCentroid, obj);
+ }
+ }
+
+ final double pbm = Math.pow((1. / centroids.length) * (b / a) * max, 2.);
+
+ if(LOG.isStatistics()) {
+ LOG.statistics(new StringStatistic(key + ".pbm.noise-handling", noiseHandling.toString()));
+ if(ignorednoise > 0) {
+ LOG.statistics(new LongStatistic(key + ".pbm.ignored", ignorednoise));
+ }
+ LOG.statistics(new DoubleStatistic(key + ".pbm", pbm));
+ }
+
+ EvaluationResult ev = EvaluationResult.findOrCreate(db.getHierarchy(), c, "Internal Clustering Evaluation", "internal evaluation");
+ MeasurementGroup g = ev.findOrCreateGroup("Distance-based Evaluation");
+ g.addMeasure("PBM-Index", pbm, 0., Double.POSITIVE_INFINITY, 0., false);
+ db.getHierarchy().resultChanged(ev);
+ return pbm;
+ }
+
+ @Override
+ public void processNewResult(ResultHierarchy hier, Result result) {
+ List<Clustering<?>> crs = ResultUtil.getClusteringResults(result);
+ if(crs.size() < 1) {
+ return;
+ }
+ Database db = ResultUtil.findDatabase(hier);
+ Relation<? extends NumberVector> rel = db.getRelation(this.distanceFunction.getInputTypeRestriction());
+
+ for(Clustering<?> c : crs) {
+ evaluateClustering(db, (Relation<? extends NumberVector>) rel, c);
+ }
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Stephan Baier
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ /**
+ * Parameter for choosing the distance function.
+ */
+ public static final OptionID DISTANCE_ID = new OptionID("pbm.distance", "Distance function to use for computing PBM.");
+
+ /**
+ * Parameter for the option, how noise should be treated.
+ */
+ public static final OptionID NOISE_ID = new OptionID("pbm.noisehandling", "Control how noise should be treated.");
+
+ /**
+ * Distance function to use.
+ */
+ private NumberVectorDistanceFunction<?> distance;
+
+ /**
+ * Option, how noise should be treated.
+ */
+ private NoiseHandling noiseHandling;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ ObjectParameter<NumberVectorDistanceFunction<?>> distanceFunctionP = new ObjectParameter<>(DISTANCE_ID, NumberVectorDistanceFunction.class, EuclideanDistanceFunction.class);
+ if(config.grab(distanceFunctionP)) {
+ distance = distanceFunctionP.instantiateClass(config);
+ }
+
+ EnumParameter<NoiseHandling> noiseP = new EnumParameter<NoiseHandling>(NOISE_ID, NoiseHandling.class, NoiseHandling.TREAT_NOISE_AS_SINGLETONS);
+ if(config.grab(noiseP)) {
+ noiseHandling = noiseP.getValue();
+ }
+ }
+
+ @Override
+ protected EvaluatePBMIndex makeInstance() {
+ return new EvaluatePBMIndex(distance, noiseHandling);
+ }
+ }
+
+}