summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/evaluation/index/IndexPurity.java
diff options
context:
space:
mode:
authorAndrej Shadura <andrewsh@debian.org>2019-03-09 22:30:32 +0000
committerAndrej Shadura <andrewsh@debian.org>2019-03-09 22:30:32 +0000
commitc36aa2a8fd31ca5e225ff30278e910070cd2c8c1 (patch)
treebdfe1a5ccb57999d4d664a2a44121a78c88b19d4 /src/de/lmu/ifi/dbs/elki/evaluation/index/IndexPurity.java
parent89aa1958dbaf9052da0c24706308a2ef8cefa96e (diff)
Import Upstream version 0.5.0~beta2
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/evaluation/index/IndexPurity.java')
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/index/IndexPurity.java104
1 files changed, 104 insertions, 0 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/index/IndexPurity.java b/src/de/lmu/ifi/dbs/elki/evaluation/index/IndexPurity.java
new file mode 100644
index 00000000..c28c22d3
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/index/IndexPurity.java
@@ -0,0 +1,104 @@
+package de.lmu.ifi.dbs.elki.evaluation.index;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map.Entry;
+
+import de.lmu.ifi.dbs.elki.data.DoubleVector;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.evaluation.Evaluator;
+import de.lmu.ifi.dbs.elki.index.tree.Node;
+import de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialDirectoryEntry;
+import de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialEntry;
+import de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialIndexTree;
+import de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialPointLeafEntry;
+import de.lmu.ifi.dbs.elki.math.MeanVariance;
+import de.lmu.ifi.dbs.elki.result.CollectionResult;
+import de.lmu.ifi.dbs.elki.result.HierarchicalResult;
+import de.lmu.ifi.dbs.elki.result.Result;
+import de.lmu.ifi.dbs.elki.result.ResultUtil;
+import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+
+/**
+ * Compute the purity of index pages as a naive measure for performance
+ * capabilities using the Gini index.
+ *
+ * @author Erich Schubert
+ */
+public class IndexPurity implements Evaluator {
+ /**
+ * Constructor.
+ */
+ public IndexPurity() {
+ super();
+ }
+
+ @Override
+ public void processNewResult(HierarchicalResult baseResult, Result newResult) {
+ Database database = ResultUtil.findDatabase(baseResult);
+ final ArrayList<SpatialIndexTree<?, ?>> indexes = ResultUtil.filterResults(newResult, SpatialIndexTree.class);
+ if(indexes == null || indexes.size() <= 0) {
+ return;
+ }
+ Relation<String> lblrel = DatabaseUtil.guessLabelRepresentation(database);
+ for(SpatialIndexTree<?, ?> index : indexes) {
+ List<? extends SpatialEntry> leaves = index.getLeaves();
+ MeanVariance mv = new MeanVariance();
+ for(SpatialEntry e : leaves) {
+ SpatialDirectoryEntry leaf = (SpatialDirectoryEntry) e;
+ Node<?> n = index.getNode(leaf.getEntryID());
+
+ final int total = n.getNumEntries();
+ HashMap<String, Integer> map = new HashMap<String, Integer>(total);
+ for(int i = 0; i < total; i++) {
+ DBID id = ((SpatialPointLeafEntry) n.getEntry(i)).getDBID();
+ String label = lblrel.get(id);
+ Integer val = map.get(label);
+ if(val == null) {
+ val = 1;
+ }
+ else {
+ val += 1;
+ }
+ map.put(label, val);
+ }
+ double gini = 0.0;
+ for(Entry<String, Integer> ent : map.entrySet()) {
+ double rel = ent.getValue() / (double) total;
+ gini += rel * rel;
+ }
+ mv.put(gini);
+ }
+ Collection<DoubleVector> col = new java.util.Vector<DoubleVector>();
+ col.add(new DoubleVector(new double[] { mv.getMean(), mv.getSampleStddev() }));
+ database.getHierarchy().add((Result) index, new CollectionResult<DoubleVector>("Gini coefficient of index", "index-gini", col));
+ }
+ }
+}