diff options
author | Andrej Shadura <andrewsh@debian.org> | 2019-03-09 22:30:32 +0000 |
---|---|---|
committer | Andrej Shadura <andrewsh@debian.org> | 2019-03-09 22:30:32 +0000 |
commit | c36aa2a8fd31ca5e225ff30278e910070cd2c8c1 (patch) | |
tree | bdfe1a5ccb57999d4d664a2a44121a78c88b19d4 /src/de/lmu/ifi/dbs/elki/evaluation/index/IndexPurity.java | |
parent | 89aa1958dbaf9052da0c24706308a2ef8cefa96e (diff) |
Import Upstream version 0.5.0~beta2
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/evaluation/index/IndexPurity.java')
-rw-r--r-- | src/de/lmu/ifi/dbs/elki/evaluation/index/IndexPurity.java | 104 |
1 files changed, 104 insertions, 0 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/index/IndexPurity.java b/src/de/lmu/ifi/dbs/elki/evaluation/index/IndexPurity.java new file mode 100644 index 00000000..c28c22d3 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/evaluation/index/IndexPurity.java @@ -0,0 +1,104 @@ +package de.lmu.ifi.dbs.elki.evaluation.index; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map.Entry; + +import de.lmu.ifi.dbs.elki.data.DoubleVector; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.database.ids.DBID; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.evaluation.Evaluator; +import de.lmu.ifi.dbs.elki.index.tree.Node; +import de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialDirectoryEntry; +import de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialEntry; +import de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialIndexTree; +import de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialPointLeafEntry; +import de.lmu.ifi.dbs.elki.math.MeanVariance; +import de.lmu.ifi.dbs.elki.result.CollectionResult; +import de.lmu.ifi.dbs.elki.result.HierarchicalResult; +import de.lmu.ifi.dbs.elki.result.Result; +import de.lmu.ifi.dbs.elki.result.ResultUtil; +import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil; + +/** + * Compute the purity of index pages as a naive measure for performance + * capabilities using the Gini index. + * + * @author Erich Schubert + */ +public class IndexPurity implements Evaluator { + /** + * Constructor. + */ + public IndexPurity() { + super(); + } + + @Override + public void processNewResult(HierarchicalResult baseResult, Result newResult) { + Database database = ResultUtil.findDatabase(baseResult); + final ArrayList<SpatialIndexTree<?, ?>> indexes = ResultUtil.filterResults(newResult, SpatialIndexTree.class); + if(indexes == null || indexes.size() <= 0) { + return; + } + Relation<String> lblrel = DatabaseUtil.guessLabelRepresentation(database); + for(SpatialIndexTree<?, ?> index : indexes) { + List<? extends SpatialEntry> leaves = index.getLeaves(); + MeanVariance mv = new MeanVariance(); + for(SpatialEntry e : leaves) { + SpatialDirectoryEntry leaf = (SpatialDirectoryEntry) e; + Node<?> n = index.getNode(leaf.getEntryID()); + + final int total = n.getNumEntries(); + HashMap<String, Integer> map = new HashMap<String, Integer>(total); + for(int i = 0; i < total; i++) { + DBID id = ((SpatialPointLeafEntry) n.getEntry(i)).getDBID(); + String label = lblrel.get(id); + Integer val = map.get(label); + if(val == null) { + val = 1; + } + else { + val += 1; + } + map.put(label, val); + } + double gini = 0.0; + for(Entry<String, Integer> ent : map.entrySet()) { + double rel = ent.getValue() / (double) total; + gini += rel * rel; + } + mv.put(gini); + } + Collection<DoubleVector> col = new java.util.Vector<DoubleVector>(); + col.add(new DoubleVector(new double[] { mv.getMean(), mv.getSampleStddev() })); + database.getHierarchy().add((Result) index, new CollectionResult<DoubleVector>("Gini coefficient of index", "index-gini", col)); + } + } +} |