package de.lmu.ifi.dbs.elki.evaluation.index; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map.Entry; import de.lmu.ifi.dbs.elki.data.DoubleVector; import de.lmu.ifi.dbs.elki.database.Database; import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.evaluation.Evaluator; import de.lmu.ifi.dbs.elki.index.tree.Node; import de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialDirectoryEntry; import de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialEntry; import de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialIndexTree; import de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialPointLeafEntry; import de.lmu.ifi.dbs.elki.math.MeanVariance; import de.lmu.ifi.dbs.elki.result.CollectionResult; import de.lmu.ifi.dbs.elki.result.HierarchicalResult; import de.lmu.ifi.dbs.elki.result.Result; import de.lmu.ifi.dbs.elki.result.ResultUtil; import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil; /** * Compute the purity of index pages as a naive measure for performance * capabilities using the Gini index. * * @author Erich Schubert */ public class IndexPurity implements Evaluator { /** * Constructor. */ public IndexPurity() { super(); } @Override public void processNewResult(HierarchicalResult baseResult, Result newResult) { Database database = ResultUtil.findDatabase(baseResult); final ArrayList> indexes = ResultUtil.filterResults(newResult, SpatialIndexTree.class); if(indexes == null || indexes.size() <= 0) { return; } Relation lblrel = DatabaseUtil.guessLabelRepresentation(database); for(SpatialIndexTree index : indexes) { List leaves = index.getLeaves(); MeanVariance mv = new MeanVariance(); for(SpatialEntry e : leaves) { SpatialDirectoryEntry leaf = (SpatialDirectoryEntry) e; Node n = index.getNode(leaf.getEntryID()); final int total = n.getNumEntries(); HashMap map = new HashMap(total); for(int i = 0; i < total; i++) { DBID id = ((SpatialPointLeafEntry) n.getEntry(i)).getDBID(); String label = lblrel.get(id); Integer val = map.get(label); if(val == null) { val = 1; } else { val += 1; } map.put(label, val); } double gini = 0.0; for(Entry ent : map.entrySet()) { double rel = ent.getValue() / (double) total; gini += rel * rel; } mv.put(gini); } Collection col = new java.util.Vector(); col.add(new DoubleVector(new double[] { mv.getMean(), mv.getSampleStddev() })); database.getHierarchy().add((Result) index, new CollectionResult("Gini coefficient of index", "index-gini", col)); } } }