package de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ import java.util.ArrayList; import java.util.Collections; import java.util.List; import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.index.tree.BreadthFirstEnumeration; import de.lmu.ifi.dbs.elki.index.tree.IndexTreePath; import de.lmu.ifi.dbs.elki.index.tree.TreeIndexPathComponent; import de.lmu.ifi.dbs.elki.index.tree.metrical.MetricalIndexTree; import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.strategies.split.Assignments; import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.strategies.split.DistanceEntry; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.statistics.Counter; import de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic; import de.lmu.ifi.dbs.elki.persistent.PageFile; import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleIntPair; /** * Abstract super class for all M-Tree variants. * * @author Elke Achtert * * @apiviz.composedOf MTreeSettings * @apiviz.composedOf Statistics * @apiviz.has AbstractMTreeNode oneway - - contains * @apiviz.excludeSubtypes * * @param the type of DatabaseObject to be stored in the metrical index * @param the type of Distance used in the metrical index * @param the type of MetricalNode used in the metrical index * @param the type of MetricalEntry used in the metrical index * @param the type to store settings in. */ public abstract class AbstractMTree, N extends AbstractMTreeNode, E extends MTreeEntry, S extends MTreeSettings> extends MetricalIndexTree { /** * Debugging flag: do extra integrity checks. */ protected static final boolean EXTRA_INTEGRITY_CHECKS = false; /** * Tree settings. */ protected S settings; /** * For counting the number of distance computations. */ public Statistics statistics = new Statistics(); /** * Constructor. * * @param pagefile Page file * @param settings Tree settings */ public AbstractMTree(PageFile pagefile, S settings) { super(pagefile); this.settings = settings; } @Override public final DistanceFunction getDistanceFunction() { return settings.distanceFunction; } /** * Get the distance factory. * * @return the distance factory used */ public final D getDistanceFactory() { return settings.distanceFunction.getDistanceFactory(); } /** * Returns a string representation of this M-Tree by performing a breadth * first enumeration on the tree and adding the string representation of the * visited nodes and their entries to the result. * * @return a string representation of this M-Tree */ @Override public String toString() { StringBuilder result = new StringBuilder(); int dirNodes = 0; int leafNodes = 0; int objects = 0; int levels = 0; N node = getRoot(); while (!node.isLeaf()) { if (node.getNumEntries() > 0) { E entry = node.getEntry(0); node = getNode(entry); levels++; } } BreadthFirstEnumeration enumeration = new BreadthFirstEnumeration<>(this, getRootPath()); while (enumeration.hasMoreElements()) { IndexTreePath path = enumeration.nextElement(); E entry = path.getLastPathComponent().getEntry(); if (entry.isLeafEntry()) { objects++; result.append("\n ").append(entry.toString()); } else { node = getNode(entry); result.append("\n\n").append(node).append(", numEntries = ").append(node.getNumEntries()); result.append("\n").append(entry.toString()); if (node.isLeaf()) { leafNodes++; } else { dirNodes++; } } } result.append(getClass().getName()).append(" hat ").append((levels + 1)).append(" Ebenen \n"); result.append("DirCapacity = ").append(dirCapacity).append("\n"); result.append("LeafCapacity = ").append(leafCapacity).append("\n"); result.append(dirNodes).append(" Directory Nodes \n"); result.append(leafNodes).append(" Leaf Nodes \n"); result.append(objects).append(" Objects \n"); // PageFileUtil.appendPageFileStatistics(result, getPageFileStatistics()); return result.toString(); } /** * Inserts the specified object into this M-Tree. * * @param entry the entry to be inserted * @param withPreInsert if this flag is true, the preInsert method will be * called before inserting the object */ // todo: implement a bulk load for M-Tree and remove this method public void insert(E entry, boolean withPreInsert) { if (getLogger().isDebugging()) { getLogger().debugFine("insert " + entry.getRoutingObjectID() + "\n"); } if (!initialized) { initialize(entry); } // choose subtree for insertion IndexTreePath subtree = settings.insertStrategy.choosePath(this, entry); if (getLogger().isDebugging()) { getLogger().debugFine("insertion-subtree " + subtree + "\n"); } // determine parent distance E parentEntry = subtree.getLastPathComponent().getEntry(); double parentDistance = distance(parentEntry.getRoutingObjectID(), entry.getRoutingObjectID()).doubleValue(); entry.setParentDistance(parentDistance); // create leaf entry and do pre insert if (withPreInsert) { preInsert(entry); } // get parent node N parent = getNode(parentEntry); parent.addLeafEntry(entry); writeNode(parent); // adjust the tree from subtree to root adjustTree(subtree); // test if (EXTRA_INTEGRITY_CHECKS) { if (withPreInsert) { getRoot().integrityCheck(this, getRootEntry()); } } } /** * Bulk insert. * * @param entries Entries to insert */ public void insertAll(List entries) { if (!initialized && entries.size() > 0) { initialize(entries.get(0)); } for (E entry : entries) { insert(entry, false); } } @Override protected final void createEmptyRoot(E exampleLeaf) { N root = createNewLeafNode(); writeNode(root); } /** * Sorts the entries of the specified node according to their minimum distance * to the specified object. * * @param node the node * @param q the id of the object * @return a list of the sorted entries */ protected final List getSortedEntries(N node, DBID q) { List result = new ArrayList<>(); for (int i = 0; i < node.getNumEntries(); i++) { E entry = node.getEntry(i); double distance = distance(entry.getRoutingObjectID(), q).doubleValue(); double radius = entry.getCoveringRadius(); double minDist = (radius > distance) ? 0.0 : distance - radius; result.add(new DoubleIntPair(minDist, i)); } Collections.sort(result); return result; } /** * Returns the distance between the two specified ids. * * @param id1 the first id * @param id2 the second id * @return the distance between the two specified ids */ public abstract D distance(DBIDRef id1, DBIDRef id2); /** * Returns the distance between the routing object of two entries. * * @param e1 First entry * @param e2 Second entry * @return the distance between the two routing objects */ public final D distance(E e1, E e2) { return distance(e1.getRoutingObjectID(), e2.getRoutingObjectID()); } /** * Creates a new directory entry representing the specified node. * * @param node the node to be represented by the new entry * @param routingObjectID the id of the routing object of the node * @param parentDistance the distance from the routing object of the node to * the routing object of the parent node * @return the newly created directory entry */ protected abstract E createNewDirectoryEntry(N node, DBID routingObjectID, double parentDistance); /** * Adjusts the tree after insertion of some nodes. * * @param subtree the subtree to be adjusted */ private void adjustTree(IndexTreePath subtree) { if (getLogger().isDebugging()) { getLogger().debugFine("Adjust tree " + subtree + "\n"); } // get the root of the subtree Integer nodeIndex = subtree.getLastPathComponent().getIndex(); N node = getNode(subtree.getLastPathComponent().getEntry()); // overflow in node; split the node if (hasOverflow(node)) { // do the split Assignments assignments = settings.splitStrategy.split(this, node); final N newNode = node.isLeaf() ? createNewLeafNode() : createNewDirectoryNode(); List entries1 = new ArrayList<>(assignments.getFirstAssignments().size()); List entries2 = new ArrayList<>(assignments.getSecondAssignments().size()); // Store final parent distances: for (DistanceEntry ent : assignments.getFirstAssignments()) { final E e = ent.getEntry(); e.setParentDistance(ent.getDistance()); entries1.add(e); } for (DistanceEntry ent : assignments.getSecondAssignments()) { final E e = ent.getEntry(); e.setParentDistance(ent.getDistance()); entries2.add(e); } node.splitTo(newNode, entries1, entries2); // write changes to file writeNode(node); writeNode(newNode); if (getLogger().isDebugging()) { String msg = "Split Node " + node.getPageID() + " (" + this.getClass() + ")\n" + " newNode " + newNode.getPageID() + "\n" + " firstPromoted " + assignments.getFirstRoutingObject() + "\n" + " firstAssignments(" + node.getPageID() + ") " + assignments.getFirstAssignments() + "\n" + " firstCR " + assignments.getFirstCoveringRadius() + "\n" + " secondPromoted " + assignments.getSecondRoutingObject() + "\n" + " secondAssignments(" + newNode.getPageID() + ") " + assignments.getSecondAssignments() + "\n" + " secondCR " + assignments.getSecondCoveringRadius() + "\n"; getLogger().debugFine(msg); } // if root was split: create a new root that points the two split // nodes if (isRoot(node)) { // FIXME: stimmen die parentDistance der Kinder in node & splitNode? IndexTreePath newRootPath = createNewRoot(node, newNode, assignments.getFirstRoutingObject(), assignments.getSecondRoutingObject()); adjustTree(newRootPath); } // node is not root else { // get the parent and add the new split node E parentEntry = subtree.getParentPath().getLastPathComponent().getEntry(); N parent = getNode(parentEntry); if (getLogger().isDebugging()) { getLogger().debugFine("parent " + parent); } double parentDistance2 = distance(parentEntry.getRoutingObjectID(), assignments.getSecondRoutingObject()).doubleValue(); // logger.warning("parent: "+parent.toString()+" split: " + // splitNode.toString()+ " dist:"+parentDistance2); parent.addDirectoryEntry(createNewDirectoryEntry(newNode, assignments.getSecondRoutingObject(), parentDistance2)); // adjust the entry representing the (old) node, that has been split double parentDistance1 = distance(parentEntry.getRoutingObjectID(), assignments.getFirstRoutingObject()).doubleValue(); // logger.warning("parent: "+parent.toString()+" node: " + // node.toString()+ " dist:"+parentDistance1); node.adjustEntry(parent.getEntry(nodeIndex), assignments.getFirstRoutingObject(), parentDistance1, this); // write changes in parent to file writeNode(parent); adjustTree(subtree.getParentPath()); } } // no overflow, only adjust parameters of the entry representing the // node else { if (!isRoot(node)) { E parentEntry = subtree.getParentPath().getLastPathComponent().getEntry(); N parent = getNode(parentEntry); int index = subtree.getLastPathComponent().getIndex(); E entry = parent.getEntry(index); node.adjustEntry(entry, entry.getRoutingObjectID(), entry.getParentDistance(), this); // write changes in parent to file writeNode(parent); adjustTree(subtree.getParentPath()); } // root level is reached else { E rootEntry = getRootEntry(); node.adjustEntry(rootEntry, rootEntry.getRoutingObjectID(), rootEntry.getParentDistance(), this); } } } /** * Returns true if in the specified node an overflow has occurred, false * otherwise. * * @param node the node to be tested for overflow * @return true if in the specified node an overflow has occurred, false * otherwise */ private boolean hasOverflow(N node) { if (node.isLeaf()) { return node.getNumEntries() == leafCapacity; } return node.getNumEntries() == dirCapacity; } /** * Creates a new root node that points to the two specified child nodes and * return the path to the new root. * * @param oldRoot the old root of this RTree * @param newNode the new split node * @param firstRoutingObjectID the id of the routing objects of the first * child node * @param secondRoutingObjectID the id of the routing objects of the second * child node * @return the path to the new root node that points to the two specified * child nodes */ private IndexTreePath createNewRoot(final N oldRoot, final N newNode, DBID firstRoutingObjectID, DBID secondRoutingObjectID) { N root = createNewDirectoryNode(); writeNode(root); // switch the ids oldRoot.setPageID(root.getPageID()); if (!oldRoot.isLeaf()) { // FIXME: what is happening here? for (int i = 0; i < oldRoot.getNumEntries(); i++) { N node = getNode(oldRoot.getEntry(i)); writeNode(node); } } root.setPageID(getRootID()); // FIXME: doesn't the root by definition not have a routing object? // D parentDistance1 = distance(getRootEntry().getRoutingObjectID(), // firstRoutingObjectID); // D parentDistance2 = distance(getRootEntry().getRoutingObjectID(), // secondRoutingObjectID); E oldRootEntry = createNewDirectoryEntry(oldRoot, firstRoutingObjectID, 0.); E newRootEntry = createNewDirectoryEntry(newNode, secondRoutingObjectID, 0.); root.addDirectoryEntry(oldRootEntry); root.addDirectoryEntry(newRootEntry); // logger.warning("new root: " + getRootEntry().toString() + " childs: " + // oldRootEntry.toString() + "," + newRootEntry.toString() + " dists: " + // parentDistance1 + ", " + parentDistance2); writeNode(root); writeNode(oldRoot); writeNode(newNode); if (getLogger().isDebugging()) { String msg = "Create new Root: ID=" + root.getPageID(); msg += "\nchild1 " + oldRoot; msg += "\nchild2 " + newNode; getLogger().debugFine(msg); } return new IndexTreePath<>(new TreeIndexPathComponent<>(getRootEntry(), null)); } @Override public List getLeaves() { List result = new ArrayList<>(); BreadthFirstEnumeration enumeration = new BreadthFirstEnumeration<>(this, getRootPath()); while (enumeration.hasMoreElements()) { IndexTreePath path = enumeration.nextElement(); E entry = path.getLastPathComponent().getEntry(); if (!entry.isLeafEntry()) { // TODO: any way to skip unnecessary reads? N node = getNode(entry); if (node.isLeaf()) { result.add(entry); } } } return result; } /** * FIXME: expensive depth computation by following a path. * * @return depth */ public int getHeight() { int levels = 0; N node = getRoot(); while (!node.isLeaf()) { if (node.getNumEntries() > 0) { E entry = node.getEntry(0); node = getNode(entry); levels++; } } return levels; } @Override public void logStatistics() { super.logStatistics(); Logging log = getLogger(); if (log.isStatistics()) { log.statistics(new LongStatistic(this.getClass().getName() + ".height", getHeight())); statistics.logStatistics(); } } /** * Class for tracking some statistics. * * @author Erich Schubert * * @apiviz.composedOf Counter */ public class Statistics { /** * For counting the number of distance computations. */ protected final Counter distanceCalcs; /** * For counting the number of knn queries answered. */ protected final Counter knnQueries; /** * For counting the number of range queries answered. */ protected final Counter rangeQueries; /** * Constructor. */ public Statistics() { super(); Logging log = getLogger(); distanceCalcs = log.isStatistics() ? log.newCounter(this.getClass().getName() + ".distancecalcs") : null; knnQueries = log.isStatistics() ? log.newCounter(this.getClass().getName() + ".knnqueries") : null; rangeQueries = log.isStatistics() ? log.newCounter(this.getClass().getName() + ".rangequeries") : null; } /** * Count a distance computation. */ public void countDistanceCalculation() { if (distanceCalcs != null) { distanceCalcs.increment(); } } /** * Count a knn query invocation. */ public void countKNNQuery() { if (knnQueries != null) { knnQueries.increment(); } } /** * Count a range query invocation. */ public void countRangeQuery() { if (rangeQueries != null) { rangeQueries.increment(); } } /** * Log the statistics. */ public void logStatistics() { Logging log = getLogger(); if (statistics.distanceCalcs != null) { log.statistics(statistics.distanceCalcs); } if (statistics.knnQueries != null) { log.statistics(statistics.knnQueries); } if (statistics.rangeQueries != null) { log.statistics(statistics.rangeQueries); } } } }