diff options
author | Andrej Shadura <andrewsh@debian.org> | 2019-03-09 22:30:36 +0000 |
---|---|---|
committer | Andrej Shadura <andrewsh@debian.org> | 2019-03-09 22:30:36 +0000 |
commit | 8300861dc4c62c5567a4e654976072f854217544 (patch) | |
tree | 0a148df3698efedd37839f6aca0e21b2f12f1b52 /src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/AbstractMTree.java | |
parent | b7b404fd7a726774d442562d11659d7b5368cdb9 (diff) |
Import Upstream version 0.6.0~beta2
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/AbstractMTree.java')
-rw-r--r-- | src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/AbstractMTree.java | 373 |
1 files changed, 171 insertions, 202 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/AbstractMTree.java b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/AbstractMTree.java index a7a063bd..61ca9116 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/AbstractMTree.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/AbstractMTree.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -28,73 +28,67 @@ import java.util.Collections; import java.util.List; import de.lmu.ifi.dbs.elki.database.ids.DBID; -import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; -import de.lmu.ifi.dbs.elki.database.ids.DBIDs; -import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; -import de.lmu.ifi.dbs.elki.distance.DistanceUtil; +import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; +import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.index.tree.BreadthFirstEnumeration; -import de.lmu.ifi.dbs.elki.index.tree.DistanceEntry; import de.lmu.ifi.dbs.elki.index.tree.IndexTreePath; import de.lmu.ifi.dbs.elki.index.tree.TreeIndexPathComponent; import de.lmu.ifi.dbs.elki.index.tree.metrical.MetricalIndexTree; -import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.split.Assignments; -import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.split.MLBDistSplit; -import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.split.MTreeSplit; +import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.strategies.split.Assignments; +import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.strategies.split.DistanceEntry; +import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.logging.statistics.Counter; +import de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic; import de.lmu.ifi.dbs.elki.persistent.PageFile; -import de.lmu.ifi.dbs.elki.persistent.PageFileUtil; +import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleIntPair; /** * Abstract super class for all M-Tree variants. * * @author Elke Achtert * - * @apiviz.has SplitResult oneway - - computes + * @apiviz.composedOf MTreeSettings + * @apiviz.composedOf Statistics * @apiviz.has AbstractMTreeNode oneway - - contains + * @apiviz.excludeSubtypes * * @param <O> the type of DatabaseObject to be stored in the metrical index * @param <D> the type of Distance used in the metrical index * @param <N> the type of MetricalNode used in the metrical index * @param <E> the type of MetricalEntry used in the metrical index + * @param <S> the type to store settings in. */ -public abstract class AbstractMTree<O, D extends Distance<D>, N extends AbstractMTreeNode<O, D, N, E>, E extends MTreeEntry<D>> extends MetricalIndexTree<O, D, N, E> { +public abstract class AbstractMTree<O, D extends NumberDistance<D, ?>, N extends AbstractMTreeNode<O, D, N, E>, E extends MTreeEntry, S extends MTreeSettings<O, D, N, E>> extends MetricalIndexTree<O, D, N, E> { /** * Debugging flag: do extra integrity checks. */ protected static final boolean EXTRA_INTEGRITY_CHECKS = false; /** - * Holds the instance of the trees distance function. + * Tree settings. */ - protected DistanceFunction<O, D> distanceFunction; + protected S settings; /** - * The distance query. + * For counting the number of distance computations. */ - protected DistanceQuery<O, D> distanceQuery; + public Statistics statistics = new Statistics(); /** * Constructor. * * @param pagefile Page file - * @param distanceQuery Distance query - * @param distanceFunction Distance function + * @param settings Tree settings */ - public AbstractMTree(PageFile<N> pagefile, DistanceQuery<O, D> distanceQuery, DistanceFunction<O, D> distanceFunction) { + public AbstractMTree(PageFile<N> pagefile, S settings) { super(pagefile); - this.distanceQuery = distanceQuery; - this.distanceFunction = distanceFunction; + this.settings = settings; } @Override - public final DistanceFunction<O, D> getDistanceFunction() { - return distanceFunction; - } - - @Override - public final DistanceQuery<O, D> getDistanceQuery() { - return distanceQuery; + public final DistanceFunction<? super O, D> getDistanceFunction() { + return settings.distanceFunction; } /** @@ -103,7 +97,7 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract * @return the distance factory used */ public final D getDistanceFactory() { - return distanceFunction.getDistanceFactory(); + return settings.distanceFunction.getDistanceFactory(); } /** @@ -131,7 +125,7 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract } } - BreadthFirstEnumeration<N, E> enumeration = new BreadthFirstEnumeration<N, E>(this, getRootPath()); + BreadthFirstEnumeration<N, E> enumeration = new BreadthFirstEnumeration<>(this, getRootPath()); while (enumeration.hasMoreElements()) { IndexTreePath<E> path = enumeration.nextElement(); E entry = path.getLastPathComponent().getEntry(); @@ -158,7 +152,7 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract result.append(leafNodes).append(" Leaf Nodes \n"); result.append(objects).append(" Objects \n"); - PageFileUtil.appendPageFileStatistics(result, getPageFileStatistics()); + // PageFileUtil.appendPageFileStatistics(result, getPageFileStatistics()); return result.toString(); } @@ -180,14 +174,14 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract } // choose subtree for insertion - IndexTreePath<E> subtree = choosePath(entry, getRootPath()); + IndexTreePath<E> subtree = settings.insertStrategy.choosePath(this, entry); if (getLogger().isDebugging()) { getLogger().debugFine("insertion-subtree " + subtree + "\n"); } // determine parent distance E parentEntry = subtree.getLastPathComponent().getEntry(); - D parentDistance = distance(parentEntry.getRoutingObjectID(), entry.getRoutingObjectID()); + double parentDistance = distance(parentEntry.getRoutingObjectID(), entry.getRoutingObjectID()).doubleValue(); entry.setParentDistance(parentDistance); // create leaf entry and do pre insert @@ -232,63 +226,6 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract } /** - * Chooses the best path of the specified subtree for insertion of the given - * object. - * - * @param object the entry to search - * @param subtree the subtree to be tested for insertion - * @return the path of the appropriate subtree to insert the given object - */ - private IndexTreePath<E> choosePath(E object, IndexTreePath<E> subtree) { - N node = getNode(subtree.getLastPathComponent().getEntry()); - - // leaf - if (node.isLeaf()) { - return subtree; - } - - DistanceEntry<D, E> bestCandidate; - D enlarge; // Track best enlargement - null for no enlargement needed. - // Initialize from first: - { - E entry = node.getEntry(0); - D distance = distance(object.getRoutingObjectID(), entry.getRoutingObjectID()); - bestCandidate = new DistanceEntry<D, E>(entry, distance, 0); - if (distance.compareTo(entry.getCoveringRadius()) <= 0) { - enlarge = null; - } else { - enlarge = distance.minus(entry.getCoveringRadius()); - } - } - - // Iterate over remaining - for (int i = 1; i < node.getNumEntries(); i++) { - E entry = node.getEntry(i); - D distance = distance(object.getRoutingObjectID(), entry.getRoutingObjectID()); - - if (distance.compareTo(entry.getCoveringRadius()) <= 0) { - if (enlarge != null || distance.compareTo(bestCandidate.getDistance()) < 0) { - bestCandidate = new DistanceEntry<D, E>(entry, distance, i); - enlarge = null; - } - } else if (enlarge != null) { - D enlrg = distance.minus(entry.getCoveringRadius()); - if (enlrg.compareTo(enlarge) < 0) { - bestCandidate = new DistanceEntry<D, E>(entry, distance, i); - enlarge = enlrg; - } - } - } - - // Apply enlargement - if (enlarge != null) { - bestCandidate.getEntry().setCoveringRadius(enlarge); - } - - return choosePath(object, subtree.pathByAddingChild(new TreeIndexPathComponent<E>(bestCandidate.getEntry(), bestCandidate.getIndex()))); - } - - /** * Sorts the entries of the specified node according to their minimum distance * to the specified object. * @@ -296,44 +233,16 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract * @param q the id of the object * @return a list of the sorted entries */ - protected final List<DistanceEntry<D, E>> getSortedEntries(N node, DBID q) { - List<DistanceEntry<D, E>> result = new ArrayList<DistanceEntry<D, E>>(); - - for (int i = 0; i < node.getNumEntries(); i++) { - E entry = node.getEntry(i); - D distance = distance(entry.getRoutingObjectID(), q); - D radius = entry.getCoveringRadius(); - D minDist = radius.compareTo(distance) > 0 ? getDistanceFactory().nullDistance() : distance.minus(radius); - - result.add(new DistanceEntry<D, E>(entry, minDist, i)); - } - - Collections.sort(result); - return result; - } - - /** - * Sorts the entries of the specified node according to their minimum distance - * to the specified objects. - * - * @param node the node - * @param ids the ids of the objects - * @return a list of the sorted entries - */ - protected final List<DistanceEntry<D, E>> getSortedEntries(N node, DBIDs ids) { - List<DistanceEntry<D, E>> result = new ArrayList<DistanceEntry<D, E>>(); + protected final List<DoubleIntPair> getSortedEntries(N node, DBID q) { + List<DoubleIntPair> result = new ArrayList<>(); for (int i = 0; i < node.getNumEntries(); i++) { E entry = node.getEntry(i); - D radius = entry.getCoveringRadius(); + double distance = distance(entry.getRoutingObjectID(), q).doubleValue(); + double radius = entry.getCoveringRadius(); + double minDist = (radius > distance) ? 0.0 : distance - radius; - D minMinDist = getDistanceFactory().infiniteDistance(); - for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { - D distance = distanceQuery.distance(entry.getRoutingObjectID(), iter); - D minDist = radius.compareTo(distance) > 0 ? getDistanceFactory().nullDistance() : distance.minus(radius); - minMinDist = DistanceUtil.min(minMinDist, minDist); - } - result.add(new DistanceEntry<D, E>(entry, minMinDist, i)); + result.add(new DoubleIntPair(minDist, i)); } Collections.sort(result); @@ -347,11 +256,17 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract * @param id2 the second id * @return the distance between the two specified ids */ - protected final D distance(DBID id1, DBID id2) { - if (id1 == null || id2 == null) { - return getDistanceFactory().undefinedDistance(); - } - return distanceQuery.distance(id1, id2); + public abstract D distance(DBIDRef id1, DBIDRef id2); + + /** + * Returns the distance between the routing object of two entries. + * + * @param e1 First entry + * @param e2 Second entry + * @return the distance between the two routing objects + */ + public final D distance(E e1, E e2) { + return distance(e1.getRoutingObjectID(), e2.getRoutingObjectID()); } /** @@ -363,38 +278,7 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract * the routing object of the parent node * @return the newly created directory entry */ - protected abstract E createNewDirectoryEntry(N node, DBID routingObjectID, D parentDistance); - - /** - * Splits the specified node and returns the split result. - * - * @param node the node to be split - * @return the split result - */ - private SplitResult split(N node) { - // do the split - // todo split stratgey - MTreeSplit<O, D, N, E> split = new MLBDistSplit<O, D, N, E>(node, distanceQuery); - Assignments<D, E> assignments = split.getAssignments(); - final N newNode; - if (node.isLeaf()) { - newNode = createNewLeafNode(); - } else { - newNode = createNewDirectoryNode(); - } - node.splitTo(newNode, assignments.getFirstAssignments(), assignments.getSecondAssignments()); - - // write changes to file - writeNode(node); - writeNode(newNode); - - if (getLogger().isDebugging()) { - String msg = "Split Node " + node.getPageID() + " (" + this.getClass() + ")\n" + " newNode " + newNode.getPageID() + "\n" + " firstPromoted " + assignments.getFirstRoutingObject() + "\n" + " firstAssignments(" + node.getPageID() + ") " + assignments.getFirstAssignments() + "\n" + " firstCR " + assignments.getFirstCoveringRadius() + "\n" + " secondPromoted " + assignments.getSecondRoutingObject() + "\n" + " secondAssignments(" + newNode.getPageID() + ") " + assignments.getSecondAssignments() + "\n" + " secondCR " + assignments.getSecondCoveringRadius() + "\n"; - getLogger().debugFine(msg); - } - - return new SplitResult(split, newNode); - } + protected abstract E createNewDirectoryEntry(N node, DBID routingObjectID, double parentDistance); /** * Adjusts the tree after insertion of some nodes. @@ -412,15 +296,39 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract // overflow in node; split the node if (hasOverflow(node)) { - SplitResult splitResult = split(node); - N splitNode = splitResult.newNode; - Assignments<D, E> assignments = splitResult.split.getAssignments(); + // do the split + Assignments<E> assignments = settings.splitStrategy.split(this, node); + final N newNode = node.isLeaf() ? createNewLeafNode() : createNewDirectoryNode(); + + List<E> entries1 = new ArrayList<>(assignments.getFirstAssignments().size()); + List<E> entries2 = new ArrayList<>(assignments.getSecondAssignments().size()); + // Store final parent distances: + for (DistanceEntry<E> ent : assignments.getFirstAssignments()) { + final E e = ent.getEntry(); + e.setParentDistance(ent.getDistance()); + entries1.add(e); + } + for (DistanceEntry<E> ent : assignments.getSecondAssignments()) { + final E e = ent.getEntry(); + e.setParentDistance(ent.getDistance()); + entries2.add(e); + } + node.splitTo(newNode, entries1, entries2); + + // write changes to file + writeNode(node); + writeNode(newNode); + + if (getLogger().isDebugging()) { + String msg = "Split Node " + node.getPageID() + " (" + this.getClass() + ")\n" + " newNode " + newNode.getPageID() + "\n" + " firstPromoted " + assignments.getFirstRoutingObject() + "\n" + " firstAssignments(" + node.getPageID() + ") " + assignments.getFirstAssignments() + "\n" + " firstCR " + assignments.getFirstCoveringRadius() + "\n" + " secondPromoted " + assignments.getSecondRoutingObject() + "\n" + " secondAssignments(" + newNode.getPageID() + ") " + assignments.getSecondAssignments() + "\n" + " secondCR " + assignments.getSecondCoveringRadius() + "\n"; + getLogger().debugFine(msg); + } // if root was split: create a new root that points the two split // nodes if (isRoot(node)) { // FIXME: stimmen die parentDistance der Kinder in node & splitNode? - IndexTreePath<E> newRootPath = createNewRoot(node, splitNode, assignments.getFirstRoutingObject(), assignments.getSecondRoutingObject()); + IndexTreePath<E> newRootPath = createNewRoot(node, newNode, assignments.getFirstRoutingObject(), assignments.getSecondRoutingObject()); adjustTree(newRootPath); } // node is not root @@ -431,13 +339,13 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract if (getLogger().isDebugging()) { getLogger().debugFine("parent " + parent); } - D parentDistance2 = distance(parentEntry.getRoutingObjectID(), assignments.getSecondRoutingObject()); + double parentDistance2 = distance(parentEntry.getRoutingObjectID(), assignments.getSecondRoutingObject()).doubleValue(); // logger.warning("parent: "+parent.toString()+" split: " + // splitNode.toString()+ " dist:"+parentDistance2); - parent.addDirectoryEntry(createNewDirectoryEntry(splitNode, assignments.getSecondRoutingObject(), parentDistance2)); + parent.addDirectoryEntry(createNewDirectoryEntry(newNode, assignments.getSecondRoutingObject(), parentDistance2)); // adjust the entry representing the (old) node, that has been split - D parentDistance1 = distance(parentEntry.getRoutingObjectID(), assignments.getFirstRoutingObject()); + double parentDistance1 = distance(parentEntry.getRoutingObjectID(), assignments.getFirstRoutingObject()).doubleValue(); // logger.warning("parent: "+parent.toString()+" node: " + // node.toString()+ " dist:"+parentDistance1); node.adjustEntry(parent.getEntry(nodeIndex), assignments.getFirstRoutingObject(), parentDistance1, this); @@ -517,8 +425,8 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract // firstRoutingObjectID); // D parentDistance2 = distance(getRootEntry().getRoutingObjectID(), // secondRoutingObjectID); - E oldRootEntry = createNewDirectoryEntry(oldRoot, firstRoutingObjectID, null); - E newRootEntry = createNewDirectoryEntry(newNode, secondRoutingObjectID, null); + E oldRootEntry = createNewDirectoryEntry(oldRoot, firstRoutingObjectID, 0.); + E newRootEntry = createNewDirectoryEntry(newNode, secondRoutingObjectID, 0.); root.addDirectoryEntry(oldRootEntry); root.addDirectoryEntry(newRootEntry); @@ -536,41 +444,13 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract getLogger().debugFine(msg); } - return new IndexTreePath<E>(new TreeIndexPathComponent<E>(getRootEntry(), null)); - } - - /** - * Encapsulates a split object and the newly created node. - * - * @apiviz.composedOf MTreeSplit - */ - private class SplitResult { - /** - * Split used - */ - protected MTreeSplit<O, D, N, E> split; - - /** - * New sibling - */ - protected N newNode; - - /** - * Constructor. - * - * @param split Split that was used - * @param newNode New sibling - */ - public SplitResult(MTreeSplit<O, D, N, E> split, N newNode) { - this.split = split; - this.newNode = newNode; - } + return new IndexTreePath<>(new TreeIndexPathComponent<>(getRootEntry(), null)); } @Override public List<E> getLeaves() { - List<E> result = new ArrayList<E>(); - BreadthFirstEnumeration<N, E> enumeration = new BreadthFirstEnumeration<N, E>(this, getRootPath()); + List<E> result = new ArrayList<>(); + BreadthFirstEnumeration<N, E> enumeration = new BreadthFirstEnumeration<>(this, getRootPath()); while (enumeration.hasMoreElements()) { IndexTreePath<E> path = enumeration.nextElement(); E entry = path.getLastPathComponent().getEntry(); @@ -603,4 +483,93 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract } return levels; } + + @Override + public void logStatistics() { + super.logStatistics(); + Logging log = getLogger(); + if (log.isStatistics()) { + log.statistics(new LongStatistic(this.getClass().getName() + ".height", getHeight())); + statistics.logStatistics(); + } + } + + /** + * Class for tracking some statistics. + * + * @author Erich Schubert + * + * @apiviz.composedOf Counter + */ + public class Statistics { + /** + * For counting the number of distance computations. + */ + protected final Counter distanceCalcs; + + /** + * For counting the number of knn queries answered. + */ + protected final Counter knnQueries; + + /** + * For counting the number of range queries answered. + */ + protected final Counter rangeQueries; + + /** + * Constructor. + */ + public Statistics() { + super(); + Logging log = getLogger(); + distanceCalcs = log.isStatistics() ? log.newCounter(this.getClass().getName() + ".distancecalcs") : null; + knnQueries = log.isStatistics() ? log.newCounter(this.getClass().getName() + ".knnqueries") : null; + rangeQueries = log.isStatistics() ? log.newCounter(this.getClass().getName() + ".rangequeries") : null; + } + + /** + * Count a distance computation. + */ + public void countDistanceCalculation() { + if (distanceCalcs != null) { + distanceCalcs.increment(); + } + } + + /** + * Count a knn query invocation. + */ + public void countKNNQuery() { + if (knnQueries != null) { + knnQueries.increment(); + } + } + + /** + * Count a range query invocation. + */ + public void countRangeQuery() { + if (rangeQueries != null) { + rangeQueries.increment(); + } + } + + /** + * Log the statistics. + */ + public void logStatistics() { + Logging log = getLogger(); + if (statistics.distanceCalcs != null) { + log.statistics(statistics.distanceCalcs); + } + if (statistics.knnQueries != null) { + log.statistics(statistics.knnQueries); + } + if (statistics.rangeQueries != null) { + log.statistics(statistics.rangeQueries); + } + } + } + } |