summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/AbstractMTree.java
diff options
context:
space:
mode:
authorAndrej Shadura <andrewsh@debian.org>2019-03-09 22:30:36 +0000
committerAndrej Shadura <andrewsh@debian.org>2019-03-09 22:30:36 +0000
commit8300861dc4c62c5567a4e654976072f854217544 (patch)
tree0a148df3698efedd37839f6aca0e21b2f12f1b52 /src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/AbstractMTree.java
parentb7b404fd7a726774d442562d11659d7b5368cdb9 (diff)
Import Upstream version 0.6.0~beta2
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/AbstractMTree.java')
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/AbstractMTree.java373
1 files changed, 171 insertions, 202 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/AbstractMTree.java b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/AbstractMTree.java
index a7a063bd..61ca9116 100644
--- a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/AbstractMTree.java
+++ b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/AbstractMTree.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -28,73 +28,67 @@ import java.util.Collections;
import java.util.List;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
-import de.lmu.ifi.dbs.elki.distance.DistanceUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.index.tree.BreadthFirstEnumeration;
-import de.lmu.ifi.dbs.elki.index.tree.DistanceEntry;
import de.lmu.ifi.dbs.elki.index.tree.IndexTreePath;
import de.lmu.ifi.dbs.elki.index.tree.TreeIndexPathComponent;
import de.lmu.ifi.dbs.elki.index.tree.metrical.MetricalIndexTree;
-import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.split.Assignments;
-import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.split.MLBDistSplit;
-import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.split.MTreeSplit;
+import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.strategies.split.Assignments;
+import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.strategies.split.DistanceEntry;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.statistics.Counter;
+import de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic;
import de.lmu.ifi.dbs.elki.persistent.PageFile;
-import de.lmu.ifi.dbs.elki.persistent.PageFileUtil;
+import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleIntPair;
/**
* Abstract super class for all M-Tree variants.
*
* @author Elke Achtert
*
- * @apiviz.has SplitResult oneway - - computes
+ * @apiviz.composedOf MTreeSettings
+ * @apiviz.composedOf Statistics
* @apiviz.has AbstractMTreeNode oneway - - contains
+ * @apiviz.excludeSubtypes
*
* @param <O> the type of DatabaseObject to be stored in the metrical index
* @param <D> the type of Distance used in the metrical index
* @param <N> the type of MetricalNode used in the metrical index
* @param <E> the type of MetricalEntry used in the metrical index
+ * @param <S> the type to store settings in.
*/
-public abstract class AbstractMTree<O, D extends Distance<D>, N extends AbstractMTreeNode<O, D, N, E>, E extends MTreeEntry<D>> extends MetricalIndexTree<O, D, N, E> {
+public abstract class AbstractMTree<O, D extends NumberDistance<D, ?>, N extends AbstractMTreeNode<O, D, N, E>, E extends MTreeEntry, S extends MTreeSettings<O, D, N, E>> extends MetricalIndexTree<O, D, N, E> {
/**
* Debugging flag: do extra integrity checks.
*/
protected static final boolean EXTRA_INTEGRITY_CHECKS = false;
/**
- * Holds the instance of the trees distance function.
+ * Tree settings.
*/
- protected DistanceFunction<O, D> distanceFunction;
+ protected S settings;
/**
- * The distance query.
+ * For counting the number of distance computations.
*/
- protected DistanceQuery<O, D> distanceQuery;
+ public Statistics statistics = new Statistics();
/**
* Constructor.
*
* @param pagefile Page file
- * @param distanceQuery Distance query
- * @param distanceFunction Distance function
+ * @param settings Tree settings
*/
- public AbstractMTree(PageFile<N> pagefile, DistanceQuery<O, D> distanceQuery, DistanceFunction<O, D> distanceFunction) {
+ public AbstractMTree(PageFile<N> pagefile, S settings) {
super(pagefile);
- this.distanceQuery = distanceQuery;
- this.distanceFunction = distanceFunction;
+ this.settings = settings;
}
@Override
- public final DistanceFunction<O, D> getDistanceFunction() {
- return distanceFunction;
- }
-
- @Override
- public final DistanceQuery<O, D> getDistanceQuery() {
- return distanceQuery;
+ public final DistanceFunction<? super O, D> getDistanceFunction() {
+ return settings.distanceFunction;
}
/**
@@ -103,7 +97,7 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract
* @return the distance factory used
*/
public final D getDistanceFactory() {
- return distanceFunction.getDistanceFactory();
+ return settings.distanceFunction.getDistanceFactory();
}
/**
@@ -131,7 +125,7 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract
}
}
- BreadthFirstEnumeration<N, E> enumeration = new BreadthFirstEnumeration<N, E>(this, getRootPath());
+ BreadthFirstEnumeration<N, E> enumeration = new BreadthFirstEnumeration<>(this, getRootPath());
while (enumeration.hasMoreElements()) {
IndexTreePath<E> path = enumeration.nextElement();
E entry = path.getLastPathComponent().getEntry();
@@ -158,7 +152,7 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract
result.append(leafNodes).append(" Leaf Nodes \n");
result.append(objects).append(" Objects \n");
- PageFileUtil.appendPageFileStatistics(result, getPageFileStatistics());
+ // PageFileUtil.appendPageFileStatistics(result, getPageFileStatistics());
return result.toString();
}
@@ -180,14 +174,14 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract
}
// choose subtree for insertion
- IndexTreePath<E> subtree = choosePath(entry, getRootPath());
+ IndexTreePath<E> subtree = settings.insertStrategy.choosePath(this, entry);
if (getLogger().isDebugging()) {
getLogger().debugFine("insertion-subtree " + subtree + "\n");
}
// determine parent distance
E parentEntry = subtree.getLastPathComponent().getEntry();
- D parentDistance = distance(parentEntry.getRoutingObjectID(), entry.getRoutingObjectID());
+ double parentDistance = distance(parentEntry.getRoutingObjectID(), entry.getRoutingObjectID()).doubleValue();
entry.setParentDistance(parentDistance);
// create leaf entry and do pre insert
@@ -232,63 +226,6 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract
}
/**
- * Chooses the best path of the specified subtree for insertion of the given
- * object.
- *
- * @param object the entry to search
- * @param subtree the subtree to be tested for insertion
- * @return the path of the appropriate subtree to insert the given object
- */
- private IndexTreePath<E> choosePath(E object, IndexTreePath<E> subtree) {
- N node = getNode(subtree.getLastPathComponent().getEntry());
-
- // leaf
- if (node.isLeaf()) {
- return subtree;
- }
-
- DistanceEntry<D, E> bestCandidate;
- D enlarge; // Track best enlargement - null for no enlargement needed.
- // Initialize from first:
- {
- E entry = node.getEntry(0);
- D distance = distance(object.getRoutingObjectID(), entry.getRoutingObjectID());
- bestCandidate = new DistanceEntry<D, E>(entry, distance, 0);
- if (distance.compareTo(entry.getCoveringRadius()) <= 0) {
- enlarge = null;
- } else {
- enlarge = distance.minus(entry.getCoveringRadius());
- }
- }
-
- // Iterate over remaining
- for (int i = 1; i < node.getNumEntries(); i++) {
- E entry = node.getEntry(i);
- D distance = distance(object.getRoutingObjectID(), entry.getRoutingObjectID());
-
- if (distance.compareTo(entry.getCoveringRadius()) <= 0) {
- if (enlarge != null || distance.compareTo(bestCandidate.getDistance()) < 0) {
- bestCandidate = new DistanceEntry<D, E>(entry, distance, i);
- enlarge = null;
- }
- } else if (enlarge != null) {
- D enlrg = distance.minus(entry.getCoveringRadius());
- if (enlrg.compareTo(enlarge) < 0) {
- bestCandidate = new DistanceEntry<D, E>(entry, distance, i);
- enlarge = enlrg;
- }
- }
- }
-
- // Apply enlargement
- if (enlarge != null) {
- bestCandidate.getEntry().setCoveringRadius(enlarge);
- }
-
- return choosePath(object, subtree.pathByAddingChild(new TreeIndexPathComponent<E>(bestCandidate.getEntry(), bestCandidate.getIndex())));
- }
-
- /**
* Sorts the entries of the specified node according to their minimum distance
* to the specified object.
*
@@ -296,44 +233,16 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract
* @param q the id of the object
* @return a list of the sorted entries
*/
- protected final List<DistanceEntry<D, E>> getSortedEntries(N node, DBID q) {
- List<DistanceEntry<D, E>> result = new ArrayList<DistanceEntry<D, E>>();
-
- for (int i = 0; i < node.getNumEntries(); i++) {
- E entry = node.getEntry(i);
- D distance = distance(entry.getRoutingObjectID(), q);
- D radius = entry.getCoveringRadius();
- D minDist = radius.compareTo(distance) > 0 ? getDistanceFactory().nullDistance() : distance.minus(radius);
-
- result.add(new DistanceEntry<D, E>(entry, minDist, i));
- }
-
- Collections.sort(result);
- return result;
- }
-
- /**
- * Sorts the entries of the specified node according to their minimum distance
- * to the specified objects.
- *
- * @param node the node
- * @param ids the ids of the objects
- * @return a list of the sorted entries
- */
- protected final List<DistanceEntry<D, E>> getSortedEntries(N node, DBIDs ids) {
- List<DistanceEntry<D, E>> result = new ArrayList<DistanceEntry<D, E>>();
+ protected final List<DoubleIntPair> getSortedEntries(N node, DBID q) {
+ List<DoubleIntPair> result = new ArrayList<>();
for (int i = 0; i < node.getNumEntries(); i++) {
E entry = node.getEntry(i);
- D radius = entry.getCoveringRadius();
+ double distance = distance(entry.getRoutingObjectID(), q).doubleValue();
+ double radius = entry.getCoveringRadius();
+ double minDist = (radius > distance) ? 0.0 : distance - radius;
- D minMinDist = getDistanceFactory().infiniteDistance();
- for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
- D distance = distanceQuery.distance(entry.getRoutingObjectID(), iter);
- D minDist = radius.compareTo(distance) > 0 ? getDistanceFactory().nullDistance() : distance.minus(radius);
- minMinDist = DistanceUtil.min(minMinDist, minDist);
- }
- result.add(new DistanceEntry<D, E>(entry, minMinDist, i));
+ result.add(new DoubleIntPair(minDist, i));
}
Collections.sort(result);
@@ -347,11 +256,17 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract
* @param id2 the second id
* @return the distance between the two specified ids
*/
- protected final D distance(DBID id1, DBID id2) {
- if (id1 == null || id2 == null) {
- return getDistanceFactory().undefinedDistance();
- }
- return distanceQuery.distance(id1, id2);
+ public abstract D distance(DBIDRef id1, DBIDRef id2);
+
+ /**
+ * Returns the distance between the routing object of two entries.
+ *
+ * @param e1 First entry
+ * @param e2 Second entry
+ * @return the distance between the two routing objects
+ */
+ public final D distance(E e1, E e2) {
+ return distance(e1.getRoutingObjectID(), e2.getRoutingObjectID());
}
/**
@@ -363,38 +278,7 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract
* the routing object of the parent node
* @return the newly created directory entry
*/
- protected abstract E createNewDirectoryEntry(N node, DBID routingObjectID, D parentDistance);
-
- /**
- * Splits the specified node and returns the split result.
- *
- * @param node the node to be split
- * @return the split result
- */
- private SplitResult split(N node) {
- // do the split
- // todo split stratgey
- MTreeSplit<O, D, N, E> split = new MLBDistSplit<O, D, N, E>(node, distanceQuery);
- Assignments<D, E> assignments = split.getAssignments();
- final N newNode;
- if (node.isLeaf()) {
- newNode = createNewLeafNode();
- } else {
- newNode = createNewDirectoryNode();
- }
- node.splitTo(newNode, assignments.getFirstAssignments(), assignments.getSecondAssignments());
-
- // write changes to file
- writeNode(node);
- writeNode(newNode);
-
- if (getLogger().isDebugging()) {
- String msg = "Split Node " + node.getPageID() + " (" + this.getClass() + ")\n" + " newNode " + newNode.getPageID() + "\n" + " firstPromoted " + assignments.getFirstRoutingObject() + "\n" + " firstAssignments(" + node.getPageID() + ") " + assignments.getFirstAssignments() + "\n" + " firstCR " + assignments.getFirstCoveringRadius() + "\n" + " secondPromoted " + assignments.getSecondRoutingObject() + "\n" + " secondAssignments(" + newNode.getPageID() + ") " + assignments.getSecondAssignments() + "\n" + " secondCR " + assignments.getSecondCoveringRadius() + "\n";
- getLogger().debugFine(msg);
- }
-
- return new SplitResult(split, newNode);
- }
+ protected abstract E createNewDirectoryEntry(N node, DBID routingObjectID, double parentDistance);
/**
* Adjusts the tree after insertion of some nodes.
@@ -412,15 +296,39 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract
// overflow in node; split the node
if (hasOverflow(node)) {
- SplitResult splitResult = split(node);
- N splitNode = splitResult.newNode;
- Assignments<D, E> assignments = splitResult.split.getAssignments();
+ // do the split
+ Assignments<E> assignments = settings.splitStrategy.split(this, node);
+ final N newNode = node.isLeaf() ? createNewLeafNode() : createNewDirectoryNode();
+
+ List<E> entries1 = new ArrayList<>(assignments.getFirstAssignments().size());
+ List<E> entries2 = new ArrayList<>(assignments.getSecondAssignments().size());
+ // Store final parent distances:
+ for (DistanceEntry<E> ent : assignments.getFirstAssignments()) {
+ final E e = ent.getEntry();
+ e.setParentDistance(ent.getDistance());
+ entries1.add(e);
+ }
+ for (DistanceEntry<E> ent : assignments.getSecondAssignments()) {
+ final E e = ent.getEntry();
+ e.setParentDistance(ent.getDistance());
+ entries2.add(e);
+ }
+ node.splitTo(newNode, entries1, entries2);
+
+ // write changes to file
+ writeNode(node);
+ writeNode(newNode);
+
+ if (getLogger().isDebugging()) {
+ String msg = "Split Node " + node.getPageID() + " (" + this.getClass() + ")\n" + " newNode " + newNode.getPageID() + "\n" + " firstPromoted " + assignments.getFirstRoutingObject() + "\n" + " firstAssignments(" + node.getPageID() + ") " + assignments.getFirstAssignments() + "\n" + " firstCR " + assignments.getFirstCoveringRadius() + "\n" + " secondPromoted " + assignments.getSecondRoutingObject() + "\n" + " secondAssignments(" + newNode.getPageID() + ") " + assignments.getSecondAssignments() + "\n" + " secondCR " + assignments.getSecondCoveringRadius() + "\n";
+ getLogger().debugFine(msg);
+ }
// if root was split: create a new root that points the two split
// nodes
if (isRoot(node)) {
// FIXME: stimmen die parentDistance der Kinder in node & splitNode?
- IndexTreePath<E> newRootPath = createNewRoot(node, splitNode, assignments.getFirstRoutingObject(), assignments.getSecondRoutingObject());
+ IndexTreePath<E> newRootPath = createNewRoot(node, newNode, assignments.getFirstRoutingObject(), assignments.getSecondRoutingObject());
adjustTree(newRootPath);
}
// node is not root
@@ -431,13 +339,13 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract
if (getLogger().isDebugging()) {
getLogger().debugFine("parent " + parent);
}
- D parentDistance2 = distance(parentEntry.getRoutingObjectID(), assignments.getSecondRoutingObject());
+ double parentDistance2 = distance(parentEntry.getRoutingObjectID(), assignments.getSecondRoutingObject()).doubleValue();
// logger.warning("parent: "+parent.toString()+" split: " +
// splitNode.toString()+ " dist:"+parentDistance2);
- parent.addDirectoryEntry(createNewDirectoryEntry(splitNode, assignments.getSecondRoutingObject(), parentDistance2));
+ parent.addDirectoryEntry(createNewDirectoryEntry(newNode, assignments.getSecondRoutingObject(), parentDistance2));
// adjust the entry representing the (old) node, that has been split
- D parentDistance1 = distance(parentEntry.getRoutingObjectID(), assignments.getFirstRoutingObject());
+ double parentDistance1 = distance(parentEntry.getRoutingObjectID(), assignments.getFirstRoutingObject()).doubleValue();
// logger.warning("parent: "+parent.toString()+" node: " +
// node.toString()+ " dist:"+parentDistance1);
node.adjustEntry(parent.getEntry(nodeIndex), assignments.getFirstRoutingObject(), parentDistance1, this);
@@ -517,8 +425,8 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract
// firstRoutingObjectID);
// D parentDistance2 = distance(getRootEntry().getRoutingObjectID(),
// secondRoutingObjectID);
- E oldRootEntry = createNewDirectoryEntry(oldRoot, firstRoutingObjectID, null);
- E newRootEntry = createNewDirectoryEntry(newNode, secondRoutingObjectID, null);
+ E oldRootEntry = createNewDirectoryEntry(oldRoot, firstRoutingObjectID, 0.);
+ E newRootEntry = createNewDirectoryEntry(newNode, secondRoutingObjectID, 0.);
root.addDirectoryEntry(oldRootEntry);
root.addDirectoryEntry(newRootEntry);
@@ -536,41 +444,13 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract
getLogger().debugFine(msg);
}
- return new IndexTreePath<E>(new TreeIndexPathComponent<E>(getRootEntry(), null));
- }
-
- /**
- * Encapsulates a split object and the newly created node.
- *
- * @apiviz.composedOf MTreeSplit
- */
- private class SplitResult {
- /**
- * Split used
- */
- protected MTreeSplit<O, D, N, E> split;
-
- /**
- * New sibling
- */
- protected N newNode;
-
- /**
- * Constructor.
- *
- * @param split Split that was used
- * @param newNode New sibling
- */
- public SplitResult(MTreeSplit<O, D, N, E> split, N newNode) {
- this.split = split;
- this.newNode = newNode;
- }
+ return new IndexTreePath<>(new TreeIndexPathComponent<>(getRootEntry(), null));
}
@Override
public List<E> getLeaves() {
- List<E> result = new ArrayList<E>();
- BreadthFirstEnumeration<N, E> enumeration = new BreadthFirstEnumeration<N, E>(this, getRootPath());
+ List<E> result = new ArrayList<>();
+ BreadthFirstEnumeration<N, E> enumeration = new BreadthFirstEnumeration<>(this, getRootPath());
while (enumeration.hasMoreElements()) {
IndexTreePath<E> path = enumeration.nextElement();
E entry = path.getLastPathComponent().getEntry();
@@ -603,4 +483,93 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract
}
return levels;
}
+
+ @Override
+ public void logStatistics() {
+ super.logStatistics();
+ Logging log = getLogger();
+ if (log.isStatistics()) {
+ log.statistics(new LongStatistic(this.getClass().getName() + ".height", getHeight()));
+ statistics.logStatistics();
+ }
+ }
+
+ /**
+ * Class for tracking some statistics.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.composedOf Counter
+ */
+ public class Statistics {
+ /**
+ * For counting the number of distance computations.
+ */
+ protected final Counter distanceCalcs;
+
+ /**
+ * For counting the number of knn queries answered.
+ */
+ protected final Counter knnQueries;
+
+ /**
+ * For counting the number of range queries answered.
+ */
+ protected final Counter rangeQueries;
+
+ /**
+ * Constructor.
+ */
+ public Statistics() {
+ super();
+ Logging log = getLogger();
+ distanceCalcs = log.isStatistics() ? log.newCounter(this.getClass().getName() + ".distancecalcs") : null;
+ knnQueries = log.isStatistics() ? log.newCounter(this.getClass().getName() + ".knnqueries") : null;
+ rangeQueries = log.isStatistics() ? log.newCounter(this.getClass().getName() + ".rangequeries") : null;
+ }
+
+ /**
+ * Count a distance computation.
+ */
+ public void countDistanceCalculation() {
+ if (distanceCalcs != null) {
+ distanceCalcs.increment();
+ }
+ }
+
+ /**
+ * Count a knn query invocation.
+ */
+ public void countKNNQuery() {
+ if (knnQueries != null) {
+ knnQueries.increment();
+ }
+ }
+
+ /**
+ * Count a range query invocation.
+ */
+ public void countRangeQuery() {
+ if (rangeQueries != null) {
+ rangeQueries.increment();
+ }
+ }
+
+ /**
+ * Log the statistics.
+ */
+ public void logStatistics() {
+ Logging log = getLogger();
+ if (statistics.distanceCalcs != null) {
+ log.statistics(statistics.distanceCalcs);
+ }
+ if (statistics.knnQueries != null) {
+ log.statistics(statistics.knnQueries);
+ }
+ if (statistics.rangeQueries != null) {
+ log.statistics(statistics.rangeQueries);
+ }
+ }
+ }
+
}