summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkapp/MkAppTree.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkapp/MkAppTree.java')
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkapp/MkAppTree.java144
1 files changed, 71 insertions, 73 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkapp/MkAppTree.java b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkapp/MkAppTree.java
index 1ff0a030..41a83fcf 100644
--- a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkapp/MkAppTree.java
+++ b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkapp/MkAppTree.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.mktrees.mkapp;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -23,7 +23,6 @@ package de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.mktrees.mkapp;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.ArrayList;
import java.util.List;
import java.util.Map;
@@ -33,22 +32,21 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.KNNList;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
-import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
-import de.lmu.ifi.dbs.elki.database.ids.generic.GenericDistanceDBIDList;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.index.tree.LeafEntry;
import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.mktrees.AbstractMkTree;
import de.lmu.ifi.dbs.elki.index.tree.query.GenericMTreeDistanceSearchCandidate;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.statistics.PolynomialRegression;
-import de.lmu.ifi.dbs.elki.persistent.ByteArrayUtil;
import de.lmu.ifi.dbs.elki.persistent.PageFile;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.UpdatableHeap;
+import de.lmu.ifi.dbs.elki.utilities.io.ByteArrayUtil;
/**
* MkAppTree is a metrical index structure based on the concepts of the M-Tree
@@ -61,9 +59,8 @@ import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.UpdatableHeap;
* @apiviz.has MkAppTreeNode oneway - - contains
*
* @param <O> the type of DatabaseObject to be stored in the metrical index
- * @param <D> the type of NumberDistance used in the metrical index
*/
-public class MkAppTree<O, D extends NumberDistance<D, ?>> extends AbstractMkTree<O, D, MkAppTreeNode<O, D>, MkAppEntry, MkAppTreeSettings<O, D>> {
+public class MkAppTree<O> extends AbstractMkTree<O, MkAppTreeNode<O>, MkAppEntry, MkAppTreeSettings<O>> {
/**
* The logger for this class.
*/
@@ -76,7 +73,7 @@ public class MkAppTree<O, D extends NumberDistance<D, ?>> extends AbstractMkTree
* @param pageFile Page file
* @param settings Tree settings
*/
- public MkAppTree(Relation<O> relation, PageFile<MkAppTreeNode<O, D>> pageFile, MkAppTreeSettings<O, D> settings) {
+ public MkAppTree(Relation<O> relation, PageFile<MkAppTreeNode<O>> pageFile, MkAppTreeSettings<O> settings) {
super(relation, pageFile, settings);
}
@@ -103,34 +100,34 @@ public class MkAppTree<O, D extends NumberDistance<D, ?>> extends AbstractMkTree
*/
@Override
public void insertAll(List<MkAppEntry> entries) {
- if (entries.isEmpty()) {
+ if(entries.isEmpty()) {
return;
}
- if (LOG.isDebugging()) {
+ if(LOG.isDebugging()) {
LOG.debugFine("insert " + entries + "\n");
}
- if (!initialized) {
+ if(!initialized) {
initialize(entries.get(0));
}
ModifiableDBIDs ids = DBIDUtil.newArray(entries.size());
// insert
- for (MkAppEntry entry : entries) {
+ for(MkAppEntry entry : entries) {
ids.add(entry.getRoutingObjectID());
// insert the object
super.insert(entry, false);
}
// do batch nn
- Map<DBID, KNNList<D>> knnLists = batchNN(getRoot(), ids, settings.k_max + 1);
+ Map<DBID, KNNList> knnLists = batchNN(getRoot(), ids, settings.k_max + 1);
// adjust the knn distances
adjustApproximatedKNNDistances(getRootEntry(), knnLists);
- if (EXTRA_INTEGRITY_CHECKS) {
+ if(EXTRA_INTEGRITY_CHECKS) {
getRoot().integrityCheck(this, getRootEntry());
}
}
@@ -144,49 +141,48 @@ public class MkAppTree<O, D extends NumberDistance<D, ?>> extends AbstractMkTree
* @return a List of the query results
*/
@Override
- public DistanceDBIDList<D> reverseKNNQuery(DBIDRef id, int k) {
- GenericDistanceDBIDList<D> result = new GenericDistanceDBIDList<>();
+ public DoubleDBIDList reverseKNNQuery(DBIDRef id, int k) {
+ ModifiableDoubleDBIDList result = DBIDUtil.newDistanceDBIDList();
final Heap<GenericMTreeDistanceSearchCandidate> pq = new UpdatableHeap<>();
// push root
pq.add(new GenericMTreeDistanceSearchCandidate(0., getRootID(), null));
// search in tree
- while (!pq.isEmpty()) {
+ while(!pq.isEmpty()) {
GenericMTreeDistanceSearchCandidate pqNode = pq.poll();
// FIXME: cache the distance to the routing object in the queue node!
- MkAppTreeNode<O, D> node = getNode(pqNode.nodeID);
+ MkAppTreeNode<O> node = getNode(pqNode.nodeID);
// directory node
- if (!node.isLeaf()) {
- for (int i = 0; i < node.getNumEntries(); i++) {
+ if(!node.isLeaf()) {
+ for(int i = 0; i < node.getNumEntries(); i++) {
MkAppEntry entry = node.getEntry(i);
- double distance = distance(entry.getRoutingObjectID(), id).doubleValue();
+ double distance = distance(entry.getRoutingObjectID(), id);
double minDist = (entry.getCoveringRadius() > distance) ? 0. : distance - entry.getCoveringRadius();
double approxValue = settings.log ? Math.exp(entry.approximatedValueAt(k)) : entry.approximatedValueAt(k);
- if (approxValue < 0) {
+ if(approxValue < 0) {
approxValue = 0;
}
- if (minDist <= approxValue) {
+ if(minDist <= approxValue) {
pq.add(new GenericMTreeDistanceSearchCandidate(minDist, getPageID(entry), entry.getRoutingObjectID()));
}
}
}
// data node
else {
- for (int i = 0; i < node.getNumEntries(); i++) {
+ for(int i = 0; i < node.getNumEntries(); i++) {
MkAppLeafEntry entry = (MkAppLeafEntry) node.getEntry(i);
- D distance = distance(entry.getRoutingObjectID(), id);
+ double distance = distance(entry.getRoutingObjectID(), id);
double approxValue = settings.log ? StrictMath.exp(entry.approximatedValueAt(k)) : entry.approximatedValueAt(k);
- if (approxValue < 0) {
+ if(approxValue < 0) {
approxValue = 0;
}
- D approximatedKnnDist = getDistanceFactory().fromDouble(approxValue);
- if (distance.compareTo(approximatedKnnDist) <= 0) {
+ if(distance <= approxValue) {
result.add(distance, entry.getRoutingObjectID());
}
}
@@ -213,7 +209,7 @@ public class MkAppTree<O, D extends NumberDistance<D, ?>> extends AbstractMkTree
// overhead = index(4), numEntries(4), id(4), isLeaf(0.125)
double overhead = 12.125;
- if (getPageSize() - overhead < 0) {
+ if(getPageSize() - overhead < 0) {
throw new RuntimeException("Node size of " + getPageSize() + " Bytes is chosen too small!");
}
@@ -221,11 +217,11 @@ public class MkAppTree<O, D extends NumberDistance<D, ?>> extends AbstractMkTree
// coveringRadius + parentDistance + approx) + 1
dirCapacity = (int) (getPageSize() - overhead) / (4 + 4 + distanceSize + distanceSize + (settings.p + 1) * 4 + 2) + 1;
- if (dirCapacity <= 1) {
+ if(dirCapacity <= 1) {
throw new RuntimeException("Node size of " + getPageSize() + " Bytes is chosen too small!");
}
- if (dirCapacity < 10) {
+ if(dirCapacity < 10) {
LOG.warning("Page size is choosen too small! Maximum number of entries " + "in a directory node = " + (dirCapacity - 1));
}
@@ -234,39 +230,37 @@ public class MkAppTree<O, D extends NumberDistance<D, ?>> extends AbstractMkTree
// approx) + 1
leafCapacity = (int) (getPageSize() - overhead) / (4 + distanceSize + (settings.p + 1) * 4 + 2) + 1;
- if (leafCapacity <= 1) {
+ if(leafCapacity <= 1) {
throw new RuntimeException("Node size of " + getPageSize() + " Bytes is chosen too small!");
}
- if (leafCapacity < 10) {
+ if(leafCapacity < 10) {
LOG.warning("Page size is choosen too small! Maximum number of entries " + "in a leaf node = " + (leafCapacity - 1));
}
initialized = true;
- if (LOG.isVerbose()) {
+ if(LOG.isVerbose()) {
LOG.verbose("Directory Capacity: " + (dirCapacity - 1) + "\nLeaf Capacity: " + (leafCapacity - 1));
}
}
- private List<D> getMeanKNNList(DBIDs ids, Map<DBID, KNNList<D>> knnLists) {
+ private double[] getMeanKNNList(DBIDs ids, Map<DBID, KNNList> knnLists) {
double[] means = new double[settings.k_max];
- for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
DBID id = DBIDUtil.deref(iter);
- KNNList<D> knns = knnLists.get(id);
+ KNNList knns = knnLists.get(id);
int k = 0;
- for (DistanceDBIDListIter<D> it = knns.iter(); k < settings.k_max && it.valid(); it.advance(), k++) {
- means[k] += it.getDistance().doubleValue();
+ for(DoubleDBIDListIter it = knns.iter(); k < settings.k_max && it.valid(); it.advance(), k++) {
+ means[k] += it.doubleValue();
}
}
- List<D> result = new ArrayList<>();
- for (int k = 0; k < settings.k_max; k++) {
+ for(int k = 0; k < settings.k_max; k++) {
means[k] /= ids.size();
- result.add(getDistanceFactory().fromDouble(means[k]));
}
- return result;
+ return means;
}
/**
@@ -275,19 +269,20 @@ public class MkAppTree<O, D extends NumberDistance<D, ?>> extends AbstractMkTree
* @param entry the root entry of the current subtree
* @param knnLists a map of knn lists for each leaf entry
*/
- private void adjustApproximatedKNNDistances(MkAppEntry entry, Map<DBID, KNNList<D>> knnLists) {
- MkAppTreeNode<O, D> node = getNode(entry);
+ private void adjustApproximatedKNNDistances(MkAppEntry entry, Map<DBID, KNNList> knnLists) {
+ MkAppTreeNode<O> node = getNode(entry);
- if (node.isLeaf()) {
- for (int i = 0; i < node.getNumEntries(); i++) {
+ if(node.isLeaf()) {
+ for(int i = 0; i < node.getNumEntries(); i++) {
MkAppLeafEntry leafEntry = (MkAppLeafEntry) node.getEntry(i);
// approximateKnnDistances(leafEntry,
// getKNNList(leafEntry.getRoutingObjectID(), knnLists));
PolynomialApproximation approx = approximateKnnDistances(getMeanKNNList(leafEntry.getDBID(), knnLists));
leafEntry.setKnnDistanceApproximation(approx);
}
- } else {
- for (int i = 0; i < node.getNumEntries(); i++) {
+ }
+ else {
+ for(int i = 0; i < node.getNumEntries(); i++) {
MkAppEntry dirEntry = node.getEntry(i);
adjustApproximatedKNNDistances(dirEntry, knnLists);
}
@@ -307,15 +302,16 @@ public class MkAppTree<O, D extends NumberDistance<D, ?>> extends AbstractMkTree
* @param result the result list containing the ids of the leaf entries stored
* in the specified subtree
*/
- private void leafEntryIDs(MkAppTreeNode<O, D> node, ModifiableDBIDs result) {
- if (node.isLeaf()) {
- for (int i = 0; i < node.getNumEntries(); i++) {
+ private void leafEntryIDs(MkAppTreeNode<O> node, ModifiableDBIDs result) {
+ if(node.isLeaf()) {
+ for(int i = 0; i < node.getNumEntries(); i++) {
MkAppEntry entry = node.getEntry(i);
result.add(((LeafEntry) entry).getDBID());
}
- } else {
- for (int i = 0; i < node.getNumEntries(); i++) {
- MkAppTreeNode<O, D> childNode = getNode(node.getEntry(i));
+ }
+ else {
+ for(int i = 0; i < node.getNumEntries(); i++) {
+ MkAppTreeNode<O> childNode = getNode(node.getEntry(i));
leafEntryIDs(childNode, result);
}
}
@@ -327,17 +323,18 @@ public class MkAppTree<O, D extends NumberDistance<D, ?>> extends AbstractMkTree
* @param knnDistances the knn-distances of the leaf entry
* @return the polynomial approximation of the specified knn-distances.
*/
- private PolynomialApproximation approximateKnnDistances(List<D> knnDistances) {
+ private PolynomialApproximation approximateKnnDistances(double[] knnDistances) {
StringBuilder msg = new StringBuilder();
// count the zero distances (necessary of log-log space is used)
int k_0 = 0;
- if (settings.log) {
- for (int i = 0; i < settings.k_max; i++) {
- double dist = knnDistances.get(i).doubleValue();
- if (dist == 0) {
+ if(settings.log) {
+ for(int i = 0; i < settings.k_max; i++) {
+ double dist = knnDistances[i];
+ if(dist == 0) {
k_0++;
- } else {
+ }
+ else {
break;
}
}
@@ -346,20 +343,21 @@ public class MkAppTree<O, D extends NumberDistance<D, ?>> extends AbstractMkTree
de.lmu.ifi.dbs.elki.math.linearalgebra.Vector x = new de.lmu.ifi.dbs.elki.math.linearalgebra.Vector(settings.k_max - k_0);
de.lmu.ifi.dbs.elki.math.linearalgebra.Vector y = new de.lmu.ifi.dbs.elki.math.linearalgebra.Vector(settings.k_max - k_0);
- for (int k = 0; k < settings.k_max - k_0; k++) {
- if (settings.log) {
+ for(int k = 0; k < settings.k_max - k_0; k++) {
+ if(settings.log) {
x.set(k, Math.log(k + k_0));
- y.set(k, Math.log(knnDistances.get(k + k_0).doubleValue()));
- } else {
+ y.set(k, Math.log(knnDistances[k + k_0]));
+ }
+ else {
x.set(k, k + k_0);
- y.set(k, knnDistances.get(k + k_0).doubleValue());
+ y.set(k, knnDistances[k + k_0]);
}
}
PolynomialRegression regression = new PolynomialRegression(y, x, settings.p);
PolynomialApproximation approximation = new PolynomialApproximation(regression.getEstimatedCoefficients().getArrayCopy());
- if (LOG.isDebugging()) {
+ if(LOG.isDebugging()) {
msg.append("approximation ").append(approximation);
LOG.debugFine(msg.toString());
}
@@ -373,7 +371,7 @@ public class MkAppTree<O, D extends NumberDistance<D, ?>> extends AbstractMkTree
* @return a new leaf node
*/
@Override
- protected MkAppTreeNode<O, D> createNewLeafNode() {
+ protected MkAppTreeNode<O> createNewLeafNode() {
return new MkAppTreeNode<>(leafCapacity, true);
}
@@ -383,7 +381,7 @@ public class MkAppTree<O, D extends NumberDistance<D, ?>> extends AbstractMkTree
* @return a new directory node
*/
@Override
- protected MkAppTreeNode<O, D> createNewDirectoryNode() {
+ protected MkAppTreeNode<O> createNewDirectoryNode() {
return new MkAppTreeNode<>(dirCapacity, false);
}
@@ -396,7 +394,7 @@ public class MkAppTree<O, D extends NumberDistance<D, ?>> extends AbstractMkTree
* the routing object of the parent node
*/
@Override
- protected MkAppEntry createNewDirectoryEntry(MkAppTreeNode<O, D> node, DBID routingObjectID, double parentDistance) {
+ protected MkAppEntry createNewDirectoryEntry(MkAppTreeNode<O> node, DBID routingObjectID, double parentDistance) {
return new MkAppDirectoryEntry(routingObjectID, parentDistance, node.getPageID(), node.coveringRadius(routingObjectID, this), null);
}