package de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.mktrees.mkmax;
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
import java.util.List;
import java.util.Map;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList;
import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
import de.lmu.ifi.dbs.elki.database.ids.distance.KNNHeap;
import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.ids.distance.ModifiableDistanceDBIDList;
import de.lmu.ifi.dbs.elki.database.ids.generic.GenericDistanceDBIDList;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.mktrees.AbstractMkTreeUnified;
import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.mktrees.MkTreeSettings;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.persistent.ByteArrayUtil;
import de.lmu.ifi.dbs.elki.persistent.PageFile;
import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleIntPair;
/**
* MkMaxTree is a metrical index structure based on the concepts of the M-Tree
* supporting efficient processing of reverse k nearest neighbor queries for
* parameter k <= k_max. The k-nearest neighbor distance for k = k_max is stored
* in each entry of a node.
*
* @author Elke Achtert
*
* @apiviz.has MkMaxTreeNode oneway - - contains
*
* @param the type of DatabaseObject to be stored in the MkMaxTree
* @param the type of Distance used in the MkMaxTree
*/
public class MkMaxTree> extends AbstractMkTreeUnified, MkMaxEntry, MkTreeSettings, MkMaxEntry>> {
/**
* The logger for this class.
*/
private static final Logging LOG = Logging.getLogger(MkMaxTree.class);
/**
* Constructor.
*
* @param relation Relation to index
* @param pagefile Page file
* @param settings Tree settings
*/
public MkMaxTree(Relation relation, PageFile> pagefile, MkTreeSettings, MkMaxEntry> settings) {
super(relation, pagefile, settings);
}
/**
* Performs a reverse k-nearest neighbor query for the given object ID. In the
* first step the candidates are chosen by performing a reverse k-nearest
* neighbor query with k = {@link #getKmax()}. Then these candidates are refined
* in a second step.
*/
@Override
public DistanceDBIDList reverseKNNQuery(DBIDRef id, int k) {
if (k > this.getKmax()) {
throw new IllegalArgumentException("Parameter k has to be equal or less than " + "parameter k of the MkMax-Tree!");
}
// get the candidates
GenericDistanceDBIDList candidates = new GenericDistanceDBIDList<>();
doReverseKNNQuery(id, getRoot(), null, candidates);
if (k == this.getKmax()) {
candidates.sort();
// FIXME: re-add statistics.
// rkNNStatistics.addTrueHits(candidates.size());
// rkNNStatistics.addResults(candidates.size());
return candidates;
}
// refinement of candidates
ModifiableDBIDs candidateIDs = DBIDUtil.newArray(candidates.size());
for (DBIDIter candidate = candidates.iter(); candidate.valid(); candidate.advance()) {
candidateIDs.add(candidate);
}
Map> knnLists = batchNN(getRoot(), candidateIDs, k);
GenericDistanceDBIDList result = new GenericDistanceDBIDList<>();
for (DBIDIter iter = candidateIDs.iter(); iter.valid(); iter.advance()) {
DBID cid = DBIDUtil.deref(iter);
KNNList cands = knnLists.get(cid);
for (DistanceDBIDListIter iter2 = cands.iter(); iter2.valid(); iter2.advance()) {
if (DBIDUtil.equal(id, iter2)) {
result.add(iter2.getDistance(), cid);
break;
}
}
}
// FIXME: re-add statistics.
// rkNNStatistics.addResults(result.size());
// rkNNStatistics.addCandidates(candidates.size());
result.sort();
return result;
}
/**
* Adapts the knn distances before insertion of the specified entry.
*
*/
@Override
protected void preInsert(MkMaxEntry entry) {
KNNHeap knns_o = DBIDUtil.newHeap(getDistanceFactory(), getKmax());
preInsert(entry, getRootEntry(), knns_o);
}
/**
* Adjusts the knn distance in the subtree of the specified root entry.
*/
@Override
protected void kNNdistanceAdjustment(MkMaxEntry entry, Map> knnLists) {
MkMaxTreeNode node = getNode(entry);
double knnDist_node = 0.;
if (node.isLeaf()) {
for (int i = 0; i < node.getNumEntries(); i++) {
MkMaxEntry leafEntry = node.getEntry(i);
leafEntry.setKnnDistance(knnLists.get(leafEntry.getRoutingObjectID()).getKNNDistance().doubleValue());
knnDist_node = Math.max(knnDist_node, leafEntry.getKnnDistance());
}
} else {
for (int i = 0; i < node.getNumEntries(); i++) {
MkMaxEntry dirEntry = node.getEntry(i);
kNNdistanceAdjustment(dirEntry, knnLists);
knnDist_node = Math.max(knnDist_node, dirEntry.getKnnDistance());
}
}
entry.setKnnDistance(knnDist_node);
}
/**
* Performs a reverse k-nearest neighbor query in the specified subtree for
* the given query object with k = {@link #getKmax()}. It recursively traverses
* all paths from the specified node, which cannot be excluded from leading to
* qualifying objects.
*
* @param q the id of the query object
* @param node the node of the subtree on which the query is performed
* @param node_entry the entry representing the node
* @param result the list for the query result
*/
private void doReverseKNNQuery(DBIDRef q, MkMaxTreeNode node, MkMaxEntry node_entry, ModifiableDistanceDBIDList result) {
// data node
if (node.isLeaf()) {
for (int i = 0; i < node.getNumEntries(); i++) {
MkMaxEntry entry = node.getEntry(i);
D distance = distance(entry.getRoutingObjectID(), q);
if (distance.doubleValue() <= entry.getKnnDistance()) {
result.add(distance, entry.getRoutingObjectID());
}
}
}
// directory node
else {
for (int i = 0; i < node.getNumEntries(); i++) {
MkMaxEntry entry = node.getEntry(i);
double node_knnDist = node_entry != null ? node_entry.getKnnDistance() : Double.POSITIVE_INFINITY;
double distance = distance(entry.getRoutingObjectID(), q).doubleValue();
double minDist = (entry.getCoveringRadius() > distance) ? 0.0 : distance - entry.getCoveringRadius();
if (minDist <= node_knnDist) {
MkMaxTreeNode childNode = getNode(entry);
doReverseKNNQuery(q, childNode, entry, result);
}
}
}
}
/**
* Adapts the knn distances before insertion of entry q.
*
* @param q the entry to be inserted
* @param nodeEntry the entry representing the root of the current subtree
* @param knns_q the knns of q
*/
private void preInsert(MkMaxEntry q, MkMaxEntry nodeEntry, KNNHeap knns_q) {
if (LOG.isDebugging()) {
LOG.debugFine("preInsert " + q + " - " + nodeEntry + "\n");
}
double knnDist_q = knns_q.getKNNDistance().doubleValue();
MkMaxTreeNode node = getNode(nodeEntry);
double knnDist_node = 0.;
// leaf node
if (node.isLeaf()) {
for (int i = 0; i < node.getNumEntries(); i++) {
MkMaxEntry p = node.getEntry(i);
D dist_pq = distance(p.getRoutingObjectID(), q.getRoutingObjectID());
// p is nearer to q than the farthest kNN-candidate of q
// ==> p becomes a knn-candidate
if (dist_pq.doubleValue() <= knnDist_q) {
knns_q.insert(dist_pq, p.getRoutingObjectID());
if (knns_q.size() >= getKmax()) {
knnDist_q = knns_q.getKNNDistance().doubleValue();
q.setKnnDistance(knnDist_q);
}
}
// p is nearer to q than to its farthest knn-candidate
// q becomes knn of p
if (dist_pq.doubleValue() <= p.getKnnDistance()) {
KNNList knns_p = knnq.getKNNForDBID(p.getRoutingObjectID(), getKmax() - 1);
if (knns_p.size() + 1 < getKmax()) {
p.setKnnDistance(Double.NaN);
} else {
double knnDist_p = Math.max(dist_pq.doubleValue(), knns_p.getKNNDistance().doubleValue());
p.setKnnDistance(knnDist_p);
}
}
knnDist_node = Math.max(knnDist_node, p.getKnnDistance());
}
}
// directory node
else {
List entries = getSortedEntries(node, q.getRoutingObjectID());
for (DoubleIntPair distEntry : entries) {
MkMaxEntry dirEntry = node.getEntry(distEntry.second);
double entry_knnDist = dirEntry.getKnnDistance();
if (distEntry.second < entry_knnDist || distEntry.second < knnDist_q) {
preInsert(q, dirEntry, knns_q);
knnDist_q = knns_q.getKNNDistance().doubleValue();
}
knnDist_node = Math.max(knnDist_node, dirEntry.getKnnDistance());
}
}
if (LOG.isDebugging()) {
LOG.debugFine(nodeEntry + "set knn dist " + knnDist_node);
}
nodeEntry.setKnnDistance(knnDist_node);
}
@Override
protected void initializeCapacities(MkMaxEntry exampleLeaf) {
int distanceSize = ByteArrayUtil.SIZE_DOUBLE; // exampleLeaf.getParentDistance().externalizableSize();
// overhead = index(4), numEntries(4), id(4), isLeaf(0.125)
double overhead = 12.125;
if (getPageSize() - overhead < 0) {
throw new RuntimeException("Node size of " + getPageSize() + " Bytes is chosen too small!");
}
// dirCapacity = (file.getPageSize() - overhead) / (nodeID + objectID +
// coveringRadius + parentDistance + knnDistance) + 1
dirCapacity = (int) (getPageSize() - overhead) / (4 + 4 + 3 * distanceSize) + 1;
if (dirCapacity <= 1) {
throw new RuntimeException("Node size of " + getPageSize() + " Bytes is chosen too small!");
}
if (dirCapacity < 10) {
LOG.warning("Page size is choosen too small! Maximum number of entries " + "in a directory node = " + (dirCapacity - 1));
}
// leafCapacity = (file.getPageSize() - overhead) / (objectID +
// parentDistance +
// knnDistance) + 1
leafCapacity = (int) (getPageSize() - overhead) / (4 + 2 * distanceSize) + 1;
if (leafCapacity <= 1) {
throw new RuntimeException("Node size of " + getPageSize() + " Bytes is chosen too small!");
}
if (leafCapacity < 10) {
LOG.warning("Page size is choosen too small! Maximum number of entries " + "in a leaf node = " + (leafCapacity - 1));
}
}
/**
* @return a new MkMaxTreeNode which is a leaf node
*/
@Override
protected MkMaxTreeNode createNewLeafNode() {
return new MkMaxTreeNode<>(leafCapacity, true);
}
/**
* @return a new MkMaxTreeNode which is a directory node
*/
@Override
protected MkMaxTreeNode createNewDirectoryNode() {
return new MkMaxTreeNode<>(dirCapacity, false);
}
/**
* @return a new MkMaxDirectoryEntry representing the specified node
*/
@Override
protected MkMaxEntry createNewDirectoryEntry(MkMaxTreeNode node, DBID routingObjectID, double parentDistance) {
return new MkMaxDirectoryEntry(routingObjectID, parentDistance, node.getPageID(), node.coveringRadius(routingObjectID, this), node.kNNDistance());
}
/**
* @return a new MkMaxDirectoryEntry by calling
* new MkMaxDirectoryEntry(null, null, 0, null)
*/
@Override
protected MkMaxEntry createRootEntry() {
return new MkMaxDirectoryEntry(null, 0., 0, 0., 0.);
}
@Override
protected Logging getLogger() {
return LOG;
}
}