package de.lmu.ifi.dbs.elki.index.preprocessed.knn;
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
Copyright (C) 2011
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.TreeSetDBIDs;
import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.GenericDistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.rknn.PreprocessorRKNNQuery;
import de.lmu.ifi.dbs.elki.database.query.rknn.RKNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.index.RKNNIndex;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.KNNHeap;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
/**
* A preprocessor for annotation of the k nearest neighbors and the reverse k
* nearest neighbors (and their distances) to each database object.
*
* @author Elke Achtert
* @param the type of database objects the preprocessor can be applied to
* @param the type of distance the used distance function will return
*/
@Title("Materialize kNN and RkNN Neighborhood preprocessor")
@Description("Materializes the k nearest neighbors and the reverse k nearest neighbors of objects of a database.")
public class MaterializeKNNAndRKNNPreprocessor> extends MaterializeKNNPreprocessor implements RKNNIndex {
/**
* Logger to use.
*/
private static final Logging logger = Logging.getLogger(MaterializeKNNAndRKNNPreprocessor.class);
/**
* Additional data storage for RkNN.
*/
private WritableDataStore>> materialized_RkNN;
/**
* Constructor.
*
* @param relation Relation to process
* @param distanceFunction the distance function to use
* @param k query k
*/
public MaterializeKNNAndRKNNPreprocessor(Relation relation, DistanceFunction super O, D> distanceFunction, int k) {
super(relation, distanceFunction, k);
}
@Override
protected void preprocess() {
storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT, List.class);
materialized_RkNN = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT, Set.class);
FiniteProgress progress = getLogger().isVerbose() ? new FiniteProgress("Materializing k nearest neighbors and reverse k nearest neighbors (k=" + k + ")", relation.size(), getLogger()) : null;
materializeKNNAndRKNNs(DBIDUtil.ensureArray(relation.getDBIDs()), progress);
}
/**
* Materializes the kNNs and RkNNs of the specified object IDs.
*
* @param ids the IDs of the objects
*/
private void materializeKNNAndRKNNs(ArrayDBIDs ids, FiniteProgress progress) {
// add an empty list to each rknn
for(DBID id : ids) {
if(materialized_RkNN.get(id) == null) {
materialized_RkNN.put(id, new TreeSet>());
}
}
// knn query
List>> kNNList = knnQuery.getKNNForBulkDBIDs(ids, k);
for(int i = 0; i < ids.size(); i++) {
DBID id = ids.get(i);
List> kNNs = kNNList.get(i);
storage.put(id, kNNs);
for(DistanceResultPair kNN : kNNs) {
Set> rknns = materialized_RkNN.get(kNN.getDBID());
rknns.add(new GenericDistanceResultPair(kNN.getDistance(), id));
}
if(progress != null) {
progress.incrementProcessed(getLogger());
}
}
if(progress != null) {
progress.ensureCompleted(getLogger());
}
}
@Override
protected void objectsInserted(DBIDs ids) {
StepProgress stepprog = getLogger().isVerbose() ? new StepProgress(3) : null;
ArrayDBIDs aids = DBIDUtil.ensureArray(ids);
// materialize the new kNNs and RkNNs
if(stepprog != null) {
stepprog.beginStep(1, "New insertions ocurred, materialize their new kNNs and RkNNs.", getLogger());
}
materializeKNNAndRKNNs(aids, null);
// update the old kNNs and RkNNs
if(stepprog != null) {
stepprog.beginStep(2, "New insertions ocurred, update the affected kNNs and RkNNs.", getLogger());
}
ArrayDBIDs rkNN_ids = updateKNNsAndRkNNs(ids);
// inform listener
if(stepprog != null) {
stepprog.beginStep(3, "New insertions ocurred, inform listeners.", getLogger());
}
fireKNNsInserted(ids, rkNN_ids);
if(stepprog != null) {
stepprog.ensureCompleted(getLogger());
}
}
/**
* Updates the kNNs and RkNNs after insertion of the specified ids.
*
* @param ids the ids of newly inserted objects causing a change of
* materialized kNNs and RkNNs
* @return the RkNNs of the specified ids, i.e. the kNNs which have been
* updated
*/
private ArrayDBIDs updateKNNsAndRkNNs(DBIDs ids) {
ArrayDBIDs rkNN_ids = DBIDUtil.newArray();
DBIDs oldids = DBIDUtil.difference(relation.getDBIDs(), ids);
for(DBID id1 : oldids) {
List> kNNs = storage.get(id1);
D knnDist = kNNs.get(kNNs.size() - 1).getDistance();
// look for new kNNs
List> newKNNs = new ArrayList>();
KNNHeap heap = null;
for(DBID id2 : ids) {
D dist = distanceQuery.distance(id1, id2);
if(dist.compareTo(knnDist) <= 0) {
if(heap == null) {
heap = new KNNHeap(k);
heap.addAll(kNNs);
}
heap.add(dist, id2);
}
}
if(heap != null) {
newKNNs = heap.toSortedArrayList();
storage.put(id1, newKNNs);
// get the difference
int i = 0;
int j = 0;
List> added = new ArrayList>();
List> removed = new ArrayList>();
while(i < kNNs.size() && j < newKNNs.size()) {
DistanceResultPair drp1 = kNNs.get(i);
DistanceResultPair drp2 = newKNNs.get(j);
if(!drp1.equals(drp2)) {
added.add(drp2);
j++;
}
else {
i++;
j++;
}
}
if(i != j) {
for(; i < kNNs.size(); i++)
removed.add(kNNs.get(i));
}
// add new RkNN
for(DistanceResultPair drp : added) {
Set> rknns = materialized_RkNN.get(drp.getDBID());
rknns.add(new GenericDistanceResultPair(drp.getDistance(), id1));
}
// remove old RkNN
for(DistanceResultPair drp : removed) {
Set> rknns = materialized_RkNN.get(drp.getDBID());
rknns.remove(new GenericDistanceResultPair(drp.getDistance(), id1));
}
rkNN_ids.add(id1);
}
}
return rkNN_ids;
}
@Override
protected void objectsRemoved(DBIDs ids) {
StepProgress stepprog = getLogger().isVerbose() ? new StepProgress(3) : null;
ArrayDBIDs aids = DBIDUtil.ensureArray(ids);
// delete the materialized (old) kNNs and RkNNs
if(stepprog != null) {
stepprog.beginStep(1, "New deletions ocurred, remove their materialized kNNs and RkNNs.", getLogger());
}
List>> kNNs = new ArrayList>>(ids.size());
List>> rkNNs = new ArrayList>>(ids.size());
for(DBID id : aids) {
kNNs.add(storage.get(id));
storage.delete(id);
rkNNs.add(new ArrayList>(materialized_RkNN.get(id)));
materialized_RkNN.delete(id);
}
ArrayDBIDs kNN_ids = extractAndRemoveIDs(kNNs, aids);
ArrayDBIDs rkNN_ids = extractAndRemoveIDs(rkNNs, aids);
// update the affected kNNs and RkNNs
if(stepprog != null) {
stepprog.beginStep(2, "New deletions ocurred, update the affected kNNs and RkNNs.", getLogger());
}
// update the kNNs of the RkNNs
List>> kNNList = knnQuery.getKNNForBulkDBIDs(rkNN_ids, k);
for(int i = 0; i < rkNN_ids.size(); i++) {
DBID id = rkNN_ids.get(i);
storage.put(id, kNNList.get(i));
for(DistanceResultPair kNN : kNNList.get(i)) {
Set> rknns = materialized_RkNN.get(kNN.getDBID());
rknns.add(new GenericDistanceResultPair(kNN.getDistance(), id));
}
}
// update the RkNNs of the kNNs
TreeSetDBIDs idsSet = DBIDUtil.newTreeSet(ids);
for(int i = 0; i < kNN_ids.size(); i++) {
DBID id = kNN_ids.get(i);
SortedSet> rkNN = materialized_RkNN.get(id);
for(Iterator> it = rkNN.iterator(); it.hasNext();) {
DistanceResultPair drp = it.next();
if(idsSet.contains(drp.getDBID())) {
it.remove();
}
}
}
// inform listener
if(stepprog != null) {
stepprog.beginStep(3, "New deletions ocurred, inform listeners.", getLogger());
}
fireKNNsRemoved(ids, rkNN_ids);
if(stepprog != null) {
stepprog.ensureCompleted(getLogger());
}
}
/**
* Returns the materialized kNNs of the specified id.
*
* @param id the query id
* @return the kNNs
*/
public List> getKNN(DBID id) {
return storage.get(id);
}
/**
* Returns the materialized RkNNs of the specified id.
*
* @param id the query id
* @return the RkNNs
*/
public List> getRKNN(DBID id) {
SortedSet> rKNN = materialized_RkNN.get(id);
if(rKNN == null)
return null;
return new ArrayList>(rKNN);
}
@SuppressWarnings("unchecked")
@Override
public > RKNNQuery getRKNNQuery(DistanceQuery distanceQuery, Object... hints) {
if(!this.distanceFunction.equals(distanceQuery.getDistanceFunction())) {
return null;
}
// k max supported?
for(Object hint : hints) {
if(hint instanceof Integer) {
if(((Integer) hint) > k) {
return null;
}
break;
}
}
return new PreprocessorRKNNQuery(relation, (MaterializeKNNAndRKNNPreprocessor) this);
}
@Override
public String getLongName() {
return "kNN and RkNN Preprocessor";
}
@Override
public String getShortName() {
return "knn and rknn preprocessor";
}
@Override
protected Logging getLogger() {
return logger;
}
/**
* The parameterizable factory.
*
* @author Elke Achtert
*
* @param The object type
* @param The distance type
*/
public static class Factory> extends MaterializeKNNPreprocessor.Factory {
/**
* Constructor.
*
* @param k k
* @param distanceFunction distance function
*/
public Factory(int k, DistanceFunction super O, D> distanceFunction) {
super(k, distanceFunction);
}
@Override
public MaterializeKNNAndRKNNPreprocessor instantiate(Relation relation) {
MaterializeKNNAndRKNNPreprocessor instance = new MaterializeKNNAndRKNNPreprocessor(relation, distanceFunction, k);
return instance;
}
/**
* Parameterization class.
*
* @author Erich Schubert
*
* @apiviz.exclude
*/
public static class Parameterizer> extends MaterializeKNNPreprocessor.Factory.Parameterizer {
@Override
protected Factory makeInstance() {
return new Factory(k, distanceFunction);
}
}
}
}