package de.lmu.ifi.dbs.elki.index.preprocessed.knn; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures Copyright (C) 2011 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore; import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs; import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.ids.TreeSetDBIDs; import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair; import de.lmu.ifi.dbs.elki.database.query.GenericDistanceResultPair; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.query.rknn.PreprocessorRKNNQuery; import de.lmu.ifi.dbs.elki.database.query.rknn.RKNNQuery; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; import de.lmu.ifi.dbs.elki.index.RKNNIndex; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; import de.lmu.ifi.dbs.elki.logging.progress.StepProgress; import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.KNNHeap; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; /** * A preprocessor for annotation of the k nearest neighbors and the reverse k * nearest neighbors (and their distances) to each database object. * * @author Elke Achtert * @param the type of database objects the preprocessor can be applied to * @param the type of distance the used distance function will return */ @Title("Materialize kNN and RkNN Neighborhood preprocessor") @Description("Materializes the k nearest neighbors and the reverse k nearest neighbors of objects of a database.") public class MaterializeKNNAndRKNNPreprocessor> extends MaterializeKNNPreprocessor implements RKNNIndex { /** * Logger to use. */ private static final Logging logger = Logging.getLogger(MaterializeKNNAndRKNNPreprocessor.class); /** * Additional data storage for RkNN. */ private WritableDataStore>> materialized_RkNN; /** * Constructor. * * @param relation Relation to process * @param distanceFunction the distance function to use * @param k query k */ public MaterializeKNNAndRKNNPreprocessor(Relation relation, DistanceFunction distanceFunction, int k) { super(relation, distanceFunction, k); } @Override protected void preprocess() { storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT, List.class); materialized_RkNN = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT, Set.class); FiniteProgress progress = getLogger().isVerbose() ? new FiniteProgress("Materializing k nearest neighbors and reverse k nearest neighbors (k=" + k + ")", relation.size(), getLogger()) : null; materializeKNNAndRKNNs(DBIDUtil.ensureArray(relation.getDBIDs()), progress); } /** * Materializes the kNNs and RkNNs of the specified object IDs. * * @param ids the IDs of the objects */ private void materializeKNNAndRKNNs(ArrayDBIDs ids, FiniteProgress progress) { // add an empty list to each rknn for(DBID id : ids) { if(materialized_RkNN.get(id) == null) { materialized_RkNN.put(id, new TreeSet>()); } } // knn query List>> kNNList = knnQuery.getKNNForBulkDBIDs(ids, k); for(int i = 0; i < ids.size(); i++) { DBID id = ids.get(i); List> kNNs = kNNList.get(i); storage.put(id, kNNs); for(DistanceResultPair kNN : kNNs) { Set> rknns = materialized_RkNN.get(kNN.getDBID()); rknns.add(new GenericDistanceResultPair(kNN.getDistance(), id)); } if(progress != null) { progress.incrementProcessed(getLogger()); } } if(progress != null) { progress.ensureCompleted(getLogger()); } } @Override protected void objectsInserted(DBIDs ids) { StepProgress stepprog = getLogger().isVerbose() ? new StepProgress(3) : null; ArrayDBIDs aids = DBIDUtil.ensureArray(ids); // materialize the new kNNs and RkNNs if(stepprog != null) { stepprog.beginStep(1, "New insertions ocurred, materialize their new kNNs and RkNNs.", getLogger()); } materializeKNNAndRKNNs(aids, null); // update the old kNNs and RkNNs if(stepprog != null) { stepprog.beginStep(2, "New insertions ocurred, update the affected kNNs and RkNNs.", getLogger()); } ArrayDBIDs rkNN_ids = updateKNNsAndRkNNs(ids); // inform listener if(stepprog != null) { stepprog.beginStep(3, "New insertions ocurred, inform listeners.", getLogger()); } fireKNNsInserted(ids, rkNN_ids); if(stepprog != null) { stepprog.ensureCompleted(getLogger()); } } /** * Updates the kNNs and RkNNs after insertion of the specified ids. * * @param ids the ids of newly inserted objects causing a change of * materialized kNNs and RkNNs * @return the RkNNs of the specified ids, i.e. the kNNs which have been * updated */ private ArrayDBIDs updateKNNsAndRkNNs(DBIDs ids) { ArrayDBIDs rkNN_ids = DBIDUtil.newArray(); DBIDs oldids = DBIDUtil.difference(relation.getDBIDs(), ids); for(DBID id1 : oldids) { List> kNNs = storage.get(id1); D knnDist = kNNs.get(kNNs.size() - 1).getDistance(); // look for new kNNs List> newKNNs = new ArrayList>(); KNNHeap heap = null; for(DBID id2 : ids) { D dist = distanceQuery.distance(id1, id2); if(dist.compareTo(knnDist) <= 0) { if(heap == null) { heap = new KNNHeap(k); heap.addAll(kNNs); } heap.add(dist, id2); } } if(heap != null) { newKNNs = heap.toSortedArrayList(); storage.put(id1, newKNNs); // get the difference int i = 0; int j = 0; List> added = new ArrayList>(); List> removed = new ArrayList>(); while(i < kNNs.size() && j < newKNNs.size()) { DistanceResultPair drp1 = kNNs.get(i); DistanceResultPair drp2 = newKNNs.get(j); if(!drp1.equals(drp2)) { added.add(drp2); j++; } else { i++; j++; } } if(i != j) { for(; i < kNNs.size(); i++) removed.add(kNNs.get(i)); } // add new RkNN for(DistanceResultPair drp : added) { Set> rknns = materialized_RkNN.get(drp.getDBID()); rknns.add(new GenericDistanceResultPair(drp.getDistance(), id1)); } // remove old RkNN for(DistanceResultPair drp : removed) { Set> rknns = materialized_RkNN.get(drp.getDBID()); rknns.remove(new GenericDistanceResultPair(drp.getDistance(), id1)); } rkNN_ids.add(id1); } } return rkNN_ids; } @Override protected void objectsRemoved(DBIDs ids) { StepProgress stepprog = getLogger().isVerbose() ? new StepProgress(3) : null; ArrayDBIDs aids = DBIDUtil.ensureArray(ids); // delete the materialized (old) kNNs and RkNNs if(stepprog != null) { stepprog.beginStep(1, "New deletions ocurred, remove their materialized kNNs and RkNNs.", getLogger()); } List>> kNNs = new ArrayList>>(ids.size()); List>> rkNNs = new ArrayList>>(ids.size()); for(DBID id : aids) { kNNs.add(storage.get(id)); storage.delete(id); rkNNs.add(new ArrayList>(materialized_RkNN.get(id))); materialized_RkNN.delete(id); } ArrayDBIDs kNN_ids = extractAndRemoveIDs(kNNs, aids); ArrayDBIDs rkNN_ids = extractAndRemoveIDs(rkNNs, aids); // update the affected kNNs and RkNNs if(stepprog != null) { stepprog.beginStep(2, "New deletions ocurred, update the affected kNNs and RkNNs.", getLogger()); } // update the kNNs of the RkNNs List>> kNNList = knnQuery.getKNNForBulkDBIDs(rkNN_ids, k); for(int i = 0; i < rkNN_ids.size(); i++) { DBID id = rkNN_ids.get(i); storage.put(id, kNNList.get(i)); for(DistanceResultPair kNN : kNNList.get(i)) { Set> rknns = materialized_RkNN.get(kNN.getDBID()); rknns.add(new GenericDistanceResultPair(kNN.getDistance(), id)); } } // update the RkNNs of the kNNs TreeSetDBIDs idsSet = DBIDUtil.newTreeSet(ids); for(int i = 0; i < kNN_ids.size(); i++) { DBID id = kNN_ids.get(i); SortedSet> rkNN = materialized_RkNN.get(id); for(Iterator> it = rkNN.iterator(); it.hasNext();) { DistanceResultPair drp = it.next(); if(idsSet.contains(drp.getDBID())) { it.remove(); } } } // inform listener if(stepprog != null) { stepprog.beginStep(3, "New deletions ocurred, inform listeners.", getLogger()); } fireKNNsRemoved(ids, rkNN_ids); if(stepprog != null) { stepprog.ensureCompleted(getLogger()); } } /** * Returns the materialized kNNs of the specified id. * * @param id the query id * @return the kNNs */ public List> getKNN(DBID id) { return storage.get(id); } /** * Returns the materialized RkNNs of the specified id. * * @param id the query id * @return the RkNNs */ public List> getRKNN(DBID id) { SortedSet> rKNN = materialized_RkNN.get(id); if(rKNN == null) return null; return new ArrayList>(rKNN); } @SuppressWarnings("unchecked") @Override public > RKNNQuery getRKNNQuery(DistanceQuery distanceQuery, Object... hints) { if(!this.distanceFunction.equals(distanceQuery.getDistanceFunction())) { return null; } // k max supported? for(Object hint : hints) { if(hint instanceof Integer) { if(((Integer) hint) > k) { return null; } break; } } return new PreprocessorRKNNQuery(relation, (MaterializeKNNAndRKNNPreprocessor) this); } @Override public String getLongName() { return "kNN and RkNN Preprocessor"; } @Override public String getShortName() { return "knn and rknn preprocessor"; } @Override protected Logging getLogger() { return logger; } /** * The parameterizable factory. * * @author Elke Achtert * * @param The object type * @param The distance type */ public static class Factory> extends MaterializeKNNPreprocessor.Factory { /** * Constructor. * * @param k k * @param distanceFunction distance function */ public Factory(int k, DistanceFunction distanceFunction) { super(k, distanceFunction); } @Override public MaterializeKNNAndRKNNPreprocessor instantiate(Relation relation) { MaterializeKNNAndRKNNPreprocessor instance = new MaterializeKNNAndRKNNPreprocessor(relation, distanceFunction, k); return instance; } /** * Parameterization class. * * @author Erich Schubert * * @apiviz.exclude */ public static class Parameterizer> extends MaterializeKNNPreprocessor.Factory.Parameterizer { @Override protected Factory makeInstance() { return new Factory(k, distanceFunction); } } } }