package de.lmu.ifi.dbs.elki.algorithm; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures Copyright (C) 2011 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ import java.util.ArrayList; import java.util.Collection; import java.util.List; import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.data.type.TypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; import de.lmu.ifi.dbs.elki.database.Database; import de.lmu.ifi.dbs.elki.database.datastore.DataStore; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.DistanceUtil; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; import de.lmu.ifi.dbs.elki.distance.distancefunction.SpatialPrimitiveDistanceFunction; import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; import de.lmu.ifi.dbs.elki.index.tree.LeafEntry; import de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialEntry; import de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialIndexTree; import de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialNode; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress; import de.lmu.ifi.dbs.elki.result.ResultUtil; import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.KNNHeap; import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.KNNList; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; /** * Joins in a given spatial database to each object its k-nearest neighbors. * This algorithm only supports spatial databases based on a spatial index * structure. * * @author Elke Achtert * @param the type of FeatureVector handled by this Algorithm * @param the type of Distance used by this Algorithm * @param the type of node used in the spatial index structure * @param the type of entry used in the spatial node */ @Title("K-Nearest Neighbor Join") @Description("Algorithm to find the k-nearest neighbors of each object in a spatial database") public class KNNJoin, D extends Distance, N extends SpatialNode, E extends SpatialEntry> extends AbstractDistanceBasedAlgorithm>> { /** * The logger for this class. */ private static final Logging logger = Logging.getLogger(KNNJoin.class); /** * Parameter that specifies the k-nearest neighbors to be assigned, must be an * integer greater than 0. Default value: 1. */ public static final OptionID K_ID = OptionID.getOrCreateOptionID("knnjoin.k", "Specifies the k-nearest neighbors to be assigned."); /** * The k parameter */ int k; /** * Constructor. * * @param distanceFunction Distance function * @param k k parameter */ public KNNJoin(DistanceFunction distanceFunction, int k) { super(distanceFunction); this.k = k; } /** * Joins in the given spatial database to each object its k-nearest neighbors. * * @throws IllegalStateException if not suitable {@link SpatialIndexTree} was * found or the specified distance function is not an instance of * {@link SpatialPrimitiveDistanceFunction}. */ @SuppressWarnings("unchecked") public DataStore> run(Database database, Relation relation) throws IllegalStateException { if(!(getDistanceFunction() instanceof SpatialPrimitiveDistanceFunction)) { throw new IllegalStateException("Distance Function must be an instance of " + SpatialPrimitiveDistanceFunction.class.getName()); } Collection> indexes = ResultUtil.filterResults(database, SpatialIndexTree.class); if(indexes.size() != 1) { throw new AbortException("KNNJoin found " + indexes.size() + " spatial indexes, expected exactly one."); } // FIXME: Ensure were looking at the right relation! SpatialIndexTree index = indexes.iterator().next(); SpatialPrimitiveDistanceFunction distFunction = (SpatialPrimitiveDistanceFunction) getDistanceFunction(); DistanceQuery distq = database.getDistanceQuery(relation, distFunction); DBIDs ids = relation.getDBIDs(); WritableDataStore> knnHeaps = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, KNNHeap.class); try { // data pages of s List ps_candidates = index.getLeaves(); FiniteProgress progress = logger.isVerbose() ? new FiniteProgress(this.getClass().getName(), relation.size(), logger) : null; IndefiniteProgress pageprog = logger.isVerbose() ? new IndefiniteProgress("Number of processed data pages", logger) : null; if(logger.isDebugging()) { logger.debugFine("# ps = " + ps_candidates.size()); } // data pages of r List pr_candidates = new ArrayList(ps_candidates); if(logger.isDebugging()) { logger.debugFine("# pr = " + pr_candidates.size()); } int processed = 0; int processedPages = 0; boolean up = true; for(E pr_entry : pr_candidates) { N pr = index.getNode(pr_entry); D pr_knn_distance = distq.infiniteDistance(); if(logger.isDebugging()) { logger.debugFine(" ------ PR = " + pr); } // create for each data object a knn list for(int j = 0; j < pr.getNumEntries(); j++) { knnHeaps.put(((LeafEntry) pr.getEntry(j)).getDBID(), new KNNHeap(k, distq.infiniteDistance())); } if(up) { for(E ps_entry : ps_candidates) { D distance = distFunction.minDist(pr_entry, ps_entry); if(distance.compareTo(pr_knn_distance) <= 0) { N ps = index.getNode(ps_entry); pr_knn_distance = processDataPages(distq, pr, ps, knnHeaps, pr_knn_distance); } } up = false; } else { for(int s = ps_candidates.size() - 1; s >= 0; s--) { E ps_entry = ps_candidates.get(s); D distance = distFunction.minDist(pr_entry, ps_entry); if(distance.compareTo(pr_knn_distance) <= 0) { N ps = index.getNode(ps_entry); pr_knn_distance = processDataPages(distq, pr, ps, knnHeaps, pr_knn_distance); } } up = true; } processed += pr.getNumEntries(); if(progress != null && pageprog != null) { progress.setProcessed(processed, logger); pageprog.setProcessed(processedPages++, logger); } } if(pageprog != null) { pageprog.setCompleted(logger); } WritableDataStore> knnLists = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_STATIC, KNNList.class); for(DBID id : ids) { knnLists.put(id, knnHeaps.get(id).toKNNList()); } return knnLists; } catch(Exception e) { throw new IllegalStateException(e); } } /** * Processes the two data pages pr and ps and determines the k-nearest * neighbors of pr in ps. * * @param distQ the distance to use * @param pr the first data page * @param ps the second data page * @param knnLists the knn lists for each data object * @param pr_knn_distance the current knn distance of data page pr * @return the k-nearest neighbor distance of pr in ps */ private D processDataPages(DistanceQuery distQ, N pr, N ps, WritableDataStore> knnLists, D pr_knn_distance) { // TODO: optimize for double? boolean infinite = pr_knn_distance.isInfiniteDistance(); for(int i = 0; i < pr.getNumEntries(); i++) { DBID r_id = ((LeafEntry) pr.getEntry(i)).getDBID(); KNNHeap knnList = knnLists.get(r_id); for(int j = 0; j < ps.getNumEntries(); j++) { DBID s_id = ((LeafEntry) ps.getEntry(j)).getDBID(); D distance = distQ.distance(r_id, s_id); if(knnList.add(distance, s_id)) { // set kNN distance of r if(infinite) { pr_knn_distance = knnList.getMaximumDistance(); } pr_knn_distance = DistanceUtil.max(knnList.getMaximumDistance(), pr_knn_distance); } } } return pr_knn_distance; } @Override public TypeInformation[] getInputTypeRestriction() { return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD); } @Override protected Logging getLogger() { return logger; } /** * Parameterization class. * * @author Erich Schubert * * @apiviz.exclude */ public static class Parameterizer, D extends Distance, N extends SpatialNode, E extends SpatialEntry> extends AbstractPrimitiveDistanceBasedAlgorithm.Parameterizer { protected int k; @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); IntParameter kP = new IntParameter(K_ID, 1); kP.addConstraint(new GreaterConstraint(0)); if(config.grab(kP)) { k = kP.getValue(); } } @Override protected KNNJoin makeInstance() { return new KNNJoin(distanceFunction, k); } } }