package de.lmu.ifi.dbs.elki.index.preprocessed.snn; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ import de.lmu.ifi.dbs.elki.data.type.TypeInformation; import de.lmu.ifi.dbs.elki.database.QueryUtil; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs; import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs; import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction; import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; import de.lmu.ifi.dbs.elki.index.preprocessed.AbstractPreprocessorIndex; import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.Parameterizable; import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; /** * A preprocessor for annotation of the ids of nearest neighbors to each * database object. *

* The k nearest neighbors are assigned based on an arbitrary distance function. * * This functionality is similar but not identical to * {@link MaterializeKNNPreprocessor}: While it also computes the k nearest * neighbors, it does not keep the actual distances, but organizes the NN set in * a TreeSet for fast set operations. * * @author Arthur Zimek * @author Erich Schubert * * @apiviz.has DistanceFunction * * @param the type of database objects the preprocessor can be applied to * @param the type of distance the used distance function will return */ @Title("Shared nearest neighbor Preprocessor") @Description("Computes the k nearest neighbors of objects of a certain database.") public class SharedNearestNeighborPreprocessor> extends AbstractPreprocessorIndex implements SharedNearestNeighborIndex { /** * Get a logger for this class. */ private static final Logging logger = Logging.getLogger(SharedNearestNeighborPreprocessor.class); /** * Holds the number of nearest neighbors to be used. */ protected int numberOfNeighbors; /** * Hold the distance function to be used. */ protected DistanceFunction distanceFunction; /** * Constructor. * * @param relation Database to use * @param numberOfNeighbors Number of neighbors * @param distanceFunction Distance function */ public SharedNearestNeighborPreprocessor(Relation relation, int numberOfNeighbors, DistanceFunction distanceFunction) { super(relation); this.numberOfNeighbors = numberOfNeighbors; this.distanceFunction = distanceFunction; } /** * Preprocessing step. */ protected void preprocess() { if(getLogger().isVerbose()) { getLogger().verbose("Assigning nearest neighbor lists to database objects"); } storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, ArrayDBIDs.class); KNNQuery knnquery = QueryUtil.getKNNQuery(relation, distanceFunction, numberOfNeighbors); FiniteProgress progress = getLogger().isVerbose() ? new FiniteProgress("assigning nearest neighbor lists", relation.size(), getLogger()) : null; for(DBID id : relation.iterDBIDs()) { ArrayModifiableDBIDs neighbors = DBIDUtil.newArray(numberOfNeighbors); KNNResult kNN = knnquery.getKNNForDBID(id, numberOfNeighbors); for(int i = 0; i < kNN.size(); i++) { final DBID nid = kNN.get(i).getDBID(); // if(!id.equals(nid)) { neighbors.add(nid); // } // Size limitation to exactly numberOfNeighbors if(neighbors.size() >= numberOfNeighbors) { break; } } neighbors.sort(); storage.put(id, neighbors); if(progress != null) { progress.incrementProcessed(getLogger()); } } if(progress != null) { progress.ensureCompleted(getLogger()); } } @Override public ArrayDBIDs getNearestNeighborSet(DBID objid) { if(storage == null) { preprocess(); } return storage.get(objid); } @Override protected Logging getLogger() { return logger; } @Override public String getLongName() { return "SNN id index"; } @Override public String getShortName() { return "SNN-index"; } /** * Get the number of neighbors * * @return NN size */ @Override public int getNumberOfNeighbors() { return numberOfNeighbors; } /** * Factory class * * @author Erich Schubert * * @apiviz.stereotype factory * @apiviz.uses SharedNearestNeighborPreprocessor oneway - - «create» */ public static class Factory> implements SharedNearestNeighborIndex.Factory>, Parameterizable { /** * Parameter to indicate the number of neighbors to be taken into account * for the shared-nearest-neighbor similarity. *

*

* Default value: 1 *

*

* Key: {@code sharedNearestNeighbors} *

*/ public static final OptionID NUMBER_OF_NEIGHBORS_ID = OptionID.getOrCreateOptionID("sharedNearestNeighbors", "number of nearest neighbors to consider (at least 1)"); /** * Parameter to indicate the distance function to be used to ascertain the * nearest neighbors. *

*

* Default value: {@link EuclideanDistanceFunction} *

*

* Key: {@code SNNDistanceFunction} *

*/ public static final OptionID DISTANCE_FUNCTION_ID = OptionID.getOrCreateOptionID("SNNDistanceFunction", "the distance function to asses the nearest neighbors"); /** * Holds the number of nearest neighbors to be used. */ protected int numberOfNeighbors; /** * Hold the distance function to be used. */ protected DistanceFunction distanceFunction; /** * Constructor. * * @param numberOfNeighbors Number of neighbors * @param distanceFunction Distance function */ public Factory(int numberOfNeighbors, DistanceFunction distanceFunction) { super(); this.numberOfNeighbors = numberOfNeighbors; this.distanceFunction = distanceFunction; } @Override public SharedNearestNeighborPreprocessor instantiate(Relation relation) { return new SharedNearestNeighborPreprocessor(relation, numberOfNeighbors, distanceFunction); } /** * Get the number of neighbors * * @return NN size */ @Override public int getNumberOfNeighbors() { return numberOfNeighbors; } @Override public TypeInformation getInputTypeRestriction() { return distanceFunction.getInputTypeRestriction(); } /** * Parameterization class. * * @author Erich Schubert * * @apiviz.exclude */ public static class Parameterizer> extends AbstractParameterizer { /** * Holds the number of nearest neighbors to be used. */ protected int numberOfNeighbors; /** * Hold the distance function to be used. */ protected DistanceFunction distanceFunction; @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); final IntParameter numberOfNeighborsP = new IntParameter(NUMBER_OF_NEIGHBORS_ID, new GreaterEqualConstraint(1)); if(config.grab(numberOfNeighborsP)) { numberOfNeighbors = numberOfNeighborsP.getValue(); } final ObjectParameter> distanceFunctionP = new ObjectParameter>(DISTANCE_FUNCTION_ID, DistanceFunction.class, EuclideanDistanceFunction.class); if(config.grab(distanceFunctionP)) { distanceFunction = distanceFunctionP.instantiateClass(config); } } @Override protected Factory makeInstance() { return new Factory(numberOfNeighbors, distanceFunction); } } } }