diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/index/preprocessed/knn')
12 files changed, 272 insertions, 388 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/AbstractMaterializeKNNPreprocessor.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/AbstractMaterializeKNNPreprocessor.java index 927678c2..9c950d95 100644 --- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/AbstractMaterializeKNNPreprocessor.java +++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/AbstractMaterializeKNNPreprocessor.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.index.preprocessed.knn; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -28,14 +28,13 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; -import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList; +import de.lmu.ifi.dbs.elki.database.ids.KNNList; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; import de.lmu.ifi.dbs.elki.index.IndexFactory; import de.lmu.ifi.dbs.elki.index.KNNIndex; import de.lmu.ifi.dbs.elki.index.preprocessed.AbstractPreprocessorIndex; @@ -52,10 +51,8 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * @author Erich Schubert * * @param <O> Object type - * @param <D> Distance type - * @param <T> Result type */ -public abstract class AbstractMaterializeKNNPreprocessor<O, D extends Distance<D>, T extends KNNList<D>> extends AbstractPreprocessorIndex<O, T> implements KNNIndex<O> { +public abstract class AbstractMaterializeKNNPreprocessor<O> extends AbstractPreprocessorIndex<O, KNNList> implements KNNIndex<O> { /** * The query k value. */ @@ -64,12 +61,12 @@ public abstract class AbstractMaterializeKNNPreprocessor<O, D extends Distance<D /** * The distance function to be used. */ - protected final DistanceFunction<? super O, D> distanceFunction; + protected final DistanceFunction<? super O> distanceFunction; /** * The distance query we used. */ - protected final DistanceQuery<O, D> distanceQuery; + protected final DistanceQuery<O> distanceQuery; /** * Constructor. @@ -78,7 +75,7 @@ public abstract class AbstractMaterializeKNNPreprocessor<O, D extends Distance<D * @param distanceFunction Distance function * @param k k */ - public AbstractMaterializeKNNPreprocessor(Relation<O> relation, DistanceFunction<? super O, D> distanceFunction, int k) { + public AbstractMaterializeKNNPreprocessor(Relation<O> relation, DistanceFunction<? super O> distanceFunction, int k) { super(relation); this.k = k; this.distanceFunction = distanceFunction; @@ -86,20 +83,11 @@ public abstract class AbstractMaterializeKNNPreprocessor<O, D extends Distance<D } /** - * Get the distance factory. - * - * @return distance factory - */ - public D getDistanceFactory() { - return distanceFunction.getDistanceFactory(); - } - - /** * The distance query we used. * * @return Distance query */ - public DistanceQuery<O, D> getDistanceQuery() { + public DistanceQuery<O> getDistanceQuery() { return distanceQuery; } @@ -123,7 +111,7 @@ public abstract class AbstractMaterializeKNNPreprocessor<O, D extends Distance<D * @param id Object ID * @return Neighbors */ - public KNNList<D> get(DBIDRef id) { + public KNNList get(DBIDRef id) { if(storage == null) { if(getLogger().isDebugging()) { getLogger().debug("Running kNN preprocessor: " + this.getClass()); @@ -137,7 +125,7 @@ public abstract class AbstractMaterializeKNNPreprocessor<O, D extends Distance<D * Create the default storage. */ void createStorage() { - WritableDataStore<T> s = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT, KNNList.class); + WritableDataStore<KNNList> s = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT, KNNList.class); storage = s; } @@ -155,7 +143,7 @@ public abstract class AbstractMaterializeKNNPreprocessor<O, D extends Distance<D @SuppressWarnings("unchecked") @Override - public <S extends Distance<S>> KNNQuery<O, S> getKNNQuery(DistanceQuery<O, S> distQ, Object... hints) { + public KNNQuery<O> getKNNQuery(DistanceQuery<O> distQ, Object... hints) { if(!this.distanceFunction.equals(distQ.getDistanceFunction())) { return null; } @@ -169,8 +157,8 @@ public abstract class AbstractMaterializeKNNPreprocessor<O, D extends Distance<D } } // To make compilers happy: - AbstractMaterializeKNNPreprocessor<?, ?, ?> tmp = this; - return new PreprocessorKNNQuery<>(relation, (AbstractMaterializeKNNPreprocessor<O, S, KNNList<S>>) tmp); + AbstractMaterializeKNNPreprocessor<?> tmp = this; + return new PreprocessorKNNQuery<>(relation, (AbstractMaterializeKNNPreprocessor<O>) tmp); } /** @@ -183,9 +171,8 @@ public abstract class AbstractMaterializeKNNPreprocessor<O, D extends Distance<D * @apiviz.uses AbstractMaterializeKNNPreprocessor oneway - - «create» * * @param <O> The object type - * @param <D> The distance type */ - public abstract static class Factory<O, D extends Distance<D>, T extends KNNList<D>> implements IndexFactory<O, KNNIndex<O>> { + public abstract static class Factory<O> implements IndexFactory<O, KNNIndex<O>> { /** * Parameter to specify the number of nearest neighbors of an object to be * materialized. must be an integer greater than 1. @@ -216,7 +203,7 @@ public abstract class AbstractMaterializeKNNPreprocessor<O, D extends Distance<D /** * Hold the distance function to be used. */ - protected DistanceFunction<? super O, D> distanceFunction; + protected DistanceFunction<? super O> distanceFunction; /** * Index factory. @@ -224,14 +211,14 @@ public abstract class AbstractMaterializeKNNPreprocessor<O, D extends Distance<D * @param k k parameter * @param distanceFunction distance function */ - public Factory(int k, DistanceFunction<? super O, D> distanceFunction) { + public Factory(int k, DistanceFunction<? super O> distanceFunction) { super(); this.k = k; this.distanceFunction = distanceFunction; } @Override - public abstract AbstractMaterializeKNNPreprocessor<O, D, T> instantiate(Relation<O> relation); + public abstract AbstractMaterializeKNNPreprocessor<O> instantiate(Relation<O> relation); /** * Get the distance function. @@ -239,19 +226,10 @@ public abstract class AbstractMaterializeKNNPreprocessor<O, D extends Distance<D * @return Distance function */ // TODO: hide this? - public DistanceFunction<? super O, D> getDistanceFunction() { + public DistanceFunction<? super O> getDistanceFunction() { return distanceFunction; } - /** - * Get the distance factory. - * - * @return Distance factory - */ - public D getDistanceFactory() { - return distanceFunction.getDistanceFactory(); - } - @Override public TypeInformation getInputTypeRestriction() { return distanceFunction.getInputTypeRestriction(); @@ -263,8 +241,10 @@ public abstract class AbstractMaterializeKNNPreprocessor<O, D extends Distance<D * @author Erich Schubert * * @apiviz.exclude + * + * @param <O> Object type */ - public abstract static class Parameterizer<O, D extends Distance<D>> extends AbstractParameterizer { + public abstract static class Parameterizer<O> extends AbstractParameterizer { /** * Holds the value of {@link #K_ID}. */ @@ -273,7 +253,7 @@ public abstract class AbstractMaterializeKNNPreprocessor<O, D extends Distance<D /** * Hold the distance function to be used. */ - protected DistanceFunction<? super O, D> distanceFunction; + protected DistanceFunction<? super O> distanceFunction; @Override protected void makeOptions(Parameterization config) { @@ -286,14 +266,14 @@ public abstract class AbstractMaterializeKNNPreprocessor<O, D extends Distance<D } // distance function - final ObjectParameter<DistanceFunction<? super O, D>> distanceFunctionP = new ObjectParameter<>(DISTANCE_FUNCTION_ID, DistanceFunction.class, EuclideanDistanceFunction.class); + final ObjectParameter<DistanceFunction<? super O>> distanceFunctionP = new ObjectParameter<>(DISTANCE_FUNCTION_ID, DistanceFunction.class, EuclideanDistanceFunction.class); if(config.grab(distanceFunctionP)) { distanceFunction = distanceFunctionP.instantiateClass(config); } } @Override - protected abstract Factory<O, D, ?> makeInstance(); + protected abstract Factory<O> makeInstance(); } } }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/CachedDoubleDistanceKNNPreprocessor.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/CachedDoubleDistanceKNNPreprocessor.java index c36948be..eec9e812 100644 --- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/CachedDoubleDistanceKNNPreprocessor.java +++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/CachedDoubleDistanceKNNPreprocessor.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.index.preprocessed.knn; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -29,20 +29,16 @@ import java.io.RandomAccessFile; import java.nio.MappedByteBuffer; import java.nio.channels.FileChannel; import java.nio.channels.FileChannel.MapMode; -import java.util.ArrayList; import de.lmu.ifi.dbs.elki.application.cache.CacheDoubleDistanceKNNLists; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; -import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDPair; -import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList; -import de.lmu.ifi.dbs.elki.database.ids.generic.DoubleDistanceDBIDPairKNNList; +import de.lmu.ifi.dbs.elki.database.ids.KNNHeap; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; import de.lmu.ifi.dbs.elki.logging.Logging; -import de.lmu.ifi.dbs.elki.persistent.ByteArrayUtil; import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; +import de.lmu.ifi.dbs.elki.utilities.io.ByteArrayUtil; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.FileParameter; @@ -54,7 +50,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.FileParameter; * * @param <O> Object type */ -public class CachedDoubleDistanceKNNPreprocessor<O> extends AbstractMaterializeKNNPreprocessor<O, DoubleDistance, DoubleDistanceKNNList> { +public class CachedDoubleDistanceKNNPreprocessor<O> extends AbstractMaterializeKNNPreprocessor<O> { /** * File to load. */ @@ -68,7 +64,7 @@ public class CachedDoubleDistanceKNNPreprocessor<O> extends AbstractMaterializeK * @param k K * @param file File to load */ - public CachedDoubleDistanceKNNPreprocessor(Relation<O> relation, DistanceFunction<? super O, DoubleDistance> distanceFunction, int k, File file) { + public CachedDoubleDistanceKNNPreprocessor(Relation<O> relation, DistanceFunction<? super O> distanceFunction, int k, File file) { super(relation, distanceFunction, k); this.filename = file; } @@ -86,28 +82,31 @@ public class CachedDoubleDistanceKNNPreprocessor<O> extends AbstractMaterializeK FileChannel channel = file.getChannel()) { // check magic header int header = file.readInt(); - if (header != CacheDoubleDistanceKNNLists.KNN_CACHE_MAGIC) { + if(header != CacheDoubleDistanceKNNLists.KNN_CACHE_MAGIC) { throw new AbortException("Cache magic number does not match."); } MappedByteBuffer buffer = channel.map(MapMode.READ_ONLY, 4, file.length() - 4); - for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) { + for(DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) { int dbid = ByteArrayUtil.readUnsignedVarint(buffer); int nnsize = ByteArrayUtil.readUnsignedVarint(buffer); - if (nnsize < k) { + if(nnsize < k) { throw new AbortException("kNN cache contains fewer than k objects!"); } - ArrayList<DoubleDistanceDBIDPair> col = new ArrayList<>(nnsize); - for (int i = 0; i < nnsize; i++) { + // FIXME: avoid the KNNHeap to KNNList roundtrip. + // FIXME: use a DBIDVar instead of importInteger. + KNNHeap knn = DBIDUtil.newHeap(k); + for(int i = 0; i < nnsize; i++) { int nid = ByteArrayUtil.readUnsignedVarint(buffer); double dist = buffer.getDouble(); - col.add(DBIDUtil.newDistancePair(dist, DBIDUtil.importInteger(nid))); + knn.insert(dist, DBIDUtil.importInteger(nid)); } - storage.put(DBIDUtil.importInteger(dbid), new DoubleDistanceDBIDPairKNNList(col, nnsize)); + storage.put(DBIDUtil.importInteger(dbid), knn.toKNNList()); } - if (buffer.hasRemaining()) { + if(buffer.hasRemaining()) { LOG.warning("kNN cache has " + buffer.remaining() + " bytes remaining!"); } - } catch (IOException e) { + } + catch(IOException e) { throw new AbortException("I/O error in loading kNN cache: " + e.getMessage(), e); } } @@ -143,7 +142,7 @@ public class CachedDoubleDistanceKNNPreprocessor<O> extends AbstractMaterializeK * * @param <O> The object type */ - public static class Factory<O> extends AbstractMaterializeKNNPreprocessor.Factory<O, DoubleDistance, DoubleDistanceKNNList> { + public static class Factory<O> extends AbstractMaterializeKNNPreprocessor.Factory<O> { /** * Filename to load. */ @@ -156,7 +155,7 @@ public class CachedDoubleDistanceKNNPreprocessor<O> extends AbstractMaterializeK * @param distanceFunction distance function * @param filename Cache file */ - public Factory(int k, DistanceFunction<? super O, DoubleDistance> distanceFunction, File filename) { + public Factory(int k, DistanceFunction<? super O> distanceFunction, File filename) { super(k, distanceFunction); this.filename = filename; } @@ -174,7 +173,7 @@ public class CachedDoubleDistanceKNNPreprocessor<O> extends AbstractMaterializeK * * @apiviz.exclude */ - public static class Parameterizer<O> extends AbstractMaterializeKNNPreprocessor.Factory.Parameterizer<O, DoubleDistance> { + public static class Parameterizer<O> extends AbstractMaterializeKNNPreprocessor.Factory.Parameterizer<O> { /** * Option ID for the kNN file. */ @@ -191,7 +190,7 @@ public class CachedDoubleDistanceKNNPreprocessor<O> extends AbstractMaterializeK // Input file parameter final FileParameter cpar = new FileParameter(CACHE_ID, FileParameter.FileType.INPUT_FILE); - if (config.grab(cpar)) { + if(config.grab(cpar)) { filename = cpar.getValue(); } } diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/KNNChangeEvent.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/KNNChangeEvent.java index 275c1c97..888c1c22 100644 --- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/KNNChangeEvent.java +++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/KNNChangeEvent.java @@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.index.preprocessed.knn; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2013 +Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/KNNJoinMaterializeKNNPreprocessor.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/KNNJoinMaterializeKNNPreprocessor.java index 44100604..8f55b519 100644 --- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/KNNJoinMaterializeKNNPreprocessor.java +++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/KNNJoinMaterializeKNNPreprocessor.java @@ -1,20 +1,10 @@ package de.lmu.ifi.dbs.elki.index.preprocessed.knn; -import de.lmu.ifi.dbs.elki.algorithm.KNNJoin; -import de.lmu.ifi.dbs.elki.data.NumberVector; -import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList; -import de.lmu.ifi.dbs.elki.database.relation.Relation; -import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; -import de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialEntry; -import de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.rstar.RStarTreeNode; -import de.lmu.ifi.dbs.elki.logging.Logging; - /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -32,6 +22,13 @@ import de.lmu.ifi.dbs.elki.logging.Logging; You should have received a copy of the GNU Affero General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ +import de.lmu.ifi.dbs.elki.algorithm.KNNJoin; +import de.lmu.ifi.dbs.elki.data.NumberVector; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; +import de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialEntry; +import de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.rstar.RStarTreeNode; +import de.lmu.ifi.dbs.elki.logging.Logging; /** * Class to materialize the kNN using a spatial join on an R-tree. @@ -39,9 +36,8 @@ import de.lmu.ifi.dbs.elki.logging.Logging; * @author Erich Schubert * * @param <V> vector type - * @param <D> distance type */ -public class KNNJoinMaterializeKNNPreprocessor<V extends NumberVector<?>, D extends Distance<D>> extends AbstractMaterializeKNNPreprocessor<V, D, KNNList<D>> { +public class KNNJoinMaterializeKNNPreprocessor<V extends NumberVector> extends AbstractMaterializeKNNPreprocessor<V> { /** * Logging class. */ @@ -54,14 +50,14 @@ public class KNNJoinMaterializeKNNPreprocessor<V extends NumberVector<?>, D exte * @param distanceFunction Distance function * @param k k */ - public KNNJoinMaterializeKNNPreprocessor(Relation<V> relation, DistanceFunction<? super V, D> distanceFunction, int k) { + public KNNJoinMaterializeKNNPreprocessor(Relation<V> relation, DistanceFunction<? super V> distanceFunction, int k) { super(relation, distanceFunction, k); } @Override protected void preprocess() { // Run KNNJoin - KNNJoin<V, D, ?, ?> knnjoin = new KNNJoin<V, D, RStarTreeNode, SpatialEntry>(distanceFunction, k); + KNNJoin<V, ?, ?> knnjoin = new KNNJoin<V, RStarTreeNode, SpatialEntry>(distanceFunction, k); storage = knnjoin.run(relation.getDatabase(), relation); } @@ -95,21 +91,20 @@ public class KNNJoinMaterializeKNNPreprocessor<V extends NumberVector<?>, D exte * @apiviz.uses AbstractMaterializeKNNPreprocessor oneway - - «create» * * @param <O> The object type - * @param <D> The distance type */ - public static class Factory<O extends NumberVector<?>, D extends Distance<D>> extends AbstractMaterializeKNNPreprocessor.Factory<O, D, KNNList<D>> { + public static class Factory<O extends NumberVector> extends AbstractMaterializeKNNPreprocessor.Factory<O> { /** * Constructor. * * @param k K * @param distanceFunction distance function */ - public Factory(int k, DistanceFunction<? super O, D> distanceFunction) { + public Factory(int k, DistanceFunction<? super O> distanceFunction) { super(k, distanceFunction); } @Override - public KNNJoinMaterializeKNNPreprocessor<O, D> instantiate(Relation<O> relation) { + public KNNJoinMaterializeKNNPreprocessor<O> instantiate(Relation<O> relation) { return new KNNJoinMaterializeKNNPreprocessor<>(relation, distanceFunction, k); } @@ -121,11 +116,10 @@ public class KNNJoinMaterializeKNNPreprocessor<V extends NumberVector<?>, D exte * @apiviz.exclude * * @param <O> Object type - * @param <D> Distance type */ - public static class Parameterizer<O extends NumberVector<?>, D extends Distance<D>> extends AbstractMaterializeKNNPreprocessor.Factory.Parameterizer<O, D> { + public static class Parameterizer<O extends NumberVector> extends AbstractMaterializeKNNPreprocessor.Factory.Parameterizer<O> { @Override - protected KNNJoinMaterializeKNNPreprocessor.Factory<O, D> makeInstance() { + protected KNNJoinMaterializeKNNPreprocessor.Factory<O> makeInstance() { return new KNNJoinMaterializeKNNPreprocessor.Factory<>(k, distanceFunction); } } diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/KNNListener.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/KNNListener.java index 8c78cdc0..e26106de 100644 --- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/KNNListener.java +++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/KNNListener.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.index.preprocessed.knn; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MaterializeKNNAndRKNNPreprocessor.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MaterializeKNNAndRKNNPreprocessor.java index fba2b168..49e23dde 100644 --- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MaterializeKNNAndRKNNPreprocessor.java +++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MaterializeKNNAndRKNNPreprocessor.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.index.preprocessed.knn; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -25,7 +25,6 @@ package de.lmu.ifi.dbs.elki.index.preprocessed.knn; import java.util.ArrayList; import java.util.Collection; -import java.util.Comparator; import java.util.Iterator; import java.util.List; import java.util.TreeSet; @@ -40,22 +39,19 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList; +import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter; +import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair; import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs; +import de.lmu.ifi.dbs.elki.database.ids.KNNHeap; +import de.lmu.ifi.dbs.elki.database.ids.KNNList; +import de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList; import de.lmu.ifi.dbs.elki.database.ids.SetDBIDs; -import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter; -import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDPair; -import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDPair; -import de.lmu.ifi.dbs.elki.database.ids.distance.KNNHeap; -import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList; -import de.lmu.ifi.dbs.elki.database.ids.generic.GenericDistanceDBIDList; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.query.rknn.PreprocessorRKNNQuery; import de.lmu.ifi.dbs.elki.database.query.rknn.RKNNQuery; import de.lmu.ifi.dbs.elki.database.relation.Relation; -import de.lmu.ifi.dbs.elki.distance.DistanceUtil; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultUtil; -import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; import de.lmu.ifi.dbs.elki.index.RKNNIndex; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; @@ -70,12 +66,12 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title; * @author Elke Achtert * * @param <O> the type of database objects the preprocessor can be applied to - * @param <D> the type of distance the used distance function will return + * @param the type of distance the used distance function will return */ // TODO: rewrite the double optimization. Maybe use a specialized subclass? @Title("Materialize kNN and RkNN Neighborhood preprocessor") @Description("Materializes the k nearest neighbors and the reverse k nearest neighbors of objects of a database.") -public class MaterializeKNNAndRKNNPreprocessor<O, D extends Distance<D>> extends MaterializeKNNPreprocessor<O, D> implements RKNNIndex<O> { +public class MaterializeKNNAndRKNNPreprocessor<O> extends MaterializeKNNPreprocessor<O> implements RKNNIndex<O> { /** * Logger to use. */ @@ -84,12 +80,7 @@ public class MaterializeKNNAndRKNNPreprocessor<O, D extends Distance<D>> extends /** * Additional data storage for RkNN. */ - private WritableDataStore<TreeSet<DistanceDBIDPair<D>>> materialized_RkNN; - - /** - * Use optimizations for double values - */ - protected boolean doubleOptimize; + private WritableDataStore<TreeSet<DoubleDBIDPair>> materialized_RkNN; /** * Constructor. @@ -98,9 +89,8 @@ public class MaterializeKNNAndRKNNPreprocessor<O, D extends Distance<D>> extends * @param distanceFunction the distance function to use * @param k query k */ - public MaterializeKNNAndRKNNPreprocessor(Relation<O> relation, DistanceFunction<? super O, D> distanceFunction, int k) { + public MaterializeKNNAndRKNNPreprocessor(Relation<O> relation, DistanceFunction<? super O> distanceFunction, int k) { super(relation, distanceFunction, k); - this.doubleOptimize = DistanceUtil.isDoubleDistanceFunction(distanceFunction); } @Override @@ -118,39 +108,30 @@ public class MaterializeKNNAndRKNNPreprocessor<O, D extends Distance<D>> extends */ private void materializeKNNAndRKNNs(ArrayDBIDs ids, FiniteProgress progress) { // add an empty list to each rknn - Comparator<? super DistanceDBIDPair<D>> comp = DistanceDBIDResultUtil.distanceComparator(); for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { if(materialized_RkNN.get(iter) == null) { - materialized_RkNN.put(iter, new TreeSet<>(comp)); + materialized_RkNN.put(iter, new TreeSet<DoubleDBIDPair>()); } } // knn query - List<? extends KNNList<D>> kNNList = knnQuery.getKNNForBulkDBIDs(ids, k); + List<? extends KNNList> kNNList = knnQuery.getKNNForBulkDBIDs(ids, k); int i = 0; for(DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) { - KNNList<D> kNNs = kNNList.get(i); + KNNList kNNs = kNNList.get(i); storage.put(id, kNNs); - for(DistanceDBIDListIter<D> iter = kNNs.iter(); iter.valid(); iter.advance()) { - TreeSet<DistanceDBIDPair<D>> rknns = materialized_RkNN.get(iter); + for(DoubleDBIDListIter iter = kNNs.iter(); iter.valid(); iter.advance()) { + TreeSet<DoubleDBIDPair> rknns = materialized_RkNN.get(iter); rknns.add(makePair(iter, id)); } - if(progress != null) { - progress.incrementProcessed(getLogger()); - } + getLogger().incrementProcessed(progress); } - if(progress != null) { - progress.ensureCompleted(getLogger()); - } + getLogger().ensureCompleted(progress); } - @SuppressWarnings("unchecked") - private DistanceDBIDPair<D> makePair(DistanceDBIDListIter<D> iter, DBIDIter id) { - if(doubleOptimize) { - return (DistanceDBIDPair<D>) DBIDUtil.newDistancePair(((DoubleDistanceDBIDPair) iter.getDistancePair()).doubleDistance(), id); - } - return DBIDUtil.newDistancePair(iter.getDistance(), id); + private DoubleDBIDPair makePair(DoubleDBIDListIter iter, DBIDIter id) { + return DBIDUtil.newPair(iter.getPair().doubleValue(), id); } @Override @@ -159,26 +140,18 @@ public class MaterializeKNNAndRKNNPreprocessor<O, D extends Distance<D>> extends ArrayDBIDs aids = DBIDUtil.ensureArray(ids); // materialize the new kNNs and RkNNs - if(stepprog != null) { - stepprog.beginStep(1, "New insertions ocurred, materialize their new kNNs and RkNNs.", getLogger()); - } + getLogger().beginStep(stepprog, 1, "New insertions ocurred, materialize their new kNNs and RkNNs."); materializeKNNAndRKNNs(aids, null); // update the old kNNs and RkNNs - if(stepprog != null) { - stepprog.beginStep(2, "New insertions ocurred, update the affected kNNs and RkNNs.", getLogger()); - } + getLogger().beginStep(stepprog, 2, "New insertions ocurred, update the affected kNNs and RkNNs."); ArrayDBIDs rkNN_ids = updateKNNsAndRkNNs(ids); // inform listener - if(stepprog != null) { - stepprog.beginStep(3, "New insertions ocurred, inform listeners.", getLogger()); - } + getLogger().beginStep(stepprog, 3, "New insertions ocurred, inform listeners."); fireKNNsInserted(ids, rkNN_ids); - if(stepprog != null) { - stepprog.ensureCompleted(getLogger()); - } + getLogger().ensureCompleted(stepprog); } /** @@ -193,13 +166,13 @@ public class MaterializeKNNAndRKNNPreprocessor<O, D extends Distance<D>> extends ArrayModifiableDBIDs rkNN_ids = DBIDUtil.newArray(); DBIDs oldids = DBIDUtil.difference(relation.getDBIDs(), ids); for(DBIDIter id = oldids.iter(); id.valid(); id.advance()) { - KNNList<D> oldkNNs = storage.get(id); - D knnDist = oldkNNs.getKNNDistance(); + KNNList oldkNNs = storage.get(id); + double knnDist = oldkNNs.getKNNDistance(); // look for new kNNs - KNNHeap<D> heap = null; + KNNHeap heap = null; for(DBIDIter newid = ids.iter(); newid.valid(); newid.advance()) { - D dist = distanceQuery.distance(id, newid); - if(dist.compareTo(knnDist) <= 0) { + double dist = distanceQuery.distance(id, newid); + if(dist <= knnDist) { // New id changes the kNNs of oldid. if(heap == null) { heap = DBIDUtil.newHeap(oldkNNs); @@ -209,18 +182,18 @@ public class MaterializeKNNAndRKNNPreprocessor<O, D extends Distance<D>> extends } // kNNs for oldid have changed: if(heap != null) { - KNNList<D> newkNNs = heap.toKNNList(); + KNNList newkNNs = heap.toKNNList(); storage.put(id, newkNNs); // get the difference int i = 0; int j = 0; - GenericDistanceDBIDList<D> added = new GenericDistanceDBIDList<>(); - GenericDistanceDBIDList<D> removed = new GenericDistanceDBIDList<>(); + ModifiableDoubleDBIDList added = DBIDUtil.newDistanceDBIDList(); + ModifiableDoubleDBIDList removed = DBIDUtil.newDistanceDBIDList(); // TODO: use iterators. while(i < oldkNNs.size() && j < newkNNs.size()) { - DistanceDBIDPair<D> drp1 = oldkNNs.get(i); - DistanceDBIDPair<D> drp2 = newkNNs.get(j); + DoubleDBIDPair drp1 = oldkNNs.get(i); + DoubleDBIDPair drp2 = newkNNs.get(j); // NOTE: we assume that on ties they are ordered the same way! if(!DBIDUtil.equal(drp1, drp2)) { added.add(drp2); @@ -240,13 +213,13 @@ public class MaterializeKNNAndRKNNPreprocessor<O, D extends Distance<D>> extends } } // add new RkNN - for(DistanceDBIDListIter<D> newnn = added.iter(); newnn.valid(); newnn.advance()) { - TreeSet<DistanceDBIDPair<D>> rknns = materialized_RkNN.get(newnn); + for(DoubleDBIDListIter newnn = added.iter(); newnn.valid(); newnn.advance()) { + TreeSet<DoubleDBIDPair> rknns = materialized_RkNN.get(newnn); rknns.add(makePair(newnn, id)); } // remove old RkNN - for(DistanceDBIDListIter<D> oldnn = removed.iter(); oldnn.valid(); oldnn.advance()) { - TreeSet<DistanceDBIDPair<D>> rknns = materialized_RkNN.get(oldnn); + for(DoubleDBIDListIter oldnn = removed.iter(); oldnn.valid(); oldnn.advance()) { + TreeSet<DoubleDBIDPair> rknns = materialized_RkNN.get(oldnn); rknns.remove(makePair(oldnn, id)); } @@ -262,12 +235,10 @@ public class MaterializeKNNAndRKNNPreprocessor<O, D extends Distance<D>> extends ArrayDBIDs aids = DBIDUtil.ensureArray(ids); // delete the materialized (old) kNNs and RkNNs - if(stepprog != null) { - stepprog.beginStep(1, "New deletions ocurred, remove their materialized kNNs and RkNNs.", getLogger()); - } + getLogger().beginStep(stepprog, 1, "New deletions ocurred, remove their materialized kNNs and RkNNs."); // Temporary storage of removed lists - List<KNNList<D>> kNNs = new ArrayList<>(ids.size()); - List<TreeSet<DistanceDBIDPair<D>>> rkNNs = new ArrayList<>(ids.size()); + List<KNNList> kNNs = new ArrayList<>(ids.size()); + List<TreeSet<DoubleDBIDPair>> rkNNs = new ArrayList<>(ids.size()); for(DBIDIter iter = aids.iter(); iter.valid(); iter.advance()) { kNNs.add(storage.get(iter)); storage.delete(iter); @@ -279,16 +250,14 @@ public class MaterializeKNNAndRKNNPreprocessor<O, D extends Distance<D>> extends ArrayDBIDs rkNN_ids = affectedRkNN(rkNNs, aids); // update the affected kNNs and RkNNs - if(stepprog != null) { - stepprog.beginStep(2, "New deletions ocurred, update the affected kNNs and RkNNs.", getLogger()); - } + getLogger().beginStep(stepprog, 2, "New deletions ocurred, update the affected kNNs and RkNNs."); // Recompute the kNN for affected objects (in rkNN lists) { - List<? extends KNNList<D>> kNNList = knnQuery.getKNNForBulkDBIDs(rkNN_ids, k); + List<? extends KNNList> kNNList = knnQuery.getKNNForBulkDBIDs(rkNN_ids, k); int i = 0; for(DBIDIter reknn = rkNN_ids.iter(); reknn.valid(); reknn.advance(), i++) { storage.put(reknn, kNNList.get(i)); - for(DistanceDBIDListIter<D> it = kNNList.get(i).iter(); it.valid(); it.advance()) { + for(DoubleDBIDListIter it = kNNList.get(i).iter(); it.valid(); it.advance()) { materialized_RkNN.get(it).add(makePair(it, reknn)); } } @@ -297,8 +266,8 @@ public class MaterializeKNNAndRKNNPreprocessor<O, D extends Distance<D>> extends { SetDBIDs idsSet = DBIDUtil.ensureSet(ids); for(DBIDIter nn = kNN_ids.iter(); nn.valid(); nn.advance()) { - TreeSet<DistanceDBIDPair<D>> rkNN = materialized_RkNN.get(nn); - for(Iterator<DistanceDBIDPair<D>> it = rkNN.iterator(); it.hasNext();) { + TreeSet<DoubleDBIDPair> rkNN = materialized_RkNN.get(nn); + for(Iterator<DoubleDBIDPair> it = rkNN.iterator(); it.hasNext();) { if(idsSet.contains(it.next())) { it.remove(); } @@ -307,14 +276,10 @@ public class MaterializeKNNAndRKNNPreprocessor<O, D extends Distance<D>> extends } // inform listener - if(stepprog != null) { - stepprog.beginStep(3, "New deletions ocurred, inform listeners.", getLogger()); - } + getLogger().beginStep(stepprog, 3, "New deletions ocurred, inform listeners."); fireKNNsRemoved(ids, rkNN_ids); - if(stepprog != null) { - stepprog.ensureCompleted(getLogger()); - } + getLogger().ensureCompleted(stepprog); } /** @@ -324,9 +289,9 @@ public class MaterializeKNNAndRKNNPreprocessor<O, D extends Distance<D>> extends * @param remove the ids to remove * @return the DBIDs in the given collection */ - protected ArrayDBIDs affectedkNN(List<? extends KNNList<D>> extraxt, DBIDs remove) { + protected ArrayDBIDs affectedkNN(List<? extends KNNList> extraxt, DBIDs remove) { HashSetModifiableDBIDs ids = DBIDUtil.newHashSet(); - for(KNNList<D> drps : extraxt) { + for(KNNList drps : extraxt) { for(DBIDIter iter = drps.iter(); iter.valid(); iter.advance()) { ids.add(iter); } @@ -343,10 +308,10 @@ public class MaterializeKNNAndRKNNPreprocessor<O, D extends Distance<D>> extends * @param remove the ids to remove * @return the DBIDs in the given collection */ - protected ArrayDBIDs affectedRkNN(List<? extends Collection<DistanceDBIDPair<D>>> extraxt, DBIDs remove) { + protected ArrayDBIDs affectedRkNN(List<? extends Collection<DoubleDBIDPair>> extraxt, DBIDs remove) { HashSetModifiableDBIDs ids = DBIDUtil.newHashSet(); - for(Collection<DistanceDBIDPair<D>> drps : extraxt) { - for(DistanceDBIDPair<D> drp : drps) { + for(Collection<DoubleDBIDPair> drps : extraxt) { + for(DoubleDBIDPair drp : drps) { ids.add(drp); } } @@ -361,7 +326,7 @@ public class MaterializeKNNAndRKNNPreprocessor<O, D extends Distance<D>> extends * @param id the query id * @return the kNNs */ - public KNNList<D> getKNN(DBID id) { + public KNNList getKNN(DBID id) { return storage.get(id); } @@ -371,22 +336,21 @@ public class MaterializeKNNAndRKNNPreprocessor<O, D extends Distance<D>> extends * @param id the query id * @return the RkNNs */ - public GenericDistanceDBIDList<D> getRKNN(DBIDRef id) { - TreeSet<DistanceDBIDPair<D>> rKNN = materialized_RkNN.get(id); + public DoubleDBIDList getRKNN(DBIDRef id) { + TreeSet<DoubleDBIDPair> rKNN = materialized_RkNN.get(id); if(rKNN == null) { return null; } - GenericDistanceDBIDList<D> ret = new GenericDistanceDBIDList<>(rKNN.size()); - for(DistanceDBIDPair<D> pair : rKNN) { + ModifiableDoubleDBIDList ret = DBIDUtil.newDistanceDBIDList(rKNN.size()); + for(DoubleDBIDPair pair : rKNN) { ret.add(pair); } ret.sort(); return ret; } - @SuppressWarnings("unchecked") @Override - public <S extends Distance<S>> RKNNQuery<O, S> getRKNNQuery(DistanceQuery<O, S> distanceQuery, Object... hints) { + public RKNNQuery<O> getRKNNQuery(DistanceQuery<O> distanceQuery, Object... hints) { if(!this.distanceFunction.equals(distanceQuery.getDistanceFunction())) { return null; } @@ -399,7 +363,7 @@ public class MaterializeKNNAndRKNNPreprocessor<O, D extends Distance<D>> extends break; } } - return new PreprocessorRKNNQuery<>(relation, (MaterializeKNNAndRKNNPreprocessor<O, S>) this); + return new PreprocessorRKNNQuery<>(relation, this); } @Override @@ -423,22 +387,22 @@ public class MaterializeKNNAndRKNNPreprocessor<O, D extends Distance<D>> extends * @author Elke Achtert * * @param <O> The object type - * @param <D> The distance type + * @param The distance type */ - public static class Factory<O, D extends Distance<D>> extends MaterializeKNNPreprocessor.Factory<O, D> { + public static class Factory<O> extends MaterializeKNNPreprocessor.Factory<O> { /** * Constructor. * * @param k k * @param distanceFunction distance function */ - public Factory(int k, DistanceFunction<? super O, D> distanceFunction) { + public Factory(int k, DistanceFunction<? super O> distanceFunction) { super(k, distanceFunction); } @Override - public MaterializeKNNAndRKNNPreprocessor<O, D> instantiate(Relation<O> relation) { - MaterializeKNNAndRKNNPreprocessor<O, D> instance = new MaterializeKNNAndRKNNPreprocessor<>(relation, distanceFunction, k); + public MaterializeKNNAndRKNNPreprocessor<O> instantiate(Relation<O> relation) { + MaterializeKNNAndRKNNPreprocessor<O> instance = new MaterializeKNNAndRKNNPreprocessor<>(relation, distanceFunction, k); return instance; } @@ -449,9 +413,9 @@ public class MaterializeKNNAndRKNNPreprocessor<O, D extends Distance<D>> extends * * @apiviz.exclude */ - public static class Parameterizer<O, D extends Distance<D>> extends MaterializeKNNPreprocessor.Factory.Parameterizer<O, D> { + public static class Parameterizer<O> extends MaterializeKNNPreprocessor.Factory.Parameterizer<O> { @Override - protected Factory<O, D> makeInstance() { + protected Factory<O> makeInstance() { return new Factory<>(k, distanceFunction); } } diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MaterializeKNNPreprocessor.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MaterializeKNNPreprocessor.java index 50ef21ed..ef48e4cb 100644 --- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MaterializeKNNPreprocessor.java +++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MaterializeKNNPreprocessor.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.index.preprocessed.knn; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -33,14 +33,13 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.ids.KNNHeap; +import de.lmu.ifi.dbs.elki.database.ids.KNNList; import de.lmu.ifi.dbs.elki.database.ids.SetDBIDs; -import de.lmu.ifi.dbs.elki.database.ids.distance.KNNHeap; -import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList; import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery; import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; import de.lmu.ifi.dbs.elki.index.DynamicIndex; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; @@ -63,11 +62,10 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title; * @apiviz.has KNNListener * * @param <O> the type of database objects the preprocessor can be applied to - * @param <D> the type of distance the used distance function will return */ @Title("Materialize kNN Neighborhood preprocessor") @Description("Materializes the k nearest neighbors of objects of a database.") -public class MaterializeKNNPreprocessor<O, D extends Distance<D>> extends AbstractMaterializeKNNPreprocessor<O, D, KNNList<D>> implements DynamicIndex { +public class MaterializeKNNPreprocessor<O> extends AbstractMaterializeKNNPreprocessor<O> implements DynamicIndex { /** * Logger to use. */ @@ -83,7 +81,7 @@ public class MaterializeKNNPreprocessor<O, D extends Distance<D>> extends Abstra /** * KNNQuery instance to use. */ - protected final KNNQuery<O, D> knnQuery; + protected final KNNQuery<O> knnQuery; /** * Holds the listener. @@ -97,7 +95,7 @@ public class MaterializeKNNPreprocessor<O, D extends Distance<D>> extends Abstra * @param distanceFunction the distance function to use * @param k query k */ - public MaterializeKNNPreprocessor(Relation<O> relation, DistanceFunction<? super O, D> distanceFunction, int k) { + public MaterializeKNNPreprocessor(Relation<O> relation, DistanceFunction<? super O> distanceFunction, int k) { super(relation, distanceFunction, k); this.knnQuery = relation.getDatabase().getKNNQuery(distanceQuery, k, DatabaseQuery.HINT_BULK, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_NO_CACHE); } @@ -111,42 +109,33 @@ public class MaterializeKNNPreprocessor<O, D extends Distance<D>> extends Abstra ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs()); - if (LOG.isStatistics()) { + if(LOG.isStatistics()) { LOG.statistics(new LongStatistic(this.getClass().getName() + ".k", k)); } - Duration duration = LOG.isStatistics() ? LOG.newDuration(this.getClass().getName() + ".precomputation-time") : null; - if (duration != null) { - duration.begin(); - } + Duration duration = LOG.isStatistics() ? LOG.newDuration(this.getClass().getName() + ".precomputation-time").begin() : null; FiniteProgress progress = getLogger().isVerbose() ? new FiniteProgress("Materializing k nearest neighbors (k=" + k + ")", ids.size(), getLogger()) : null; // Try bulk - List<? extends KNNList<D>> kNNList = null; - if (usebulk) { + List<? extends KNNList> kNNList = null; + if(usebulk) { kNNList = knnQuery.getKNNForBulkDBIDs(ids, k); - if (kNNList != null) { + if(kNNList != null) { int i = 0; - for (DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) { + for(DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) { storage.put(id, kNNList.get(i)); - if (progress != null) { - progress.incrementProcessed(getLogger()); - } + getLogger().incrementProcessed(progress); } } - } else { - for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { - KNNList<D> knn = knnQuery.getKNNForDBID(iter, k); + } + else { + for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { + KNNList knn = knnQuery.getKNNForDBID(iter, k); storage.put(iter, knn); - if (progress != null) { - progress.incrementProcessed(getLogger()); - } + getLogger().incrementProcessed(progress); } } - if (progress != null) { - progress.ensureCompleted(getLogger()); - } - if (duration != null) { - duration.end(); - LOG.statistics(duration); + getLogger().ensureCompleted(progress); + if(duration != null) { + LOG.statistics(duration.end()); } } @@ -157,9 +146,10 @@ public class MaterializeKNNPreprocessor<O, D extends Distance<D>> extends Abstra @Override public void insertAll(DBIDs ids) { - if (storage == null && ids.size() > 0) { + if(storage == null && ids.size() > 0) { preprocess(); - } else { + } + else { objectsInserted(ids); } } @@ -186,32 +176,24 @@ public class MaterializeKNNPreprocessor<O, D extends Distance<D>> extends Abstra ArrayDBIDs aids = DBIDUtil.ensureArray(ids); // materialize the new kNNs - if (stepprog != null) { - stepprog.beginStep(1, "New insertions ocurred, materialize their new kNNs.", getLogger()); - } + getLogger().beginStep(stepprog, 1, "New insertions ocurred, materialize their new kNNs."); // Bulk-query kNNs - List<? extends KNNList<D>> kNNList = knnQuery.getKNNForBulkDBIDs(aids, k); + List<? extends KNNList> kNNList = knnQuery.getKNNForBulkDBIDs(aids, k); // Store in storage DBIDIter iter = aids.iter(); - for (int i = 0; i < aids.size(); i++, iter.advance()) { + for(int i = 0; i < aids.size(); i++, iter.advance()) { storage.put(iter, kNNList.get(i)); } // update the affected kNNs - if (stepprog != null) { - stepprog.beginStep(2, "New insertions ocurred, update the affected kNNs.", getLogger()); - } + getLogger().beginStep(stepprog, 2, "New insertions ocurred, update the affected kNNs."); ArrayDBIDs rkNN_ids = updateKNNsAfterInsertion(ids); // inform listener - if (stepprog != null) { - stepprog.beginStep(3, "New insertions ocurred, inform listeners.", getLogger()); - } + getLogger().beginStep(stepprog, 3, "New insertions ocurred, inform listeners."); fireKNNsInserted(ids, rkNN_ids); - if (stepprog != null) { - stepprog.setCompleted(getLogger()); - } + getLogger().setCompleted(stepprog); } /** @@ -225,21 +207,21 @@ public class MaterializeKNNPreprocessor<O, D extends Distance<D>> extends Abstra private ArrayDBIDs updateKNNsAfterInsertion(DBIDs ids) { ArrayModifiableDBIDs rkNN_ids = DBIDUtil.newArray(); DBIDs oldids = DBIDUtil.difference(relation.getDBIDs(), ids); - for (DBIDIter iter = oldids.iter(); iter.valid(); iter.advance()) { - KNNList<D> kNNs = storage.get(iter); - D knnDist = kNNs.get(kNNs.size() - 1).getDistance(); + for(DBIDIter iter = oldids.iter(); iter.valid(); iter.advance()) { + KNNList kNNs = storage.get(iter); + double knnDist = kNNs.getKNNDistance(); // look for new kNNs - KNNHeap<D> heap = null; - for (DBIDIter iter2 = ids.iter(); iter2.valid(); iter2.advance()) { - D dist = distanceQuery.distance(iter, iter2); - if (dist.compareTo(knnDist) <= 0) { - if (heap == null) { + KNNHeap heap = null; + for(DBIDIter iter2 = ids.iter(); iter2.valid(); iter2.advance()) { + double dist = distanceQuery.distance(iter, iter2); + if(dist <= knnDist) { + if(heap == null) { heap = DBIDUtil.newHeap(kNNs); } heap.insert(dist, iter2); } } - if (heap != null) { + if(heap != null) { kNNs = heap.toKNNList(); storage.put(iter, kNNs); rkNN_ids.add(iter); @@ -258,10 +240,10 @@ public class MaterializeKNNPreprocessor<O, D extends Distance<D>> extends Abstra private ArrayDBIDs updateKNNsAfterDeletion(DBIDs ids) { SetDBIDs idsSet = DBIDUtil.ensureSet(ids); ArrayModifiableDBIDs rkNN_ids = DBIDUtil.newArray(); - for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { - KNNList<D> kNNs = storage.get(iditer); - for (DBIDIter it = kNNs.iter(); it.valid(); it.advance()) { - if (idsSet.contains(it)) { + for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { + KNNList kNNs = storage.get(iditer); + for(DBIDIter it = kNNs.iter(); it.valid(); it.advance()) { + if(idsSet.contains(it)) { rkNN_ids.add(iditer); break; } @@ -269,9 +251,9 @@ public class MaterializeKNNPreprocessor<O, D extends Distance<D>> extends Abstra } // update the kNNs of the RkNNs - List<? extends KNNList<D>> kNNList = knnQuery.getKNNForBulkDBIDs(rkNN_ids, k); + List<? extends KNNList> kNNList = knnQuery.getKNNForBulkDBIDs(rkNN_ids, k); DBIDIter iter = rkNN_ids.iter(); - for (int i = 0; i < rkNN_ids.size(); i++, iter.advance()) { + for(int i = 0; i < rkNN_ids.size(); i++, iter.advance()) { storage.put(iter, kNNList.get(i)); } @@ -288,28 +270,20 @@ public class MaterializeKNNPreprocessor<O, D extends Distance<D>> extends Abstra StepProgress stepprog = getLogger().isVerbose() ? new StepProgress(3) : null; // delete the materialized (old) kNNs - if (stepprog != null) { - stepprog.beginStep(1, "New deletions ocurred, remove their materialized kNNs.", getLogger()); - } - for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { + getLogger().beginStep(stepprog, 1, "New deletions ocurred, remove their materialized kNNs."); + for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { storage.delete(iter); } // update the affected kNNs - if (stepprog != null) { - stepprog.beginStep(2, "New deletions ocurred, update the affected kNNs.", getLogger()); - } + getLogger().beginStep(stepprog, 2, "New deletions ocurred, update the affected kNNs."); ArrayDBIDs rkNN_ids = updateKNNsAfterDeletion(ids); // inform listener - if (stepprog != null) { - stepprog.beginStep(3, "New deletions ocurred, inform listeners.", getLogger()); - } + getLogger().beginStep(stepprog, 3, "New deletions ocurred, inform listeners."); fireKNNsRemoved(ids, rkNN_ids); - if (stepprog != null) { - stepprog.ensureCompleted(getLogger()); - } + getLogger().ensureCompleted(stepprog); } /** @@ -324,8 +298,8 @@ public class MaterializeKNNPreprocessor<O, D extends Distance<D>> extends Abstra protected void fireKNNsInserted(DBIDs insertions, DBIDs updates) { KNNChangeEvent e = new KNNChangeEvent(this, KNNChangeEvent.Type.INSERT, insertions, updates); Object[] listeners = listenerList.getListenerList(); - for (int i = listeners.length - 2; i >= 0; i -= 2) { - if (listeners[i] == KNNListener.class) { + for(int i = listeners.length - 2; i >= 0; i -= 2) { + if(listeners[i] == KNNListener.class) { ((KNNListener) listeners[i + 1]).kNNsChanged(e); } } @@ -342,8 +316,8 @@ public class MaterializeKNNPreprocessor<O, D extends Distance<D>> extends Abstra protected void fireKNNsRemoved(DBIDs removals, DBIDs updates) { KNNChangeEvent e = new KNNChangeEvent(this, KNNChangeEvent.Type.DELETE, removals, updates); Object[] listeners = listenerList.getListenerList(); - for (int i = listeners.length - 2; i >= 0; i -= 2) { - if (listeners[i] == KNNListener.class) { + for(int i = listeners.length - 2; i >= 0; i -= 2) { + if(listeners[i] == KNNListener.class) { ((KNNListener) listeners[i + 1]).kNNsChanged(e); } } @@ -403,22 +377,21 @@ public class MaterializeKNNPreprocessor<O, D extends Distance<D>> extends Abstra * @apiviz.uses MaterializeKNNPreprocessor oneway - - «create» * * @param <O> The object type - * @param <D> The distance type */ - public static class Factory<O, D extends Distance<D>> extends AbstractMaterializeKNNPreprocessor.Factory<O, D, KNNList<D>> { + public static class Factory<O> extends AbstractMaterializeKNNPreprocessor.Factory<O> { /** * Index factory. * * @param k k parameter * @param distanceFunction distance function */ - public Factory(int k, DistanceFunction<? super O, D> distanceFunction) { + public Factory(int k, DistanceFunction<? super O> distanceFunction) { super(k, distanceFunction); } @Override - public MaterializeKNNPreprocessor<O, D> instantiate(Relation<O> relation) { - MaterializeKNNPreprocessor<O, D> instance = new MaterializeKNNPreprocessor<>(relation, distanceFunction, k); + public MaterializeKNNPreprocessor<O> instantiate(Relation<O> relation) { + MaterializeKNNPreprocessor<O> instance = new MaterializeKNNPreprocessor<>(relation, distanceFunction, k); return instance; } @@ -429,9 +402,9 @@ public class MaterializeKNNPreprocessor<O, D extends Distance<D>> extends Abstra * * @apiviz.exclude */ - public static class Parameterizer<O, D extends Distance<D>> extends AbstractMaterializeKNNPreprocessor.Factory.Parameterizer<O, D> { + public static class Parameterizer<O> extends AbstractMaterializeKNNPreprocessor.Factory.Parameterizer<O> { @Override - protected Factory<O, D> makeInstance() { + protected Factory<O> makeInstance() { return new Factory<>(k, distanceFunction); } } diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MetricalIndexApproximationMaterializeKNNPreprocessor.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MetricalIndexApproximationMaterializeKNNPreprocessor.java index 43798fbc..b3e73abb 100644 --- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MetricalIndexApproximationMaterializeKNNPreprocessor.java +++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MetricalIndexApproximationMaterializeKNNPreprocessor.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.index.preprocessed.knn; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,8 +23,10 @@ package de.lmu.ifi.dbs.elki.index.preprocessed.knn; along with this program. If not, see <http://www.gnu.org/licenses/>. */ +import gnu.trove.impl.Constants; +import gnu.trove.map.hash.TObjectDoubleHashMap; + import java.util.ArrayList; -import java.util.HashMap; import java.util.List; import de.lmu.ifi.dbs.elki.data.NumberVector; @@ -32,12 +34,10 @@ import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDPair; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; -import de.lmu.ifi.dbs.elki.database.ids.distance.KNNHeap; -import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList; +import de.lmu.ifi.dbs.elki.database.ids.KNNHeap; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; import de.lmu.ifi.dbs.elki.index.tree.LeafEntry; import de.lmu.ifi.dbs.elki.index.tree.Node; import de.lmu.ifi.dbs.elki.index.tree.metrical.MetricalIndexTree; @@ -63,13 +63,12 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title; * @apiviz.uses MetricalIndexTree * * @param <O> the type of database objects the preprocessor can be applied to - * @param <D> the type of distance the used distance function will return * @param <N> the type of spatial nodes in the spatial index * @param <E> the type of spatial entries in the spatial index */ @Title("Spatial Approximation Materialize kNN Preprocessor") @Description("Caterializes the (approximate) k nearest neighbors of objects of a database using a spatial approximation.") -public class MetricalIndexApproximationMaterializeKNNPreprocessor<O extends NumberVector<?>, D extends Distance<D>, N extends Node<E>, E extends MTreeEntry> extends AbstractMaterializeKNNPreprocessor<O, D, KNNList<D>> { +public class MetricalIndexApproximationMaterializeKNNPreprocessor<O extends NumberVector, N extends Node<E>, E extends MTreeEntry> extends AbstractMaterializeKNNPreprocessor<O> { /** * Logger to use */ @@ -82,15 +81,15 @@ public class MetricalIndexApproximationMaterializeKNNPreprocessor<O extends Numb * @param distanceFunction the distance function to use * @param k query k */ - public MetricalIndexApproximationMaterializeKNNPreprocessor(Relation<O> relation, DistanceFunction<? super O, D> distanceFunction, int k) { + public MetricalIndexApproximationMaterializeKNNPreprocessor(Relation<O> relation, DistanceFunction<? super O> distanceFunction, int k) { super(relation, distanceFunction, k); } @Override protected void preprocess() { - DistanceQuery<O, D> distanceQuery = relation.getDatabase().getDistanceQuery(relation, distanceFunction); + DistanceQuery<O> distanceQuery = relation.getDatabase().getDistanceQuery(relation, distanceFunction); - MetricalIndexTree<O, D, N, E> index = getMetricalIndex(relation); + MetricalIndexTree<O, N, E> index = getMetricalIndex(relation); createStorage(); MeanVariance pagesize = new MeanVariance(); @@ -113,13 +112,13 @@ public class MetricalIndexApproximationMaterializeKNNPreprocessor<O extends Numb for(int i = 0; i < size; i++) { ids.add(((LeafEntry) node.getEntry(i)).getDBID()); } - HashMap<DBIDPair, D> cache = new HashMap<>((size * size * 3) >> 2); + TObjectDoubleHashMap<DBIDPair> cache = new TObjectDoubleHashMap<>((size * size * 3) >> 2, Constants.DEFAULT_LOAD_FACTOR, Double.NaN); for(DBIDIter id = ids.iter(); id.valid(); id.advance()) { - KNNHeap<D> kNN = DBIDUtil.newHeap(distanceFunction.getDistanceFactory(), k); + KNNHeap kNN = DBIDUtil.newHeap(k); for(DBIDIter id2 = ids.iter(); id2.valid(); id2.advance()) { DBIDPair key = DBIDUtil.newPair(id, id2); - D d = cache.remove(key); - if(d != null) { + double d = cache.remove(key); + if(d == d) { // Not NaN // consume the previous result. kNN.insert(d, id2); } @@ -140,13 +139,9 @@ public class MetricalIndexApproximationMaterializeKNNPreprocessor<O extends Numb getLogger().warning("Cache should be empty after each run, but still has " + cache.size() + " elements."); } } - if(progress != null) { - progress.incrementProcessed(getLogger()); - } - } - if(progress != null) { - progress.ensureCompleted(getLogger()); + getLogger().incrementProcessed(progress); } + getLogger().ensureCompleted(progress); if(getLogger().isVerbose()) { getLogger().verbose("Average page size = " + pagesize.getMean() + " +- " + pagesize.getSampleStddev()); getLogger().verbose("On average, " + ksize.getMean() + " +- " + ksize.getSampleStddev() + " neighbors returned."); @@ -161,9 +156,9 @@ public class MetricalIndexApproximationMaterializeKNNPreprocessor<O extends Numb * @return Metrical index * @throws IllegalStateException when the cast fails. */ - private MetricalIndexTree<O, D, N, E> getMetricalIndex(Relation<O> relation) throws IllegalStateException { - Class<MetricalIndexTree<O, D, N, E>> mcls = ClassGenericsUtil.uglyCastIntoSubclass(MetricalIndexTree.class); - ArrayList<MetricalIndexTree<O, D, N, E>> indexes = ResultUtil.filterResults(relation.getDatabase(), mcls); + private MetricalIndexTree<O, N, E> getMetricalIndex(Relation<O> relation) throws IllegalStateException { + Class<MetricalIndexTree<O, N, E>> mcls = ClassGenericsUtil.uglyCastIntoSubclass(MetricalIndexTree.class); + ArrayList<MetricalIndexTree<O, N, E>> indexes = ResultUtil.filterResults(relation.getDatabase(), mcls); // FIXME: check we got the right the representation if(indexes.size() == 1) { return indexes.get(0); @@ -204,24 +199,23 @@ public class MetricalIndexApproximationMaterializeKNNPreprocessor<O extends Numb * - «create» * * @param <O> the type of database objects the preprocessor can be applied to - * @param <D> the type of distance the used distance function will return * @param <N> the type of spatial nodes in the spatial index * @param <E> the type of spatial entries in the spatial index */ - public static class Factory<O extends NumberVector<?>, D extends Distance<D>, N extends Node<E>, E extends MTreeEntry> extends AbstractMaterializeKNNPreprocessor.Factory<O, D, KNNList<D>> { + public static class Factory<O extends NumberVector, N extends Node<E>, E extends MTreeEntry> extends AbstractMaterializeKNNPreprocessor.Factory<O> { /** * Constructor. * * @param k k * @param distanceFunction distance function */ - public Factory(int k, DistanceFunction<? super O, D> distanceFunction) { + public Factory(int k, DistanceFunction<? super O> distanceFunction) { super(k, distanceFunction); } @Override - public MetricalIndexApproximationMaterializeKNNPreprocessor<O, D, N, E> instantiate(Relation<O> relation) { - MetricalIndexApproximationMaterializeKNNPreprocessor<O, D, N, E> instance = new MetricalIndexApproximationMaterializeKNNPreprocessor<>(relation, distanceFunction, k); + public MetricalIndexApproximationMaterializeKNNPreprocessor<O, N, E> instantiate(Relation<O> relation) { + MetricalIndexApproximationMaterializeKNNPreprocessor<O, N, E> instance = new MetricalIndexApproximationMaterializeKNNPreprocessor<>(relation, distanceFunction, k); return instance; } @@ -232,9 +226,9 @@ public class MetricalIndexApproximationMaterializeKNNPreprocessor<O extends Numb * * @apiviz.exclude */ - public static class Parameterizer<O extends NumberVector<?>, D extends Distance<D>, N extends Node<E>, E extends MTreeEntry> extends AbstractMaterializeKNNPreprocessor.Factory.Parameterizer<O, D> { + public static class Parameterizer<O extends NumberVector, N extends Node<E>, E extends MTreeEntry> extends AbstractMaterializeKNNPreprocessor.Factory.Parameterizer<O> { @Override - protected Factory<O, D, N, E> makeInstance() { + protected Factory<O, N, E> makeInstance() { return new Factory<>(k, distanceFunction); } } diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/PartitionApproximationMaterializeKNNPreprocessor.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/PartitionApproximationMaterializeKNNPreprocessor.java index b6cc5103..ef276e78 100644 --- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/PartitionApproximationMaterializeKNNPreprocessor.java +++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/PartitionApproximationMaterializeKNNPreprocessor.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.index.preprocessed.knn; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,24 +23,23 @@ package de.lmu.ifi.dbs.elki.index.preprocessed.knn; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import java.util.HashMap; - +import gnu.trove.impl.Constants; +import gnu.trove.map.hash.TObjectDoubleHashMap; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDPair; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; -import de.lmu.ifi.dbs.elki.database.ids.distance.KNNHeap; -import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList; +import de.lmu.ifi.dbs.elki.database.ids.KNNHeap; +import de.lmu.ifi.dbs.elki.database.ids.KNNList; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; import de.lmu.ifi.dbs.elki.math.MeanVariance; -import de.lmu.ifi.dbs.elki.utilities.RandomFactory; +import de.lmu.ifi.dbs.elki.math.random.RandomFactory; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; @@ -58,11 +57,10 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter; * @author Erich Schubert * * @param <O> the type of database objects the preprocessor can be applied to - * @param <D> the type of distance the used distance function will return */ @Title("Partitioning Approximate kNN Preprocessor") @Description("Caterializes the (approximate) k nearest neighbors of objects of a database by partitioning and only computing kNN within each partition.") -public class PartitionApproximationMaterializeKNNPreprocessor<O, D extends Distance<D>> extends AbstractMaterializeKNNPreprocessor<O, D, KNNList<D>> { +public class PartitionApproximationMaterializeKNNPreprocessor<O> extends AbstractMaterializeKNNPreprocessor<O> { /** * Logger to use */ @@ -87,7 +85,7 @@ public class PartitionApproximationMaterializeKNNPreprocessor<O, D extends Dista * @param partitions Number of partitions * @param rnd Random number generator */ - public PartitionApproximationMaterializeKNNPreprocessor(Relation<O> relation, DistanceFunction<? super O, D> distanceFunction, int k, int partitions, RandomFactory rnd) { + public PartitionApproximationMaterializeKNNPreprocessor(Relation<O> relation, DistanceFunction<? super O> distanceFunction, int k, int partitions, RandomFactory rnd) { super(relation, distanceFunction, k); this.partitions = partitions; this.rnd = rnd; @@ -95,7 +93,7 @@ public class PartitionApproximationMaterializeKNNPreprocessor<O, D extends Dista @Override protected void preprocess() { - DistanceQuery<O, D> distanceQuery = relation.getDatabase().getDistanceQuery(relation, distanceFunction); + DistanceQuery<O> distanceQuery = relation.getDatabase().getDistanceQuery(relation, distanceFunction); storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, KNNList.class); MeanVariance ksize = new MeanVariance(); if(LOG.isVerbose()) { @@ -109,13 +107,13 @@ public class PartitionApproximationMaterializeKNNPreprocessor<O, D extends Dista for(int part = 0; part < partitions; part++) { final ArrayDBIDs ids = parts[part]; final int size = ids.size(); - HashMap<DBIDPair, D> cache = new HashMap<>((size * size * 3) >> 3); + TObjectDoubleHashMap<DBIDPair> cache = new TObjectDoubleHashMap<>((size * size * 3) >> 3, Constants.DEFAULT_LOAD_FACTOR, Double.NaN); for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { - KNNHeap<D> kNN = DBIDUtil.newHeap(distanceFunction.getDistanceFactory(), k); + KNNHeap kNN = DBIDUtil.newHeap(k); for(DBIDIter iter2 = ids.iter(); iter2.valid(); iter2.advance()) { DBIDPair key = DBIDUtil.newPair(iter, iter2); - D d = cache.remove(key); - if(d != null) { + double d = cache.remove(key); + if(d == d) { // Not NaN // consume the previous result. kNN.insert(d, iter2); } @@ -136,13 +134,9 @@ public class PartitionApproximationMaterializeKNNPreprocessor<O, D extends Dista LOG.warning("Cache should be empty after each run, but still has " + cache.size() + " elements."); } } - if(progress != null) { - progress.incrementProcessed(LOG); - } - } - if(progress != null) { - progress.ensureCompleted(LOG); + LOG.incrementProcessed(progress); } + LOG.ensureCompleted(progress); if(LOG.isVerbose()) { LOG.verbose("On average, " + ksize.getMean() + " +- " + ksize.getSampleStddev() + " neighbors returned."); } @@ -178,9 +172,8 @@ public class PartitionApproximationMaterializeKNNPreprocessor<O, D extends Dista * «create» * * @param <O> The object type - * @param <D> The distance type */ - public static class Factory<O, D extends Distance<D>> extends AbstractMaterializeKNNPreprocessor.Factory<O, D, KNNList<D>> { + public static class Factory<O> extends AbstractMaterializeKNNPreprocessor.Factory<O> { /** * The number of partitions to use */ @@ -199,15 +192,15 @@ public class PartitionApproximationMaterializeKNNPreprocessor<O, D extends Dista * @param partitions number of partitions * @param rnd */ - public Factory(int k, DistanceFunction<? super O, D> distanceFunction, int partitions, RandomFactory rnd) { + public Factory(int k, DistanceFunction<? super O> distanceFunction, int partitions, RandomFactory rnd) { super(k, distanceFunction); this.partitions = partitions; this.rnd = rnd; } @Override - public PartitionApproximationMaterializeKNNPreprocessor<O, D> instantiate(Relation<O> relation) { - PartitionApproximationMaterializeKNNPreprocessor<O, D> instance = new PartitionApproximationMaterializeKNNPreprocessor<>(relation, distanceFunction, k, partitions, rnd); + public PartitionApproximationMaterializeKNNPreprocessor<O> instantiate(Relation<O> relation) { + PartitionApproximationMaterializeKNNPreprocessor<O> instance = new PartitionApproximationMaterializeKNNPreprocessor<>(relation, distanceFunction, k, partitions, rnd); return instance; } @@ -218,7 +211,7 @@ public class PartitionApproximationMaterializeKNNPreprocessor<O, D extends Dista * * @apiviz.exclude */ - public static class Parameterizer<O, D extends Distance<D>> extends AbstractMaterializeKNNPreprocessor.Factory.Parameterizer<O, D> { + public static class Parameterizer<O> extends AbstractMaterializeKNNPreprocessor.Factory.Parameterizer<O> { /** * Parameter to specify the number of partitions to use for materializing * the kNN. Must be an integer greater than 1. @@ -261,7 +254,7 @@ public class PartitionApproximationMaterializeKNNPreprocessor<O, D extends Dista } @Override - protected Factory<O, D> makeInstance() { + protected Factory<O> makeInstance() { return new Factory<>(k, distanceFunction, partitions, rnd); } } diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/RandomSampleKNNPreprocessor.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/RandomSampleKNNPreprocessor.java index 87b5a3c0..0f07713f 100644 --- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/RandomSampleKNNPreprocessor.java +++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/RandomSampleKNNPreprocessor.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.index.preprocessed.knn; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -29,15 +29,14 @@ import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; -import de.lmu.ifi.dbs.elki.database.ids.distance.KNNHeap; -import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList; +import de.lmu.ifi.dbs.elki.database.ids.KNNHeap; +import de.lmu.ifi.dbs.elki.database.ids.KNNList; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; -import de.lmu.ifi.dbs.elki.utilities.RandomFactory; +import de.lmu.ifi.dbs.elki.math.random.RandomFactory; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints; @@ -60,10 +59,9 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter; * @author Erich Schubert * * @param <O> Object type - * @param <D> Distance type */ @Reference(title = "Subsampling for Efficient and Effective Unsupervised Outlier Detection Ensembles", authors = "A. Zimek and M. Gaudet and R. J. G. B. Campello and J. Sander", booktitle = "Proc. 19th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, KDD '13") -public class RandomSampleKNNPreprocessor<O, D extends Distance<D>> extends AbstractMaterializeKNNPreprocessor<O, D, KNNList<D>> { +public class RandomSampleKNNPreprocessor<O> extends AbstractMaterializeKNNPreprocessor<O> { /** * Logger */ @@ -88,7 +86,7 @@ public class RandomSampleKNNPreprocessor<O, D extends Distance<D>> extends Abstr * @param share Relative share * @param rnd Random generator */ - public RandomSampleKNNPreprocessor(Relation<O> relation, DistanceFunction<? super O, D> distanceFunction, int k, double share, RandomFactory rnd) { + public RandomSampleKNNPreprocessor(Relation<O> relation, DistanceFunction<? super O> distanceFunction, int k, double share, RandomFactory rnd) { super(relation, distanceFunction, k); this.share = share; this.rnd = rnd; @@ -96,7 +94,7 @@ public class RandomSampleKNNPreprocessor<O, D extends Distance<D>> extends Abstr @Override protected void preprocess() { - DistanceQuery<O, D> distanceQuery = relation.getDatabase().getDistanceQuery(relation, distanceFunction); + DistanceQuery<O> distanceQuery = relation.getDatabase().getDistanceQuery(relation, distanceFunction); storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, KNNList.class); FiniteProgress progress = getLogger().isVerbose() ? new FiniteProgress("Materializing random-sample k nearest neighbors (k=" + k + ")", relation.size(), getLogger()) : null; @@ -104,23 +102,19 @@ public class RandomSampleKNNPreprocessor<O, D extends Distance<D>> extends Abstr final int samplesize = (int) (ids.size() * share); for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { - KNNHeap<D> kNN = DBIDUtil.newHeap(distanceFunction.getDistanceFactory(), k); + KNNHeap kNN = DBIDUtil.newHeap(k); DBIDs rsamp = DBIDUtil.randomSample(ids, samplesize, rnd); for(DBIDIter iter2 = rsamp.iter(); iter2.valid(); iter2.advance()) { - D dist = distanceQuery.distance(iter, iter2); + double dist = distanceQuery.distance(iter, iter2); kNN.insert(dist, iter2); } storage.put(iter, kNN.toKNNList()); - if(progress != null) { - progress.incrementProcessed(getLogger()); - } + getLogger().incrementProcessed(progress); } - if(progress != null) { - progress.ensureCompleted(getLogger()); - } + getLogger().ensureCompleted(progress); } @Override @@ -153,9 +147,8 @@ public class RandomSampleKNNPreprocessor<O, D extends Distance<D>> extends Abstr * @apiviz.uses AbstractMaterializeKNNPreprocessor oneway - - «create» * * @param <O> The object type - * @param <D> The distance type */ - public static class Factory<O, D extends Distance<D>> extends AbstractMaterializeKNNPreprocessor.Factory<O, D, KNNList<D>> { + public static class Factory<O> extends AbstractMaterializeKNNPreprocessor.Factory<O> { /** * Relative share of objects to get */ @@ -174,14 +167,14 @@ public class RandomSampleKNNPreprocessor<O, D extends Distance<D>> extends Abstr * @param share Sample size (relative) * @param rnd Random generator */ - public Factory(int k, DistanceFunction<? super O, D> distanceFunction, double share, RandomFactory rnd) { + public Factory(int k, DistanceFunction<? super O> distanceFunction, double share, RandomFactory rnd) { super(k, distanceFunction); this.share = share; this.rnd = rnd; } @Override - public RandomSampleKNNPreprocessor<O, D> instantiate(Relation<O> relation) { + public RandomSampleKNNPreprocessor<O> instantiate(Relation<O> relation) { return new RandomSampleKNNPreprocessor<>(relation, distanceFunction, k, share, rnd); } @@ -193,9 +186,8 @@ public class RandomSampleKNNPreprocessor<O, D extends Distance<D>> extends Abstr * @apiviz.exclude * * @param <O> Object type - * @param <D> Distance type */ - public static class Parameterizer<O, D extends Distance<D>> extends AbstractMaterializeKNNPreprocessor.Factory.Parameterizer<O, D> { + public static class Parameterizer<O> extends AbstractMaterializeKNNPreprocessor.Factory.Parameterizer<O> { /** * Parameter to specify how many objects to consider for computing the * kNN. @@ -241,7 +233,7 @@ public class RandomSampleKNNPreprocessor<O, D extends Distance<D>> extends Abstr } @Override - protected RandomSampleKNNPreprocessor.Factory<O, D> makeInstance() { + protected RandomSampleKNNPreprocessor.Factory<O> makeInstance() { return new RandomSampleKNNPreprocessor.Factory<>(k, distanceFunction, share, rnd); } } diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/SpatialApproximationMaterializeKNNPreprocessor.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/SpatialApproximationMaterializeKNNPreprocessor.java index 83f8f6d8..cd363091 100644 --- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/SpatialApproximationMaterializeKNNPreprocessor.java +++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/SpatialApproximationMaterializeKNNPreprocessor.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.index.preprocessed.knn; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,8 +23,10 @@ package de.lmu.ifi.dbs.elki.index.preprocessed.knn; along with this program. If not, see <http://www.gnu.org/licenses/>. */ +import gnu.trove.impl.Constants; +import gnu.trove.map.hash.TObjectDoubleHashMap; + import java.util.Collection; -import java.util.HashMap; import java.util.List; import de.lmu.ifi.dbs.elki.data.NumberVector; @@ -34,12 +36,11 @@ import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDPair; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; -import de.lmu.ifi.dbs.elki.database.ids.distance.KNNHeap; -import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList; +import de.lmu.ifi.dbs.elki.database.ids.KNNHeap; +import de.lmu.ifi.dbs.elki.database.ids.KNNList; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; import de.lmu.ifi.dbs.elki.index.tree.LeafEntry; import de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialEntry; import de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialIndexTree; @@ -64,13 +65,12 @@ import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; * * @apiviz.uses SpatialIndexTree * - * @param <D> the type of distance the used distance function will return * @param <N> the type of spatial nodes in the spatial index * @param <E> the type of spatial entries in the spatial index */ @Title("Spatial Approximation Materialize kNN Preprocessor") @Description("Caterializes the (approximate) k nearest neighbors of objects of a database using a spatial approximation.") -public class SpatialApproximationMaterializeKNNPreprocessor<O extends NumberVector<?>, D extends Distance<D>, N extends SpatialNode<N, E>, E extends SpatialEntry> extends AbstractMaterializeKNNPreprocessor<O, D, KNNList<D>> { +public class SpatialApproximationMaterializeKNNPreprocessor<O extends NumberVector, N extends SpatialNode<N, E>, E extends SpatialEntry> extends AbstractMaterializeKNNPreprocessor<O> { /** * Logger to use */ @@ -83,13 +83,13 @@ public class SpatialApproximationMaterializeKNNPreprocessor<O extends NumberVect * @param distanceFunction the distance function to use * @param k query k */ - public SpatialApproximationMaterializeKNNPreprocessor(Relation<O> relation, DistanceFunction<? super O, D> distanceFunction, int k) { + public SpatialApproximationMaterializeKNNPreprocessor(Relation<O> relation, DistanceFunction<? super O> distanceFunction, int k) { super(relation, distanceFunction, k); } @Override protected void preprocess() { - DistanceQuery<O, D> distanceQuery = relation.getDatabase().getDistanceQuery(relation, distanceFunction); + DistanceQuery<O> distanceQuery = relation.getDatabase().getDistanceQuery(relation, distanceFunction); Collection<SpatialIndexTree<N, E>> indexes = ResultUtil.filterResults(relation, SpatialIndexTree.class); if(indexes.size() != 1) { @@ -118,13 +118,13 @@ public class SpatialApproximationMaterializeKNNPreprocessor<O extends NumberVect for(int i = 0; i < size; i++) { ids.add(((LeafEntry) node.getEntry(i)).getDBID()); } - HashMap<DBIDPair, D> cache = new HashMap<>((size * size * 3) >> 3); + TObjectDoubleHashMap<DBIDPair> cache = new TObjectDoubleHashMap<>((size * size * 3) >> 3, Constants.DEFAULT_LOAD_FACTOR, Double.NaN); for(DBIDIter id = ids.iter(); id.valid(); id.advance()) { - KNNHeap<D> kNN = DBIDUtil.newHeap(distanceFunction.getDistanceFactory(), k); + KNNHeap kNN = DBIDUtil.newHeap(k); for(DBIDIter id2 = ids.iter(); id2.valid(); id2.advance()) { DBIDPair key = DBIDUtil.newPair(id, id2); - D d = cache.remove(key); - if(d != null) { + double d = cache.remove(key); + if(d == d) { // Not NaN // consume the previous result. kNN.insert(d, id2); } @@ -145,13 +145,9 @@ public class SpatialApproximationMaterializeKNNPreprocessor<O extends NumberVect getLogger().warning("Cache should be empty after each run, but still has " + cache.size() + " elements."); } } - if(progress != null) { - progress.incrementProcessed(getLogger()); - } - } - if(progress != null) { - progress.ensureCompleted(getLogger()); + getLogger().incrementProcessed(progress); } + getLogger().ensureCompleted(progress); if(getLogger().isVerbose()) { getLogger().verbose("Average page size = " + pagesize.getMean() + " +- " + pagesize.getSampleStddev()); getLogger().verbose("On average, " + ksize.getMean() + " +- " + ksize.getSampleStddev() + " neighbors returned."); @@ -187,24 +183,23 @@ public class SpatialApproximationMaterializeKNNPreprocessor<O extends NumberVect * @apiviz.uses SpatialApproximationMaterializeKNNPreprocessor oneway - - * «create» * - * @param <D> the type of distance the used distance function will return * @param <N> the type of spatial nodes in the spatial index * @param <E> the type of spatial entries in the spatial index */ - public static class Factory<D extends Distance<D>, N extends SpatialNode<N, E>, E extends SpatialEntry> extends AbstractMaterializeKNNPreprocessor.Factory<NumberVector<?>, D, KNNList<D>> { + public static class Factory<N extends SpatialNode<N, E>, E extends SpatialEntry> extends AbstractMaterializeKNNPreprocessor.Factory<NumberVector> { /** * Constructor. * * @param k k * @param distanceFunction distance function */ - public Factory(int k, DistanceFunction<? super NumberVector<?>, D> distanceFunction) { + public Factory(int k, DistanceFunction<? super NumberVector> distanceFunction) { super(k, distanceFunction); } @Override - public SpatialApproximationMaterializeKNNPreprocessor<NumberVector<?>, D, N, E> instantiate(Relation<NumberVector<?>> relation) { - SpatialApproximationMaterializeKNNPreprocessor<NumberVector<?>, D, N, E> instance = new SpatialApproximationMaterializeKNNPreprocessor<>(relation, distanceFunction, k); + public SpatialApproximationMaterializeKNNPreprocessor<NumberVector, N, E> instantiate(Relation<NumberVector> relation) { + SpatialApproximationMaterializeKNNPreprocessor<NumberVector, N, E> instance = new SpatialApproximationMaterializeKNNPreprocessor<>(relation, distanceFunction, k); return instance; } @@ -215,9 +210,9 @@ public class SpatialApproximationMaterializeKNNPreprocessor<O extends NumberVect * * @apiviz.exclude */ - public static class Parameterizer<D extends Distance<D>, N extends SpatialNode<N, E>, E extends SpatialEntry> extends AbstractMaterializeKNNPreprocessor.Factory.Parameterizer<NumberVector<?>, D> { + public static class Parameterizer<N extends SpatialNode<N, E>, E extends SpatialEntry> extends AbstractMaterializeKNNPreprocessor.Factory.Parameterizer<NumberVector> { @Override - protected Factory<D, N, E> makeInstance() { + protected Factory<N, E> makeInstance() { return new Factory<>(k, distanceFunction); } } diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/package-info.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/package-info.java index 47339bb4..f5e3d978 100644 --- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/package-info.java @@ -5,7 +5,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2013 +Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team |