diff options
author | Erich Schubert <erich@debian.org> | 2013-10-29 20:02:37 +0100 |
---|---|---|
committer | Andrej Shadura <andrewsh@debian.org> | 2019-03-09 22:30:37 +0000 |
commit | ec7f409f6e795bbcc6f3c005687954e9475c600c (patch) | |
tree | fbf36c0ab791c556198b487ca40ae56ae5ab1ee5 /src/de/lmu/ifi/dbs/elki/database/query/knn | |
parent | 974d4cf6d54cadc06258039f2cd0515cc34aeac6 (diff) | |
parent | 8300861dc4c62c5567a4e654976072f854217544 (diff) |
Import Debian changes 0.6.0~beta2-1
elki (0.6.0~beta2-1) unstable; urgency=low
* New upstream beta release.
* 3DPC extension is not yet included.
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/database/query/knn')
8 files changed, 388 insertions, 204 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/database/query/knn/AbstractDistanceKNNQuery.java b/src/de/lmu/ifi/dbs/elki/database/query/knn/AbstractDistanceKNNQuery.java index 0ac2e8ee..f9f7d018 100644 --- a/src/de/lmu/ifi/dbs/elki/database/query/knn/AbstractDistanceKNNQuery.java +++ b/src/de/lmu/ifi/dbs/elki/database/query/knn/AbstractDistanceKNNQuery.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.database.query.knn; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,10 +23,15 @@ package de.lmu.ifi.dbs.elki.database.query.knn; along with this program. If not, see <http://www.gnu.org/licenses/>. */ +import java.util.ArrayList; +import java.util.List; + +import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs; +import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; +import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList; import de.lmu.ifi.dbs.elki.database.query.AbstractDataBasedQuery; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult; import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; /** @@ -51,8 +56,22 @@ public abstract class AbstractDistanceKNNQuery<O, D extends Distance<D>> extends } @Override - abstract public KNNResult<D> getKNNForDBID(DBIDRef id, int k); + public List<? extends KNNList<D>> getKNNForBulkDBIDs(ArrayDBIDs ids, int k) { + // throw new + // UnsupportedOperationException(ExceptionMessages.UNSUPPORTED_NOT_YET); + // TODO: optimize + List<KNNList<D>> ret = new ArrayList<>(ids.size()); + for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { + ret.add(getKNNForDBID(iter, k)); + } + return ret; + } + + @Override + public KNNList<D> getKNNForDBID(DBIDRef id, int k) { + return getKNNForObject(relation.get(id), k); + } @Override - abstract public KNNResult<D> getKNNForObject(O obj, int k); -}
\ No newline at end of file + abstract public KNNList<D> getKNNForObject(O obj, int k); +} diff --git a/src/de/lmu/ifi/dbs/elki/database/query/knn/DoubleOptimizedKNNQuery.java b/src/de/lmu/ifi/dbs/elki/database/query/knn/DoubleOptimizedKNNQuery.java new file mode 100644 index 00000000..b18db8f0 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/database/query/knn/DoubleOptimizedKNNQuery.java @@ -0,0 +1,309 @@ +package de.lmu.ifi.dbs.elki.database.query.knn; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.ArrayList; + +import de.lmu.ifi.dbs.elki.database.ids.DBIDFactory; +import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; +import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDPair; +import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDPair; +import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNHeap; +import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList; +import de.lmu.ifi.dbs.elki.database.ids.generic.DistanceDBIDPairKNNList; +import de.lmu.ifi.dbs.elki.database.ids.generic.DoubleDistanceDBIDPairKNNHeap; +import de.lmu.ifi.dbs.elki.database.ids.generic.DoubleDistanceDBIDPairKNNList; +import de.lmu.ifi.dbs.elki.database.ids.integer.DoubleDistanceIntegerDBIDKNNList; +import de.lmu.ifi.dbs.elki.database.ids.integer.DoubleDistanceIntegerDBIDKNNListHeap; +import de.lmu.ifi.dbs.elki.database.query.distance.PrimitiveDistanceQuery; +import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultUtil; +import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; +import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap; +import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TiedTopBoundedHeap; + +/** + * Optimized linear scan query for {@link PrimitiveDoubleDistanceFunction}s. + * + * @author Erich Schubert + * + * @apiviz.uses PrimitiveDoubleDistanceFunction + * + * @param <O> Object type + */ +public class DoubleOptimizedKNNQuery<O> extends LinearScanKNNQuery<O, DoubleDistance> { + /** + * Raw distance function. + */ + PrimitiveDoubleDistanceFunction<O> rawdist; + + /** + * Constructor. + * + * @param distanceQuery Distance function to use + */ + @SuppressWarnings("unchecked") + public DoubleOptimizedKNNQuery(PrimitiveDistanceQuery<O, DoubleDistance> distanceQuery) { + super(distanceQuery); + if(!(distanceQuery.getDistanceFunction() instanceof PrimitiveDoubleDistanceFunction)) { + throw new UnsupportedOperationException("DoubleOptimizedKNNQuery instantiated for non-PrimitiveDoubleDistanceFunction!"); + } + rawdist = (PrimitiveDoubleDistanceFunction<O>) distanceQuery.getDistanceFunction(); + } + + @Override + public KNNList<DoubleDistance> getKNNForDBID(DBIDRef id, int k) { + return getKNNForObject(relation.get(id), k); + } + + @Override + public KNNList<DoubleDistance> getKNNForObject(O obj, int k) { + return getKNNForObjectBenchmarked(obj, k); + } + + /** + * This is the straightforward implementation using the optimized heap. + * + * @param obj Query object + * @param k Desired number of neighbors + * @return kNN result + */ + KNNList<DoubleDistance> getKNNForObjectKNNHeap(O obj, int k) { + // Optimization for double distances. + final DoubleDistanceKNNHeap heap = (DoubleDistanceKNNHeap) DBIDUtil.newHeap(DoubleDistance.FACTORY, k); + for(DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) { + heap.add(rawdist.doubleDistance(obj, relation.get(iter)), iter); + } + return heap.toKNNList(); + } + + /** + * This is the cleaner, supposedly faster implementation. + * + * @param obj Query object + * @param k Desired number of neighbors + * @return kNN result + */ + KNNList<DoubleDistance> getKNNForObjectClean(O obj, int k) { + // Optimization for double distances. + final TiedTopBoundedHeap<DoubleDistanceDBIDPair> heap = new TiedTopBoundedHeap<>(k, DoubleDistanceDBIDPairKNNHeap.COMPARATOR); + final DBIDIter iter = relation.iterDBIDs(); + + // First k elements don't need checking. + double max = 0.; + for(int i = 0; i < k && iter.valid(); i++, iter.advance()) { + final double doubleDistance = rawdist.doubleDistance(obj, relation.get(iter)); + heap.add(DBIDFactory.FACTORY.newDistancePair(doubleDistance, iter)); + max = Math.max(max, doubleDistance); + } + // Remaining elements + for(; iter.valid(); iter.advance()) { + final double doubleDistance = rawdist.doubleDistance(obj, relation.get(iter)); + if(doubleDistance <= max) { + heap.add(DBIDFactory.FACTORY.newDistancePair(doubleDistance, iter)); + } + if(doubleDistance < max) { + max = heap.peek().doubleDistance(); + } + } + return new DoubleDistanceDBIDPairKNNList(heap, k); + } + + /** + * It does not make sense, but this version is faster in our larger + * benchmarks. Apparently, some JIT optimization kicks in better. + * + * @param obj Query object + * @param k Desired number of neighbors + * @return kNN result + */ + KNNList<DoubleDistance> getKNNForObjectBenchmarked(O obj, int k) { + // THIS SHOULD BE SLOWER THAN THE VERSION ABOVE, BUT ISN'T! + final TiedTopBoundedHeap<DistanceDBIDPair<DoubleDistance>> heap = new TiedTopBoundedHeap<>(k, DistanceDBIDResultUtil.BY_REVERSE_DISTANCE); + final DBIDIter iter = relation.iterDBIDs(); + // First k elements don't need checking. + double max = 0.; + for(int i = 0; i < k && iter.valid(); i++, iter.advance()) { + final double doubleDistance = rawdist.doubleDistance(obj, relation.get(iter)); + heap.add(DBIDFactory.FACTORY.newDistancePair(new DoubleDistance(doubleDistance), iter)); + max = Math.max(max, doubleDistance); + } + // Remaining elements + for(; iter.valid(); iter.advance()) { + final double doubleDistance = rawdist.doubleDistance(obj, relation.get(iter)); + if(doubleDistance <= max) { + heap.add(DBIDFactory.FACTORY.newDistancePair(new DoubleDistance(doubleDistance), iter)); + } + if(doubleDistance < max) { + max = heap.peek().getDistance().doubleValue(); + } + } + return new DistanceDBIDPairKNNList<>(heap, k); + } + + /** + * Another attempt at getting a faster knn heap. + * + * @param obj Query object + * @param k Desired number of neighbors + * @return kNN result + */ + KNNList<DoubleDistance> getKNNForObjectBenchmarked2(O obj, int k) { + final Heap<DoubleDistanceDBIDPair> heap = new Heap<>(k, DistanceDBIDResultUtil.BY_REVERSE_DISTANCE); + final ArrayList<DoubleDistanceDBIDPair> ties = new ArrayList<>(); + final DBIDIter iter = relation.iterDBIDs(); + // First k elements don't need checking. + double max = 0.; + for(int i = 0; i < k && iter.valid(); i++, iter.advance()) { + final double doubleDistance = rawdist.distance(obj, relation.get(iter)).doubleValue(); + heap.add(DBIDFactory.FACTORY.newDistancePair(doubleDistance, iter)); + max = Math.max(max, doubleDistance); + } + // Remaining elements + for(; iter.valid(); iter.advance()) { + final double doubleDistance = rawdist.distance(obj, relation.get(iter)).doubleValue(); + if(doubleDistance <= max) { + if(doubleDistance < max) { + DoubleDistanceDBIDPair prev = heap.replaceTopElement(DBIDFactory.FACTORY.newDistancePair(doubleDistance, iter)); + double newkdist = heap.peek().doubleDistance(); + if(newkdist < max) { + max = newkdist; + ties.clear(); + } + else { + ties.add(prev); + } + } + else { + ties.add(DBIDFactory.FACTORY.newDistancePair(doubleDistance, iter)); + } + } + } + + DoubleDistanceIntegerDBIDKNNList ret = new DoubleDistanceIntegerDBIDKNNList(k, k + ties.size()); + for(DoubleDistanceDBIDPair pair : ties) { + ret.add(pair); + } + while(!heap.isEmpty()) { + ret.add(heap.poll()); + } + ret.sort(); // Actually, reverse. + return ret; + } + + /** + * Next attempt at exploiting the JIT fully. + * + * @param obj Query object + * @param k Desired number of neighbors + * @return kNN result + */ + KNNList<DoubleDistance> getKNNForObjectBenchmarked3(O obj, int k) { + DoubleDistanceKNNHeap heap = new DoubleDistanceIntegerDBIDKNNListHeap(k); + final DBIDIter iter = relation.iterDBIDs(); + // First k elements don't need checking. + double max = 0.; + for(int i = 0; i < k && iter.valid(); i++, iter.advance()) { + final double doubleDistance = rawdist.doubleDistance(obj, relation.get(iter)); + heap.add(doubleDistance, iter); + max = Math.max(max, doubleDistance); + } + // Remaining elements + for(; iter.valid(); iter.advance()) { + final double doubleDistance = rawdist.doubleDistance(obj, relation.get(iter)); + if(doubleDistance <= max) { + heap.add(doubleDistance, iter); + } + if(doubleDistance < max) { + max = heap.peek().doubleDistance(); + } + } + return heap.toKNNList(); + } + + /** + * Next attempt at exploiting the JIT fully. + * + * @param obj Query object + * @param k Desired number of neighbors + * @return kNN result + */ + KNNList<DoubleDistance> getKNNForObjectBenchmarked4(O obj, int k) { + DoubleDistanceKNNHeap heap = new DoubleDistanceIntegerDBIDKNNListHeap(k); + final DBIDIter iter = relation.iterDBIDs(); + // First k elements don't need checking. + for(int i = 0; i < k && iter.valid(); i++, iter.advance()) { + final double doubleDistance = rawdist.doubleDistance(obj, relation.get(iter)); + heap.add(doubleDistance, iter); + } + double max = heap.doubleKNNDistance(); + // Remaining elements + for(; iter.valid(); iter.advance()) { + final double doubleDistance = rawdist.doubleDistance(obj, relation.get(iter)); + if(doubleDistance <= max) { + heap.add(doubleDistance, iter); + max = heap.doubleKNNDistance(); + } + } + return heap.toKNNList(); + } + + /** + * Next attempt at exploiting the JIT fully. + * + * @param obj Query object + * @param k Desired number of neighbors + * @return kNN result + */ + KNNList<DoubleDistance> getKNNForObjectBenchmarked5(O obj, int k) { + DoubleDistanceKNNHeap heap = new DoubleDistanceIntegerDBIDKNNListHeap(k); + double max = Double.POSITIVE_INFINITY; + for(DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) { + final double distance = rawdist.doubleDistance(obj, relation.get(iter)); + if(distance <= max) { + heap.add(distance, iter); + max = heap.doubleKNNDistance(); + } + } + return heap.toKNNList(); + } + + /** + * Next attempt at exploiting the JIT fully. + * + * @param obj Query object + * @param k Desired number of neighbors + * @return kNN result + */ + KNNList<DoubleDistance> getKNNForObjectBenchmarked6(O obj, int k) { + DoubleDistanceKNNHeap heap = new DoubleDistanceIntegerDBIDKNNListHeap(k); + for(DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) { + final double distance = rawdist.doubleDistance(obj, relation.get(iter)); + heap.add(distance, iter); + } + return heap.toKNNList(); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/database/query/knn/KNNQuery.java b/src/de/lmu/ifi/dbs/elki/database/query/knn/KNNQuery.java index 5b517509..c94f3d42 100644 --- a/src/de/lmu/ifi/dbs/elki/database/query/knn/KNNQuery.java +++ b/src/de/lmu/ifi/dbs/elki/database/query/knn/KNNQuery.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.database.query.knn; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -27,8 +27,8 @@ import java.util.List; import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs; import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; +import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList; import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult; import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; /** @@ -37,7 +37,7 @@ import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; * @author Erich Schubert * * @apiviz.landmark - * @apiviz.has KNNResult oneway - - «create» + * @apiviz.has KNNList oneway - - «create» * * @param <O> Object type * @param <D> Distance type @@ -50,7 +50,7 @@ public interface KNNQuery<O, D extends Distance<D>> extends DatabaseQuery { * @param k Number of neighbors requested * @return neighbors */ - public KNNResult<D> getKNNForDBID(DBIDRef id, int k); + public KNNList<D> getKNNForDBID(DBIDRef id, int k); /** * Bulk query method @@ -59,7 +59,7 @@ public interface KNNQuery<O, D extends Distance<D>> extends DatabaseQuery { * @param k Number of neighbors requested * @return neighbors */ - public List<? extends KNNResult<D>> getKNNForBulkDBIDs(ArrayDBIDs ids, int k); + public List<? extends KNNList<D>> getKNNForBulkDBIDs(ArrayDBIDs ids, int k); /** * Get the k nearest neighbors for a particular id. @@ -68,5 +68,5 @@ public interface KNNQuery<O, D extends Distance<D>> extends DatabaseQuery { * @param k Number of neighbors requested * @return neighbors */ - public KNNResult<D> getKNNForObject(O obj, int k); + public KNNList<D> getKNNForObject(O obj, int k); }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/database/query/knn/LinearScanKNNQuery.java b/src/de/lmu/ifi/dbs/elki/database/query/knn/LinearScanKNNQuery.java index 29fa9931..485330ea 100644 --- a/src/de/lmu/ifi/dbs/elki/database/query/knn/LinearScanKNNQuery.java +++ b/src/de/lmu/ifi/dbs/elki/database/query/knn/LinearScanKNNQuery.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.database.query.knn; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -29,12 +29,12 @@ import java.util.List; import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; +import de.lmu.ifi.dbs.elki.database.ids.distance.KNNHeap; +import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList; import de.lmu.ifi.dbs.elki.database.query.LinearScanQuery; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.query.distance.PrimitiveDistanceQuery; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNHeap; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNUtil; import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; /** @@ -63,9 +63,9 @@ public class LinearScanKNNQuery<O, D extends Distance<D>> extends AbstractDistan */ private void linearScanBatchKNN(ArrayDBIDs ids, List<KNNHeap<D>> heaps) { // The distance is computed on database IDs - for(DBIDIter iter = relation.getDBIDs().iter(); iter.valid(); iter.advance()) { + for (DBIDIter iter = relation.getDBIDs().iter(); iter.valid(); iter.advance()) { int index = 0; - for(DBIDIter iter2 = ids.iter(); iter2.valid(); iter2.advance()) { + for (DBIDIter iter2 = ids.iter(); iter2.valid(); iter2.advance()) { KNNHeap<D> heap = heaps.get(index); heap.add(distanceQuery.distance(iter2, iter), iter); index++; @@ -74,27 +74,26 @@ public class LinearScanKNNQuery<O, D extends Distance<D>> extends AbstractDistan } @Override - public KNNResult<D> getKNNForDBID(DBIDRef id, int k) { - KNNHeap<D> heap = KNNUtil.newHeap(distanceQuery.getDistanceFactory(), k); + public KNNList<D> getKNNForDBID(DBIDRef id, int k) { + KNNHeap<D> heap = DBIDUtil.newHeap(distanceQuery.getDistanceFactory(), k); D max = distanceQuery.getDistanceFactory().infiniteDistance(); - if(PrimitiveDistanceQuery.class.isInstance(distanceQuery)) { + if (PrimitiveDistanceQuery.class.isInstance(distanceQuery)) { O obj = relation.get(id); - for(DBIDIter iter = relation.getDBIDs().iter(); iter.valid(); iter.advance()) { + for (DBIDIter iter = relation.getDBIDs().iter(); iter.valid(); iter.advance()) { final D dist = distanceQuery.distance(obj, relation.get(iter)); - if(max.compareTo(dist) > 0) { + if (max.compareTo(dist) > 0) { heap.add(dist, iter); - if(heap.size() >= k) { + if (heap.size() >= k) { max = heap.getKNNDistance(); } } } - } - else { - for(DBIDIter iter = relation.getDBIDs().iter(); iter.valid(); iter.advance()) { + } else { + for (DBIDIter iter = relation.getDBIDs().iter(); iter.valid(); iter.advance()) { final D dist = distanceQuery.distance(id, iter); - if(max.compareTo(dist) > 0) { + if (max.compareTo(dist) > 0) { heap.add(dist, iter); - if(heap.size() >= k) { + if (heap.size() >= k) { max = heap.getKNNDistance(); } } @@ -104,27 +103,27 @@ public class LinearScanKNNQuery<O, D extends Distance<D>> extends AbstractDistan } @Override - public List<KNNResult<D>> getKNNForBulkDBIDs(ArrayDBIDs ids, int k) { + public List<KNNList<D>> getKNNForBulkDBIDs(ArrayDBIDs ids, int k) { final int size = ids.size(); - final List<KNNHeap<D>> heaps = new ArrayList<KNNHeap<D>>(size); - for(int i = 0; i < size; i++) { - heaps.add(KNNUtil.newHeap(distanceQuery.getDistanceFactory(), k)); + final List<KNNHeap<D>> heaps = new ArrayList<>(size); + for (int i = 0; i < size; i++) { + heaps.add(DBIDUtil.newHeap(distanceQuery.getDistanceFactory(), k)); } linearScanBatchKNN(ids, heaps); // Serialize heaps - List<KNNResult<D>> result = new ArrayList<KNNResult<D>>(size); - for(KNNHeap<D> heap : heaps) { + List<KNNList<D>> result = new ArrayList<>(size); + for (KNNHeap<D> heap : heaps) { result.add(heap.toKNNList()); } return result; } @Override - public KNNResult<D> getKNNForObject(O obj, int k) { - KNNHeap<D> heap = KNNUtil.newHeap(distanceQuery.getDistanceFactory(), k); - for(DBIDIter iter = relation.getDBIDs().iter(); iter.valid(); iter.advance()) { + public KNNList<D> getKNNForObject(O obj, int k) { + KNNHeap<D> heap = DBIDUtil.newHeap(distanceQuery.getDistanceFactory(), k); + for (DBIDIter iter = relation.getDBIDs().iter(); iter.valid(); iter.advance()) { heap.add(distanceQuery.distance(obj, iter), iter); } return heap.toKNNList(); } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/database/query/knn/LinearScanPrimitiveDistanceKNNQuery.java b/src/de/lmu/ifi/dbs/elki/database/query/knn/LinearScanPrimitiveDistanceKNNQuery.java index c4f09744..3f3fbc3f 100644 --- a/src/de/lmu/ifi/dbs/elki/database/query/knn/LinearScanPrimitiveDistanceKNNQuery.java +++ b/src/de/lmu/ifi/dbs/elki/database/query/knn/LinearScanPrimitiveDistanceKNNQuery.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.database.query.knn; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -29,10 +29,10 @@ import java.util.List; import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; +import de.lmu.ifi.dbs.elki.database.ids.distance.KNNHeap; +import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList; import de.lmu.ifi.dbs.elki.database.query.distance.PrimitiveDistanceQuery; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNHeap; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNUtil; import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; /** @@ -74,22 +74,22 @@ public class LinearScanPrimitiveDistanceKNNQuery<O, D extends Distance<D>> exten } @Override - public KNNResult<D> getKNNForDBID(DBIDRef id, int k) { + public KNNList<D> getKNNForDBID(DBIDRef id, int k) { return getKNNForObject(relation.get(id), k); } @Override - public List<KNNResult<D>> getKNNForBulkDBIDs(ArrayDBIDs ids, int k) { + public List<KNNList<D>> getKNNForBulkDBIDs(ArrayDBIDs ids, int k) { final int size = ids.size(); - final List<KNNHeap<D>> heaps = new ArrayList<KNNHeap<D>>(size); - List<O> objs = new ArrayList<O>(size); + final List<KNNHeap<D>> heaps = new ArrayList<>(size); + List<O> objs = new ArrayList<>(size); for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { - heaps.add(KNNUtil.newHeap(distanceQuery.getDistanceFactory(), k)); + heaps.add(DBIDUtil.newHeap(distanceQuery.getDistanceFactory(), k)); objs.add(relation.get(iter)); } linearScanBatchKNN(objs, heaps); - List<KNNResult<D>> result = new ArrayList<KNNResult<D>>(heaps.size()); + List<KNNList<D>> result = new ArrayList<>(heaps.size()); for(KNNHeap<D> heap : heaps) { result.add(heap.toKNNList()); } diff --git a/src/de/lmu/ifi/dbs/elki/database/query/knn/LinearScanRawDoubleDistanceKNNQuery.java b/src/de/lmu/ifi/dbs/elki/database/query/knn/LinearScanRawDoubleDistanceKNNQuery.java deleted file mode 100644 index 5e5eae2f..00000000 --- a/src/de/lmu/ifi/dbs/elki/database/query/knn/LinearScanRawDoubleDistanceKNNQuery.java +++ /dev/null @@ -1,143 +0,0 @@ -package de.lmu.ifi.dbs.elki.database.query.knn; - -/* - This file is part of ELKI: - Environment for Developing KDD-Applications Supported by Index-Structures - - Copyright (C) 2012 - Ludwig-Maximilians-Universität München - Lehr- und Forschungseinheit für Datenbanksysteme - ELKI Development Team - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -import de.lmu.ifi.dbs.elki.database.ids.DBIDFactory; -import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; -import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; -import de.lmu.ifi.dbs.elki.database.ids.DistanceDBIDPair; -import de.lmu.ifi.dbs.elki.database.ids.DoubleDistanceDBIDPair; -import de.lmu.ifi.dbs.elki.database.query.distance.PrimitiveDistanceQuery; -import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.AbstractKNNHeap; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceKNNHeap; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceKNNList; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.GenericKNNList; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult; -import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; -import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TiedTopBoundedHeap; - -/** - * Optimized linear scan query for {@link PrimitiveDoubleDistanceFunction}s. - * - * @author Erich Schubert - * - * @apiviz.uses PrimitiveDoubleDistanceFunction - * - * @param <O> Object type - */ -public class LinearScanRawDoubleDistanceKNNQuery<O> extends LinearScanPrimitiveDistanceKNNQuery<O, DoubleDistance> { - /** - * Raw distance function. - */ - PrimitiveDoubleDistanceFunction<O> rawdist; - - /** - * Constructor. - * - * @param distanceQuery Distance function to use - */ - @SuppressWarnings("unchecked") - public LinearScanRawDoubleDistanceKNNQuery(PrimitiveDistanceQuery<O, DoubleDistance> distanceQuery) { - super(distanceQuery); - if (!(distanceQuery.getDistanceFunction() instanceof PrimitiveDoubleDistanceFunction)) { - throw new UnsupportedOperationException("LinearScanRawDoubleDistance instantiated for non-RawDoubleDistance!"); - } - rawdist = (PrimitiveDoubleDistanceFunction<O>) distanceQuery.getDistanceFunction(); - } - - @Override - public KNNResult<DoubleDistance> getKNNForDBID(DBIDRef id, int k) { - return getKNNForObject(relation.get(id), k); - } - - @Override - public KNNResult<DoubleDistance> getKNNForObject(O obj, int k) { - return getKNNForObjectBenchmarked(obj, k); - } - - /** - * This is the cleaner, supposedly faster implementation. - * - * @param obj Query object - * @param k Desired number of neighbors - * @return kNN result - */ - KNNResult<DoubleDistance> getKNNForObjectClean(O obj, int k) { - // Optimization for double distances. - final TiedTopBoundedHeap<DoubleDistanceDBIDPair> heap = new TiedTopBoundedHeap<DoubleDistanceDBIDPair>(k, DoubleDistanceKNNHeap.COMPARATOR); - final DBIDIter iter = relation.iterDBIDs(); - - // First k elements don't need checking. - double max = 0.; - for (int i = 0; i < k && iter.valid(); i++, iter.advance()) { - final double doubleDistance = rawdist.doubleDistance(obj, relation.get(iter)); - heap.add(DBIDFactory.FACTORY.newDistancePair(doubleDistance, iter)); - max = Math.max(max, doubleDistance); - } - // Remaining elements - for (; iter.valid(); iter.advance()) { - final double doubleDistance = rawdist.doubleDistance(obj, relation.get(iter)); - if (doubleDistance <= max) { - heap.add(DBIDFactory.FACTORY.newDistancePair(doubleDistance, iter)); - } - if (doubleDistance < max) { - max = heap.peek().doubleDistance(); - } - } - return new DoubleDistanceKNNList(heap, k); - } - - /** - * It does not make sense, but this version is faster in our larger - * benchmarks. Apparently, some JIT optimization kicks in better. - * - * @param obj Query object - * @param k Desired number of neighbors - * @return kNN result - */ - KNNResult<DoubleDistance> getKNNForObjectBenchmarked(O obj, int k) { - // THIS SHOULD BE SLOWER THAN THE VERSION ABOVE, BUT ISN'T! - final TiedTopBoundedHeap<DistanceDBIDPair<DoubleDistance>> heap = new TiedTopBoundedHeap<DistanceDBIDPair<DoubleDistance>>(k, AbstractKNNHeap.COMPARATOR); - final DBIDIter iter = relation.iterDBIDs(); - // First k elements don't need checking. - double max = 0.; - for (int i = 0; i < k && iter.valid(); i++, iter.advance()) { - final double doubleDistance = rawdist.doubleDistance(obj, relation.get(iter)); - heap.add(DBIDFactory.FACTORY.newDistancePair(new DoubleDistance(doubleDistance), iter)); - max = Math.max(max, doubleDistance); - } - // Remaining elements - for (; iter.valid(); iter.advance()) { - final double doubleDistance = rawdist.doubleDistance(obj, relation.get(iter)); - if (doubleDistance <= max) { - heap.add(DBIDFactory.FACTORY.newDistancePair(new DoubleDistance(doubleDistance), iter)); - } - if (doubleDistance < max) { - max = heap.peek().getDistance().doubleValue(); - } - } - return new GenericKNNList<DoubleDistance>(heap, k); - } -} diff --git a/src/de/lmu/ifi/dbs/elki/database/query/knn/PreprocessorKNNQuery.java b/src/de/lmu/ifi/dbs/elki/database/query/knn/PreprocessorKNNQuery.java index a1e12fc3..0f555c79 100644 --- a/src/de/lmu/ifi/dbs/elki/database/query/knn/PreprocessorKNNQuery.java +++ b/src/de/lmu/ifi/dbs/elki/database/query/knn/PreprocessorKNNQuery.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.database.query.knn; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -29,10 +29,10 @@ import java.util.List; import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; +import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList; import de.lmu.ifi.dbs.elki.database.query.AbstractDataBasedQuery; import de.lmu.ifi.dbs.elki.database.relation.Relation; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNUtil; import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; import de.lmu.ifi.dbs.elki.index.preprocessed.knn.AbstractMaterializeKNNPreprocessor; import de.lmu.ifi.dbs.elki.logging.LoggingUtil; @@ -43,7 +43,7 @@ import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; * * @author Erich Schubert */ -public class PreprocessorKNNQuery<O, D extends Distance<D>, T extends KNNResult<D>> extends AbstractDataBasedQuery<O> implements KNNQuery<O, D> { +public class PreprocessorKNNQuery<O, D extends Distance<D>, T extends KNNList<D>> extends AbstractDataBasedQuery<O> implements KNNQuery<O, D> { /** * The last preprocessor result */ @@ -76,12 +76,12 @@ public class PreprocessorKNNQuery<O, D extends Distance<D>, T extends KNNResult< } @Override - public KNNResult<D> getKNNForDBID(DBIDRef id, int k) { + public KNNList<D> getKNNForDBID(DBIDRef id, int k) { if(!warned && k > preprocessor.getK()) { LoggingUtil.warning("Requested more neighbors than preprocessed!"); } if(!warned && k < preprocessor.getK()) { - KNNResult<D> dr = preprocessor.get(id); + KNNList<D> dr = preprocessor.get(id); int subk = k; D kdist = dr.get(subk - 1).getDistance(); while(subk < dr.size()) { @@ -95,7 +95,7 @@ public class PreprocessorKNNQuery<O, D extends Distance<D>, T extends KNNResult< } } if(subk < dr.size()) { - return KNNUtil.subList(dr, subk); + return DBIDUtil.subList(dr, subk); } else { return dr; @@ -105,14 +105,14 @@ public class PreprocessorKNNQuery<O, D extends Distance<D>, T extends KNNResult< } @Override - public List<KNNResult<D>> getKNNForBulkDBIDs(ArrayDBIDs ids, int k) { + public List<KNNList<D>> getKNNForBulkDBIDs(ArrayDBIDs ids, int k) { if(!warned && k > preprocessor.getK()) { LoggingUtil.warning("Requested more neighbors than preprocessed!"); } - List<KNNResult<D>> result = new ArrayList<KNNResult<D>>(ids.size()); + List<KNNList<D>> result = new ArrayList<>(ids.size()); if(k < preprocessor.getK()) { for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { - KNNResult<D> dr = preprocessor.get(iter); + KNNList<D> dr = preprocessor.get(iter); int subk = k; D kdist = dr.get(subk - 1).getDistance(); while(subk < dr.size()) { @@ -126,7 +126,7 @@ public class PreprocessorKNNQuery<O, D extends Distance<D>, T extends KNNResult< } } if(subk < dr.size()) { - result.add(KNNUtil.subList(dr, subk)); + result.add(DBIDUtil.subList(dr, subk)); } else { result.add(dr); @@ -142,7 +142,7 @@ public class PreprocessorKNNQuery<O, D extends Distance<D>, T extends KNNResult< } @Override - public KNNResult<D> getKNNForObject(O obj, int k) { + public KNNList<D> getKNNForObject(O obj, int k) { throw new AbortException("Preprocessor KNN query only supports ID queries."); } diff --git a/src/de/lmu/ifi/dbs/elki/database/query/knn/package-info.java b/src/de/lmu/ifi/dbs/elki/database/query/knn/package-info.java index 63c3aa16..17d8e4b0 100644 --- a/src/de/lmu/ifi/dbs/elki/database/query/knn/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/database/query/knn/package-info.java @@ -8,7 +8,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team |