summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/index/vafile
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/index/vafile')
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/vafile/DAFile.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/vafile/PartialVAFile.java132
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/vafile/VAFile.java71
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/vafile/VALPNormDistance.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/vafile/VectorApproximation.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/vafile/package-info.java2
6 files changed, 93 insertions, 128 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/index/vafile/DAFile.java b/src/de/lmu/ifi/dbs/elki/index/vafile/DAFile.java
index 089397dd..dcc54e2a 100644
--- a/src/de/lmu/ifi/dbs/elki/index/vafile/DAFile.java
+++ b/src/de/lmu/ifi/dbs/elki/index/vafile/DAFile.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.index.vafile;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -28,8 +28,8 @@ import java.util.Arrays;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
-import de.lmu.ifi.dbs.elki.persistent.ByteArrayUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.io.ByteArrayUtil;
/**
* Dimension approximation file, a one-dimensional part of the
@@ -66,7 +66,7 @@ public class DAFile {
* @param dimension Dimension of this file
* @param partitions Number of partitions
*/
- public DAFile(Relation<? extends NumberVector<?>> relation, int dimension, int partitions) {
+ public DAFile(Relation<? extends NumberVector> relation, int dimension, int partitions) {
final int size = relation.size();
this.dimension = dimension;
this.splitPositions = new double[partitions + 1];
diff --git a/src/de/lmu/ifi/dbs/elki/index/vafile/PartialVAFile.java b/src/de/lmu/ifi/dbs/elki/index/vafile/PartialVAFile.java
index e66b4011..a7dd0a29 100644
--- a/src/de/lmu/ifi/dbs/elki/index/vafile/PartialVAFile.java
+++ b/src/de/lmu/ifi/dbs/elki/index/vafile/PartialVAFile.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.index.vafile;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -25,7 +25,6 @@ package de.lmu.ifi.dbs.elki.index.vafile;
import java.util.ArrayList;
import java.util.Arrays;
-import java.util.BitSet;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
@@ -38,9 +37,10 @@ import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDPairList;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNHeap;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList;
+import de.lmu.ifi.dbs.elki.database.ids.KNNHeap;
+import de.lmu.ifi.dbs.elki.database.ids.KNNList;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
@@ -50,8 +50,6 @@ import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.LPNormDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceLPNormDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.index.AbstractRefiningIndex;
import de.lmu.ifi.dbs.elki.index.IndexFactory;
import de.lmu.ifi.dbs.elki.index.KNNIndex;
@@ -61,9 +59,10 @@ import de.lmu.ifi.dbs.elki.logging.statistics.Counter;
import de.lmu.ifi.dbs.elki.math.MathUtil;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
import de.lmu.ifi.dbs.elki.persistent.AbstractPageFileFactory;
-import de.lmu.ifi.dbs.elki.persistent.ByteArrayUtil;
+import de.lmu.ifi.dbs.elki.utilities.BitsUtil;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.DoubleMaxHeap;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.io.ByteArrayUtil;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
@@ -96,7 +95,7 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair;
* @param <V> Vector type
*/
@Reference(authors = "Hans-Peter Kriegel, Peer Kröger, Matthias Schubert, Ziyue Zhu", title = "Efficient Query Processing in Arbitrary Subspaces Using Vector Approximations", booktitle = "Proc. 18th Int. Conf. on Scientific and Statistical Database Management (SSDBM 06), Wien, Austria, 2006", url = "http://dx.doi.org/10.1109/SSDBM.2006.23")
-public class PartialVAFile<V extends NumberVector<?>> extends AbstractRefiningIndex<V> implements KNNIndex<V>, RangeIndex<V> {
+public class PartialVAFile<V extends NumberVector> extends AbstractRefiningIndex<V> implements KNNIndex<V>, RangeIndex<V> {
/**
* Class logger.
*/
@@ -152,7 +151,7 @@ public class PartialVAFile<V extends NumberVector<?>> extends AbstractRefiningIn
throw new IllegalStateException("Data already inserted.");
}
- if((Math.log(partitions) / MathUtil.LOG2) != (int) (Math.log(partitions) / MathUtil.LOG2)) {
+ if(MathUtil.log2(partitions) != (int) MathUtil.log2(partitions)) {
throw new IllegalArgumentException("Number of partitions must be a power of 2!");
}
@@ -196,21 +195,6 @@ public class PartialVAFile<V extends NumberVector<?>> extends AbstractRefiningIn
}
/**
- * Fake subspace (full-dimensional).
- *
- * @param relation Relation with full dimensionality
- * @return Bit set with all bits set.
- */
- protected static BitSet fakeSubspace(Relation<? extends NumberVector<?>> relation) {
- int dim = RelationUtil.dimensionality(relation);
- BitSet bits = new BitSet();
- for(int i = 0; i < dim; i++) {
- bits.set(i);
- }
- return bits;
- }
-
- /**
* Calculate the VA file position given the existing borders.
*
* @param id Object ID
@@ -247,51 +231,41 @@ public class PartialVAFile<V extends NumberVector<?>> extends AbstractRefiningIn
return new VectorApproximation(id, approximation);
}
- @SuppressWarnings("unchecked")
@Override
- public <D extends Distance<D>> KNNQuery<V, D> getKNNQuery(DistanceQuery<V, D> distanceQuery, Object... hints) {
+ public KNNQuery<V> getKNNQuery(DistanceQuery<V> distanceQuery, Object... hints) {
for(Object hint : hints) {
if(hint == DatabaseQuery.HINT_BULK) {
// FIXME: support bulk?
return null;
}
}
- DistanceFunction<? super V, ?> df = distanceQuery.getDistanceFunction();
+ DistanceFunction<? super V> df = distanceQuery.getDistanceFunction();
if(df instanceof SubspaceLPNormDistanceFunction) {
double p = ((SubspaceLPNormDistanceFunction) df).getP();
- BitSet bits = ((SubspaceLPNormDistanceFunction) df).getSelectedDimensions();
- DistanceQuery<V, ?> ddq = (DistanceQuery<V, ?>) distanceQuery;
- KNNQuery<V, ?> dq = new PartialVAFileKNNQuery((DistanceQuery<V, DoubleDistance>) ddq, p, bits);
- return (KNNQuery<V, D>) dq;
+ long[] bits = ((SubspaceLPNormDistanceFunction) df).getSelectedDimensions();
+ return new PartialVAFileKNNQuery(distanceQuery, p, bits);
}
if(df instanceof LPNormDistanceFunction) {
double p = ((LPNormDistanceFunction) df).getP();
- BitSet bits = fakeSubspace(distanceQuery.getRelation());
- DistanceQuery<V, ?> ddq = (DistanceQuery<V, ?>) distanceQuery;
- KNNQuery<V, ?> dq = new PartialVAFileKNNQuery((DistanceQuery<V, DoubleDistance>) ddq, p, bits);
- return (KNNQuery<V, D>) dq;
+ long[] bits = BitsUtil.ones(RelationUtil.dimensionality(distanceQuery.getRelation()));
+ return new PartialVAFileKNNQuery(distanceQuery, p, bits);
}
// Not supported.
return null;
}
- @SuppressWarnings("unchecked")
@Override
- public <D extends Distance<D>> RangeQuery<V, D> getRangeQuery(DistanceQuery<V, D> distanceQuery, Object... hints) {
- DistanceFunction<? super V, ?> df = distanceQuery.getDistanceFunction();
+ public RangeQuery<V> getRangeQuery(DistanceQuery<V> distanceQuery, Object... hints) {
+ DistanceFunction<? super V> df = distanceQuery.getDistanceFunction();
if(df instanceof SubspaceLPNormDistanceFunction) {
double p = ((SubspaceLPNormDistanceFunction) df).getP();
- BitSet bits = ((SubspaceLPNormDistanceFunction) df).getSelectedDimensions();
- DistanceQuery<V, ?> ddq = (DistanceQuery<V, ?>) distanceQuery;
- RangeQuery<V, ?> dq = new PartialVAFileRangeQuery((DistanceQuery<V, DoubleDistance>) ddq, p, bits);
- return (RangeQuery<V, D>) dq;
+ long[] bits = ((SubspaceLPNormDistanceFunction) df).getSelectedDimensions();
+ return new PartialVAFileRangeQuery(distanceQuery, p, bits);
}
if(df instanceof LPNormDistanceFunction) {
double p = ((LPNormDistanceFunction) df).getP();
- BitSet bits = fakeSubspace(distanceQuery.getRelation());
- DistanceQuery<V, ?> ddq = (DistanceQuery<V, ?>) distanceQuery;
- RangeQuery<V, ?> dq = new PartialVAFileRangeQuery((DistanceQuery<V, DoubleDistance>) ddq, p, bits);
- return (RangeQuery<V, D>) dq;
+ long[] bits = BitsUtil.ones(RelationUtil.dimensionality(distanceQuery.getRelation()));
+ return new PartialVAFileRangeQuery(distanceQuery, p, bits);
}
// Not supported.
return null;
@@ -304,7 +278,7 @@ public class PartialVAFile<V extends NumberVector<?>> extends AbstractRefiningIn
* @param query Query vector
* @param epsilon Epsilon radius
*/
- protected static void calculateSelectivityCoeffs(List<DoubleObjPair<DAFile>> daFiles, NumberVector<?> query, double epsilon) {
+ protected static void calculateSelectivityCoeffs(List<DoubleObjPair<DAFile>> daFiles, NumberVector query, double epsilon) {
final int dimensions = query.getDimensionality();
double[] lowerVals = new double[dimensions];
double[] upperVals = new double[dimensions];
@@ -337,7 +311,7 @@ public class PartialVAFile<V extends NumberVector<?>> extends AbstractRefiningIn
* @param daFiles List of approximations to use
* @return Vector approximation
*/
- protected static VectorApproximation calculatePartialApproximation(DBID id, NumberVector<?> dv, List<DoubleObjPair<DAFile>> daFiles) {
+ protected static VectorApproximation calculatePartialApproximation(DBID id, NumberVector dv, List<DoubleObjPair<DAFile>> daFiles) {
int[] approximation = new int[dv.getDimensionality()];
for(int i = 0; i < daFiles.size(); i++) {
double val = dv.doubleValue(i);
@@ -484,7 +458,7 @@ public class PartialVAFile<V extends NumberVector<?>> extends AbstractRefiningIn
* @author Erich Schubert
* @author Thomas Bernecker
*/
- public class PartialVAFileRangeQuery extends AbstractRefiningIndex<V>.AbstractRangeQuery<DoubleDistance> {
+ public class PartialVAFileRangeQuery extends AbstractRefiningIndex<V>.AbstractRangeQuery {
/**
* Lp-Norm p.
*/
@@ -493,7 +467,7 @@ public class PartialVAFile<V extends NumberVector<?>> extends AbstractRefiningIn
/**
* Subspace.
*/
- private BitSet subspace;
+ private long[] subspace;
/**
* Constructor.
@@ -502,18 +476,18 @@ public class PartialVAFile<V extends NumberVector<?>> extends AbstractRefiningIn
* @param p LP Norm p
* @param subspace Subspace
*/
- public PartialVAFileRangeQuery(DistanceQuery<V, DoubleDistance> ddq, double p, BitSet subspace) {
+ public PartialVAFileRangeQuery(DistanceQuery<V> ddq, double p, long[] subspace) {
super(ddq);
this.p = p;
this.subspace = subspace;
}
@Override
- public DoubleDistanceDBIDPairList getRangeForObject(V query, DoubleDistance range) {
+ public DoubleDBIDList getRangeForObject(V query, double range) {
stats.incrementIssuedQueries();
long t = System.nanoTime();
- final double epsilonP = Math.pow(range.doubleValue(), p);
+ final double epsilonP = Math.pow(range, p);
// generate query approximation and lookup table
final VectorApproximation queryApprox = calculateFullApproximation(null, query);
@@ -524,12 +498,12 @@ public class PartialVAFile<V extends NumberVector<?>> extends AbstractRefiningIn
// filter step
// calculate selectivity coefficients
- List<DoubleObjPair<DAFile>> subspaceDAFiles = new ArrayList<>(subspace.cardinality());
- for(int d = subspace.nextSetBit(0); d >= 0; d = subspace.nextSetBit(d + 1)) {
+ List<DoubleObjPair<DAFile>> subspaceDAFiles = new ArrayList<>(BitsUtil.cardinality(subspace));
+ for(int d = BitsUtil.nextSetBit(subspace, 0); d >= 0; d = BitsUtil.nextSetBit(subspace, d + 1)) {
DAFile daFile = daFiles.get(d);
subspaceDAFiles.add(new DoubleObjPair<>(-1, daFile));
}
- calculateSelectivityCoeffs(subspaceDAFiles, query, range.doubleValue());
+ calculateSelectivityCoeffs(subspaceDAFiles, query, range);
// sort DA files by selectivity
// TODO: validate that this is the correct order
Collections.sort(subspaceDAFiles, Collections.reverseOrder());
@@ -537,7 +511,7 @@ public class PartialVAFile<V extends NumberVector<?>> extends AbstractRefiningIn
// create candidate list (all objects) and prune candidates w.r.t.
// mindist (i.e. remove them from the list)
// important: this structure contains the maxDist values for refinement!
- DoubleDistanceDBIDPairList result = new DoubleDistanceDBIDPairList();
+ ModifiableDoubleDBIDList result = DBIDUtil.newDistanceDBIDList();
int candidates = 0;
for(VectorApproximation va : vectorApprox) {
DBID id = va.getId();
@@ -560,20 +534,20 @@ public class PartialVAFile<V extends NumberVector<?>> extends AbstractRefiningIn
// candidate cannot be dropped
// TODO: actually: no refinement needed - need API that allows
// reporting maxdists only.
- result.add(refine(id, query).doubleValue(), id);
+ result.add(refine(id, query), id);
}
else { // refine candidate - true refinement
- DoubleDistance dis = refine(id, query);
+ double dis = refine(id, query);
stats.incrementRefinements();
- if(dis.doubleValue() <= range.doubleValue()) {
- result.add(dis.doubleValue(), id);
+ if(dis <= range) {
+ result.add(dis, id);
}
}
}
}
result.sort();
- stats.incrementScannedBytes(relation.size() * VectorApproximation.byteOnDisk(subspace.cardinality(), partitions));
+ stats.incrementScannedBytes(relation.size() * VectorApproximation.byteOnDisk(BitsUtil.cardinality(subspace), partitions));
stats.incrementQueryTime(System.nanoTime() - t);
@@ -592,7 +566,7 @@ public class PartialVAFile<V extends NumberVector<?>> extends AbstractRefiningIn
* @author Erich Schubert
* @author Thomas Bernecker
*/
- public class PartialVAFileKNNQuery extends AbstractRefiningIndex<V>.AbstractKNNQuery<DoubleDistance> {
+ public class PartialVAFileKNNQuery extends AbstractRefiningIndex<V>.AbstractKNNQuery {
/**
* Lp-Norm p.
*/
@@ -601,7 +575,7 @@ public class PartialVAFile<V extends NumberVector<?>> extends AbstractRefiningIn
/**
* Subspace.
*/
- private BitSet subspace;
+ private long[] subspace;
/**
* Constructor.
@@ -610,14 +584,14 @@ public class PartialVAFile<V extends NumberVector<?>> extends AbstractRefiningIn
* @param p LP-norm p
* @param subspace Subspace to query
*/
- public PartialVAFileKNNQuery(DistanceQuery<V, DoubleDistance> ddq, double p, BitSet subspace) {
+ public PartialVAFileKNNQuery(DistanceQuery<V> ddq, double p, long[] subspace) {
super(ddq);
this.p = p;
this.subspace = subspace;
}
@Override
- public DoubleDistanceKNNList getKNNForObject(V query, int k) {
+ public KNNList getKNNForObject(V query, int k) {
stats.incrementIssuedQueries();
long t = System.nanoTime();
@@ -628,7 +602,7 @@ public class PartialVAFile<V extends NumberVector<?>> extends AbstractRefiningIn
// sort DA files by worst case distance
List<DAFile> daFiles = getWorstCaseDistOrder(dist, subspace);
- final int currentSubspaceDims = subspace.cardinality();
+ final int currentSubspaceDims = BitsUtil.cardinality(subspace);
int reducedDims = (2 * currentSubspaceDims) / 3;
reducedDims = Math.max(1, reducedDims);
if(LOG.isDebuggingFine()) {
@@ -690,7 +664,7 @@ public class PartialVAFile<V extends NumberVector<?>> extends AbstractRefiningIn
ArrayList<PartialVACandidate> sortedCandidates = new ArrayList<>(candidates2);
// sort candidates by lower bound (minDist)
Collections.sort(sortedCandidates);
- DoubleDistanceKNNList result = retrieveAccurateDistances(sortedCandidates, k, subspace, query);
+ KNNList result = retrieveAccurateDistances(sortedCandidates, k, subspace, query);
stats.incrementQueryTime(System.nanoTime() - t);
return result;
@@ -758,26 +732,26 @@ public class PartialVAFile<V extends NumberVector<?>> extends AbstractRefiningIn
* @param subspace Subspace
* @return Ordered list of dimension files
*/
- public List<DAFile> getWorstCaseDistOrder(VALPNormDistance dist, BitSet subspace) {
- int subspaceLength = subspace.cardinality();
+ public List<DAFile> getWorstCaseDistOrder(VALPNormDistance dist, long[] subspace) {
+ int subspaceLength = BitsUtil.cardinality(subspace);
List<DAFile> result = new ArrayList<>(subspaceLength);
- for(int i = subspace.nextSetBit(0); i >= 0; i = subspace.nextSetBit(i + 1)) {
+ for(int i = BitsUtil.nextSetBit(subspace, 0); i >= 0; i = BitsUtil.nextSetBit(subspace, i + 1)) {
result.add(daFiles.get(i));
}
Collections.sort(result, new WorstCaseDistComparator(dist));
return result;
}
- protected DoubleDistanceKNNList retrieveAccurateDistances(List<PartialVACandidate> sortedCandidates, int k, BitSet subspace, V query) {
- DoubleDistanceKNNHeap result = DBIDUtil.newDoubleDistanceHeap(k);
+ protected KNNList retrieveAccurateDistances(List<PartialVACandidate> sortedCandidates, int k, long[] subspace, V query) {
+ KNNHeap result = DBIDUtil.newHeap(k);
for(PartialVACandidate va : sortedCandidates) {
- double stopdist = result.doubleKNNDistance();
+ double stopdist = result.getKNNDistance();
DBID currentID = va.getId();
if(result.size() < k || va.minDistP < stopdist) {
- DoubleDistance dist = refine(currentID, query);
+ double dist = refine(currentID, query);
stats.incrementRefinements();
- if(dist.doubleValue() < stopdist) {
- result.insert(dist.doubleValue(), currentID);
+ if(dist < stopdist) {
+ result.insert(dist, currentID);
}
}
}
@@ -813,7 +787,7 @@ public class PartialVAFile<V extends NumberVector<?>> extends AbstractRefiningIn
*
* @param <V> Vector type
*/
- public static class Factory<V extends NumberVector<?>> implements IndexFactory<V, PartialVAFile<V>> {
+ public static class Factory<V extends NumberVector> implements IndexFactory<V, PartialVAFile<V>> {
/**
* Number of partitions to use in each dimension.
*
diff --git a/src/de/lmu/ifi/dbs/elki/index/vafile/VAFile.java b/src/de/lmu/ifi/dbs/elki/index/vafile/VAFile.java
index 42651b15..bdf30d15 100644
--- a/src/de/lmu/ifi/dbs/elki/index/vafile/VAFile.java
+++ b/src/de/lmu/ifi/dbs/elki/index/vafile/VAFile.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.index.vafile;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -25,7 +25,6 @@ package de.lmu.ifi.dbs.elki.index.vafile;
import java.util.ArrayList;
import java.util.Arrays;
-import java.util.Collections;
import java.util.List;
import de.lmu.ifi.dbs.elki.data.NumberVector;
@@ -34,9 +33,11 @@ import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDPairList;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNHeap;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.KNNHeap;
+import de.lmu.ifi.dbs.elki.database.ids.KNNList;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
@@ -45,8 +46,6 @@ import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.LPNormDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.index.AbstractRefiningIndex;
import de.lmu.ifi.dbs.elki.index.IndexFactory;
import de.lmu.ifi.dbs.elki.index.KNNIndex;
@@ -61,7 +60,6 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
-import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair;
/**
* Vector-approximation file (VAFile)
@@ -87,7 +85,7 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair;
*/
@Title("An approximation based data structure for similarity search")
@Reference(authors = "Weber, R. and Blott, S.", title = "An approximation based data structure for similarity search", booktitle = "Report TR1997b, ETH Zentrum, Zurich, Switzerland", url = "http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.40.480&rep=rep1&type=pdf")
-public class VAFile<V extends NumberVector<?>> extends AbstractRefiningIndex<V> implements KNNIndex<V>, RangeIndex<V> {
+public class VAFile<V extends NumberVector> extends AbstractRefiningIndex<V> implements KNNIndex<V>, RangeIndex<V> {
/**
* Logging class.
*/
@@ -244,35 +242,29 @@ public class VAFile<V extends NumberVector<?>> extends AbstractRefiningIndex<V>
return "va-file";
}
- @SuppressWarnings("unchecked")
@Override
- public <D extends Distance<D>> KNNQuery<V, D> getKNNQuery(DistanceQuery<V, D> distanceQuery, Object... hints) {
+ public KNNQuery<V> getKNNQuery(DistanceQuery<V> distanceQuery, Object... hints) {
for(Object hint : hints) {
if(hint == DatabaseQuery.HINT_BULK) {
// FIXME: support bulk?
return null;
}
}
- DistanceFunction<? super V, ?> df = distanceQuery.getDistanceFunction();
+ DistanceFunction<? super V> df = distanceQuery.getDistanceFunction();
if(df instanceof LPNormDistanceFunction) {
double p = ((LPNormDistanceFunction) df).getP();
- DistanceQuery<V, ?> ddq = (DistanceQuery<V, ?>) distanceQuery;
- KNNQuery<V, ?> dq = new VAFileKNNQuery((DistanceQuery<V, DoubleDistance>) ddq, p);
- return (KNNQuery<V, D>) dq;
+ return new VAFileKNNQuery(distanceQuery, p);
}
// Not supported.
return null;
}
- @SuppressWarnings("unchecked")
@Override
- public <D extends Distance<D>> RangeQuery<V, D> getRangeQuery(DistanceQuery<V, D> distanceQuery, Object... hints) {
- DistanceFunction<? super V, ?> df = distanceQuery.getDistanceFunction();
+ public RangeQuery<V> getRangeQuery(DistanceQuery<V> distanceQuery, Object... hints) {
+ DistanceFunction<? super V> df = distanceQuery.getDistanceFunction();
if(df instanceof LPNormDistanceFunction) {
double p = ((LPNormDistanceFunction) df).getP();
- DistanceQuery<V, ?> ddq = (DistanceQuery<V, ?>) distanceQuery;
- RangeQuery<V, ?> dq = new VAFileRangeQuery((DistanceQuery<V, DoubleDistance>) ddq, p);
- return (RangeQuery<V, D>) dq;
+ return new VAFileRangeQuery(distanceQuery, p);
}
// Not supported.
return null;
@@ -283,7 +275,7 @@ public class VAFile<V extends NumberVector<?>> extends AbstractRefiningIndex<V>
*
* @author Erich Schubert
*/
- public class VAFileRangeQuery extends AbstractRefiningIndex<V>.AbstractRangeQuery<DoubleDistance> {
+ public class VAFileRangeQuery extends AbstractRefiningIndex<V>.AbstractRangeQuery {
/**
* LP Norm p parameter.
*/
@@ -296,14 +288,13 @@ public class VAFile<V extends NumberVector<?>> extends AbstractRefiningIndex<V>
* @param p LP norm p
*/
- public VAFileRangeQuery(DistanceQuery<V, DoubleDistance> distanceQuery, double p) {
+ public VAFileRangeQuery(DistanceQuery<V> distanceQuery, double p) {
super(distanceQuery);
this.p = p;
}
@Override
- public DoubleDistanceDBIDPairList getRangeForObject(V query, DoubleDistance range) {
- final double eps = range.doubleValue();
+ public DoubleDBIDList getRangeForObject(V query, double eps) {
// generate query approximation and lookup table
VectorApproximation queryApprox = calculateApproximation(null, query);
@@ -313,7 +304,7 @@ public class VAFile<V extends NumberVector<?>> extends AbstractRefiningIndex<V>
// Count a VA file scan
scans += 1;
- DoubleDistanceDBIDPairList result = new DoubleDistanceDBIDPairList();
+ ModifiableDoubleDBIDList result = DBIDUtil.newDistanceDBIDList();
// Approximation step
for(int i = 0; i < vectorApprox.size(); i++) {
VectorApproximation va = vectorApprox.get(i);
@@ -327,7 +318,7 @@ public class VAFile<V extends NumberVector<?>> extends AbstractRefiningIndex<V>
// interested in the DBID only! But this needs an API change.
// refine the next element
- final double dist = refine(va.id, query).doubleValue();
+ final double dist = refine(va.id, query);
if(dist <= eps) {
result.add(dist, va.id);
}
@@ -342,7 +333,7 @@ public class VAFile<V extends NumberVector<?>> extends AbstractRefiningIndex<V>
*
* @author Erich Schubert
*/
- public class VAFileKNNQuery extends AbstractRefiningIndex<V>.AbstractKNNQuery<DoubleDistance> {
+ public class VAFileKNNQuery extends AbstractRefiningIndex<V>.AbstractKNNQuery {
/**
* LP Norm p parameter.
*/
@@ -354,13 +345,13 @@ public class VAFile<V extends NumberVector<?>> extends AbstractRefiningIndex<V>
* @param distanceQuery Distance query object
* @param p LP norm p
*/
- public VAFileKNNQuery(DistanceQuery<V, DoubleDistance> distanceQuery, double p) {
+ public VAFileKNNQuery(DistanceQuery<V> distanceQuery, double p) {
super(distanceQuery);
this.p = p;
}
@Override
- public DoubleDistanceKNNList getKNNForObject(V query, int k) {
+ public KNNList getKNNForObject(V query, int k) {
// generate query approximation and lookup table
VectorApproximation queryApprox = calculateApproximation(null, query);
@@ -371,7 +362,7 @@ public class VAFile<V extends NumberVector<?>> extends AbstractRefiningIndex<V>
DoubleMaxHeap minMaxHeap = new DoubleMaxHeap(k + 1);
double minMaxDist = Double.POSITIVE_INFINITY;
// Candidates with minDist <= kth maxDist
- ArrayList<DoubleObjPair<DBID>> candidates = new ArrayList<>(vectorApprox.size());
+ ModifiableDoubleDBIDList candidates = DBIDUtil.newDistanceDBIDList(vectorApprox.size());
// Count a VA file scan
scans += 1;
@@ -386,7 +377,7 @@ public class VAFile<V extends NumberVector<?>> extends AbstractRefiningIndex<V>
if(minDist > minMaxDist) {
continue;
}
- candidates.add(new DoubleObjPair<>(minDist, va.id));
+ candidates.add(minDist, va.id);
// Update candidate pruning heap
minMaxHeap.add(maxDist, k);
@@ -395,25 +386,25 @@ public class VAFile<V extends NumberVector<?>> extends AbstractRefiningIndex<V>
}
}
// sort candidates by lower bound (minDist)
- Collections.sort(candidates);
+ candidates.sort();
// refinement step
- DoubleDistanceKNNHeap result = DBIDUtil.newDoubleDistanceHeap(k);
+ KNNHeap result = DBIDUtil.newHeap(k);
// log.fine("candidates size " + candidates.size());
// retrieve accurate distances
- for(DoubleObjPair<DBID> va : candidates) {
+ for(DoubleDBIDListIter iter = candidates.iter(); iter.valid(); iter.advance()) {
// Stop when we are sure to have all elements
if(result.size() >= k) {
- double kDist = result.doubleKNNDistance();
- if(va.first > kDist) {
+ double kDist = result.getKNNDistance();
+ if(iter.doubleValue() > kDist) {
break;
}
}
// refine the next element
- final double dist = refine(va.second, query).doubleValue();
- result.insert(dist, va.second);
+ final double dist = refine(iter, query);
+ result.insert(dist, iter);
}
if(LOG.isDebuggingFinest()) {
LOG.finest("query = (" + query + ")");
@@ -434,7 +425,7 @@ public class VAFile<V extends NumberVector<?>> extends AbstractRefiningIndex<V>
*
* @param <V> Vector type
*/
- public static class Factory<V extends NumberVector<?>> implements IndexFactory<V, VAFile<V>> {
+ public static class Factory<V extends NumberVector> implements IndexFactory<V, VAFile<V>> {
/**
* Number of partitions to use in each dimension.
*
diff --git a/src/de/lmu/ifi/dbs/elki/index/vafile/VALPNormDistance.java b/src/de/lmu/ifi/dbs/elki/index/vafile/VALPNormDistance.java
index d0b4a8e5..92ef5097 100644
--- a/src/de/lmu/ifi/dbs/elki/index/vafile/VALPNormDistance.java
+++ b/src/de/lmu/ifi/dbs/elki/index/vafile/VALPNormDistance.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.index.vafile;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -54,7 +54,7 @@ public class VALPNormDistance {
* @param query Query vector
* @param queryApprox Query approximation
*/
- public VALPNormDistance(double p, double[][] splitPositions, NumberVector<?> query, VectorApproximation queryApprox) {
+ public VALPNormDistance(double p, double[][] splitPositions, NumberVector query, VectorApproximation queryApprox) {
super();
this.onebyp = 1.0 / p;
this.queryApprox = queryApprox;
@@ -155,7 +155,7 @@ public class VALPNormDistance {
* @param query Query vector
* @param p p
*/
- private void initializeLookupTable(double[][] splitPositions, NumberVector<?> query, double p) {
+ private void initializeLookupTable(double[][] splitPositions, NumberVector query, double p) {
final int dimensions = splitPositions.length;
final int bordercount = splitPositions[0].length;
lookup = new double[dimensions][bordercount];
diff --git a/src/de/lmu/ifi/dbs/elki/index/vafile/VectorApproximation.java b/src/de/lmu/ifi/dbs/elki/index/vafile/VectorApproximation.java
index f679cf16..464eee11 100644
--- a/src/de/lmu/ifi/dbs/elki/index/vafile/VectorApproximation.java
+++ b/src/de/lmu/ifi/dbs/elki/index/vafile/VectorApproximation.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.index.vafile;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -26,7 +26,7 @@ package de.lmu.ifi.dbs.elki.index.vafile;
import java.util.Arrays;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
-import de.lmu.ifi.dbs.elki.persistent.ByteArrayUtil;
+import de.lmu.ifi.dbs.elki.utilities.io.ByteArrayUtil;
/**
* Object in a VA approximation.
diff --git a/src/de/lmu/ifi/dbs/elki/index/vafile/package-info.java b/src/de/lmu/ifi/dbs/elki/index/vafile/package-info.java
index 10aae35c..362d0b79 100644
--- a/src/de/lmu/ifi/dbs/elki/index/vafile/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/index/vafile/package-info.java
@@ -5,7 +5,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team