summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/index/vafile/PartialVAFile.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/index/vafile/PartialVAFile.java')
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/vafile/PartialVAFile.java132
1 files changed, 53 insertions, 79 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/index/vafile/PartialVAFile.java b/src/de/lmu/ifi/dbs/elki/index/vafile/PartialVAFile.java
index e66b4011..a7dd0a29 100644
--- a/src/de/lmu/ifi/dbs/elki/index/vafile/PartialVAFile.java
+++ b/src/de/lmu/ifi/dbs/elki/index/vafile/PartialVAFile.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.index.vafile;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -25,7 +25,6 @@ package de.lmu.ifi.dbs.elki.index.vafile;
import java.util.ArrayList;
import java.util.Arrays;
-import java.util.BitSet;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
@@ -38,9 +37,10 @@ import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDPairList;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNHeap;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList;
+import de.lmu.ifi.dbs.elki.database.ids.KNNHeap;
+import de.lmu.ifi.dbs.elki.database.ids.KNNList;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
@@ -50,8 +50,6 @@ import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.LPNormDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceLPNormDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.index.AbstractRefiningIndex;
import de.lmu.ifi.dbs.elki.index.IndexFactory;
import de.lmu.ifi.dbs.elki.index.KNNIndex;
@@ -61,9 +59,10 @@ import de.lmu.ifi.dbs.elki.logging.statistics.Counter;
import de.lmu.ifi.dbs.elki.math.MathUtil;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
import de.lmu.ifi.dbs.elki.persistent.AbstractPageFileFactory;
-import de.lmu.ifi.dbs.elki.persistent.ByteArrayUtil;
+import de.lmu.ifi.dbs.elki.utilities.BitsUtil;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.DoubleMaxHeap;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.io.ByteArrayUtil;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
@@ -96,7 +95,7 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair;
* @param <V> Vector type
*/
@Reference(authors = "Hans-Peter Kriegel, Peer Kröger, Matthias Schubert, Ziyue Zhu", title = "Efficient Query Processing in Arbitrary Subspaces Using Vector Approximations", booktitle = "Proc. 18th Int. Conf. on Scientific and Statistical Database Management (SSDBM 06), Wien, Austria, 2006", url = "http://dx.doi.org/10.1109/SSDBM.2006.23")
-public class PartialVAFile<V extends NumberVector<?>> extends AbstractRefiningIndex<V> implements KNNIndex<V>, RangeIndex<V> {
+public class PartialVAFile<V extends NumberVector> extends AbstractRefiningIndex<V> implements KNNIndex<V>, RangeIndex<V> {
/**
* Class logger.
*/
@@ -152,7 +151,7 @@ public class PartialVAFile<V extends NumberVector<?>> extends AbstractRefiningIn
throw new IllegalStateException("Data already inserted.");
}
- if((Math.log(partitions) / MathUtil.LOG2) != (int) (Math.log(partitions) / MathUtil.LOG2)) {
+ if(MathUtil.log2(partitions) != (int) MathUtil.log2(partitions)) {
throw new IllegalArgumentException("Number of partitions must be a power of 2!");
}
@@ -196,21 +195,6 @@ public class PartialVAFile<V extends NumberVector<?>> extends AbstractRefiningIn
}
/**
- * Fake subspace (full-dimensional).
- *
- * @param relation Relation with full dimensionality
- * @return Bit set with all bits set.
- */
- protected static BitSet fakeSubspace(Relation<? extends NumberVector<?>> relation) {
- int dim = RelationUtil.dimensionality(relation);
- BitSet bits = new BitSet();
- for(int i = 0; i < dim; i++) {
- bits.set(i);
- }
- return bits;
- }
-
- /**
* Calculate the VA file position given the existing borders.
*
* @param id Object ID
@@ -247,51 +231,41 @@ public class PartialVAFile<V extends NumberVector<?>> extends AbstractRefiningIn
return new VectorApproximation(id, approximation);
}
- @SuppressWarnings("unchecked")
@Override
- public <D extends Distance<D>> KNNQuery<V, D> getKNNQuery(DistanceQuery<V, D> distanceQuery, Object... hints) {
+ public KNNQuery<V> getKNNQuery(DistanceQuery<V> distanceQuery, Object... hints) {
for(Object hint : hints) {
if(hint == DatabaseQuery.HINT_BULK) {
// FIXME: support bulk?
return null;
}
}
- DistanceFunction<? super V, ?> df = distanceQuery.getDistanceFunction();
+ DistanceFunction<? super V> df = distanceQuery.getDistanceFunction();
if(df instanceof SubspaceLPNormDistanceFunction) {
double p = ((SubspaceLPNormDistanceFunction) df).getP();
- BitSet bits = ((SubspaceLPNormDistanceFunction) df).getSelectedDimensions();
- DistanceQuery<V, ?> ddq = (DistanceQuery<V, ?>) distanceQuery;
- KNNQuery<V, ?> dq = new PartialVAFileKNNQuery((DistanceQuery<V, DoubleDistance>) ddq, p, bits);
- return (KNNQuery<V, D>) dq;
+ long[] bits = ((SubspaceLPNormDistanceFunction) df).getSelectedDimensions();
+ return new PartialVAFileKNNQuery(distanceQuery, p, bits);
}
if(df instanceof LPNormDistanceFunction) {
double p = ((LPNormDistanceFunction) df).getP();
- BitSet bits = fakeSubspace(distanceQuery.getRelation());
- DistanceQuery<V, ?> ddq = (DistanceQuery<V, ?>) distanceQuery;
- KNNQuery<V, ?> dq = new PartialVAFileKNNQuery((DistanceQuery<V, DoubleDistance>) ddq, p, bits);
- return (KNNQuery<V, D>) dq;
+ long[] bits = BitsUtil.ones(RelationUtil.dimensionality(distanceQuery.getRelation()));
+ return new PartialVAFileKNNQuery(distanceQuery, p, bits);
}
// Not supported.
return null;
}
- @SuppressWarnings("unchecked")
@Override
- public <D extends Distance<D>> RangeQuery<V, D> getRangeQuery(DistanceQuery<V, D> distanceQuery, Object... hints) {
- DistanceFunction<? super V, ?> df = distanceQuery.getDistanceFunction();
+ public RangeQuery<V> getRangeQuery(DistanceQuery<V> distanceQuery, Object... hints) {
+ DistanceFunction<? super V> df = distanceQuery.getDistanceFunction();
if(df instanceof SubspaceLPNormDistanceFunction) {
double p = ((SubspaceLPNormDistanceFunction) df).getP();
- BitSet bits = ((SubspaceLPNormDistanceFunction) df).getSelectedDimensions();
- DistanceQuery<V, ?> ddq = (DistanceQuery<V, ?>) distanceQuery;
- RangeQuery<V, ?> dq = new PartialVAFileRangeQuery((DistanceQuery<V, DoubleDistance>) ddq, p, bits);
- return (RangeQuery<V, D>) dq;
+ long[] bits = ((SubspaceLPNormDistanceFunction) df).getSelectedDimensions();
+ return new PartialVAFileRangeQuery(distanceQuery, p, bits);
}
if(df instanceof LPNormDistanceFunction) {
double p = ((LPNormDistanceFunction) df).getP();
- BitSet bits = fakeSubspace(distanceQuery.getRelation());
- DistanceQuery<V, ?> ddq = (DistanceQuery<V, ?>) distanceQuery;
- RangeQuery<V, ?> dq = new PartialVAFileRangeQuery((DistanceQuery<V, DoubleDistance>) ddq, p, bits);
- return (RangeQuery<V, D>) dq;
+ long[] bits = BitsUtil.ones(RelationUtil.dimensionality(distanceQuery.getRelation()));
+ return new PartialVAFileRangeQuery(distanceQuery, p, bits);
}
// Not supported.
return null;
@@ -304,7 +278,7 @@ public class PartialVAFile<V extends NumberVector<?>> extends AbstractRefiningIn
* @param query Query vector
* @param epsilon Epsilon radius
*/
- protected static void calculateSelectivityCoeffs(List<DoubleObjPair<DAFile>> daFiles, NumberVector<?> query, double epsilon) {
+ protected static void calculateSelectivityCoeffs(List<DoubleObjPair<DAFile>> daFiles, NumberVector query, double epsilon) {
final int dimensions = query.getDimensionality();
double[] lowerVals = new double[dimensions];
double[] upperVals = new double[dimensions];
@@ -337,7 +311,7 @@ public class PartialVAFile<V extends NumberVector<?>> extends AbstractRefiningIn
* @param daFiles List of approximations to use
* @return Vector approximation
*/
- protected static VectorApproximation calculatePartialApproximation(DBID id, NumberVector<?> dv, List<DoubleObjPair<DAFile>> daFiles) {
+ protected static VectorApproximation calculatePartialApproximation(DBID id, NumberVector dv, List<DoubleObjPair<DAFile>> daFiles) {
int[] approximation = new int[dv.getDimensionality()];
for(int i = 0; i < daFiles.size(); i++) {
double val = dv.doubleValue(i);
@@ -484,7 +458,7 @@ public class PartialVAFile<V extends NumberVector<?>> extends AbstractRefiningIn
* @author Erich Schubert
* @author Thomas Bernecker
*/
- public class PartialVAFileRangeQuery extends AbstractRefiningIndex<V>.AbstractRangeQuery<DoubleDistance> {
+ public class PartialVAFileRangeQuery extends AbstractRefiningIndex<V>.AbstractRangeQuery {
/**
* Lp-Norm p.
*/
@@ -493,7 +467,7 @@ public class PartialVAFile<V extends NumberVector<?>> extends AbstractRefiningIn
/**
* Subspace.
*/
- private BitSet subspace;
+ private long[] subspace;
/**
* Constructor.
@@ -502,18 +476,18 @@ public class PartialVAFile<V extends NumberVector<?>> extends AbstractRefiningIn
* @param p LP Norm p
* @param subspace Subspace
*/
- public PartialVAFileRangeQuery(DistanceQuery<V, DoubleDistance> ddq, double p, BitSet subspace) {
+ public PartialVAFileRangeQuery(DistanceQuery<V> ddq, double p, long[] subspace) {
super(ddq);
this.p = p;
this.subspace = subspace;
}
@Override
- public DoubleDistanceDBIDPairList getRangeForObject(V query, DoubleDistance range) {
+ public DoubleDBIDList getRangeForObject(V query, double range) {
stats.incrementIssuedQueries();
long t = System.nanoTime();
- final double epsilonP = Math.pow(range.doubleValue(), p);
+ final double epsilonP = Math.pow(range, p);
// generate query approximation and lookup table
final VectorApproximation queryApprox = calculateFullApproximation(null, query);
@@ -524,12 +498,12 @@ public class PartialVAFile<V extends NumberVector<?>> extends AbstractRefiningIn
// filter step
// calculate selectivity coefficients
- List<DoubleObjPair<DAFile>> subspaceDAFiles = new ArrayList<>(subspace.cardinality());
- for(int d = subspace.nextSetBit(0); d >= 0; d = subspace.nextSetBit(d + 1)) {
+ List<DoubleObjPair<DAFile>> subspaceDAFiles = new ArrayList<>(BitsUtil.cardinality(subspace));
+ for(int d = BitsUtil.nextSetBit(subspace, 0); d >= 0; d = BitsUtil.nextSetBit(subspace, d + 1)) {
DAFile daFile = daFiles.get(d);
subspaceDAFiles.add(new DoubleObjPair<>(-1, daFile));
}
- calculateSelectivityCoeffs(subspaceDAFiles, query, range.doubleValue());
+ calculateSelectivityCoeffs(subspaceDAFiles, query, range);
// sort DA files by selectivity
// TODO: validate that this is the correct order
Collections.sort(subspaceDAFiles, Collections.reverseOrder());
@@ -537,7 +511,7 @@ public class PartialVAFile<V extends NumberVector<?>> extends AbstractRefiningIn
// create candidate list (all objects) and prune candidates w.r.t.
// mindist (i.e. remove them from the list)
// important: this structure contains the maxDist values for refinement!
- DoubleDistanceDBIDPairList result = new DoubleDistanceDBIDPairList();
+ ModifiableDoubleDBIDList result = DBIDUtil.newDistanceDBIDList();
int candidates = 0;
for(VectorApproximation va : vectorApprox) {
DBID id = va.getId();
@@ -560,20 +534,20 @@ public class PartialVAFile<V extends NumberVector<?>> extends AbstractRefiningIn
// candidate cannot be dropped
// TODO: actually: no refinement needed - need API that allows
// reporting maxdists only.
- result.add(refine(id, query).doubleValue(), id);
+ result.add(refine(id, query), id);
}
else { // refine candidate - true refinement
- DoubleDistance dis = refine(id, query);
+ double dis = refine(id, query);
stats.incrementRefinements();
- if(dis.doubleValue() <= range.doubleValue()) {
- result.add(dis.doubleValue(), id);
+ if(dis <= range) {
+ result.add(dis, id);
}
}
}
}
result.sort();
- stats.incrementScannedBytes(relation.size() * VectorApproximation.byteOnDisk(subspace.cardinality(), partitions));
+ stats.incrementScannedBytes(relation.size() * VectorApproximation.byteOnDisk(BitsUtil.cardinality(subspace), partitions));
stats.incrementQueryTime(System.nanoTime() - t);
@@ -592,7 +566,7 @@ public class PartialVAFile<V extends NumberVector<?>> extends AbstractRefiningIn
* @author Erich Schubert
* @author Thomas Bernecker
*/
- public class PartialVAFileKNNQuery extends AbstractRefiningIndex<V>.AbstractKNNQuery<DoubleDistance> {
+ public class PartialVAFileKNNQuery extends AbstractRefiningIndex<V>.AbstractKNNQuery {
/**
* Lp-Norm p.
*/
@@ -601,7 +575,7 @@ public class PartialVAFile<V extends NumberVector<?>> extends AbstractRefiningIn
/**
* Subspace.
*/
- private BitSet subspace;
+ private long[] subspace;
/**
* Constructor.
@@ -610,14 +584,14 @@ public class PartialVAFile<V extends NumberVector<?>> extends AbstractRefiningIn
* @param p LP-norm p
* @param subspace Subspace to query
*/
- public PartialVAFileKNNQuery(DistanceQuery<V, DoubleDistance> ddq, double p, BitSet subspace) {
+ public PartialVAFileKNNQuery(DistanceQuery<V> ddq, double p, long[] subspace) {
super(ddq);
this.p = p;
this.subspace = subspace;
}
@Override
- public DoubleDistanceKNNList getKNNForObject(V query, int k) {
+ public KNNList getKNNForObject(V query, int k) {
stats.incrementIssuedQueries();
long t = System.nanoTime();
@@ -628,7 +602,7 @@ public class PartialVAFile<V extends NumberVector<?>> extends AbstractRefiningIn
// sort DA files by worst case distance
List<DAFile> daFiles = getWorstCaseDistOrder(dist, subspace);
- final int currentSubspaceDims = subspace.cardinality();
+ final int currentSubspaceDims = BitsUtil.cardinality(subspace);
int reducedDims = (2 * currentSubspaceDims) / 3;
reducedDims = Math.max(1, reducedDims);
if(LOG.isDebuggingFine()) {
@@ -690,7 +664,7 @@ public class PartialVAFile<V extends NumberVector<?>> extends AbstractRefiningIn
ArrayList<PartialVACandidate> sortedCandidates = new ArrayList<>(candidates2);
// sort candidates by lower bound (minDist)
Collections.sort(sortedCandidates);
- DoubleDistanceKNNList result = retrieveAccurateDistances(sortedCandidates, k, subspace, query);
+ KNNList result = retrieveAccurateDistances(sortedCandidates, k, subspace, query);
stats.incrementQueryTime(System.nanoTime() - t);
return result;
@@ -758,26 +732,26 @@ public class PartialVAFile<V extends NumberVector<?>> extends AbstractRefiningIn
* @param subspace Subspace
* @return Ordered list of dimension files
*/
- public List<DAFile> getWorstCaseDistOrder(VALPNormDistance dist, BitSet subspace) {
- int subspaceLength = subspace.cardinality();
+ public List<DAFile> getWorstCaseDistOrder(VALPNormDistance dist, long[] subspace) {
+ int subspaceLength = BitsUtil.cardinality(subspace);
List<DAFile> result = new ArrayList<>(subspaceLength);
- for(int i = subspace.nextSetBit(0); i >= 0; i = subspace.nextSetBit(i + 1)) {
+ for(int i = BitsUtil.nextSetBit(subspace, 0); i >= 0; i = BitsUtil.nextSetBit(subspace, i + 1)) {
result.add(daFiles.get(i));
}
Collections.sort(result, new WorstCaseDistComparator(dist));
return result;
}
- protected DoubleDistanceKNNList retrieveAccurateDistances(List<PartialVACandidate> sortedCandidates, int k, BitSet subspace, V query) {
- DoubleDistanceKNNHeap result = DBIDUtil.newDoubleDistanceHeap(k);
+ protected KNNList retrieveAccurateDistances(List<PartialVACandidate> sortedCandidates, int k, long[] subspace, V query) {
+ KNNHeap result = DBIDUtil.newHeap(k);
for(PartialVACandidate va : sortedCandidates) {
- double stopdist = result.doubleKNNDistance();
+ double stopdist = result.getKNNDistance();
DBID currentID = va.getId();
if(result.size() < k || va.minDistP < stopdist) {
- DoubleDistance dist = refine(currentID, query);
+ double dist = refine(currentID, query);
stats.incrementRefinements();
- if(dist.doubleValue() < stopdist) {
- result.insert(dist.doubleValue(), currentID);
+ if(dist < stopdist) {
+ result.insert(dist, currentID);
}
}
}
@@ -813,7 +787,7 @@ public class PartialVAFile<V extends NumberVector<?>> extends AbstractRefiningIn
*
* @param <V> Vector type
*/
- public static class Factory<V extends NumberVector<?>> implements IndexFactory<V, PartialVAFile<V>> {
+ public static class Factory<V extends NumberVector> implements IndexFactory<V, PartialVAFile<V>> {
/**
* Number of partitions to use in each dimension.
*