diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/index')
96 files changed, 2097 insertions, 1779 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/index/AbstractIndex.java b/src/de/lmu/ifi/dbs/elki/index/AbstractIndex.java index 30966fd1..44418a3e 100644 --- a/src/de/lmu/ifi/dbs/elki/index/AbstractIndex.java +++ b/src/de/lmu/ifi/dbs/elki/index/AbstractIndex.java @@ -23,7 +23,7 @@ package de.lmu.ifi.dbs.elki.index; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import de.lmu.ifi.dbs.elki.database.ids.DBID; +import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.persistent.PageFileStatistics; @@ -65,7 +65,7 @@ public abstract class AbstractIndex<O> implements Index { } @Override - public void insert(DBID id) { + public void insert(DBIDRef id) { throw new UnsupportedOperationException("This index does not allow dynamic updates."); } @@ -75,7 +75,7 @@ public abstract class AbstractIndex<O> implements Index { } @Override - public boolean delete(DBID id) { + public boolean delete(DBIDRef id) { throw new UnsupportedOperationException("This index does not allow dynamic updates."); } diff --git a/src/de/lmu/ifi/dbs/elki/index/AbstractRefiningIndex.java b/src/de/lmu/ifi/dbs/elki/index/AbstractRefiningIndex.java index 1d42b7b3..33c9b341 100644 --- a/src/de/lmu/ifi/dbs/elki/index/AbstractRefiningIndex.java +++ b/src/de/lmu/ifi/dbs/elki/index/AbstractRefiningIndex.java @@ -23,21 +23,19 @@ package de.lmu.ifi.dbs.elki.index; */ import java.util.List; -import java.util.Map; import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs; import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; -import de.lmu.ifi.dbs.elki.database.query.DistanceDBIDResult; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.query.knn.AbstractDistanceKNNQuery; -import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult; import de.lmu.ifi.dbs.elki.database.query.range.AbstractDistanceRangeQuery; import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult; import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; import de.lmu.ifi.dbs.elki.persistent.PageFileStatistics; -import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.KNNHeap; /** * Abstract base class for Filter-refinement indexes. @@ -72,10 +70,10 @@ public abstract class AbstractRefiningIndex<O> extends AbstractIndex<O> implemen * @param relation Relation to index * @param ids database ids */ - abstract protected void initialize(Relation<O> relation, DBIDs ids); + protected abstract void initialize(Relation<O> relation, DBIDs ids); /** - * Refine a given object (and count the refinement!) + * Refine a given object (and count the refinement!). * * @param id Object id * @return refined object @@ -148,7 +146,7 @@ public abstract class AbstractRefiningIndex<O> extends AbstractIndex<O> implemen * @param q Query object * @return Distance */ - protected D refine(DBID id, O q) { + protected D refine(DBIDRef id, O q) { AbstractRefiningIndex.this.refinements++; return distanceQuery.distance(q, id); } @@ -168,7 +166,7 @@ public abstract class AbstractRefiningIndex<O> extends AbstractIndex<O> implemen * * @author Erich Schubert */ - abstract public class AbstractKNNQuery<D extends Distance<D>> extends AbstractDistanceKNNQuery<O, D> { + public abstract class AbstractKNNQuery<D extends Distance<D>> extends AbstractDistanceKNNQuery<O, D> { /** * Constructor. * @@ -184,11 +182,6 @@ public abstract class AbstractRefiningIndex<O> extends AbstractIndex<O> implemen } @Override - public void getKNNForBulkHeaps(Map<DBID, KNNHeap<D>> heaps) { - throw new UnsupportedOperationException("Not yet implemented."); - } - - @Override public KNNResult<D> getKNNForDBID(DBIDRef id, int k) { return getKNNForObject(relation.get(id), k); } diff --git a/src/de/lmu/ifi/dbs/elki/index/Index.java b/src/de/lmu/ifi/dbs/elki/index/Index.java index 9e77073c..1b866f5e 100644 --- a/src/de/lmu/ifi/dbs/elki/index/Index.java +++ b/src/de/lmu/ifi/dbs/elki/index/Index.java @@ -23,7 +23,7 @@ package de.lmu.ifi.dbs.elki.index; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import de.lmu.ifi.dbs.elki.database.ids.DBID; +import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.persistent.PageFileStatistics; import de.lmu.ifi.dbs.elki.result.Result; @@ -48,7 +48,7 @@ public interface Index extends Result { * * @param id the object to be inserted */ - public void insert(DBID id); + public void insert(DBIDRef id); /** * Inserts the specified objects into this index. If a bulk load mode is @@ -64,7 +64,7 @@ public interface Index extends Result { * @param id Object to remove * @return true if this index did contain the object, false otherwise */ - public boolean delete(DBID id); + public boolean delete(DBIDRef id); /** * Deletes the specified objects from this index. diff --git a/src/de/lmu/ifi/dbs/elki/index/package-info.java b/src/de/lmu/ifi/dbs/elki/index/package-info.java index ca7dbe28..003a25bb 100644 --- a/src/de/lmu/ifi/dbs/elki/index/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/index/package-info.java @@ -1,5 +1,8 @@ /** * <p>Index structure implementations</p> + * + * @apiviz.exclude de.lmu.ifi.dbs.elki.index.*\.*\.Factory + * @apiviz.exclude de.lmu.ifi.dbs.elki.index.tree.TreeIndexFactory */ /* This file is part of ELKI: @@ -23,4 +26,4 @@ You should have received a copy of the GNU Affero General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ -package de.lmu.ifi.dbs.elki.index;
\ No newline at end of file +package de.lmu.ifi.dbs.elki.index; diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/AbstractPreprocessorIndex.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/AbstractPreprocessorIndex.java index 0160480f..95d0878d 100644 --- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/AbstractPreprocessorIndex.java +++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/AbstractPreprocessorIndex.java @@ -41,12 +41,14 @@ import de.lmu.ifi.dbs.elki.logging.Logging; */ public abstract class AbstractPreprocessorIndex<O, R> extends AbstractIndex<O> { /** - * The data store + * The data store. */ protected WritableDataStore<R> storage = null; /** * Constructor. + * + * @param relation Relation to index */ public AbstractPreprocessorIndex(Relation<O> relation) { super(relation); @@ -57,5 +59,5 @@ public abstract class AbstractPreprocessorIndex<O, R> extends AbstractIndex<O> { * * @return Logger */ - abstract protected Logging getLogger(); + protected abstract Logging getLogger(); } diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/LocalProjectionIndex.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/LocalProjectionIndex.java index 4e187a9c..7373646f 100644 --- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/LocalProjectionIndex.java +++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/LocalProjectionIndex.java @@ -40,7 +40,7 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.ProjectionResult; * @param <V> Vector type * @param <P> Projection result type */ -public interface LocalProjectionIndex<V extends NumberVector<?, ?>, P extends ProjectionResult> extends Index { +public interface LocalProjectionIndex<V extends NumberVector<?>, P extends ProjectionResult> extends Index { /** * Get the precomputed local projection for a particular object ID. * @@ -60,7 +60,7 @@ public interface LocalProjectionIndex<V extends NumberVector<?, ?>, P extends Pr * @param <V> Vector type * @param <I> Index type */ - public static interface Factory<V extends NumberVector<?, ?>, I extends LocalProjectionIndex<V, ?>> extends IndexFactory<V, I> { + public static interface Factory<V extends NumberVector<?>, I extends LocalProjectionIndex<V, ?>> extends IndexFactory<V, I> { /** * Instantiate the index for a given database. * diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/AbstractMaterializeKNNPreprocessor.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/AbstractMaterializeKNNPreprocessor.java index 21e0abf7..7a64f294 100644 --- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/AbstractMaterializeKNNPreprocessor.java +++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/AbstractMaterializeKNNPreprocessor.java @@ -31,11 +31,11 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; -import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult; import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult; import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; import de.lmu.ifi.dbs.elki.index.IndexFactory; import de.lmu.ifi.dbs.elki.index.KNNIndex; @@ -54,6 +54,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * * @param <O> Object type * @param <D> Distance type + * @param <T> Result type */ public abstract class AbstractMaterializeKNNPreprocessor<O, D extends Distance<D>, T extends KNNResult<D>> extends AbstractPreprocessorIndex<O, T> implements KNNIndex<O> { /** @@ -155,8 +156,8 @@ public abstract class AbstractMaterializeKNNPreprocessor<O, D extends Distance<D @SuppressWarnings("unchecked") @Override - public <S extends Distance<S>> KNNQuery<O, S> getKNNQuery(DistanceQuery<O, S> distanceQuery, Object... hints) { - if(!this.distanceFunction.equals(distanceQuery.getDistanceFunction())) { + public <S extends Distance<S>> KNNQuery<O, S> getKNNQuery(DistanceQuery<O, S> distQ, Object... hints) { + if(!this.distanceFunction.equals(distQ.getDistanceFunction())) { return null; } // k max supported? @@ -185,7 +186,7 @@ public abstract class AbstractMaterializeKNNPreprocessor<O, D extends Distance<D * @param <O> The object type * @param <D> The distance type */ - public static abstract class Factory<O, D extends Distance<D>, T extends KNNResult<D>> implements IndexFactory<O, KNNIndex<O>> { + public abstract static class Factory<O, D extends Distance<D>, T extends KNNResult<D>> implements IndexFactory<O, KNNIndex<O>> { /** * Parameter to specify the number of nearest neighbors of an object to be * materialized. must be an integer greater than 1. @@ -193,7 +194,7 @@ public abstract class AbstractMaterializeKNNPreprocessor<O, D extends Distance<D * Key: {@code -materialize.k} * </p> */ - public static final OptionID K_ID = OptionID.getOrCreateOptionID("materialize.k", "The number of nearest neighbors of an object to be materialized."); + public static final OptionID K_ID = new OptionID("materialize.k", "The number of nearest neighbors of an object to be materialized."); /** * Parameter to indicate the distance function to be used to ascertain the @@ -206,7 +207,7 @@ public abstract class AbstractMaterializeKNNPreprocessor<O, D extends Distance<D * Key: {@code materialize.distance} * </p> */ - public static final OptionID DISTANCE_FUNCTION_ID = OptionID.getOrCreateOptionID("materialize.distance", "the distance function to materialize the nearest neighbors"); + public static final OptionID DISTANCE_FUNCTION_ID = new OptionID("materialize.distance", "the distance function to materialize the nearest neighbors"); /** * Holds the value of {@link #K_ID}. @@ -231,7 +232,7 @@ public abstract class AbstractMaterializeKNNPreprocessor<O, D extends Distance<D } @Override - abstract public AbstractMaterializeKNNPreprocessor<O, D, T> instantiate(Relation<O> relation); + public abstract AbstractMaterializeKNNPreprocessor<O, D, T> instantiate(Relation<O> relation); /** * Get the distance function. @@ -264,7 +265,7 @@ public abstract class AbstractMaterializeKNNPreprocessor<O, D extends Distance<D * * @apiviz.exclude */ - public static abstract class Parameterizer<O, D extends Distance<D>> extends AbstractParameterizer { + public abstract static class Parameterizer<O, D extends Distance<D>> extends AbstractParameterizer { /** * Holds the value of {@link #K_ID}. */ @@ -279,7 +280,8 @@ public abstract class AbstractMaterializeKNNPreprocessor<O, D extends Distance<D protected void makeOptions(Parameterization config) { super.makeOptions(config); // number of neighbors - final IntParameter kP = new IntParameter(K_ID, new GreaterConstraint(1)); + final IntParameter kP = new IntParameter(K_ID); + kP.addConstraint(new GreaterConstraint(1)); if(config.grab(kP)) { k = kP.getValue(); } @@ -292,7 +294,7 @@ public abstract class AbstractMaterializeKNNPreprocessor<O, D extends Distance<D } @Override - abstract protected Factory<O, D, ?> makeInstance(); + protected abstract Factory<O, D, ?> makeInstance(); } } }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/KNNJoinMaterializeKNNPreprocessor.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/KNNJoinMaterializeKNNPreprocessor.java index b7c3f0ac..bb947348 100644 --- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/KNNJoinMaterializeKNNPreprocessor.java +++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/KNNJoinMaterializeKNNPreprocessor.java @@ -4,11 +4,11 @@ import de.lmu.ifi.dbs.elki.algorithm.KNNJoin; import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult; import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; import de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialEntry; import de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.rstar.RStarTreeNode; import de.lmu.ifi.dbs.elki.logging.Logging; -import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.KNNList; /* This file is part of ELKI: @@ -41,11 +41,11 @@ import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.KNNList; * @param <V> vector type * @param <D> distance type */ -public class KNNJoinMaterializeKNNPreprocessor<V extends NumberVector<V, ?>, D extends Distance<D>> extends AbstractMaterializeKNNPreprocessor<V, D, KNNList<D>> { +public class KNNJoinMaterializeKNNPreprocessor<V extends NumberVector<?>, D extends Distance<D>> extends AbstractMaterializeKNNPreprocessor<V, D, KNNResult<D>> { /** * Logging class. */ - private static final Logging logger = Logging.getLogger(KNNJoinMaterializeKNNPreprocessor.class); + private static final Logging LOG = Logging.getLogger(KNNJoinMaterializeKNNPreprocessor.class); /** * Constructor. @@ -67,7 +67,7 @@ public class KNNJoinMaterializeKNNPreprocessor<V extends NumberVector<V, ?>, D e @Override protected Logging getLogger() { - return logger; + return LOG; } @Override @@ -92,7 +92,7 @@ public class KNNJoinMaterializeKNNPreprocessor<V extends NumberVector<V, ?>, D e * @param <O> The object type * @param <D> The distance type */ - public static class Factory<O extends NumberVector<O, ?>, D extends Distance<D>> extends AbstractMaterializeKNNPreprocessor.Factory<O, D, KNNList<D>> { + public static class Factory<O extends NumberVector<?>, D extends Distance<D>> extends AbstractMaterializeKNNPreprocessor.Factory<O, D, KNNResult<D>> { /** * Constructor. * @@ -118,7 +118,7 @@ public class KNNJoinMaterializeKNNPreprocessor<V extends NumberVector<V, ?>, D e * @param <O> Object type * @param <D> Distance type */ - public static class Parameterizer<O extends NumberVector<O, ?>, D extends Distance<D>> extends AbstractMaterializeKNNPreprocessor.Factory.Parameterizer<O, D> { + public static class Parameterizer<O extends NumberVector<?>, D extends Distance<D>> extends AbstractMaterializeKNNPreprocessor.Factory.Parameterizer<O, D> { @Override protected KNNJoinMaterializeKNNPreprocessor.Factory<O, D> makeInstance() { return new KNNJoinMaterializeKNNPreprocessor.Factory<O, D>(k, distanceFunction); diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MaterializeKNNAndRKNNPreprocessor.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MaterializeKNNAndRKNNPreprocessor.java index c200472b..b52b15af 100644 --- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MaterializeKNNAndRKNNPreprocessor.java +++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MaterializeKNNAndRKNNPreprocessor.java @@ -24,10 +24,10 @@ package de.lmu.ifi.dbs.elki.index.preprocessed.knn; */ import java.util.ArrayList; +import java.util.Collection; +import java.util.Comparator; import java.util.Iterator; import java.util.List; -import java.util.Set; -import java.util.SortedSet; import java.util.TreeSet; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; @@ -40,22 +40,27 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.ids.DistanceDBIDPair; +import de.lmu.ifi.dbs.elki.database.ids.DoubleDistanceDBIDPair; +import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs; import de.lmu.ifi.dbs.elki.database.ids.SetDBIDs; -import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair; -import de.lmu.ifi.dbs.elki.database.query.GenericDistanceResultPair; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; -import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult; import de.lmu.ifi.dbs.elki.database.query.rknn.PreprocessorRKNNQuery; import de.lmu.ifi.dbs.elki.database.query.rknn.RKNNQuery; import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.distance.DistanceUtil; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultUtil; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.GenericDistanceDBIDList; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNHeap; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNUtil; import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; import de.lmu.ifi.dbs.elki.index.RKNNIndex; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; import de.lmu.ifi.dbs.elki.logging.progress.StepProgress; -import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.KNNHeap; -import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.KNNList; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; @@ -64,21 +69,28 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title; * nearest neighbors (and their distances) to each database object. * * @author Elke Achtert + * * @param <O> the type of database objects the preprocessor can be applied to * @param <D> the type of distance the used distance function will return */ +// TODO: rewrite the double optimization. Maybe use a specialized subclass? @Title("Materialize kNN and RkNN Neighborhood preprocessor") @Description("Materializes the k nearest neighbors and the reverse k nearest neighbors of objects of a database.") public class MaterializeKNNAndRKNNPreprocessor<O, D extends Distance<D>> extends MaterializeKNNPreprocessor<O, D> implements RKNNIndex<O> { /** * Logger to use. */ - private static final Logging logger = Logging.getLogger(MaterializeKNNAndRKNNPreprocessor.class); + private static final Logging LOG = Logging.getLogger(MaterializeKNNAndRKNNPreprocessor.class); /** * Additional data storage for RkNN. */ - private WritableDataStore<SortedSet<DistanceResultPair<D>>> materialized_RkNN; + private WritableDataStore<TreeSet<DistanceDBIDPair<D>>> materialized_RkNN; + + /** + * Use optimizations for double values + */ + protected boolean doubleOptimize; /** * Constructor. @@ -89,12 +101,13 @@ public class MaterializeKNNAndRKNNPreprocessor<O, D extends Distance<D>> extends */ public MaterializeKNNAndRKNNPreprocessor(Relation<O> relation, DistanceFunction<? super O, D> distanceFunction, int k) { super(relation, distanceFunction, k); + this.doubleOptimize = DistanceUtil.isDoubleDistanceFunction(distanceFunction); } @Override protected void preprocess() { createStorage(); - materialized_RkNN = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT, Set.class); + materialized_RkNN = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT, TreeSet.class); FiniteProgress progress = getLogger().isVerbose() ? new FiniteProgress("Materializing k nearest neighbors and reverse k nearest neighbors (k=" + k + ")", relation.size(), getLogger()) : null; materializeKNNAndRKNNs(DBIDUtil.ensureArray(relation.getDBIDs()), progress); } @@ -106,21 +119,22 @@ public class MaterializeKNNAndRKNNPreprocessor<O, D extends Distance<D>> extends */ private void materializeKNNAndRKNNs(ArrayDBIDs ids, FiniteProgress progress) { // add an empty list to each rknn - for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { + Comparator<DistanceDBIDPair<D>> comp = DistanceDBIDResultUtil.distanceComparator(); + for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { if(materialized_RkNN.get(iter) == null) { - materialized_RkNN.put(iter, new TreeSet<DistanceResultPair<D>>()); + materialized_RkNN.put(iter, new TreeSet<DistanceDBIDPair<D>>(comp)); } } // knn query - List<KNNResult<D>> kNNList = knnQuery.getKNNForBulkDBIDs(ids, k); - for(int i = 0; i < ids.size(); i++) { - DBID id = ids.get(i); + List<? extends KNNResult<D>> kNNList = knnQuery.getKNNForBulkDBIDs(ids, k); + int i = 0; + for(DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) { KNNResult<D> kNNs = kNNList.get(i); storage.put(id, kNNs); - for(DistanceResultPair<D> kNN : kNNs) { - Set<DistanceResultPair<D>> rknns = materialized_RkNN.get(kNN); - rknns.add(new GenericDistanceResultPair<D>(kNN.getDistance(), id)); + for(DistanceDBIDResultIter<D> iter = kNNs.iter(); iter.valid(); iter.advance()) { + TreeSet<DistanceDBIDPair<D>> rknns = materialized_RkNN.get(iter); + rknns.add(makePair(iter, id)); } if(progress != null) { progress.incrementProcessed(getLogger()); @@ -132,6 +146,14 @@ public class MaterializeKNNAndRKNNPreprocessor<O, D extends Distance<D>> extends } } + @SuppressWarnings("unchecked") + private DistanceDBIDPair<D> makePair(DistanceDBIDResultIter<D> iter, DBIDIter id) { + if(doubleOptimize) { + return (DistanceDBIDPair<D>) DBIDUtil.newDistancePair(((DoubleDistanceDBIDPair) iter.getDistancePair()).doubleDistance(), id); + } + return DBIDUtil.newDistancePair(iter.getDistance(), id); + } + @Override protected void objectsInserted(DBIDs ids) { StepProgress stepprog = getLogger().isVerbose() ? new StepProgress(3) : null; @@ -171,34 +193,37 @@ public class MaterializeKNNAndRKNNPreprocessor<O, D extends Distance<D>> extends private ArrayDBIDs updateKNNsAndRkNNs(DBIDs ids) { ArrayModifiableDBIDs rkNN_ids = DBIDUtil.newArray(); DBIDs oldids = DBIDUtil.difference(relation.getDBIDs(), ids); - for (DBIDIter iter = oldids.iter(); iter.valid(); iter.advance()) { - KNNResult<D> kNNs = storage.get(iter); - D knnDist = kNNs.getKNNDistance(); + for(DBIDIter id = oldids.iter(); id.valid(); id.advance()) { + KNNResult<D> oldkNNs = storage.get(id); + D knnDist = oldkNNs.getKNNDistance(); // look for new kNNs KNNHeap<D> heap = null; - for (DBIDIter iter2 = ids.iter(); iter2.valid(); iter2.advance()) { - D dist = distanceQuery.distance(iter, iter2); + for(DBIDIter newid = ids.iter(); newid.valid(); newid.advance()) { + D dist = distanceQuery.distance(id, newid); if(dist.compareTo(knnDist) <= 0) { + // New id changes the kNNs of oldid. if(heap == null) { - heap = new KNNHeap<D>(k); - heap.addAll(kNNs); + heap = KNNUtil.newHeap(oldkNNs); } - heap.add(dist, iter2); + heap.add(dist, newid); } } + // kNNs for oldid have changed: if(heap != null) { - KNNList<D> newKNNs = heap.toKNNList(); - storage.put(iter, newKNNs); + KNNResult<D> newkNNs = heap.toKNNList(); + storage.put(id, newkNNs); // get the difference int i = 0; int j = 0; - List<DistanceResultPair<D>> added = new ArrayList<DistanceResultPair<D>>(); - List<DistanceResultPair<D>> removed = new ArrayList<DistanceResultPair<D>>(); - while(i < kNNs.size() && j < newKNNs.size()) { - DistanceResultPair<D> drp1 = kNNs.get(i); - DistanceResultPair<D> drp2 = newKNNs.get(j); - if(!drp1.sameDBID(drp2)) { + GenericDistanceDBIDList<D> added = new GenericDistanceDBIDList<D>(); + GenericDistanceDBIDList<D> removed = new GenericDistanceDBIDList<D>(); + // TODO: use iterators. + while(i < oldkNNs.size() && j < newkNNs.size()) { + DistanceDBIDPair<D> drp1 = oldkNNs.get(i); + DistanceDBIDPair<D> drp2 = newkNNs.get(j); + // NOTE: we assume that on ties they are ordered the same way! + if(!DBIDUtil.equal(drp1, drp2)) { added.add(drp2); j++; } @@ -208,21 +233,25 @@ public class MaterializeKNNAndRKNNPreprocessor<O, D extends Distance<D>> extends } } if(i != j) { - for(; i < kNNs.size(); i++) - removed.add(kNNs.get(i)); + for(; i < oldkNNs.size(); i++) { + removed.add(oldkNNs.get(i)); + } + for(; j < newkNNs.size(); i++) { + added.add(newkNNs.get(i)); + } } // add new RkNN - for(DistanceResultPair<D> drp : added) { - Set<DistanceResultPair<D>> rknns = materialized_RkNN.get(drp); - rknns.add(new GenericDistanceResultPair<D>(drp.getDistance(), iter.getDBID())); + for(DistanceDBIDResultIter<D> newnn = added.iter(); newnn.valid(); newnn.advance()) { + TreeSet<DistanceDBIDPair<D>> rknns = materialized_RkNN.get(newnn); + rknns.add(makePair(newnn, id)); } // remove old RkNN - for(DistanceResultPair<D> drp : removed) { - Set<DistanceResultPair<D>> rknns = materialized_RkNN.get(drp); - rknns.remove(new GenericDistanceResultPair<D>(drp.getDistance(), iter.getDBID())); + for(DistanceDBIDResultIter<D> oldnn = removed.iter(); oldnn.valid(); oldnn.advance()) { + TreeSet<DistanceDBIDPair<D>> rknns = materialized_RkNN.get(oldnn); + rknns.remove(makePair(oldnn, id)); } - rkNN_ids.add(iter); + rkNN_ids.add(id); } } return rkNN_ids; @@ -237,39 +266,43 @@ public class MaterializeKNNAndRKNNPreprocessor<O, D extends Distance<D>> extends if(stepprog != null) { stepprog.beginStep(1, "New deletions ocurred, remove their materialized kNNs and RkNNs.", getLogger()); } + // Temporary storage of removed lists List<KNNResult<D>> kNNs = new ArrayList<KNNResult<D>>(ids.size()); - List<List<DistanceResultPair<D>>> rkNNs = new ArrayList<List<DistanceResultPair<D>>>(ids.size()); - for (DBIDIter iter = aids.iter(); iter.valid(); iter.advance()) { + List<TreeSet<DistanceDBIDPair<D>>> rkNNs = new ArrayList<TreeSet<DistanceDBIDPair<D>>>(ids.size()); + for(DBIDIter iter = aids.iter(); iter.valid(); iter.advance()) { kNNs.add(storage.get(iter)); storage.delete(iter); - rkNNs.add(new ArrayList<DistanceResultPair<D>>(materialized_RkNN.get(iter))); + rkNNs.add(materialized_RkNN.get(iter)); materialized_RkNN.delete(iter); } - ArrayDBIDs kNN_ids = extractAndRemoveIDs(kNNs, aids); - ArrayDBIDs rkNN_ids = extractAndRemoveIDs(rkNNs, aids); + // Keep only those IDs not also removed + ArrayDBIDs kNN_ids = affectedkNN(kNNs, aids); + ArrayDBIDs rkNN_ids = affectedRkNN(rkNNs, aids); // update the affected kNNs and RkNNs if(stepprog != null) { stepprog.beginStep(2, "New deletions ocurred, update the affected kNNs and RkNNs.", getLogger()); } - // update the kNNs of the RkNNs - List<KNNResult<D>> kNNList = knnQuery.getKNNForBulkDBIDs(rkNN_ids, k); - for(int i = 0; i < rkNN_ids.size(); i++) { - DBID id = rkNN_ids.get(i); - storage.put(id, kNNList.get(i)); - for(DistanceResultPair<D> kNN : kNNList.get(i)) { - materialized_RkNN.get(kNN).add(new GenericDistanceResultPair<D>(kNN.getDistance(), id)); + // Recompute the kNN for affected objects (in rkNN lists) + { + List<? extends KNNResult<D>> kNNList = knnQuery.getKNNForBulkDBIDs(rkNN_ids, k); + int i = 0; + for(DBIDIter reknn = rkNN_ids.iter(); reknn.valid(); reknn.advance(), i++) { + storage.put(reknn, kNNList.get(i)); + for(DistanceDBIDResultIter<D> it = kNNList.get(i).iter(); it.valid(); it.advance()) { + materialized_RkNN.get(it).add(makePair(it, reknn)); + } } } - // update the RkNNs of the kNNs - SetDBIDs idsSet = DBIDUtil.ensureSet(ids); - for(int i = 0; i < kNN_ids.size(); i++) { - DBID id = kNN_ids.get(i); - SortedSet<DistanceResultPair<D>> rkNN = materialized_RkNN.get(id); - for(Iterator<DistanceResultPair<D>> it = rkNN.iterator(); it.hasNext();) { - DistanceResultPair<D> drp = it.next(); - if(idsSet.contains(drp)) { - it.remove(); + // remove objects from RkNNs of obejcts (in kNN lists) + { + SetDBIDs idsSet = DBIDUtil.ensureSet(ids); + for(DBIDIter nn = kNN_ids.iter(); nn.valid(); nn.advance()) { + TreeSet<DistanceDBIDPair<D>> rkNN = materialized_RkNN.get(nn); + for(Iterator<DistanceDBIDPair<D>> it = rkNN.iterator(); it.hasNext();) { + if(idsSet.contains(it.next())) { + it.remove(); + } } } } @@ -286,6 +319,44 @@ public class MaterializeKNNAndRKNNPreprocessor<O, D extends Distance<D>> extends } /** + * Extracts and removes the DBIDs in the given collections. + * + * @param extraxt a list of lists of DistanceResultPair to extract + * @param remove the ids to remove + * @return the DBIDs in the given collection + */ + protected ArrayDBIDs affectedkNN(List<? extends KNNResult<D>> extraxt, DBIDs remove) { + HashSetModifiableDBIDs ids = DBIDUtil.newHashSet(); + for(KNNResult<D> drps : extraxt) { + for(DBIDIter iter = drps.iter(); iter.valid(); iter.advance()) { + ids.add(iter); + } + } + ids.removeDBIDs(remove); + // Convert back to array + return DBIDUtil.newArray(ids); + } + + /** + * Extracts and removes the DBIDs in the given collections. + * + * @param extraxt a list of lists of DistanceResultPair to extract + * @param remove the ids to remove + * @return the DBIDs in the given collection + */ + protected ArrayDBIDs affectedRkNN(List<? extends Collection<DistanceDBIDPair<D>>> extraxt, DBIDs remove) { + HashSetModifiableDBIDs ids = DBIDUtil.newHashSet(); + for(Collection<DistanceDBIDPair<D>> drps : extraxt) { + for(DistanceDBIDPair<D> drp : drps) { + ids.add(drp); + } + } + ids.removeDBIDs(remove); + // Convert back to array + return DBIDUtil.newArray(ids); + } + + /** * Returns the materialized kNNs of the specified id. * * @param id the query id @@ -301,11 +372,17 @@ public class MaterializeKNNAndRKNNPreprocessor<O, D extends Distance<D>> extends * @param id the query id * @return the RkNNs */ - public List<DistanceResultPair<D>> getRKNN(DBIDRef id) { - SortedSet<DistanceResultPair<D>> rKNN = materialized_RkNN.get(id); - if(rKNN == null) + public GenericDistanceDBIDList<D> getRKNN(DBIDRef id) { + TreeSet<DistanceDBIDPair<D>> rKNN = materialized_RkNN.get(id); + if(rKNN == null) { return null; - return new ArrayList<DistanceResultPair<D>>(rKNN); + } + GenericDistanceDBIDList<D> ret = new GenericDistanceDBIDList<D>(rKNN.size()); + for(DistanceDBIDPair<D> pair : rKNN) { + ret.add(pair); + } + ret.sort(); + return ret; } @SuppressWarnings("unchecked") @@ -338,7 +415,7 @@ public class MaterializeKNNAndRKNNPreprocessor<O, D extends Distance<D>> extends @Override protected Logging getLogger() { - return logger; + return LOG; } /** diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MaterializeKNNPreprocessor.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MaterializeKNNPreprocessor.java index cdc3fce4..f792833e 100644 --- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MaterializeKNNPreprocessor.java +++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MaterializeKNNPreprocessor.java @@ -23,31 +23,28 @@ package de.lmu.ifi.dbs.elki.index.preprocessed.knn; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import java.util.Collection; import java.util.List; import javax.swing.event.EventListenerList; import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs; import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs; -import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; -import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs; import de.lmu.ifi.dbs.elki.database.ids.SetDBIDs; import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery; -import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair; import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; -import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNHeap; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNUtil; import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; -import de.lmu.ifi.dbs.elki.index.preprocessed.knn.KNNChangeEvent.Type; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; import de.lmu.ifi.dbs.elki.logging.progress.StepProgress; -import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.KNNHeap; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; @@ -72,7 +69,7 @@ public class MaterializeKNNPreprocessor<O, D extends Distance<D>> extends Abstra /** * Logger to use. */ - private static final Logging logger = Logging.getLogger(MaterializeKNNPreprocessor.class); + private static final Logging LOG = Logging.getLogger(MaterializeKNNPreprocessor.class); /** * Flag to use bulk operations. @@ -114,12 +111,12 @@ public class MaterializeKNNPreprocessor<O, D extends Distance<D>> extends Abstra FiniteProgress progress = getLogger().isVerbose() ? new FiniteProgress("Materializing k nearest neighbors (k=" + k + ")", ids.size(), getLogger()) : null; // Try bulk - List<KNNResult<D>> kNNList = null; + List<? extends KNNResult<D>> kNNList = null; if(usebulk) { kNNList = knnQuery.getKNNForBulkDBIDs(ids, k); if(kNNList != null) { - for(int i = 0; i < ids.size(); i++) { - DBID id = ids.get(i); + int i = 0; + for(DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) { storage.put(id, kNNList.get(i)); if(progress != null) { progress.incrementProcessed(getLogger()); @@ -128,7 +125,7 @@ public class MaterializeKNNPreprocessor<O, D extends Distance<D>> extends Abstra } } else { - for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { + for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { KNNResult<D> knn = knnQuery.getKNNForDBID(iter, k); storage.put(iter, knn); if(progress != null) { @@ -143,22 +140,23 @@ public class MaterializeKNNPreprocessor<O, D extends Distance<D>> extends Abstra } @Override - public final void insert(DBID id) { - objectsInserted(id); + public final void insert(DBIDRef id) { + objectsInserted(DBIDUtil.deref(id)); } @Override public void insertAll(DBIDs ids) { if(storage == null && ids.size() > 0) { preprocess(); - } else { + } + else { objectsInserted(ids); } } @Override - public boolean delete(DBID id) { - objectsRemoved(id); + public boolean delete(DBIDRef id) { + objectsRemoved(DBIDUtil.deref(id)); return true; } @@ -181,10 +179,12 @@ public class MaterializeKNNPreprocessor<O, D extends Distance<D>> extends Abstra if(stepprog != null) { stepprog.beginStep(1, "New insertions ocurred, materialize their new kNNs.", getLogger()); } - List<KNNResult<D>> kNNList = knnQuery.getKNNForBulkDBIDs(aids, k); - for(int i = 0; i < aids.size(); i++) { - DBID id = aids.get(i); - storage.put(id, kNNList.get(i)); + // Bulk-query kNNs + List<? extends KNNResult<D>> kNNList = knnQuery.getKNNForBulkDBIDs(aids, k); + // Store in storage + DBIDIter iter = aids.iter(); + for(int i = 0; i < aids.size(); i++, iter.advance()) { + storage.put(iter, kNNList.get(i)); } // update the affected kNNs @@ -215,17 +215,16 @@ public class MaterializeKNNPreprocessor<O, D extends Distance<D>> extends Abstra private ArrayDBIDs updateKNNsAfterInsertion(DBIDs ids) { ArrayModifiableDBIDs rkNN_ids = DBIDUtil.newArray(); DBIDs oldids = DBIDUtil.difference(relation.getDBIDs(), ids); - for (DBIDIter iter = oldids.iter(); iter.valid(); iter.advance()) { + for(DBIDIter iter = oldids.iter(); iter.valid(); iter.advance()) { KNNResult<D> kNNs = storage.get(iter); D knnDist = kNNs.get(kNNs.size() - 1).getDistance(); // look for new kNNs KNNHeap<D> heap = null; - for (DBIDIter iter2 = ids.iter(); iter2.valid(); iter2.advance()) { + for(DBIDIter iter2 = ids.iter(); iter2.valid(); iter2.advance()) { D dist = distanceQuery.distance(iter, iter2); if(dist.compareTo(knnDist) <= 0) { if(heap == null) { - heap = new KNNHeap<D>(k); - heap.addAll(kNNs); + heap = KNNUtil.newHeap(kNNs); } heap.add(dist, iter2); } @@ -251,8 +250,8 @@ public class MaterializeKNNPreprocessor<O, D extends Distance<D>> extends Abstra ArrayModifiableDBIDs rkNN_ids = DBIDUtil.newArray(); for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { KNNResult<D> kNNs = storage.get(iditer); - for(DistanceResultPair<D> kNN : kNNs) { - if(idsSet.contains(kNN)) { + for(DBIDIter it = kNNs.iter(); it.valid(); it.advance()) { + if(idsSet.contains(it)) { rkNN_ids.add(iditer); break; } @@ -260,10 +259,10 @@ public class MaterializeKNNPreprocessor<O, D extends Distance<D>> extends Abstra } // update the kNNs of the RkNNs - List<KNNResult<D>> kNNList = knnQuery.getKNNForBulkDBIDs(rkNN_ids, k); - for(int i = 0; i < rkNN_ids.size(); i++) { - DBID id = rkNN_ids.get(i); - storage.put(id, kNNList.get(i)); + List<? extends KNNResult<D>> kNNList = knnQuery.getKNNForBulkDBIDs(rkNN_ids, k); + DBIDIter iter = rkNN_ids.iter(); + for(int i = 0; i < rkNN_ids.size(); i++, iter.advance()) { + storage.put(iter, kNNList.get(i)); } return rkNN_ids; @@ -282,7 +281,7 @@ public class MaterializeKNNPreprocessor<O, D extends Distance<D>> extends Abstra if(stepprog != null) { stepprog.beginStep(1, "New deletions ocurred, remove their materialized kNNs.", getLogger()); } - for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { + for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { storage.delete(iter); } @@ -313,7 +312,7 @@ public class MaterializeKNNPreprocessor<O, D extends Distance<D>> extends Abstra * @see KNNListener */ protected void fireKNNsInserted(DBIDs insertions, DBIDs updates) { - KNNChangeEvent e = new KNNChangeEvent(this, Type.INSERT, insertions, updates); + KNNChangeEvent e = new KNNChangeEvent(this, KNNChangeEvent.Type.INSERT, insertions, updates); Object[] listeners = listenerList.getListenerList(); for(int i = listeners.length - 2; i >= 0; i -= 2) { if(listeners[i] == KNNListener.class) { @@ -331,7 +330,7 @@ public class MaterializeKNNPreprocessor<O, D extends Distance<D>> extends Abstra * @see KNNListener */ protected void fireKNNsRemoved(DBIDs removals, DBIDs updates) { - KNNChangeEvent e = new KNNChangeEvent(this, Type.DELETE, removals, updates); + KNNChangeEvent e = new KNNChangeEvent(this, KNNChangeEvent.Type.DELETE, removals, updates); Object[] listeners = listenerList.getListenerList(); for(int i = listeners.length - 2; i >= 0; i -= 2) { if(listeners[i] == KNNListener.class) { @@ -341,27 +340,6 @@ public class MaterializeKNNPreprocessor<O, D extends Distance<D>> extends Abstra } /** - * Extracts and removes the DBIDs in the given collections. - * - * @param extraxt a list of lists of DistanceResultPair to extract - * @param remove the ids to remove - * @return the DBIDs in the given collection - */ - protected ArrayDBIDs extractAndRemoveIDs(List<? extends Collection<DistanceResultPair<D>>> extraxt, ArrayDBIDs remove) { - HashSetModifiableDBIDs ids = DBIDUtil.newHashSet(); - for(Collection<DistanceResultPair<D>> drps : extraxt) { - for(DistanceResultPair<D> drp : drps) { - ids.add(drp); - } - } - for (DBIDIter iter = remove.iter(); iter.valid(); iter.advance()) { - ids.remove(iter); - } - // Convert back to array - return DBIDUtil.newArray(ids); - } - - /** * Adds a {@link KNNListener} which will be invoked when the kNNs of objects * are changing. * @@ -397,7 +375,7 @@ public class MaterializeKNNPreprocessor<O, D extends Distance<D>> extends Abstra @Override protected Logging getLogger() { - return logger; + return LOG; } /** diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MetricalIndexApproximationMaterializeKNNPreprocessor.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MetricalIndexApproximationMaterializeKNNPreprocessor.java index d3df7855..b82e9c77 100644 --- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MetricalIndexApproximationMaterializeKNNPreprocessor.java +++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MetricalIndexApproximationMaterializeKNNPreprocessor.java @@ -33,9 +33,11 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDPair; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; -import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNHeap; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNUtil; import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; import de.lmu.ifi.dbs.elki.index.tree.LeafEntry; import de.lmu.ifi.dbs.elki.index.tree.Node; @@ -46,7 +48,6 @@ import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; import de.lmu.ifi.dbs.elki.math.MeanVariance; import de.lmu.ifi.dbs.elki.result.ResultUtil; import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; -import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.KNNHeap; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; @@ -69,11 +70,11 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title; */ @Title("Spatial Approximation Materialize kNN Preprocessor") @Description("Caterializes the (approximate) k nearest neighbors of objects of a database using a spatial approximation.") -public class MetricalIndexApproximationMaterializeKNNPreprocessor<O extends NumberVector<? super O, ?>, D extends Distance<D>, N extends Node<E>, E extends MTreeEntry<D>> extends AbstractMaterializeKNNPreprocessor<O, D, KNNResult<D>> { +public class MetricalIndexApproximationMaterializeKNNPreprocessor<O extends NumberVector<?>, D extends Distance<D>, N extends Node<E>, E extends MTreeEntry<D>> extends AbstractMaterializeKNNPreprocessor<O, D, KNNResult<D>> { /** * Logger to use */ - private static final Logging logger = Logging.getLogger(MetricalIndexApproximationMaterializeKNNPreprocessor.class); + private static final Logging LOG = Logging.getLogger(MetricalIndexApproximationMaterializeKNNPreprocessor.class); /** * Constructor @@ -113,9 +114,9 @@ public class MetricalIndexApproximationMaterializeKNNPreprocessor<O extends Numb for(int i = 0; i < size; i++) { ids.add(((LeafEntry) node.getEntry(i)).getDBID()); } - HashMap<DBIDPair, D> cache = new HashMap<DBIDPair, D>(size * size * 3 / 8); + HashMap<DBIDPair, D> cache = new HashMap<DBIDPair, D>((size * size * 3) >> 2); for(DBIDIter id = ids.iter(); id.valid(); id.advance()) { - KNNHeap<D> kNN = new KNNHeap<D>(k, distanceQuery.infiniteDistance()); + KNNHeap<D> kNN = KNNUtil.newHeap(distanceFunction, k); for(DBIDIter id2 = ids.iter(); id2.valid(); id2.advance()) { DBIDPair key = DBIDUtil.newPair(id, id2); D d = cache.remove(key); @@ -173,7 +174,7 @@ public class MetricalIndexApproximationMaterializeKNNPreprocessor<O extends Numb } throw new IllegalStateException("No metrical index found!"); } - + @Override public String getLongName() { return "Metrical index knn approximation"; @@ -186,7 +187,7 @@ public class MetricalIndexApproximationMaterializeKNNPreprocessor<O extends Numb @Override protected Logging getLogger() { - return logger; + return LOG; } /** @@ -203,7 +204,7 @@ public class MetricalIndexApproximationMaterializeKNNPreprocessor<O extends Numb * @param <N> the type of spatial nodes in the spatial index * @param <E> the type of spatial entries in the spatial index */ - public static class Factory<O extends NumberVector<? super O, ?>, D extends Distance<D>, N extends Node<E>, E extends MTreeEntry<D>> extends AbstractMaterializeKNNPreprocessor.Factory<O, D, KNNResult<D>> { + public static class Factory<O extends NumberVector<?>, D extends Distance<D>, N extends Node<E>, E extends MTreeEntry<D>> extends AbstractMaterializeKNNPreprocessor.Factory<O, D, KNNResult<D>> { /** * Constructor. * @@ -227,7 +228,7 @@ public class MetricalIndexApproximationMaterializeKNNPreprocessor<O extends Numb * * @apiviz.exclude */ - public static class Parameterizer<O extends NumberVector<? super O, ?>, D extends Distance<D>, N extends Node<E>, E extends MTreeEntry<D>> extends AbstractMaterializeKNNPreprocessor.Factory.Parameterizer<O, D> { + public static class Parameterizer<O extends NumberVector<?>, D extends Distance<D>, N extends Node<E>, E extends MTreeEntry<D>> extends AbstractMaterializeKNNPreprocessor.Factory.Parameterizer<O, D> { @Override protected Factory<O, D, N, E> makeInstance() { return new Factory<O, D, N, E>(k, distanceFunction); diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/PartitionApproximationMaterializeKNNPreprocessor.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/PartitionApproximationMaterializeKNNPreprocessor.java index 79c70642..353e227d 100644 --- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/PartitionApproximationMaterializeKNNPreprocessor.java +++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/PartitionApproximationMaterializeKNNPreprocessor.java @@ -27,26 +27,29 @@ import java.util.HashMap; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; -import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs; import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs; +import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDPair; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; -import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNHeap; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNUtil; import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; import de.lmu.ifi.dbs.elki.math.MeanVariance; -import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.KNNHeap; +import de.lmu.ifi.dbs.elki.utilities.RandomFactory; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter; /** * A preprocessor for annotation of the k nearest neighbors (and their @@ -62,12 +65,10 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; @Title("Partitioning Approximate kNN Preprocessor") @Description("Caterializes the (approximate) k nearest neighbors of objects of a database by partitioning and only computing kNN within each partition.") public class PartitionApproximationMaterializeKNNPreprocessor<O, D extends Distance<D>> extends AbstractMaterializeKNNPreprocessor<O, D, KNNResult<D>> { - // TODO: randomize/shuffle? - /** * Logger to use */ - private static final Logging logger = Logging.getLogger(PartitionApproximationMaterializeKNNPreprocessor.class); + private static final Logging LOG = Logging.getLogger(PartitionApproximationMaterializeKNNPreprocessor.class); /** * Number of partitions to use. @@ -75,16 +76,23 @@ public class PartitionApproximationMaterializeKNNPreprocessor<O, D extends Dista private final int partitions; /** + * Random generator + */ + private final RandomFactory rnd; + + /** * Constructor * * @param relation Relation to process * @param distanceFunction the distance function to use * @param k query k * @param partitions Number of partitions + * @param rnd Random number generator */ - public PartitionApproximationMaterializeKNNPreprocessor(Relation<O> relation, DistanceFunction<? super O, D> distanceFunction, int k, int partitions) { + public PartitionApproximationMaterializeKNNPreprocessor(Relation<O> relation, DistanceFunction<? super O, D> distanceFunction, int k, int partitions, RandomFactory rnd) { super(relation, distanceFunction, k); this.partitions = partitions; + this.rnd = rnd; } @Override @@ -92,33 +100,42 @@ public class PartitionApproximationMaterializeKNNPreprocessor<O, D extends Dista DistanceQuery<O, D> distanceQuery = relation.getDatabase().getDistanceQuery(relation, distanceFunction); storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, KNNResult.class); MeanVariance ksize = new MeanVariance(); - if(logger.isVerbose()) { - logger.verbose("Approximating nearest neighbor lists to database objects"); + if (LOG.isVerbose()) { + LOG.verbose("Approximating nearest neighbor lists to database objects"); } - ArrayDBIDs aids = DBIDUtil.ensureArray(relation.getDBIDs()); + // Produce a random shuffling of the IDs: + ArrayModifiableDBIDs aids = DBIDUtil.newArray(relation.getDBIDs()); + DBIDUtil.randomShuffle(aids, rnd); int minsize = (int) Math.floor(aids.size() / partitions); - FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("Processing partitions.", partitions, logger) : null; - for(int part = 0; part < partitions; part++) { + FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Processing partitions.", partitions, LOG) : null; + for (int part = 0; part < partitions; part++) { int size = (partitions * minsize + part >= aids.size()) ? minsize : minsize + 1; // Collect the ids in this node. ArrayModifiableDBIDs ids = DBIDUtil.newArray(size); - for(int i = 0; i < size; i++) { - assert (size * partitions + part < aids.size()); - ids.add(aids.get(i * partitions + part)); + { // TODO: this is a bit overly complicated. The code dates back to when + // we were not shuffling the array beforehand. Right now, we could just + // compute the proper partition sizes and split it directly. But + // ArrayDBIDs does not have a "sublist" function yet, anyway. + DBIDArrayIter iter = aids.iter(); + // Offset - really cheap on array iterators. + iter.seek(part); + // Seek in steps of "partitions". Also just a += instead of ++ op! + for (; iter.valid(); iter.advance(partitions)) { + ids.add(iter); + } } - HashMap<DBIDPair, D> cache = new HashMap<DBIDPair, D>(size * size * 3 / 8); + HashMap<DBIDPair, D> cache = new HashMap<DBIDPair, D>((size * size * 3) >> 3); for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { - KNNHeap<D> kNN = new KNNHeap<D>(k, distanceQuery.infiniteDistance()); + KNNHeap<D> kNN = KNNUtil.newHeap(distanceFunction, k); for (DBIDIter iter2 = ids.iter(); iter2.valid(); iter2.advance()) { DBIDPair key = DBIDUtil.newPair(iter, iter2); D d = cache.remove(key); - if(d != null) { + if (d != null) { // consume the previous result. kNN.add(d, iter2); - } - else { + } else { // compute new and store the previous result. d = distanceQuery.distance(iter, iter2); kNN.add(d, iter2); @@ -130,26 +147,26 @@ public class PartitionApproximationMaterializeKNNPreprocessor<O, D extends Dista ksize.put(kNN.size()); storage.put(iter, kNN.toKNNList()); } - if(logger.isDebugging()) { - if(cache.size() > 0) { - logger.warning("Cache should be empty after each run, but still has " + cache.size() + " elements."); + if (LOG.isDebugging()) { + if (cache.size() > 0) { + LOG.warning("Cache should be empty after each run, but still has " + cache.size() + " elements."); } } - if(progress != null) { - progress.incrementProcessed(logger); + if (progress != null) { + progress.incrementProcessed(LOG); } } - if(progress != null) { - progress.ensureCompleted(logger); + if (progress != null) { + progress.ensureCompleted(LOG); } - if(logger.isVerbose()) { - logger.verbose("On average, " + ksize.getMean() + " +- " + ksize.getSampleStddev() + " neighbors returned."); + if (LOG.isVerbose()) { + LOG.verbose("On average, " + ksize.getMean() + " +- " + ksize.getSampleStddev() + " neighbors returned."); } } @Override protected Logging getLogger() { - return logger; + return LOG; } @Override @@ -176,18 +193,14 @@ public class PartitionApproximationMaterializeKNNPreprocessor<O, D extends Dista */ public static class Factory<O, D extends Distance<D>> extends AbstractMaterializeKNNPreprocessor.Factory<O, D, KNNResult<D>> { /** - * Parameter to specify the number of partitions to use for materializing - * the kNN. Must be an integer greater than 1. - * <p> - * Key: {@code -partknn.p} - * </p> + * The number of partitions to use */ - public static final OptionID PARTITIONS_ID = OptionID.getOrCreateOptionID("partknn.p", "The number of partitions to use for approximate kNN."); + int partitions; /** - * The number of partitions to use + * Random generator */ - int partitions; + private final RandomFactory rnd; /** * Constructor. @@ -195,15 +208,17 @@ public class PartitionApproximationMaterializeKNNPreprocessor<O, D extends Dista * @param k k * @param distanceFunction distance function * @param partitions number of partitions + * @param rnd */ - public Factory(int k, DistanceFunction<? super O, D> distanceFunction, int partitions) { + public Factory(int k, DistanceFunction<? super O, D> distanceFunction, int partitions, RandomFactory rnd) { super(k, distanceFunction); this.partitions = partitions; + this.rnd = rnd; } @Override public PartitionApproximationMaterializeKNNPreprocessor<O, D> instantiate(Relation<O> relation) { - PartitionApproximationMaterializeKNNPreprocessor<O, D> instance = new PartitionApproximationMaterializeKNNPreprocessor<O, D>(relation, distanceFunction, k, partitions); + PartitionApproximationMaterializeKNNPreprocessor<O, D> instance = new PartitionApproximationMaterializeKNNPreprocessor<O, D>(relation, distanceFunction, k, partitions, rnd); return instance; } @@ -215,21 +230,51 @@ public class PartitionApproximationMaterializeKNNPreprocessor<O, D extends Dista * @apiviz.exclude */ public static class Parameterizer<O, D extends Distance<D>> extends AbstractMaterializeKNNPreprocessor.Factory.Parameterizer<O, D> { + /** + * Parameter to specify the number of partitions to use for materializing + * the kNN. Must be an integer greater than 1. + * <p> + * Key: {@code -partknn.p} + * </p> + */ + public static final OptionID PARTITIONS_ID = new OptionID("partknn.p", "The number of partitions to use for approximate kNN."); + + /** + * Parameter to specify the random number generator. + * <p> + * Key: {@code -partknn.seed} + * </p> + */ + public static final OptionID SEED_ID = new OptionID("partknn.seed", "The random number generator seed."); + + /** + * Number of partitions + */ protected int partitions = 0; + /** + * Random generator + */ + private RandomFactory rnd; + @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - final IntParameter partitionsP = new IntParameter(PARTITIONS_ID, new GreaterConstraint(1)); - if(config.grab(partitionsP)) { + final IntParameter partitionsP = new IntParameter(PARTITIONS_ID); + partitionsP.addConstraint(new GreaterConstraint(1)); + if (config.grab(partitionsP)) { partitions = partitionsP.getValue(); } + RandomParameter rndP = new RandomParameter(SEED_ID); + if (config.grab(rndP)) { + rnd = rndP.getValue(); + } } @Override protected Factory<O, D> makeInstance() { - return new Factory<O, D>(k, distanceFunction, partitions); + return new Factory<O, D>(k, distanceFunction, partitions, rnd); } } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/RandomSampleKNNPreprocessor.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/RandomSampleKNNPreprocessor.java index fa868109..592a1206 100644 --- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/RandomSampleKNNPreprocessor.java +++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/RandomSampleKNNPreprocessor.java @@ -23,8 +23,6 @@ package de.lmu.ifi.dbs.elki.index.preprocessed.knn; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import java.util.Random; - import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs; @@ -32,19 +30,21 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; -import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNHeap; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNUtil; import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; -import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.KNNHeap; +import de.lmu.ifi.dbs.elki.utilities.RandomFactory; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.IntervalConstraint; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.IntervalConstraint.IntervalBoundary; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessConstraint; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.LongParameter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter; /** * Class that computed the kNN only on a random sample. @@ -58,7 +58,7 @@ public class RandomSampleKNNPreprocessor<O, D extends Distance<D>> extends Abstr /** * Logger */ - private static final Logging logger = Logging.getLogger(RandomSampleKNNPreprocessor.class); + private static final Logging LOG = Logging.getLogger(RandomSampleKNNPreprocessor.class); /** * Relative share of objects to get @@ -66,9 +66,9 @@ public class RandomSampleKNNPreprocessor<O, D extends Distance<D>> extends Abstr private final double share; /** - * Random seed + * Random generator */ - private final Long seed; + private final RandomFactory rnd; /** * Constructor. @@ -77,12 +77,12 @@ public class RandomSampleKNNPreprocessor<O, D extends Distance<D>> extends Abstr * @param distanceFunction distance function * @param k k * @param share Relative share - * @param seed Random seed (may be null) + * @param rnd Random generator */ - public RandomSampleKNNPreprocessor(Relation<O> relation, DistanceFunction<? super O, D> distanceFunction, int k, double share, Long seed) { + public RandomSampleKNNPreprocessor(Relation<O> relation, DistanceFunction<? super O, D> distanceFunction, int k, double share, RandomFactory rnd) { super(relation, distanceFunction, k); this.share = share; - this.seed = seed; + this.rnd = rnd; } @Override @@ -93,33 +93,30 @@ public class RandomSampleKNNPreprocessor<O, D extends Distance<D>> extends Abstr final ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs()); final int samplesize = (int) (ids.size() * share); - final long iseed = (seed != null) ? seed : (new Random()).nextLong(); - int i = 0; for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { - KNNHeap<D> kNN = new KNNHeap<D>(k, distanceQuery.infiniteDistance()); + KNNHeap<D> kNN = KNNUtil.newHeap(distanceFunction, k); - long rseed = i * 0x7FFFFFFFFFFFFFE7L + iseed; - DBIDs rsamp = DBIDUtil.randomSample(ids, samplesize, rseed); + DBIDs rsamp = DBIDUtil.randomSample(ids, samplesize, rnd); for (DBIDIter iter2 = rsamp.iter(); iter2.valid(); iter2.advance()) { D dist = distanceQuery.distance(iter, iter2); kNN.add(dist, iter2); } storage.put(iter, kNN.toKNNList()); - if(progress != null) { + if (progress != null) { progress.incrementProcessed(getLogger()); } } - if(progress != null) { + if (progress != null) { progress.ensureCompleted(getLogger()); } } @Override protected Logging getLogger() { - return logger; + return LOG; } @Override @@ -151,9 +148,9 @@ public class RandomSampleKNNPreprocessor<O, D extends Distance<D>> extends Abstr private final double share; /** - * Random seed + * Random generator */ - private final Long seed; + private final RandomFactory rnd; /** * Constructor. @@ -161,17 +158,17 @@ public class RandomSampleKNNPreprocessor<O, D extends Distance<D>> extends Abstr * @param k K * @param distanceFunction distance function * @param share Sample size (relative) - * @param seed Random seed + * @param rnd Random generator */ - public Factory(int k, DistanceFunction<? super O, D> distanceFunction, double share, Long seed) { + public Factory(int k, DistanceFunction<? super O, D> distanceFunction, double share, RandomFactory rnd) { super(k, distanceFunction); this.share = share; - this.seed = seed; + this.rnd = rnd; } @Override public RandomSampleKNNPreprocessor<O, D> instantiate(Relation<O> relation) { - return new RandomSampleKNNPreprocessor<O, D>(relation, distanceFunction, k, share, seed); + return new RandomSampleKNNPreprocessor<O, D>(relation, distanceFunction, k, share, rnd); } /** @@ -193,7 +190,7 @@ public class RandomSampleKNNPreprocessor<O, D extends Distance<D>> extends Abstr * Key: {@code -randomknn.share} * </p> */ - public static final OptionID SHARE_ID = OptionID.getOrCreateOptionID("randomknn.share", "The relative amount of objects to consider for kNN computations."); + public static final OptionID SHARE_ID = new OptionID("randomknn.share", "The relative amount of objects to consider for kNN computations."); /** * Random number generator seed. @@ -202,7 +199,7 @@ public class RandomSampleKNNPreprocessor<O, D extends Distance<D>> extends Abstr * Key: {@code -randomknn.seed} * </p> */ - public static final OptionID SEED_ID = OptionID.getOrCreateOptionID("randomknn.seed", "The random number seed."); + public static final OptionID SEED_ID = new OptionID("randomknn.seed", "The random number seed."); /** * Relative share of objects to get @@ -210,27 +207,29 @@ public class RandomSampleKNNPreprocessor<O, D extends Distance<D>> extends Abstr private double share = 0.0; /** - * Random seed + * Random generator */ - private Long seed = null; + private RandomFactory rnd; @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - DoubleParameter shareP = new DoubleParameter(SHARE_ID, new IntervalConstraint(0.0, IntervalBoundary.OPEN, 1.0, IntervalBoundary.OPEN)); - if(config.grab(shareP)) { + DoubleParameter shareP = new DoubleParameter(SHARE_ID); + shareP.addConstraint(new GreaterConstraint(0.0)); + shareP.addConstraint(new LessConstraint(1.0)); + if (config.grab(shareP)) { share = shareP.getValue(); } - LongParameter seedP = new LongParameter(SEED_ID, true); - if(config.grab(seedP)) { - seed = seedP.getValue(); + RandomParameter rndP = new RandomParameter(SEED_ID); + if (config.grab(rndP)) { + rnd = rndP.getValue(); } } @Override protected RandomSampleKNNPreprocessor.Factory<O, D> makeInstance() { - return new RandomSampleKNNPreprocessor.Factory<O, D>(k, distanceFunction, share, seed); + return new RandomSampleKNNPreprocessor.Factory<O, D>(k, distanceFunction, share, rnd); } } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/SpatialApproximationMaterializeKNNPreprocessor.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/SpatialApproximationMaterializeKNNPreprocessor.java index b206194b..c7963e14 100644 --- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/SpatialApproximationMaterializeKNNPreprocessor.java +++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/SpatialApproximationMaterializeKNNPreprocessor.java @@ -35,9 +35,11 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDPair; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; -import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNHeap; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNUtil; import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; import de.lmu.ifi.dbs.elki.index.tree.LeafEntry; import de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialEntry; @@ -47,7 +49,6 @@ import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; import de.lmu.ifi.dbs.elki.math.MeanVariance; import de.lmu.ifi.dbs.elki.result.ResultUtil; -import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.KNNHeap; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; @@ -70,11 +71,11 @@ import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; */ @Title("Spatial Approximation Materialize kNN Preprocessor") @Description("Caterializes the (approximate) k nearest neighbors of objects of a database using a spatial approximation.") -public class SpatialApproximationMaterializeKNNPreprocessor<O extends NumberVector<?, ?>, D extends Distance<D>, N extends SpatialNode<N, E>, E extends SpatialEntry> extends AbstractMaterializeKNNPreprocessor<O, D, KNNResult<D>> { +public class SpatialApproximationMaterializeKNNPreprocessor<O extends NumberVector<?>, D extends Distance<D>, N extends SpatialNode<N, E>, E extends SpatialEntry> extends AbstractMaterializeKNNPreprocessor<O, D, KNNResult<D>> { /** * Logger to use */ - private static final Logging logger = Logging.getLogger(SpatialApproximationMaterializeKNNPreprocessor.class); + private static final Logging LOG = Logging.getLogger(SpatialApproximationMaterializeKNNPreprocessor.class); /** * Constructor @@ -118,9 +119,9 @@ public class SpatialApproximationMaterializeKNNPreprocessor<O extends NumberVect for(int i = 0; i < size; i++) { ids.add(((LeafEntry) node.getEntry(i)).getDBID()); } - HashMap<DBIDPair, D> cache = new HashMap<DBIDPair, D>(size * size * 3 / 8); + HashMap<DBIDPair, D> cache = new HashMap<DBIDPair, D>((size * size * 3) >> 3); for(DBIDIter id = ids.iter(); id.valid(); id.advance()) { - KNNHeap<D> kNN = new KNNHeap<D>(k, distanceQuery.infiniteDistance()); + KNNHeap<D> kNN = KNNUtil.newHeap(distanceFunction, k); for(DBIDIter id2 = ids.iter(); id2.valid(); id2.advance()) { DBIDPair key = DBIDUtil.newPair(id, id2); D d = cache.remove(key); @@ -160,7 +161,7 @@ public class SpatialApproximationMaterializeKNNPreprocessor<O extends NumberVect @Override protected Logging getLogger() { - return logger; + return LOG; } @Override @@ -186,20 +187,20 @@ public class SpatialApproximationMaterializeKNNPreprocessor<O extends NumberVect * @param <N> the type of spatial nodes in the spatial index * @param <E> the type of spatial entries in the spatial index */ - public static class Factory<D extends Distance<D>, N extends SpatialNode<N, E>, E extends SpatialEntry> extends AbstractMaterializeKNNPreprocessor.Factory<NumberVector<?, ?>, D, KNNResult<D>> { + public static class Factory<D extends Distance<D>, N extends SpatialNode<N, E>, E extends SpatialEntry> extends AbstractMaterializeKNNPreprocessor.Factory<NumberVector<?>, D, KNNResult<D>> { /** * Constructor. * * @param k k * @param distanceFunction distance function */ - public Factory(int k, DistanceFunction<? super NumberVector<?, ?>, D> distanceFunction) { + public Factory(int k, DistanceFunction<? super NumberVector<?>, D> distanceFunction) { super(k, distanceFunction); } @Override - public SpatialApproximationMaterializeKNNPreprocessor<NumberVector<?, ?>, D, N, E> instantiate(Relation<NumberVector<?, ?>> relation) { - SpatialApproximationMaterializeKNNPreprocessor<NumberVector<?, ?>, D, N, E> instance = new SpatialApproximationMaterializeKNNPreprocessor<NumberVector<?, ?>, D, N, E>(relation, distanceFunction, k); + public SpatialApproximationMaterializeKNNPreprocessor<NumberVector<?>, D, N, E> instantiate(Relation<NumberVector<?>> relation) { + SpatialApproximationMaterializeKNNPreprocessor<NumberVector<?>, D, N, E> instance = new SpatialApproximationMaterializeKNNPreprocessor<NumberVector<?>, D, N, E>(relation, distanceFunction, k); return instance; } @@ -210,7 +211,7 @@ public class SpatialApproximationMaterializeKNNPreprocessor<O extends NumberVect * * @apiviz.exclude */ - public static class Parameterizer<D extends Distance<D>, N extends SpatialNode<N, E>, E extends SpatialEntry> extends AbstractMaterializeKNNPreprocessor.Factory.Parameterizer<NumberVector<?, ?>, D> { + public static class Parameterizer<D extends Distance<D>, N extends SpatialNode<N, E>, E extends SpatialEntry> extends AbstractMaterializeKNNPreprocessor.Factory.Parameterizer<NumberVector<?>, D> { @Override protected Factory<D, N, E> makeInstance() { return new Factory<D, N, E>(k, distanceFunction); diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/localpca/AbstractFilteredPCAIndex.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/localpca/AbstractFilteredPCAIndex.java index 99e956c8..371dbca6 100644 --- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/localpca/AbstractFilteredPCAIndex.java +++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/localpca/AbstractFilteredPCAIndex.java @@ -23,19 +23,16 @@ package de.lmu.ifi.dbs.elki.index.preprocessed.localpca; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import java.util.Collection; - import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.data.type.TypeInformation; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; -import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; -import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult; import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; import de.lmu.ifi.dbs.elki.index.preprocessed.AbstractPreprocessorIndex; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; @@ -64,11 +61,11 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; // TODO: loosen DoubleDistance restriction. @Title("Local PCA Preprocessor") @Description("Materializes the local PCA and the locally weighted matrix of objects of a database.") -public abstract class AbstractFilteredPCAIndex<NV extends NumberVector<? extends NV, ?>> extends AbstractPreprocessorIndex<NV, PCAFilteredResult> implements FilteredLocalPCAIndex<NV> { +public abstract class AbstractFilteredPCAIndex<NV extends NumberVector<?>> extends AbstractPreprocessorIndex<NV, PCAFilteredResult> implements FilteredLocalPCAIndex<NV> { /** * PCA utility object. */ - final protected PCAFilteredRunner<NV> pca; + protected final PCAFilteredRunner<NV> pca; /** * Constructor. @@ -102,12 +99,11 @@ public abstract class AbstractFilteredPCAIndex<NV extends NumberVector<? extends // TODO: use a bulk operation? for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { - DBID id = iditer.getDBID(); - Collection<DistanceResultPair<DoubleDistance>> objects = objectsForPCA(id); + DistanceDBIDResult<DoubleDistance> objects = objectsForPCA(iditer); PCAFilteredResult pcares = pca.processQueryResult(objects, relation); - storage.put(id, pcares); + storage.put(iditer, pcares); if(progress != null) { progress.incrementProcessed(getLogger()); @@ -140,23 +136,23 @@ public abstract class AbstractFilteredPCAIndex<NV extends NumberVector<? extends * @return the list of the objects (i.e. the ids and the distances to the * query object) to be considered within the PCA */ - protected abstract Collection<DistanceResultPair<DoubleDistance>> objectsForPCA(DBID id); + protected abstract DistanceDBIDResult<DoubleDistance> objectsForPCA(DBIDRef id); /** - * Factory class + * Factory class. * * @author Erich Schubert * * @apiviz.stereotype factory * @apiviz.uses AbstractFilteredPCAIndex oneway - - «create» */ - public static abstract class Factory<NV extends NumberVector<NV, ?>, I extends AbstractFilteredPCAIndex<NV>> implements FilteredLocalPCAIndex.Factory<NV, I>, Parameterizable { + public abstract static class Factory<NV extends NumberVector<?>, I extends AbstractFilteredPCAIndex<NV>> implements FilteredLocalPCAIndex.Factory<NV, I>, Parameterizable { /** * Parameter to specify the distance function used for running PCA. * * Key: {@code -localpca.distancefunction} */ - public static final OptionID PCA_DISTANCE_ID = OptionID.getOrCreateOptionID("localpca.distancefunction", "The distance function used to select objects for running PCA."); + public static final OptionID PCA_DISTANCE_ID = new OptionID("localpca.distancefunction", "The distance function used to select objects for running PCA."); /** * Holds the instance of the distance function specified by @@ -196,7 +192,7 @@ public abstract class AbstractFilteredPCAIndex<NV extends NumberVector<? extends * * @apiviz.exclude */ - public static abstract class Parameterizer<NV extends NumberVector<NV, ?>, I extends AbstractFilteredPCAIndex<NV>> extends AbstractParameterizer { + public abstract static class Parameterizer<NV extends NumberVector<?>, I extends AbstractFilteredPCAIndex<NV>> extends AbstractParameterizer { /** * Holds the instance of the distance function specified by * {@link #PCA_DISTANCE_ID}. diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/localpca/FilteredLocalPCAIndex.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/localpca/FilteredLocalPCAIndex.java index bc5d08c1..d0780751 100644 --- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/localpca/FilteredLocalPCAIndex.java +++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/localpca/FilteredLocalPCAIndex.java @@ -36,7 +36,7 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredResult; * * @param <NV> Vector type */ -public interface FilteredLocalPCAIndex<NV extends NumberVector<?, ?>> extends LocalProjectionIndex<NV, PCAFilteredResult> { +public interface FilteredLocalPCAIndex<NV extends NumberVector<?>> extends LocalProjectionIndex<NV, PCAFilteredResult> { /** * Get the precomputed local PCA for a particular object ID. * @@ -57,7 +57,7 @@ public interface FilteredLocalPCAIndex<NV extends NumberVector<?, ?>> extends Lo * @param <NV> Vector type * @param <I> Index type produced */ - public static interface Factory<NV extends NumberVector<?, ?>, I extends FilteredLocalPCAIndex<NV>> extends LocalProjectionIndex.Factory<NV, I> { + public static interface Factory<NV extends NumberVector<?>, I extends FilteredLocalPCAIndex<NV>> extends LocalProjectionIndex.Factory<NV, I> { /** * Instantiate the index for a given database. * diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/localpca/KNNQueryFilteredPCAIndex.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/localpca/KNNQueryFilteredPCAIndex.java index 8d766391..00ce11b8 100644 --- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/localpca/KNNQueryFilteredPCAIndex.java +++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/localpca/KNNQueryFilteredPCAIndex.java @@ -25,11 +25,11 @@ package de.lmu.ifi.dbs.elki.index.preprocessed.localpca; import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.database.QueryUtil; -import de.lmu.ifi.dbs.elki.database.ids.DBID; +import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; -import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult; import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredRunner; @@ -55,21 +55,21 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; // TODO: loosen DoubleDistance restriction. @Title("Knn Query Based Local PCA Preprocessor") @Description("Materializes the local PCA and the locally weighted matrix of objects of a database. The PCA is based on k nearest neighbor queries.") -public class KNNQueryFilteredPCAIndex<NV extends NumberVector<? extends NV, ?>> extends AbstractFilteredPCAIndex<NV> { +public class KNNQueryFilteredPCAIndex<NV extends NumberVector<?>> extends AbstractFilteredPCAIndex<NV> { /** * Logger. */ - private static final Logging logger = Logging.getLogger(KNNQueryFilteredPCAIndex.class); + private static final Logging LOG = Logging.getLogger(KNNQueryFilteredPCAIndex.class); /** - * The kNN query instance we use + * The kNN query instance we use. */ - final private KNNQuery<NV, DoubleDistance> knnQuery; + private final KNNQuery<NV, DoubleDistance> knnQuery; /** - * Query k + * Query k. */ - final private int k; + private final int k; /** * Constructor. @@ -86,7 +86,7 @@ public class KNNQueryFilteredPCAIndex<NV extends NumberVector<? extends NV, ?>> } @Override - protected KNNResult<DoubleDistance> objectsForPCA(DBID id) { + protected KNNResult<DoubleDistance> objectsForPCA(DBIDRef id) { return knnQuery.getKNNForDBID(id, k); } @@ -102,11 +102,11 @@ public class KNNQueryFilteredPCAIndex<NV extends NumberVector<? extends NV, ?>> @Override public Logging getLogger() { - return logger; + return LOG; } /** - * Factory class + * Factory class. * * @author Erich Schubert * @@ -114,7 +114,7 @@ public class KNNQueryFilteredPCAIndex<NV extends NumberVector<? extends NV, ?>> * @apiviz.landmark * @apiviz.uses KNNQueryFilteredPCAIndex oneway - - «create» */ - public static class Factory<V extends NumberVector<V, ?>> extends AbstractFilteredPCAIndex.Factory<V, KNNQueryFilteredPCAIndex<V>> { + public static class Factory<V extends NumberVector<?>> extends AbstractFilteredPCAIndex.Factory<V, KNNQueryFilteredPCAIndex<V>> { /** * Optional parameter to specify the number of nearest neighbors considered * in the PCA, must be an integer greater than 0. If this parameter is not @@ -127,7 +127,7 @@ public class KNNQueryFilteredPCAIndex<NV extends NumberVector<? extends NV, ?>> * Default value: three times of the dimensionality of the database objects * </p> */ - public static final OptionID K_ID = OptionID.getOrCreateOptionID("localpca.k", "The number of nearest neighbors considered in the PCA. " + "If this parameter is not set, k ist set to three " + "times of the dimensionality of the database objects."); + public static final OptionID K_ID = new OptionID("localpca.k", "The number of nearest neighbors considered in the PCA. " + "If this parameter is not set, k ist set to three " + "times of the dimensionality of the database objects."); /** * Holds the value of {@link #K_ID}. @@ -160,13 +160,15 @@ public class KNNQueryFilteredPCAIndex<NV extends NumberVector<? extends NV, ?>> * * @apiviz.exclude */ - public static class Parameterizer<NV extends NumberVector<NV, ?>> extends AbstractFilteredPCAIndex.Factory.Parameterizer<NV, KNNQueryFilteredPCAIndex<NV>> { + public static class Parameterizer<NV extends NumberVector<?>> extends AbstractFilteredPCAIndex.Factory.Parameterizer<NV, KNNQueryFilteredPCAIndex<NV>> { protected int k = 0; @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - final IntParameter kP = new IntParameter(K_ID, new GreaterConstraint(0), true); + final IntParameter kP = new IntParameter(K_ID); + kP.addConstraint(new GreaterConstraint(0)); + kP.setOptional(true); if(config.grab(kP)) { k = kP.getValue(); } diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/localpca/RangeQueryFilteredPCAIndex.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/localpca/RangeQueryFilteredPCAIndex.java index 99937dbf..bbc00c64 100644 --- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/localpca/RangeQueryFilteredPCAIndex.java +++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/localpca/RangeQueryFilteredPCAIndex.java @@ -25,11 +25,11 @@ package de.lmu.ifi.dbs.elki.index.preprocessed.localpca; import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.database.QueryUtil; -import de.lmu.ifi.dbs.elki.database.ids.DBID; -import de.lmu.ifi.dbs.elki.database.query.DistanceDBIDResult; +import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult; import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredRunner; @@ -52,22 +52,22 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DistanceParameter */ @Title("Range Query Based Local PCA Preprocessor") @Description("Materializes the local PCA and the locally weighted matrix of objects of a database. The PCA is based on epsilon range queries.") -public class RangeQueryFilteredPCAIndex<NV extends NumberVector<? extends NV, ?>> extends AbstractFilteredPCAIndex<NV> { +public class RangeQueryFilteredPCAIndex<NV extends NumberVector<?>> extends AbstractFilteredPCAIndex<NV> { // TODO: lose DoubleDistance restriction. /** * Logger. */ - private static final Logging logger = Logging.getLogger(RangeQueryFilteredPCAIndex.class); + private static final Logging LOG = Logging.getLogger(RangeQueryFilteredPCAIndex.class); /** - * The kNN query instance we use + * The kNN query instance we use. */ - final private RangeQuery<NV, DoubleDistance> rangeQuery; + private final RangeQuery<NV, DoubleDistance> rangeQuery; /** - * Query epsilon + * Query epsilon. */ - final private DoubleDistance epsilon; + private final DoubleDistance epsilon; /** * Constructor. @@ -84,7 +84,7 @@ public class RangeQueryFilteredPCAIndex<NV extends NumberVector<? extends NV, ?> } @Override - protected DistanceDBIDResult<DoubleDistance> objectsForPCA(DBID id) { + protected DistanceDBIDResult<DoubleDistance> objectsForPCA(DBIDRef id) { return rangeQuery.getRangeForDBID(id, epsilon); } @@ -100,18 +100,18 @@ public class RangeQueryFilteredPCAIndex<NV extends NumberVector<? extends NV, ?> @Override public Logging getLogger() { - return logger; + return LOG; } /** - * Factory class + * Factory class. * * @author Erich Schubert * * @apiviz.stereotype factory * @apiviz.uses RangeQueryFilteredPCAIndex oneway - - «create» */ - public static class Factory<V extends NumberVector<V, ?>> extends AbstractFilteredPCAIndex.Factory<V, RangeQueryFilteredPCAIndex<V>> { + public static class Factory<V extends NumberVector<?>> extends AbstractFilteredPCAIndex.Factory<V, RangeQueryFilteredPCAIndex<V>> { /** * Parameter to specify the maximum radius of the neighborhood to be * considered in the PCA, must be suitable to the distance function @@ -119,7 +119,7 @@ public class RangeQueryFilteredPCAIndex<NV extends NumberVector<? extends NV, ?> * * Key: {@code -localpca.epsilon} */ - public static final OptionID EPSILON_ID = OptionID.getOrCreateOptionID("localpca.epsilon", "The maximum radius of the neighborhood to be considered in the PCA."); + public static final OptionID EPSILON_ID = new OptionID("localpca.epsilon", "The maximum radius of the neighborhood to be considered in the PCA."); /** * Holds the value of {@link #EPSILON_ID}. @@ -152,7 +152,7 @@ public class RangeQueryFilteredPCAIndex<NV extends NumberVector<? extends NV, ?> * * @apiviz.exclude */ - public static class Parameterizer<NV extends NumberVector<NV, ?>> extends AbstractFilteredPCAIndex.Factory.Parameterizer<NV, RangeQueryFilteredPCAIndex<NV>> { + public static class Parameterizer<NV extends NumberVector<?>> extends AbstractFilteredPCAIndex.Factory.Parameterizer<NV, RangeQueryFilteredPCAIndex<NV>> { protected DoubleDistance epsilon = null; @Override diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/AbstractPreferenceVectorIndex.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/AbstractPreferenceVectorIndex.java index 387985ab..56b09dba 100644 --- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/AbstractPreferenceVectorIndex.java +++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/AbstractPreferenceVectorIndex.java @@ -40,7 +40,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.Parameterizable; * * @param <NV> Number vector */ -public abstract class AbstractPreferenceVectorIndex<NV extends NumberVector<?, ?>> extends AbstractPreprocessorIndex<NV, BitSet> implements PreferenceVectorIndex<NV> { +public abstract class AbstractPreferenceVectorIndex<NV extends NumberVector<?>> extends AbstractPreprocessorIndex<NV, BitSet> implements PreferenceVectorIndex<NV> { /** * Constructor. * @@ -53,7 +53,7 @@ public abstract class AbstractPreferenceVectorIndex<NV extends NumberVector<?, ? /** * Preprocessing step. */ - abstract protected void preprocess(); + protected abstract void preprocess(); @Override public BitSet getPreferenceVector(DBIDRef objid) { @@ -64,14 +64,14 @@ public abstract class AbstractPreferenceVectorIndex<NV extends NumberVector<?, ? } /** - * Factory class + * Factory class. * * @author Erich Schubert * * @apiviz.stereotype factory * @apiviz.uses AbstractPreferenceVectorIndex oneway - - «create» */ - public static abstract class Factory<V extends NumberVector<?, ?>, I extends PreferenceVectorIndex<V>> implements PreferenceVectorIndex.Factory<V, I>, Parameterizable { + public abstract static class Factory<V extends NumberVector<?>, I extends PreferenceVectorIndex<V>> implements PreferenceVectorIndex.Factory<V, I>, Parameterizable { @Override public abstract I instantiate(Relation<V> relation); diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/DiSHPreferenceVectorIndex.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/DiSHPreferenceVectorIndex.java index 416a5ffb..2b02e7d6 100644 --- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/DiSHPreferenceVectorIndex.java +++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/DiSHPreferenceVectorIndex.java @@ -40,31 +40,28 @@ import de.lmu.ifi.dbs.elki.database.HashmapDatabase; import de.lmu.ifi.dbs.elki.database.UpdatableDatabase; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; -import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs; -import de.lmu.ifi.dbs.elki.database.query.DistanceDBIDResult; -import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair; import de.lmu.ifi.dbs.elki.database.query.distance.PrimitiveDistanceQuery; import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery; import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; import de.lmu.ifi.dbs.elki.datasource.bundle.SingleObjectBundle; import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.DimensionSelectingDistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult; import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; import de.lmu.ifi.dbs.elki.result.AprioriResult; import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; -import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil; import de.lmu.ifi.dbs.elki.utilities.FormatUtil; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.exceptions.ExceptionMessages; import de.lmu.ifi.dbs.elki.utilities.exceptions.UnableToComplyException; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException; import de.lmu.ifi.dbs.elki.utilities.optionhandling.WrongParameterValueException; import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; @@ -77,13 +74,15 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * database. * * @author Elke Achtert + * + * @param <V> Vector type */ @Description("Computes the preference vector of objects of a certain database according to the DiSH algorithm.") -public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends AbstractPreferenceVectorIndex<V> implements PreferenceVectorIndex<V> { +public class DiSHPreferenceVectorIndex<V extends NumberVector<?>> extends AbstractPreferenceVectorIndex<V> implements PreferenceVectorIndex<V> { /** - * Logger to use + * Logger to use. */ - protected static final Logging logger = Logging.getLogger(DiSHPreferenceVectorIndex.class); + private static final Logging LOG = Logging.getLogger(DiSHPreferenceVectorIndex.class); /** * Available strategies for determination of the preference vector. @@ -92,17 +91,17 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs */ public enum Strategy { /** - * Apriori strategy + * Apriori strategy. */ APRIORI, /** - * Max intersection strategy + * Max intersection strategy. */ MAX_INTERSECTION } /** - * The epsilon value for each dimension; + * The epsilon value for each dimension. */ protected DoubleDistance[] epsilon; @@ -139,84 +138,76 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, BitSet.class); - if(logger.isDebugging()) { - StringBuffer msg = new StringBuffer(); + if(LOG.isDebugging()) { + StringBuilder msg = new StringBuilder(); msg.append("\n eps ").append(Arrays.asList(epsilon)); msg.append("\n minpts ").append(minpts); msg.append("\n strategy ").append(strategy); - logger.debugFine(msg.toString()); + LOG.debugFine(msg.toString()); } - try { - long start = System.currentTimeMillis(); - FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("Preprocessing preference vector", relation.size(), logger) : null; - - // only one epsilon value specified - int dim = DatabaseUtil.dimensionality(relation); - if(epsilon.length == 1 && dim != 1) { - DoubleDistance eps = epsilon[0]; - epsilon = new DoubleDistance[dim]; - Arrays.fill(epsilon, eps); - } - - // epsilons as string - RangeQuery<V, DoubleDistance>[] rangeQueries = initRangeQueries(relation, dim); + long start = System.currentTimeMillis(); + FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Preprocessing preference vector", relation.size(), LOG) : null; - for(DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) { - StringBuffer msg = new StringBuffer(); - final DBID id = it.getDBID(); + // only one epsilon value specified + int dim = RelationUtil.dimensionality(relation); + if(epsilon.length == 1 && dim != 1) { + DoubleDistance eps = epsilon[0]; + epsilon = new DoubleDistance[dim]; + Arrays.fill(epsilon, eps); + } - if(logger.isDebugging()) { - msg.append("\nid = ").append(id); - // msg.append(" ").append(database.get(id)); - //msg.append(" ").append(database.getObjectLabelQuery().get(id)); - } + // epsilons as string + RangeQuery<V, DoubleDistance>[] rangeQueries = initRangeQueries(relation, dim); - // determine neighbors in each dimension - ModifiableDBIDs[] allNeighbors = ClassGenericsUtil.newArrayOfNull(dim, ModifiableDBIDs.class); - for(int d = 0; d < dim; d++) { - DistanceDBIDResult<DoubleDistance> qrList = rangeQueries[d].getRangeForDBID(id, epsilon[d]); - allNeighbors[d] = DBIDUtil.newHashSet(qrList.size()); - for(DistanceResultPair<DoubleDistance> qr : qrList) { - allNeighbors[d].add(qr.getDBID()); - } - } + for(DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) { + StringBuilder msg = new StringBuilder(); - if(logger.isDebugging()) { - for(int d = 0; d < dim; d++) { - msg.append("\n neighbors [").append(d).append("]"); - msg.append(" (").append(allNeighbors[d].size()).append(") = "); - msg.append(allNeighbors[d]); - } - } + if(LOG.isDebugging()) { + msg.append("\nid = ").append(DBIDUtil.toString(it)); + // msg.append(" ").append(database.get(id)); + // msg.append(" ").append(database.getObjectLabelQuery().get(id)); + } - BitSet preferenceVector = determinePreferenceVector(relation, allNeighbors, msg); - storage.put(id, preferenceVector); + // determine neighbors in each dimension + ModifiableDBIDs[] allNeighbors = new ModifiableDBIDs[dim]; + for(int d = 0; d < dim; d++) { + DistanceDBIDResult<DoubleDistance> qrList = rangeQueries[d].getRangeForDBID(it, epsilon[d]); + allNeighbors[d] = DBIDUtil.newHashSet(qrList); + } - if(logger.isDebugging()) { - logger.debugFine(msg.toString()); + if(LOG.isDebugging()) { + for(int d = 0; d < dim; d++) { + msg.append("\n neighbors [").append(d).append(']'); + msg.append(" (").append(allNeighbors[d].size()).append(") = "); + msg.append(allNeighbors[d]); } + } - if(progress != null) { - progress.incrementProcessed(logger); - } + try { + storage.put(it, determinePreferenceVector(relation, allNeighbors, msg)); } - if(progress != null) { - progress.ensureCompleted(logger); + catch(UnableToComplyException e) { + throw new IllegalStateException(e); } - long end = System.currentTimeMillis(); - // TODO: re-add timing code! - if(logger.isVerbose()) { - long elapsedTime = end - start; - logger.verbose(this.getClass().getName() + " runtime: " + elapsedTime + " milliseconds."); + if(LOG.isDebugging()) { + LOG.debugFine(msg.toString()); + } + + if(progress != null) { + progress.incrementProcessed(LOG); } } - catch(ParameterException e) { - throw new IllegalStateException(e); + if(progress != null) { + progress.ensureCompleted(LOG); } - catch(UnableToComplyException e) { - throw new IllegalStateException(e); + + long end = System.currentTimeMillis(); + // TODO: re-add timing code! + if(LOG.isVerbose()) { + long elapsedTime = end - start; + LOG.verbose(this.getClass().getName() + " runtime: " + elapsedTime + " milliseconds."); } } @@ -227,11 +218,10 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs * @param neighborIDs the list of ids of the neighbors in each dimension * @param msg a string buffer for debug messages * @return the preference vector - * @throws de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException * * @throws de.lmu.ifi.dbs.elki.utilities.exceptions.UnableToComplyException */ - private BitSet determinePreferenceVector(Relation<V> relation, ModifiableDBIDs[] neighborIDs, StringBuffer msg) throws ParameterException, UnableToComplyException { + private BitSet determinePreferenceVector(Relation<V> relation, ModifiableDBIDs[] neighborIDs, StringBuilder msg) throws UnableToComplyException { if(strategy.equals(Strategy.APRIORI)) { return determinePreferenceVectorByApriori(relation, neighborIDs, msg); } @@ -250,23 +240,21 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs * @param neighborIDs the list of ids of the neighbors in each dimension * @param msg a string buffer for debug messages * @return the preference vector - * @throws de.lmu.ifi.dbs.elki.utilities.optionhandling.ParameterException * * @throws de.lmu.ifi.dbs.elki.utilities.exceptions.UnableToComplyException * */ - private BitSet determinePreferenceVectorByApriori(Relation<V> relation, ModifiableDBIDs[] neighborIDs, StringBuffer msg) throws ParameterException, UnableToComplyException { + private BitSet determinePreferenceVectorByApriori(Relation<V> relation, ModifiableDBIDs[] neighborIDs, StringBuilder msg) throws UnableToComplyException { int dimensionality = neighborIDs.length; // database for apriori UpdatableDatabase apriori_db = new HashmapDatabase(); - SimpleTypeInformation<?> bitmeta = VectorFieldTypeInformation.get(BitVector.class, dimensionality); + SimpleTypeInformation<?> bitmeta = new VectorFieldTypeInformation<BitVector>(BitVector.class, dimensionality); for(DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) { - DBID id = it.getDBID(); Bit[] bits = new Bit[dimensionality]; boolean allFalse = true; for(int d = 0; d < dimensionality; d++) { - if(neighborIDs[d].contains(id)) { + if(neighborIDs[d].contains(it)) { bits[d] = new Bit(true); allFalse = false; } @@ -286,9 +274,9 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs // result of apriori List<BitSet> frequentItemsets = aprioriResult.getSolution(); Map<BitSet, Integer> supports = aprioriResult.getSupports(); - if(logger.isDebugging()) { - msg.append("\n Frequent itemsets: " + frequentItemsets); - msg.append("\n All supports: " + supports); + if(LOG.isDebugging()) { + msg.append("\n Frequent itemsets: ").append(frequentItemsets); + msg.append("\n All supports: ").append(supports); } int maxSupport = 0; int maxCardinality = 0; @@ -302,11 +290,11 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs } } - if(logger.isDebugging()) { + if(LOG.isDebugging()) { msg.append("\n preference "); msg.append(FormatUtil.format(dimensionality, preferenceVector)); - msg.append("\n"); - logger.debugFine(msg.toString()); + msg.append('\n'); + LOG.debugFine(msg.toString()); } return preferenceVector; @@ -319,7 +307,7 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs * @param msg a string buffer for debug messages * @return the preference vector */ - private BitSet determinePreferenceVectorByMaxIntersection(ModifiableDBIDs[] neighborIDs, StringBuffer msg) { + private BitSet determinePreferenceVectorByMaxIntersection(ModifiableDBIDs[] neighborIDs, StringBuilder msg) { int dimensionality = neighborIDs.length; BitSet preferenceVector = new BitSet(dimensionality); @@ -330,8 +318,8 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs candidates.put(i, s_i); } } - if(logger.isDebugging()) { - msg.append("\n candidates " + candidates.keySet()); + if(LOG.isDebugging()) { + msg.append("\n candidates ").append(candidates.keySet()); } if(!candidates.isEmpty()) { @@ -355,11 +343,11 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs } } - if(logger.isDebugging()) { + if(LOG.isDebugging()) { msg.append("\n preference "); msg.append(FormatUtil.format(dimensionality, preferenceVector)); - msg.append("\n"); - logger.debug(msg.toString()); + msg.append('\n'); + LOG.debug(msg.toString()); } return preferenceVector; @@ -416,20 +404,19 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs * @param dimensionality the dimensionality of the objects * @return the dimension selecting distancefunctions to determine the * preference vectors - * @throws ParameterException */ - private RangeQuery<V, DoubleDistance>[] initRangeQueries(Relation<V> relation, int dimensionality) throws ParameterException { + private RangeQuery<V, DoubleDistance>[] initRangeQueries(Relation<V> relation, int dimensionality) { Class<RangeQuery<V, DoubleDistance>> rqcls = ClassGenericsUtil.uglyCastIntoSubclass(RangeQuery.class); RangeQuery<V, DoubleDistance>[] rangeQueries = ClassGenericsUtil.newArrayOfNull(dimensionality, rqcls); for(int d = 0; d < dimensionality; d++) { - rangeQueries[d] = relation.getDatabase().getRangeQuery(new PrimitiveDistanceQuery<V, DoubleDistance>(relation, new DimensionSelectingDistanceFunction(d + 1))); + rangeQueries[d] = relation.getDatabase().getRangeQuery(new PrimitiveDistanceQuery<V, DoubleDistance>(relation, new DimensionSelectingDistanceFunction(d))); } return rangeQueries; } @Override protected Logging getLogger() { - return logger; + return LOG; } @Override @@ -443,7 +430,7 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs } /** - * Factory class + * Factory class. * * @author Erich Schubert * @@ -452,7 +439,7 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs * * @param <V> Vector type */ - public static class Factory<V extends NumberVector<?, ?>> extends AbstractPreferenceVectorIndex.Factory<V, DiSHPreferenceVectorIndex<V>> { + public static class Factory<V extends NumberVector<?>> extends AbstractPreferenceVectorIndex.Factory<V, DiSHPreferenceVectorIndex<V>> { /** * The default value for epsilon. */ @@ -472,7 +459,7 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs * Default value: {@link #DEFAULT_EPSILON} * </p> */ - public static final OptionID EPSILON_ID = OptionID.getOrCreateOptionID("dish.epsilon", "A comma separated list of positive doubles specifying the " + "maximum radius of the neighborhood to be " + "considered in each dimension for determination of " + "the preference vector " + "(default is " + DEFAULT_EPSILON + " in each dimension). " + "If only one value is specified, this value " + "will be used for each dimension."); + public static final OptionID EPSILON_ID = new OptionID("dish.epsilon", "A comma separated list of positive doubles specifying the " + "maximum radius of the neighborhood to be " + "considered in each dimension for determination of " + "the preference vector " + "(default is " + DEFAULT_EPSILON + " in each dimension). " + "If only one value is specified, this value " + "will be used for each dimension."); /** * Option name for {@link #MINPTS_ID}. @@ -493,12 +480,12 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs * Key: {@code -dish.minpts} * </p> */ - public static final OptionID MINPTS_ID = OptionID.getOrCreateOptionID(MINPTS_P, "Positive threshold for minumum numbers of points in the epsilon-" + "neighborhood of a point. " + CONDITION); + public static final OptionID MINPTS_ID = new OptionID(MINPTS_P, "Positive threshold for minumum numbers of points in the epsilon-" + "neighborhood of a point. " + CONDITION); /** * Default strategy. */ - public static Strategy DEFAULT_STRATEGY = Strategy.MAX_INTERSECTION; + public static final Strategy DEFAULT_STRATEGY = Strategy.MAX_INTERSECTION; /** * The strategy for determination of the preference vector, available @@ -512,10 +499,10 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs * Default value: {@link #DEFAULT_STRATEGY} * </p> */ - public static final OptionID STRATEGY_ID = OptionID.getOrCreateOptionID("dish.strategy", "The strategy for determination of the preference vector, " + "available strategies are: [" + Strategy.APRIORI + "| " + Strategy.MAX_INTERSECTION + "]" + "(default is " + DEFAULT_STRATEGY + ")"); + public static final OptionID STRATEGY_ID = new OptionID("dish.strategy", "The strategy for determination of the preference vector, " + "available strategies are: [" + Strategy.APRIORI + "| " + Strategy.MAX_INTERSECTION + "]" + "(default is " + DEFAULT_STRATEGY + ")"); /** - * The epsilon value for each dimension; + * The epsilon value for each dimension. */ protected DoubleDistance[] epsilon; @@ -549,7 +536,7 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs } /** - * Return the minpts value + * Return the minpts value. * * @return minpts */ @@ -564,9 +551,9 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs * * @apiviz.exclude */ - public static class Parameterizer<V extends NumberVector<?, ?>> extends AbstractParameterizer { + public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer { /** - * The epsilon value for each dimension; + * The epsilon value for each dimension. */ protected DoubleDistance[] epsilon; @@ -583,7 +570,8 @@ public class DiSHPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - final IntParameter minptsP = new IntParameter(MINPTS_ID, new GreaterConstraint(0)); + final IntParameter minptsP = new IntParameter(MINPTS_ID); + minptsP.addConstraint(new GreaterConstraint(0)); if(config.grab(minptsP)) { minpts = minptsP.getValue(); } diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/HiSCPreferenceVectorIndex.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/HiSCPreferenceVectorIndex.java index 65f5f61e..fd6aa0bf 100644 --- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/HiSCPreferenceVectorIndex.java +++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/HiSCPreferenceVectorIndex.java @@ -30,13 +30,15 @@ import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.database.QueryUtil; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; -import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; -import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult; import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult; import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; @@ -48,7 +50,7 @@ import de.lmu.ifi.dbs.elki.utilities.exceptions.ExceptionMessages; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.IntervalConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessConstraint; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; @@ -60,14 +62,16 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * @author Elke Achtert * * @see HiSC + * + * @param <V> Vector type */ @Title("HiSC Preprocessor") @Description("Computes the preference vector of objects of a certain database according to the HiSC algorithm.") -public class HiSCPreferenceVectorIndex<V extends NumberVector<?, ?>> extends AbstractPreferenceVectorIndex<V> implements PreferenceVectorIndex<V> { +public class HiSCPreferenceVectorIndex<V extends NumberVector<?>> extends AbstractPreferenceVectorIndex<V> implements PreferenceVectorIndex<V> { /** - * Logger to use + * Logger to use. */ - protected static final Logging logger = Logging.getLogger(HiSCPreferenceVectorIndex.class); + private static final Logging LOG = Logging.getLogger(HiSCPreferenceVectorIndex.class); /** * Holds the value of parameter alpha. @@ -94,49 +98,47 @@ public class HiSCPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs @Override protected void preprocess() { - if(relation == null || relation.size() <= 0) { + if (relation == null || relation.size() <= 0) { throw new IllegalArgumentException(ExceptionMessages.DATABASE_EMPTY); } storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, BitSet.class); - StringBuffer msg = new StringBuffer(); + StringBuilder msg = new StringBuilder(); long start = System.currentTimeMillis(); - FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("Preprocessing preference vector", relation.size(), logger) : null; + FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Preprocessing preference vector", relation.size(), LOG) : null; KNNQuery<V, DoubleDistance> knnQuery = QueryUtil.getKNNQuery(relation, EuclideanDistanceFunction.STATIC, k); for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) { - DBID id = it.getDBID(); - - if(logger.isDebugging()) { - msg.append("\n\nid = ").append(id); - ///msg.append(" ").append(database.getObjectLabelQuery().get(id)); + if (LOG.isDebugging()) { + msg.append("\n\nid = ").append(DBIDUtil.toString(it)); + // /msg.append(" ").append(database.getObjectLabelQuery().get(id)); msg.append("\n knns: "); } - KNNResult<DoubleDistance> knns = knnQuery.getKNNForDBID(id, k); - BitSet preferenceVector = determinePreferenceVector(relation, id, knns.asDBIDs(), msg); - storage.put(id, preferenceVector); + KNNResult<DoubleDistance> knns = knnQuery.getKNNForDBID(it, k); + BitSet preferenceVector = determinePreferenceVector(relation, it, knns, msg); + storage.put(it, preferenceVector); - if(progress != null) { - progress.incrementProcessed(logger); + if (progress != null) { + progress.incrementProcessed(LOG); } } - if(progress != null) { - progress.ensureCompleted(logger); + if (progress != null) { + progress.ensureCompleted(LOG); } - if(logger.isDebugging()) { - logger.debugFine(msg.toString()); + if (LOG.isDebugging()) { + LOG.debugFine(msg.toString()); } long end = System.currentTimeMillis(); // TODO: re-add timing code! - if(logger.isVerbose()) { + if (LOG.isVerbose()) { long elapsedTime = end - start; - logger.verbose(this.getClass().getName() + " runtime: " + elapsedTime + " milliseconds."); + LOG.verbose(this.getClass().getName() + " runtime: " + elapsedTime + " milliseconds."); } } @@ -150,20 +152,20 @@ public class HiSCPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs * @param msg a string buffer for debug messages * @return the preference vector */ - private BitSet determinePreferenceVector(Relation<V> relation, DBID id, DBIDs neighborIDs, StringBuffer msg) { + private BitSet determinePreferenceVector(Relation<V> relation, DBIDRef id, DBIDs neighborIDs, StringBuilder msg) { // variances double[] variances = DatabaseUtil.variances(relation, relation.get(id), neighborIDs); // preference vector BitSet preferenceVector = new BitSet(variances.length); - for(int d = 0; d < variances.length; d++) { - if(variances[d] < alpha) { + for (int d = 0; d < variances.length; d++) { + if (variances[d] < alpha) { preferenceVector.set(d); } } - if(msg != null && logger.isDebugging()) { - msg.append("\nalpha " + alpha); + if (msg != null && LOG.isDebugging()) { + msg.append("\nalpha ").append(alpha); msg.append("\nvariances "); msg.append(FormatUtil.format(variances, ", ", 4)); msg.append("\npreference "); @@ -175,7 +177,7 @@ public class HiSCPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs @Override protected Logging getLogger() { - return logger; + return LOG; } @Override @@ -189,7 +191,7 @@ public class HiSCPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs } /** - * Factory class + * Factory class. * * @author Erich Schubert * @@ -198,7 +200,7 @@ public class HiSCPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs * * @param <V> Vector type */ - public static class Factory<V extends NumberVector<?, ?>> extends AbstractPreferenceVectorIndex.Factory<V, HiSCPreferenceVectorIndex<V>> { + public static class Factory<V extends NumberVector<?>> extends AbstractPreferenceVectorIndex.Factory<V, HiSCPreferenceVectorIndex<V>> { /** * The default value for alpha. */ @@ -214,7 +216,7 @@ public class HiSCPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs * Key: {@code -hisc.alpha} * </p> */ - public static final OptionID ALPHA_ID = OptionID.getOrCreateOptionID("hisc.alpha", "The maximum absolute variance along a coordinate axis."); + public static final OptionID ALPHA_ID = new OptionID("hisc.alpha", "The maximum absolute variance along a coordinate axis."); /** * The number of nearest neighbors considered to determine the preference @@ -227,7 +229,7 @@ public class HiSCPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs * Default value: three times of the dimensionality of the database objects * </p> */ - public static final OptionID K_ID = OptionID.getOrCreateOptionID("hisc.k", "The number of nearest neighbors considered to determine the preference vector. If this value is not defined, k ist set to three times of the dimensionality of the database objects."); + public static final OptionID K_ID = new OptionID("hisc.k", "The number of nearest neighbors considered to determine the preference vector. If this value is not defined, k ist set to three times of the dimensionality of the database objects."); /** * Holds the value of parameter {@link #ALPHA_ID}. @@ -254,10 +256,9 @@ public class HiSCPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs @Override public HiSCPreferenceVectorIndex<V> instantiate(Relation<V> relation) { final int usek; - if(k == null) { - usek = 3 * DatabaseUtil.dimensionality(relation); - } - else { + if (k == null) { + usek = 3 * RelationUtil.dimensionality(relation); + } else { usek = k; } return new HiSCPreferenceVectorIndex<V>(relation, alpha, usek); @@ -270,7 +271,7 @@ public class HiSCPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs * * @apiviz.exclude */ - public static class Parameterizer<V extends NumberVector<?, ?>> extends AbstractParameterizer { + public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer { /** * Holds the value of parameter {@link #ALPHA_ID}. */ @@ -281,17 +282,21 @@ public class HiSCPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs */ protected Integer k; - @Override + @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - final DoubleParameter ALPHA_PARAM = new DoubleParameter(ALPHA_ID, new IntervalConstraint(0.0, IntervalConstraint.IntervalBoundary.OPEN, 1.0, IntervalConstraint.IntervalBoundary.OPEN), DEFAULT_ALPHA); - if(config.grab(ALPHA_PARAM)) { - alpha = ALPHA_PARAM.getValue(); + final DoubleParameter alphaP = new DoubleParameter(ALPHA_ID, DEFAULT_ALPHA); + alphaP.addConstraint(new GreaterConstraint(0.0)); + alphaP.addConstraint(new LessConstraint(1.0)); + if (config.grab(alphaP)) { + alpha = alphaP.doubleValue(); } - final IntParameter K_PARAM = new IntParameter(K_ID, new GreaterConstraint(0), true); - if(config.grab(K_PARAM)) { - k = K_PARAM.getValue(); + final IntParameter kP = new IntParameter(K_ID); + kP.addConstraint(new GreaterConstraint(0)); + kP.setOptional(true); + if (config.grab(kP)) { + k = kP.intValue(); } } @@ -301,4 +306,4 @@ public class HiSCPreferenceVectorIndex<V extends NumberVector<?, ?>> extends Abs } } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/PreferenceVectorIndex.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/PreferenceVectorIndex.java index a0fba8f3..a212c2cd 100644 --- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/PreferenceVectorIndex.java +++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/preference/PreferenceVectorIndex.java @@ -38,7 +38,7 @@ import de.lmu.ifi.dbs.elki.index.IndexFactory; * * @param <NV> Vector type */ -public interface PreferenceVectorIndex<NV extends NumberVector<?, ?>> extends Index { +public interface PreferenceVectorIndex<NV extends NumberVector<?>> extends Index { /** * Get the precomputed preference vector for a particular object ID. * @@ -58,7 +58,7 @@ public interface PreferenceVectorIndex<NV extends NumberVector<?, ?>> extends In * @param <V> vector type * @param <I> index type */ - public static interface Factory<V extends NumberVector<?, ?>, I extends PreferenceVectorIndex<V>> extends IndexFactory<V, I> { + public static interface Factory<V extends NumberVector<?>, I extends PreferenceVectorIndex<V>> extends IndexFactory<V, I> { /** * Instantiate the index for a given database. * diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/snn/SharedNearestNeighborPreprocessor.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/snn/SharedNearestNeighborPreprocessor.java index e4d96028..d57aeb8f 100644 --- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/snn/SharedNearestNeighborPreprocessor.java +++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/snn/SharedNearestNeighborPreprocessor.java @@ -32,12 +32,11 @@ import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; -import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair; import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; -import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult; import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; import de.lmu.ifi.dbs.elki.index.preprocessed.AbstractPreprocessorIndex; import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor; @@ -78,7 +77,7 @@ public class SharedNearestNeighborPreprocessor<O, D extends Distance<D>> extends /** * Get a logger for this class. */ - private static final Logging logger = Logging.getLogger(SharedNearestNeighborPreprocessor.class); + private static final Logging LOG = Logging.getLogger(SharedNearestNeighborPreprocessor.class); /** * Holds the number of nearest neighbors to be used. @@ -117,9 +116,9 @@ public class SharedNearestNeighborPreprocessor<O, D extends Distance<D>> extends for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { ArrayModifiableDBIDs neighbors = DBIDUtil.newArray(numberOfNeighbors); KNNResult<D> kNN = knnquery.getKNNForDBID(iditer, numberOfNeighbors); - for(DistanceResultPair<D> pair : kNN) { + for (DBIDIter iter = kNN.iter(); iter.valid(); iter.advance()) { // if(!id.equals(nid)) { - neighbors.add(pair); + neighbors.add(iter); // } // Size limitation to exactly numberOfNeighbors if(neighbors.size() >= numberOfNeighbors) { @@ -147,7 +146,7 @@ public class SharedNearestNeighborPreprocessor<O, D extends Distance<D>> extends @Override protected Logging getLogger() { - return logger; + return LOG; } @Override @@ -190,7 +189,7 @@ public class SharedNearestNeighborPreprocessor<O, D extends Distance<D>> extends * Key: {@code sharedNearestNeighbors} * </p> */ - public static final OptionID NUMBER_OF_NEIGHBORS_ID = OptionID.getOrCreateOptionID("sharedNearestNeighbors", "number of nearest neighbors to consider (at least 1)"); + public static final OptionID NUMBER_OF_NEIGHBORS_ID = new OptionID("sharedNearestNeighbors", "number of nearest neighbors to consider (at least 1)"); /** * Parameter to indicate the distance function to be used to ascertain the @@ -203,7 +202,7 @@ public class SharedNearestNeighborPreprocessor<O, D extends Distance<D>> extends * Key: {@code SNNDistanceFunction} * </p> */ - public static final OptionID DISTANCE_FUNCTION_ID = OptionID.getOrCreateOptionID("SNNDistanceFunction", "the distance function to asses the nearest neighbors"); + public static final OptionID DISTANCE_FUNCTION_ID = new OptionID("SNNDistanceFunction", "the distance function to asses the nearest neighbors"); /** * Holds the number of nearest neighbors to be used. @@ -268,7 +267,8 @@ public class SharedNearestNeighborPreprocessor<O, D extends Distance<D>> extends @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - final IntParameter numberOfNeighborsP = new IntParameter(NUMBER_OF_NEIGHBORS_ID, new GreaterEqualConstraint(1)); + final IntParameter numberOfNeighborsP = new IntParameter(NUMBER_OF_NEIGHBORS_ID); + numberOfNeighborsP.addConstraint(new GreaterEqualConstraint(1)); if(config.grab(numberOfNeighborsP)) { numberOfNeighbors = numberOfNeighborsP.getValue(); } diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/subspaceproj/AbstractSubspaceProjectionIndex.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/subspaceproj/AbstractSubspaceProjectionIndex.java index da16dd08..1d23681d 100644 --- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/subspaceproj/AbstractSubspaceProjectionIndex.java +++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/subspaceproj/AbstractSubspaceProjectionIndex.java @@ -32,13 +32,13 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; -import de.lmu.ifi.dbs.elki.database.query.DistanceDBIDResult; -import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair; -import de.lmu.ifi.dbs.elki.database.query.GenericDistanceDBIDList; +import de.lmu.ifi.dbs.elki.database.ids.DistanceDBIDPair; import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.GenericDistanceDBIDList; import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; import de.lmu.ifi.dbs.elki.index.preprocessed.AbstractPreprocessorIndex; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; @@ -66,7 +66,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; */ @Title("Local PCA Preprocessor") @Description("Materializes the local PCA and the locally weighted matrix of objects of a database.") -public abstract class AbstractSubspaceProjectionIndex<NV extends NumberVector<?, ?>, D extends Distance<D>, P extends ProjectionResult> extends AbstractPreprocessorIndex<NV, P> implements SubspaceProjectionIndex<NV, P> { +public abstract class AbstractSubspaceProjectionIndex<NV extends NumberVector<?>, D extends Distance<D>, P extends ProjectionResult> extends AbstractPreprocessorIndex<NV, P> implements SubspaceProjectionIndex<NV, P> { /** * Contains the value of parameter epsilon; */ @@ -122,10 +122,10 @@ public abstract class AbstractSubspaceProjectionIndex<NV extends NumberVector<?, pres = computeProjection(iditer, neighbors, relation); } else { - DistanceResultPair<D> firstQR = neighbors.iterator().next(); - neighbors = new GenericDistanceDBIDList<D>(); - neighbors.add(firstQR); - pres = computeProjection(iditer, neighbors, relation); + DistanceDBIDPair<D> firstQR = neighbors.iter().getDistancePair(); + GenericDistanceDBIDList<D> newne = new GenericDistanceDBIDList<D>(); + newne.add(firstQR); + pres = computeProjection(iditer, newne, relation); } storage.put(iditer, pres); @@ -177,7 +177,7 @@ public abstract class AbstractSubspaceProjectionIndex<NV extends NumberVector<?, * @apiviz.stereotype factory * @apiviz.uses AbstractSubspaceProjectionIndex oneway - - «create» */ - public static abstract class Factory<NV extends NumberVector<?, ?>, D extends Distance<D>, I extends AbstractSubspaceProjectionIndex<NV, D, ?>> implements SubspaceProjectionIndex.Factory<NV, I>, Parameterizable { + public abstract static class Factory<NV extends NumberVector<?>, D extends Distance<D>, I extends AbstractSubspaceProjectionIndex<NV, D, ?>> implements SubspaceProjectionIndex.Factory<NV, I>, Parameterizable { /** * Contains the value of parameter epsilon; */ @@ -222,7 +222,7 @@ public abstract class AbstractSubspaceProjectionIndex<NV extends NumberVector<?, * * @apiviz.exclude */ - public static abstract class Parameterizer<NV extends NumberVector<?, ?>, D extends Distance<D>, C> extends AbstractParameterizer { + public abstract static class Parameterizer<NV extends NumberVector<?>, D extends Distance<D>, C> extends AbstractParameterizer { /** * Contains the value of parameter epsilon; */ @@ -263,9 +263,10 @@ public abstract class AbstractSubspaceProjectionIndex<NV extends NumberVector<?, } protected void configMinPts(Parameterization config) { - IntParameter minptsP = new IntParameter(AbstractProjectedDBSCAN.MINPTS_ID, new GreaterConstraint(0)); + IntParameter minptsP = new IntParameter(AbstractProjectedDBSCAN.MINPTS_ID); + minptsP.addConstraint(new GreaterConstraint(0)); if(config.grab(minptsP)) { - minpts = minptsP.getValue(); + minpts = minptsP.intValue(); } } } diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/subspaceproj/FourCSubspaceIndex.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/subspaceproj/FourCSubspaceIndex.java index e61b9144..80212981 100644 --- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/subspaceproj/FourCSubspaceIndex.java +++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/subspaceproj/FourCSubspaceIndex.java @@ -26,13 +26,13 @@ package de.lmu.ifi.dbs.elki.index.preprocessed.subspaceproj; import java.util.ArrayList; import de.lmu.ifi.dbs.elki.data.NumberVector; +import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs; -import de.lmu.ifi.dbs.elki.database.query.DistanceDBIDResult; -import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult; import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.LoggingUtil; @@ -67,11 +67,11 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag; */ @Title("4C Preprocessor") @Description("Computes the local dimensionality and locally weighted matrix of objects of a certain database according to the 4C algorithm.\n" + "The PCA is based on epsilon range queries.") -public class FourCSubspaceIndex<V extends NumberVector<V, ?>, D extends Distance<D>> extends AbstractSubspaceProjectionIndex<V, D, PCAFilteredResult> { +public class FourCSubspaceIndex<V extends NumberVector<?>, D extends Distance<D>> extends AbstractSubspaceProjectionIndex<V, D, PCAFilteredResult> { /** * Our logger */ - private final static Logging logger = Logging.getLogger(FourCSubspaceIndex.class); + private static final Logging LOG = Logging.getLogger(FourCSubspaceIndex.class); /** * The Filtered PCA Runner @@ -95,23 +95,23 @@ public class FourCSubspaceIndex<V extends NumberVector<V, ?>, D extends Distance @Override protected PCAFilteredResult computeProjection(DBIDRef id, DistanceDBIDResult<D> neighbors, Relation<V> database) { ModifiableDBIDs ids = DBIDUtil.newArray(neighbors.size()); - for(DistanceResultPair<D> neighbor : neighbors) { - ids.add(neighbor.getDBID()); + for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { + ids.add(neighbor); } PCAFilteredResult pcares = pca.processIds(ids, database); - if(logger.isDebugging()) { - StringBuffer msg = new StringBuffer(); - msg.append(id).append(" "); //.append(database.getObjectLabelQuery().get(id)); + if (LOG.isDebugging()) { + StringBuilder msg = new StringBuilder(); + msg.append(id).append(' '); // .append(database.getObjectLabelQuery().get(id)); msg.append("\ncorrDim ").append(pcares.getCorrelationDimension()); - logger.debugFine(msg.toString()); + LOG.debugFine(msg.toString()); } return pcares; } @Override protected Logging getLogger() { - return logger; + return LOG; } @Override @@ -135,7 +135,7 @@ public class FourCSubspaceIndex<V extends NumberVector<V, ?>, D extends Distance * @param <V> Vector type * @param <D> Distance type */ - public static class Factory<V extends NumberVector<V, ?>, D extends Distance<D>> extends AbstractSubspaceProjectionIndex.Factory<V, D, FourCSubspaceIndex<V, D>> { + public static class Factory<V extends NumberVector<?>, D extends Distance<D>> extends AbstractSubspaceProjectionIndex.Factory<V, D, FourCSubspaceIndex<V, D>> { /** * The default value for delta. */ @@ -171,7 +171,7 @@ public class FourCSubspaceIndex<V extends NumberVector<V, ?>, D extends Distance * * @apiviz.exclude */ - public static class Parameterizer<V extends NumberVector<V, ?>, D extends Distance<D>> extends AbstractSubspaceProjectionIndex.Factory.Parameterizer<V, D, Factory<V, D>> { + public static class Parameterizer<V extends NumberVector<?>, D extends Distance<D>> extends AbstractSubspaceProjectionIndex.Factory.Parameterizer<V, D, Factory<V, D>> { /** * The Filtered PCA Runner */ @@ -183,18 +183,19 @@ public class FourCSubspaceIndex<V extends NumberVector<V, ?>, D extends Distance // flag absolute boolean absolute = false; Flag absoluteF = new Flag(LimitEigenPairFilter.EIGENPAIR_FILTER_ABSOLUTE); - if(config.grab(absoluteF)) { - absolute = absoluteF.getValue(); + if (config.grab(absoluteF)) { + absolute = absoluteF.isTrue(); } // Parameter delta double delta = 0.0; - DoubleParameter deltaP = new DoubleParameter(LimitEigenPairFilter.EIGENPAIR_FILTER_DELTA, new GreaterEqualConstraint(0), DEFAULT_DELTA); - if(config.grab(deltaP)) { - delta = deltaP.getValue(); + DoubleParameter deltaP = new DoubleParameter(LimitEigenPairFilter.EIGENPAIR_FILTER_DELTA, DEFAULT_DELTA); + deltaP.addConstraint(new GreaterEqualConstraint(0)); + if (config.grab(deltaP)) { + delta = deltaP.doubleValue(); } // Absolute flag doesn't have a sensible default value for delta. - if(absolute && deltaP.tookDefaultValue()) { + if (absolute && deltaP.tookDefaultValue()) { config.reportError(new WrongParameterValueException("Illegal parameter setting: " + "Flag " + absoluteF.getName() + " is set, " + "but no value for " + deltaP.getName() + " is specified.")); } @@ -220,7 +221,7 @@ public class FourCSubspaceIndex<V extends NumberVector<V, ?>, D extends Distance // eigen pair filter pcaParameters.addParameter(PCAFilteredRunner.PCA_EIGENPAIR_FILTER, LimitEigenPairFilter.class.getName()); // abs - if(absolute) { + if (absolute) { pcaParameters.addFlag(LimitEigenPairFilter.EIGENPAIR_FILTER_ABSOLUTE); } // delta @@ -231,18 +232,18 @@ public class FourCSubspaceIndex<V extends NumberVector<V, ?>, D extends Distance pcaParameters.addParameter(PCAFilteredRunner.SMALL_ID, 1); Class<PCAFilteredRunner<V>> cls = ClassGenericsUtil.uglyCastIntoSubclass(PCAFilteredRunner.class); pca = pcaParameters.tryInstantiate(cls); - for(ParameterException e : pcaParameters.getErrors()) { + for (ParameterException e : pcaParameters.getErrors()) { LoggingUtil.warning("Error in internal parameterization: " + e.getMessage()); } - final ArrayList<ParameterConstraint<Number>> deltaCons = new ArrayList<ParameterConstraint<Number>>(); + final ArrayList<ParameterConstraint<? super Double>> deltaCons = new ArrayList<ParameterConstraint<? super Double>>(); // TODO: this constraint is already set in the parameter itself, since // it // also applies to the relative case, right? -- erich // deltaCons.add(new GreaterEqualConstraint(0)); deltaCons.add(new LessEqualConstraint(1)); - GlobalParameterConstraint gpc = new ParameterFlagGlobalConstraint<Number, Double>(deltaP, deltaCons, absoluteF, false); + GlobalParameterConstraint gpc = new ParameterFlagGlobalConstraint<Double>(deltaP, deltaCons, absoluteF, false); config.checkConstraint(gpc); } @@ -252,4 +253,4 @@ public class FourCSubspaceIndex<V extends NumberVector<V, ?>, D extends Distance } } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/subspaceproj/PreDeConSubspaceIndex.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/subspaceproj/PreDeConSubspaceIndex.java index 34bfb5e9..17590804 100644 --- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/subspaceproj/PreDeConSubspaceIndex.java +++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/subspaceproj/PreDeConSubspaceIndex.java @@ -24,11 +24,11 @@ package de.lmu.ifi.dbs.elki.index.preprocessed.subspaceproj; */ import de.lmu.ifi.dbs.elki.data.NumberVector; +import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; -import de.lmu.ifi.dbs.elki.database.query.DistanceDBIDResult; -import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult; import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix; @@ -37,8 +37,8 @@ import de.lmu.ifi.dbs.elki.utilities.FormatUtil; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.IntervalConstraint; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.IntervalConstraint.IntervalBoundary; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessConstraint; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; @@ -55,11 +55,11 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; */ @Title("PreDeCon Preprocessor") @Description("Computes the projected dimension of objects of a certain database according to the PreDeCon algorithm.\n" + "The variance analysis is based on epsilon range queries.") -public class PreDeConSubspaceIndex<V extends NumberVector<? extends V, ?>, D extends Distance<D>> extends AbstractSubspaceProjectionIndex<V, D, SubspaceProjectionResult> { +public class PreDeConSubspaceIndex<V extends NumberVector<?>, D extends Distance<D>> extends AbstractSubspaceProjectionIndex<V, D, SubspaceProjectionResult> { /** * The logger for this class. */ - private static final Logging logger = Logging.getLogger(PreDeConSubspaceIndex.class); + private static final Logging LOG = Logging.getLogger(PreDeConSubspaceIndex.class); /** * The threshold for small eigenvalues. @@ -87,25 +87,25 @@ public class PreDeConSubspaceIndex<V extends NumberVector<? extends V, ?>, D ext @Override protected SubspaceProjectionResult computeProjection(DBIDRef id, DistanceDBIDResult<D> neighbors, Relation<V> database) { - StringBuffer msg = null; + StringBuilder msg = null; int referenceSetSize = neighbors.size(); V obj = database.get(id); - if(getLogger().isDebugging()) { - msg = new StringBuffer(); - msg.append("referenceSetSize = " + referenceSetSize); - msg.append("\ndelta = " + delta); + if (getLogger().isDebugging()) { + msg = new StringBuilder(); + msg.append("referenceSetSize = ").append(referenceSetSize); + msg.append("\ndelta = ").append(delta); } - if(referenceSetSize == 0) { + if (referenceSetSize == 0) { throw new RuntimeException("Reference Set Size = 0. This should never happen!"); } // prepare similarity matrix int dim = obj.getDimensionality(); Matrix simMatrix = new Matrix(dim, dim, 0); - for(int i = 0; i < dim; i++) { + for (int i = 0; i < dim; i++) { simMatrix.set(i, i, 1); } @@ -114,38 +114,41 @@ public class PreDeConSubspaceIndex<V extends NumberVector<? extends V, ?>, D ext // start variance analysis double[] sum = new double[dim]; - for(DistanceResultPair<D> neighbor : neighbors) { - V o = database.get(neighbor.getDBID()); - for(int d = 0; d < dim; d++) { - sum[d] += Math.pow(obj.doubleValue(d + 1) - o.doubleValue(d + 1), 2.0); + for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { + V o = database.get(neighbor); + for (int d = 0; d < dim; d++) { + sum[d] += Math.pow(obj.doubleValue(d) - o.doubleValue(d), 2.0); } } - for(int d = 0; d < dim; d++) { - if(Math.sqrt(sum[d]) / referenceSetSize <= delta) { - if(msg != null) { - msg.append("\nsum[" + d + "]= " + sum[d]); - msg.append("\n Math.sqrt(sum[d]) / referenceSetSize)= " + Math.sqrt(sum[d]) / referenceSetSize); + for (int d = 0; d < dim; d++) { + if (Math.sqrt(sum[d]) / referenceSetSize <= delta) { + if (msg != null) { + msg.append("\nsum[").append(d).append("]= ").append(sum[d]); + msg.append("\n Math.sqrt(sum[d]) / referenceSetSize)= ").append(Math.sqrt(sum[d]) / referenceSetSize); } // projDim++; simMatrix.set(d, d, kappa); - } - else { + } else { // bug in paper? projDim++; } } - if(projDim == 0) { - if(msg != null) { + if (projDim == 0) { + if (msg != null) { // msg.append("\nprojDim == 0!"); } projDim = dim; } - if(msg != null) { - msg.append("\nprojDim " /*+ database.getObjectLabelQuery().get(id)*/ + ": " + projDim); - msg.append("\nsimMatrix " /*+ database.getObjectLabelQuery().get(id)*/ + ": " + FormatUtil.format(simMatrix, FormatUtil.NF4)); + if (msg != null) { + msg.append("\nprojDim "); + // .append(database.getObjectLabelQuery().get(id)); + msg.append(": ").append(projDim); + msg.append("\nsimMatrix "); + // .append(database.getObjectLabelQuery().get(id)); + msg.append(": ").append(FormatUtil.format(simMatrix, FormatUtil.NF4)); getLogger().debugFine(msg.toString()); } @@ -164,11 +167,11 @@ public class PreDeConSubspaceIndex<V extends NumberVector<? extends V, ?>, D ext @Override protected Logging getLogger() { - return logger; + return LOG; } /** - * Factory + * Factory. * * @author Erich Schubert * @@ -178,22 +181,22 @@ public class PreDeConSubspaceIndex<V extends NumberVector<? extends V, ?>, D ext * @param <V> Vector type * @param <D> Distance type */ - public static class Factory<V extends NumberVector<? extends V, ?>, D extends Distance<D>> extends AbstractSubspaceProjectionIndex.Factory<V, D, PreDeConSubspaceIndex<V, D>> { + public static class Factory<V extends NumberVector<?>, D extends Distance<D>> extends AbstractSubspaceProjectionIndex.Factory<V, D, PreDeConSubspaceIndex<V, D>> { /** * The default value for delta. */ public static final double DEFAULT_DELTA = 0.01; /** - * Parameter for Delta + * Parameter for Delta. */ - public static final OptionID DELTA_ID = OptionID.getOrCreateOptionID("predecon.delta", "a double between 0 and 1 specifying the threshold for small Eigenvalues (default is delta = " + DEFAULT_DELTA + ")."); + public static final OptionID DELTA_ID = new OptionID("predecon.delta", "a double between 0 and 1 specifying the threshold for small Eigenvalues (default is delta = " + DEFAULT_DELTA + ")."); /** * The threshold for small eigenvalues. */ protected double delta; - + /** * Constructor. * @@ -219,7 +222,7 @@ public class PreDeConSubspaceIndex<V extends NumberVector<? extends V, ?>, D ext * * @apiviz.exclude */ - public static class Parameterizer<V extends NumberVector<? extends V, ?>, D extends Distance<D>> extends AbstractSubspaceProjectionIndex.Factory.Parameterizer<V, D, Factory<V, D>> { + public static class Parameterizer<V extends NumberVector<?>, D extends Distance<D>> extends AbstractSubspaceProjectionIndex.Factory.Parameterizer<V, D, Factory<V, D>> { /** * The threshold for small eigenvalues. */ @@ -228,9 +231,11 @@ public class PreDeConSubspaceIndex<V extends NumberVector<? extends V, ?>, D ext @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - DoubleParameter deltaP = new DoubleParameter(DELTA_ID, new IntervalConstraint(0.0, IntervalBoundary.OPEN, 1.0, IntervalBoundary.OPEN), DEFAULT_DELTA); - if(config.grab(deltaP)) { - delta = deltaP.getValue(); + DoubleParameter deltaP = new DoubleParameter(DELTA_ID, DEFAULT_DELTA); + deltaP.addConstraint(new GreaterConstraint(0.0)); + deltaP.addConstraint(new LessConstraint(1.0)); + if (config.grab(deltaP)) { + delta = deltaP.doubleValue(); } } @@ -240,4 +245,4 @@ public class PreDeConSubspaceIndex<V extends NumberVector<? extends V, ?>, D ext } } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/subspaceproj/SubspaceProjectionIndex.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/subspaceproj/SubspaceProjectionIndex.java index 210db8f6..67cc1701 100644 --- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/subspaceproj/SubspaceProjectionIndex.java +++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/subspaceproj/SubspaceProjectionIndex.java @@ -38,7 +38,7 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.ProjectionResult; * * @param <NV> Vector type */ -public interface SubspaceProjectionIndex<NV extends NumberVector<?, ?>, P extends ProjectionResult> extends LocalProjectionIndex<NV, P> { +public interface SubspaceProjectionIndex<NV extends NumberVector<?>, P extends ProjectionResult> extends LocalProjectionIndex<NV, P> { /** * Get the precomputed local subspace for a particular object ID. * @@ -60,7 +60,7 @@ public interface SubspaceProjectionIndex<NV extends NumberVector<?, ?>, P extend * @param <NV> Vector type * @param <I> Index type produced */ - public static interface Factory<NV extends NumberVector<?, ?>, I extends SubspaceProjectionIndex<NV, ?>> extends LocalProjectionIndex.Factory<NV, I> { + public static interface Factory<NV extends NumberVector<?>, I extends SubspaceProjectionIndex<NV, ?>> extends LocalProjectionIndex.Factory<NV, I> { /** * Instantiate the index for a given database. * diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/AbstractLeafEntry.java b/src/de/lmu/ifi/dbs/elki/index/tree/AbstractLeafEntry.java index d2d38976..ea5fd6de 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/AbstractLeafEntry.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/AbstractLeafEntry.java @@ -74,7 +74,7 @@ public abstract class AbstractLeafEntry implements LeafEntry { */ @Override public void writeExternal(ObjectOutput out) throws IOException { - out.writeInt(id.getIntegerID()); + out.writeInt(DBIDUtil.asInteger(id)); } /** diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/AbstractNode.java b/src/de/lmu/ifi/dbs/elki/index/tree/AbstractNode.java index 42f4fa11..4f2c6d04 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/AbstractNode.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/AbstractNode.java @@ -341,7 +341,7 @@ public abstract class AbstractNode<E extends Entry> extends AbstractExternalizab public final void splitTo(AbstractNode<E> newNode, List<E> sorting, int splitPoint) { assert (isLeaf() == newNode.isLeaf()); deleteAllEntries(); - StringBuffer msg = LoggingConfiguration.DEBUG ? new StringBuffer("\n") : null; + StringBuilder msg = LoggingConfiguration.DEBUG ? new StringBuilder("\n") : null; for(int i = 0; i < splitPoint; i++) { addEntry(sorting.get(i)); @@ -373,7 +373,7 @@ public abstract class AbstractNode<E extends Entry> extends AbstractExternalizab public final void splitTo(AbstractNode<E> newNode, List<E> assignmentsToFirst, List<E> assignmentsToSecond) { assert (isLeaf() == newNode.isLeaf()); deleteAllEntries(); - StringBuffer msg = LoggingConfiguration.DEBUG ? new StringBuffer() : null; + StringBuilder msg = LoggingConfiguration.DEBUG ? new StringBuilder() : null; // assignments to this node for(E entry : assignmentsToFirst) { diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/IndexTree.java b/src/de/lmu/ifi/dbs/elki/index/tree/IndexTree.java index e4cdc71f..13e91151 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/IndexTree.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/IndexTree.java @@ -44,7 +44,7 @@ public abstract class IndexTree<N extends Node<E>, E extends Entry> { /** * The file storing the entries of this index. */ - final private PageFile<N> file; + private final PageFile<N> file; /** * True if this index is already initialized. @@ -105,7 +105,7 @@ public abstract class IndexTree<N extends Node<E>, E extends Entry> { * * @return the static logger */ - abstract protected Logging getLogger(); + protected abstract Logging getLogger(); /** * Returns the entry representing the root if this index. @@ -213,6 +213,9 @@ public abstract class IndexTree<N extends Node<E>, E extends Entry> { /** * Initializes this index from an existing persistent file. + * + * @param header File header + * @param file Page file */ public void initializeFromFile(TreeIndexHeader header, PageFile<N> file) { this.dirCapacity = header.getDirCapacity(); @@ -221,7 +224,7 @@ public abstract class IndexTree<N extends Node<E>, E extends Entry> { this.leafMinimum = header.getLeafMinimum(); if(getLogger().isDebugging()) { - StringBuffer msg = new StringBuffer(); + StringBuilder msg = new StringBuilder(); msg.append(getClass()); msg.append("\n file = ").append(file.getClass()); getLogger().debugFine(msg.toString()); @@ -242,7 +245,7 @@ public abstract class IndexTree<N extends Node<E>, E extends Entry> { createEmptyRoot(exampleLeaf); if(getLogger().isDebugging()) { - StringBuffer msg = new StringBuffer(); + StringBuilder msg = new StringBuilder(); msg.append(getClass()).append("\n"); msg.append(" file = ").append(file.getClass()).append("\n"); msg.append(" maximum number of dir entries = ").append((dirCapacity - 1)).append("\n"); diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/IndexTreePath.java b/src/de/lmu/ifi/dbs/elki/index/tree/IndexTreePath.java index 560bd250..2634ae07 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/IndexTreePath.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/IndexTreePath.java @@ -284,7 +284,7 @@ public class IndexTreePath<E extends Entry> { */ @Override public String toString() { - StringBuffer buffer = new StringBuffer("["); + StringBuilder buffer = new StringBuilder("["); for(int counter = 0, maxCounter = getPathCount(); counter < maxCounter; counter++) { if(counter > 0) { diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/TreeIndexFactory.java b/src/de/lmu/ifi/dbs/elki/index/tree/TreeIndexFactory.java index a170ca5b..c40effe5 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/TreeIndexFactory.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/TreeIndexFactory.java @@ -60,7 +60,7 @@ public abstract class TreeIndexFactory<O, I extends Index> implements IndexFacto * Key: {@code -treeindex.file} * </p> */ - public static final OptionID FILE_ID = OptionID.getOrCreateOptionID("treeindex.file", "The name of the file storing the index. " + "If this parameter is not set the index is hold in the main memory."); + public static final OptionID FILE_ID = new OptionID("treeindex.file", "The name of the file storing the index. " + "If this parameter is not set the index is hold in the main memory."); /** * Parameter to specify the size of a page in bytes, must be an integer @@ -72,7 +72,7 @@ public abstract class TreeIndexFactory<O, I extends Index> implements IndexFacto * Key: {@code -treeindex.pagesize} * </p> */ - public static final OptionID PAGE_SIZE_ID = OptionID.getOrCreateOptionID("treeindex.pagesize", "The size of a page in bytes."); + public static final OptionID PAGE_SIZE_ID = new OptionID("treeindex.pagesize", "The size of a page in bytes."); /** * Parameter to specify the size of the cache in bytes, must be an integer @@ -84,7 +84,7 @@ public abstract class TreeIndexFactory<O, I extends Index> implements IndexFacto * Key: {@code -treeindex.cachesize} * </p> */ - public static final OptionID CACHE_SIZE_ID = OptionID.getOrCreateOptionID("treeindex.cachesize", "The size of the cache in bytes."); + public static final OptionID CACHE_SIZE_ID = new OptionID("treeindex.cachesize", "The size of the cache in bytes."); /** * Holds the name of the file storing the index specified by {@link #FILE_ID}, @@ -126,13 +126,12 @@ public abstract class TreeIndexFactory<O, I extends Index> implements IndexFacto // FIXME: make this single-shot when filename is set! protected <N extends ExternalizablePage> PageFile<N> makePageFile(Class<N> cls) { final PageFile<N> inner; - if(fileName == null) { + if (fileName == null) { inner = new MemoryPageFile<N>(pageSize); - } - else { + } else { inner = new PersistentPageFile<N>(pageSize, fileName, cls); } - if(cacheSize >= Integer.MAX_VALUE) { + if (cacheSize >= Integer.MAX_VALUE) { return inner; } return new LRUCache<N>(cacheSize, inner); @@ -148,7 +147,7 @@ public abstract class TreeIndexFactory<O, I extends Index> implements IndexFacto * * @apiviz.exclude */ - public static abstract class Parameterizer<O> extends AbstractParameterizer { + public abstract static class Parameterizer<O> extends AbstractParameterizer { protected String fileName = null; protected int pageSize; @@ -158,26 +157,28 @@ public abstract class TreeIndexFactory<O, I extends Index> implements IndexFacto @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - FileParameter FILE_PARAM = new FileParameter(FILE_ID, FileParameter.FileType.OUTPUT_FILE, true); - if(config.grab(FILE_PARAM)) { - fileName = FILE_PARAM.getValue().getPath(); - } - else { + FileParameter fileNameP = new FileParameter(FILE_ID, FileParameter.FileType.OUTPUT_FILE, true); + if (config.grab(fileNameP)) { + fileName = fileNameP.getValue().getPath(); + } else { fileName = null; } - final IntParameter PAGE_SIZE_PARAM = new IntParameter(PAGE_SIZE_ID, new GreaterConstraint(0), 4000); - if(config.grab(PAGE_SIZE_PARAM)) { - pageSize = PAGE_SIZE_PARAM.getValue(); + final IntParameter pageSizeP = new IntParameter(PAGE_SIZE_ID, 4000); + pageSizeP.addConstraint(new GreaterConstraint(0)); + if (config.grab(pageSizeP)) { + pageSize = pageSizeP.getValue(); } - LongParameter CACHE_SIZE_PARAM = new LongParameter(CACHE_SIZE_ID, new GreaterEqualConstraint(0), Integer.MAX_VALUE); - if(config.grab(CACHE_SIZE_PARAM)) { - cacheSize = CACHE_SIZE_PARAM.getValue(); + // FIXME: long, but limited to int values?!? + LongParameter cacheSizeP = new LongParameter(CACHE_SIZE_ID, Integer.MAX_VALUE); + cacheSizeP.addConstraint(new GreaterEqualConstraint(0)); + if (config.grab(cacheSizeP)) { + cacheSize = cacheSizeP.getValue(); } } @Override protected abstract TreeIndexFactory<O, ?> makeInstance(); } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/AbstractMTree.java b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/AbstractMTree.java index a35a4057..a7a063bd 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/AbstractMTree.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/AbstractMTree.java @@ -26,7 +26,6 @@ package de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants; import java.util.ArrayList; import java.util.Collections; import java.util.List; -import java.util.Map; import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; @@ -43,12 +42,8 @@ import de.lmu.ifi.dbs.elki.index.tree.metrical.MetricalIndexTree; import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.split.Assignments; import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.split.MLBDistSplit; import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.split.MTreeSplit; -import de.lmu.ifi.dbs.elki.index.tree.query.GenericMTreeDistanceSearchCandidate; import de.lmu.ifi.dbs.elki.persistent.PageFile; import de.lmu.ifi.dbs.elki.persistent.PageFileUtil; -import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap; -import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.KNNHeap; -import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.UpdatableHeap; /** * Abstract super class for all M-Tree variants. @@ -65,17 +60,17 @@ import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.UpdatableHeap; */ public abstract class AbstractMTree<O, D extends Distance<D>, N extends AbstractMTreeNode<O, D, N, E>, E extends MTreeEntry<D>> extends MetricalIndexTree<O, D, N, E> { /** - * Debugging flag: do extra integrity checks + * Debugging flag: do extra integrity checks. */ - protected final static boolean extraIntegrityChecks = true; + protected static final boolean EXTRA_INTEGRITY_CHECKS = false; /** - * Holds the instance of the trees distance function + * Holds the instance of the trees distance function. */ protected DistanceFunction<O, D> distanceFunction; /** - * The distance query + * The distance query. */ protected DistanceQuery<O, D> distanceQuery; @@ -103,11 +98,11 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract } /** - * Get the distance factory + * Get the distance factory. * * @return the distance factory used */ - protected final D getDistanceFactory() { + public final D getDistanceFactory() { return distanceFunction.getDistanceFactory(); } @@ -120,7 +115,7 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract */ @Override public String toString() { - StringBuffer result = new StringBuffer(); + StringBuilder result = new StringBuilder(); int dirNodes = 0; int leafNodes = 0; int objects = 0; @@ -128,8 +123,8 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract N node = getRoot(); - while(!node.isLeaf()) { - if(node.getNumEntries() > 0) { + while (!node.isLeaf()) { + if (node.getNumEntries() > 0) { E entry = node.getEntry(0); node = getNode(entry); levels++; @@ -137,22 +132,20 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract } BreadthFirstEnumeration<N, E> enumeration = new BreadthFirstEnumeration<N, E>(this, getRootPath()); - while(enumeration.hasMoreElements()) { + while (enumeration.hasMoreElements()) { IndexTreePath<E> path = enumeration.nextElement(); E entry = path.getLastPathComponent().getEntry(); - if(entry.isLeafEntry()) { + if (entry.isLeafEntry()) { objects++; result.append("\n ").append(entry.toString()); - } - else { + } else { node = getNode(entry); result.append("\n\n").append(node).append(", numEntries = ").append(node.getNumEntries()); result.append("\n").append(entry.toString()); - if(node.isLeaf()) { + if (node.isLeaf()) { leafNodes++; - } - else { + } else { dirNodes++; } } @@ -178,17 +171,17 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract */ // todo: implement a bulk load for M-Tree and remove this method public void insert(E entry, boolean withPreInsert) { - if(getLogger().isDebugging()) { + if (getLogger().isDebugging()) { getLogger().debugFine("insert " + entry.getRoutingObjectID() + "\n"); } - if(!initialized) { + if (!initialized) { initialize(entry); } // choose subtree for insertion IndexTreePath<E> subtree = choosePath(entry, getRootPath()); - if(getLogger().isDebugging()) { + if (getLogger().isDebugging()) { getLogger().debugFine("insertion-subtree " + subtree + "\n"); } @@ -198,7 +191,7 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract entry.setParentDistance(parentDistance); // create leaf entry and do pre insert - if(withPreInsert) { + if (withPreInsert) { preInsert(entry); } @@ -211,8 +204,8 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract adjustTree(subtree); // test - if(extraIntegrityChecks) { - if(withPreInsert) { + if (EXTRA_INTEGRITY_CHECKS) { + if (withPreInsert) { getRoot().integrityCheck(this, getRootEntry()); } } @@ -221,13 +214,13 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract /** * Bulk insert. * - * @param entries + * @param entries Entries to insert */ public void insertAll(List<E> entries) { - if(!initialized && entries.size() > 0) { + if (!initialized && entries.size() > 0) { initialize(entries.get(0)); } - for(E entry : entries) { + for (E entry : entries) { insert(entry, false); } } @@ -239,84 +232,6 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract } /** - * Performs a k-nearest neighbor query for the given FeatureVector with the - * given parameter k and the according distance function. The query result is - * in ascending order to the distance to the query object. - * - * @param q the id of the query object - * @param knnList the query result list - */ - protected final void doKNNQuery(DBID q, KNNHeap<D> knnList) { - final Heap<GenericMTreeDistanceSearchCandidate<D>> pq = new UpdatableHeap<GenericMTreeDistanceSearchCandidate<D>>(); - - // push root - pq.add(new GenericMTreeDistanceSearchCandidate<D>(getDistanceFactory().nullDistance(), getRootID(), null)); - D d_k = knnList.getKNNDistance(); - - if (d_k == null) { - // Empty tree? - return; - } - - // search in tree - while(!pq.isEmpty()) { - GenericMTreeDistanceSearchCandidate<D> pqNode = pq.poll(); - - if(pqNode.mindist.compareTo(d_k) > 0) { - return; - } - - N node = getNode(pqNode.nodeID); - DBID o_p = pqNode.routingObjectID; - - // directory node - if(!node.isLeaf()) { - for(int i = 0; i < node.getNumEntries(); i++) { - E entry = node.getEntry(i); - DBID o_r = entry.getRoutingObjectID(); - D r_or = entry.getCoveringRadius(); - D d1 = o_p != null ? distanceQuery.distance(o_p, q) : getDistanceFactory().nullDistance(); - D d2 = o_p != null ? distanceQuery.distance(o_r, o_p) : getDistanceFactory().nullDistance(); - - D diff = d1.compareTo(d2) > 0 ? d1.minus(d2) : d2.minus(d1); - - D sum = d_k.plus(r_or); - - if(diff.compareTo(sum) <= 0) { - D d3 = distance(o_r, q); - D d_min = DistanceUtil.max(d3.minus(r_or), getDistanceFactory().nullDistance()); - if(d_min.compareTo(d_k) <= 0) { - pq.add(new GenericMTreeDistanceSearchCandidate<D>(d_min, getPageID(entry), o_r)); - } - } - } - - } - - // data node - else { - for(int i = 0; i < node.getNumEntries(); i++) { - E entry = node.getEntry(i); - DBID o_j = entry.getRoutingObjectID(); - - D d1 = o_p != null ? distanceQuery.distance(o_p, q) : getDistanceFactory().nullDistance(); - D d2 = o_p != null ? distanceQuery.distance(o_j, o_p) : getDistanceFactory().nullDistance(); - - D diff = d1.compareTo(d2) > 0 ? d1.minus(d2) : d2.minus(d1); - - if(diff.compareTo(d_k) <= 0) { - D d3 = distanceQuery.distance(o_j, q); - if(d3.compareTo(d_k) <= 0) { - knnList.add(d3, o_j); - d_k = knnList.getKNNDistance(); - } - } - } - } - } - } - - /** * Chooses the best path of the specified subtree for insertion of the given * object. * @@ -328,90 +243,52 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract N node = getNode(subtree.getLastPathComponent().getEntry()); // leaf - if(node.isLeaf()) { + if (node.isLeaf()) { return subtree; } - D nullDistance = getDistanceFactory().nullDistance(); - List<DistanceEntry<D, E>> candidatesWithoutExtension = new ArrayList<DistanceEntry<D, E>>(); - List<DistanceEntry<D, E>> candidatesWithExtension = new ArrayList<DistanceEntry<D, E>>(); + DistanceEntry<D, E> bestCandidate; + D enlarge; // Track best enlargement - null for no enlargement needed. + // Initialize from first: + { + E entry = node.getEntry(0); + D distance = distance(object.getRoutingObjectID(), entry.getRoutingObjectID()); + bestCandidate = new DistanceEntry<D, E>(entry, distance, 0); + if (distance.compareTo(entry.getCoveringRadius()) <= 0) { + enlarge = null; + } else { + enlarge = distance.minus(entry.getCoveringRadius()); + } + } - for(int i = 0; i < node.getNumEntries(); i++) { + // Iterate over remaining + for (int i = 1; i < node.getNumEntries(); i++) { E entry = node.getEntry(i); D distance = distance(object.getRoutingObjectID(), entry.getRoutingObjectID()); - D enlrg = distance.minus(entry.getCoveringRadius()); - if(enlrg.compareTo(nullDistance) <= 0) { - candidatesWithoutExtension.add(new DistanceEntry<D, E>(entry, distance, i)); - } - else { - candidatesWithExtension.add(new DistanceEntry<D, E>(entry, enlrg, i)); + if (distance.compareTo(entry.getCoveringRadius()) <= 0) { + if (enlarge != null || distance.compareTo(bestCandidate.getDistance()) < 0) { + bestCandidate = new DistanceEntry<D, E>(entry, distance, i); + enlarge = null; + } + } else if (enlarge != null) { + D enlrg = distance.minus(entry.getCoveringRadius()); + if (enlrg.compareTo(enlarge) < 0) { + bestCandidate = new DistanceEntry<D, E>(entry, distance, i); + enlarge = enlrg; + } } } - DistanceEntry<D, E> bestCandidate; - if(!candidatesWithoutExtension.isEmpty()) { - bestCandidate = Collections.min(candidatesWithoutExtension); - } - else { - Collections.sort(candidatesWithExtension); - bestCandidate = Collections.min(candidatesWithExtension); - E entry = bestCandidate.getEntry(); - D cr = entry.getCoveringRadius(); - entry.setCoveringRadius(cr.plus(bestCandidate.getDistance())); + // Apply enlargement + if (enlarge != null) { + bestCandidate.getEntry().setCoveringRadius(enlarge); } return choosePath(object, subtree.pathByAddingChild(new TreeIndexPathComponent<E>(bestCandidate.getEntry(), bestCandidate.getIndex()))); } /** - * Performs a batch k-nearest neigbor query for a list of query objects. - * - * @param node the node reprsenting the subtree on which the query should be - * performed - * @param ids the ids of th query objects - * @param knnLists the knn lists of the query objcets - * - * @deprecated Change to use by-object NN lookups instead. - */ - @Deprecated - protected final void batchNN(N node, DBIDs ids, Map<DBID, KNNHeap<D>> knnLists) { - if(node.isLeaf()) { - for(int i = 0; i < node.getNumEntries(); i++) { - E p = node.getEntry(i); - for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { - DBID q = iter.getDBID(); - KNNHeap<D> knns_q = knnLists.get(q); - D knn_q_maxDist = knns_q.getKNNDistance(); - - D dist_pq = distanceQuery.distance(p.getRoutingObjectID(), q); - if(dist_pq.compareTo(knn_q_maxDist) <= 0) { - knns_q.add(dist_pq, p.getRoutingObjectID()); - } - } - } - } - else { - List<DistanceEntry<D, E>> entries = getSortedEntries(node, ids); - for(DistanceEntry<D, E> distEntry : entries) { - D minDist = distEntry.getDistance(); - for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { - DBID q = iter.getDBID(); - KNNHeap<D> knns_q = knnLists.get(q); - D knn_q_maxDist = knns_q.getKNNDistance(); - - if(minDist.compareTo(knn_q_maxDist) <= 0) { - E entry = distEntry.getEntry(); - N child = getNode(entry); - batchNN(child, ids, knnLists); - break; - } - } - } - } - } - - /** * Sorts the entries of the specified node according to their minimum distance * to the specified object. * @@ -422,10 +299,11 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract protected final List<DistanceEntry<D, E>> getSortedEntries(N node, DBID q) { List<DistanceEntry<D, E>> result = new ArrayList<DistanceEntry<D, E>>(); - for(int i = 0; i < node.getNumEntries(); i++) { + for (int i = 0; i < node.getNumEntries(); i++) { E entry = node.getEntry(i); D distance = distance(entry.getRoutingObjectID(), q); - D minDist = entry.getCoveringRadius().compareTo(distance) > 0 ? getDistanceFactory().nullDistance() : distance.minus(entry.getCoveringRadius()); + D radius = entry.getCoveringRadius(); + D minDist = radius.compareTo(distance) > 0 ? getDistanceFactory().nullDistance() : distance.minus(radius); result.add(new DistanceEntry<D, E>(entry, minDist, i)); } @@ -445,14 +323,15 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract protected final List<DistanceEntry<D, E>> getSortedEntries(N node, DBIDs ids) { List<DistanceEntry<D, E>> result = new ArrayList<DistanceEntry<D, E>>(); - for(int i = 0; i < node.getNumEntries(); i++) { + for (int i = 0; i < node.getNumEntries(); i++) { E entry = node.getEntry(i); + D radius = entry.getCoveringRadius(); D minMinDist = getDistanceFactory().infiniteDistance(); for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { - D distance = distanceQuery.distance(entry.getRoutingObjectID(), iter.getDBID()); - D minDist = entry.getCoveringRadius().compareTo(distance) > 0 ? getDistanceFactory().nullDistance() : distance.minus(entry.getCoveringRadius()); - minMinDist = DistanceUtil.max(minMinDist, minDist); + D distance = distanceQuery.distance(entry.getRoutingObjectID(), iter); + D minDist = radius.compareTo(distance) > 0 ? getDistanceFactory().nullDistance() : distance.minus(radius); + minMinDist = DistanceUtil.min(minMinDist, minDist); } result.add(new DistanceEntry<D, E>(entry, minMinDist, i)); } @@ -469,27 +348,13 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract * @return the distance between the two specified ids */ protected final D distance(DBID id1, DBID id2) { - if(id1 == null || id2 == null) { + if (id1 == null || id2 == null) { return getDistanceFactory().undefinedDistance(); } return distanceQuery.distance(id1, id2); } /** - * Returns the distance between the given object and the id. - * - * @param id1 the first id - * @param o2 the second object - * @return the distance between the two specified objects - */ - protected final D distance(DBID id1, O o2) { - if(id1 == null) { - return getDistanceFactory().undefinedDistance(); - } - return distanceQuery.distance(id1, o2); - } - - /** * Creates a new directory entry representing the specified node. * * @param node the node to be represented by the new entry @@ -498,7 +363,7 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract * the routing object of the parent node * @return the newly created directory entry */ - abstract protected E createNewDirectoryEntry(N node, DBID routingObjectID, D parentDistance); + protected abstract E createNewDirectoryEntry(N node, DBID routingObjectID, D parentDistance); /** * Splits the specified node and returns the split result. @@ -512,10 +377,9 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract MTreeSplit<O, D, N, E> split = new MLBDistSplit<O, D, N, E>(node, distanceQuery); Assignments<D, E> assignments = split.getAssignments(); final N newNode; - if(node.isLeaf()) { + if (node.isLeaf()) { newNode = createNewLeafNode(); - } - else { + } else { newNode = createNewDirectoryNode(); } node.splitTo(newNode, assignments.getFirstAssignments(), assignments.getSecondAssignments()); @@ -524,7 +388,7 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract writeNode(node); writeNode(newNode); - if(getLogger().isDebugging()) { + if (getLogger().isDebugging()) { String msg = "Split Node " + node.getPageID() + " (" + this.getClass() + ")\n" + " newNode " + newNode.getPageID() + "\n" + " firstPromoted " + assignments.getFirstRoutingObject() + "\n" + " firstAssignments(" + node.getPageID() + ") " + assignments.getFirstAssignments() + "\n" + " firstCR " + assignments.getFirstCoveringRadius() + "\n" + " secondPromoted " + assignments.getSecondRoutingObject() + "\n" + " secondAssignments(" + newNode.getPageID() + ") " + assignments.getSecondAssignments() + "\n" + " secondCR " + assignments.getSecondCoveringRadius() + "\n"; getLogger().debugFine(msg); } @@ -533,38 +397,12 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract } /** - * Sorts the entries of the specified node according to their minimum distance - * to the specified objects. - * - * @param node the node - * @param ids the ids of the objects - * @return a list of the sorted entries - */ - // FIXME: Duplicate from above? - /* - * private List<DistanceEntry<D, E>> getSortedEntries(N node, DBIDs ids) { - * List<DistanceEntry<D, E>> result = new ArrayList<DistanceEntry<D, E>>(); - * - * for(int i = 0; i < node.getNumEntries(); i++) { E entry = node.getEntry(i); - * - * D minMinDist = distanceFunction.infiniteDistance(); for(DBID q : ids) { D - * distance = distance(entry.getRoutingObjectID(), q); D minDist = - * entry.getCoveringRadius().compareTo(distance) > 0 ? - * distanceFunction.nullDistance() : - * distance.minus(entry.getCoveringRadius()); if(minDist.compareTo(minMinDist) - * < 0) { minMinDist = minDist; } } result.add(new DistanceEntry<D, E>(entry, - * minMinDist, i)); } - * - * Collections.sort(result); return result; } - */ - - /** * Adjusts the tree after insertion of some nodes. * * @param subtree the subtree to be adjusted */ private void adjustTree(IndexTreePath<E> subtree) { - if(getLogger().isDebugging()) { + if (getLogger().isDebugging()) { getLogger().debugFine("Adjust tree " + subtree + "\n"); } @@ -573,14 +411,14 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract N node = getNode(subtree.getLastPathComponent().getEntry()); // overflow in node; split the node - if(hasOverflow(node)) { + if (hasOverflow(node)) { SplitResult splitResult = split(node); N splitNode = splitResult.newNode; Assignments<D, E> assignments = splitResult.split.getAssignments(); // if root was split: create a new root that points the two split // nodes - if(isRoot(node)) { + if (isRoot(node)) { // FIXME: stimmen die parentDistance der Kinder in node & splitNode? IndexTreePath<E> newRootPath = createNewRoot(node, splitNode, assignments.getFirstRoutingObject(), assignments.getSecondRoutingObject()); adjustTree(newRootPath); @@ -590,7 +428,7 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract // get the parent and add the new split node E parentEntry = subtree.getParentPath().getLastPathComponent().getEntry(); N parent = getNode(parentEntry); - if(getLogger().isDebugging()) { + if (getLogger().isDebugging()) { getLogger().debugFine("parent " + parent); } D parentDistance2 = distance(parentEntry.getRoutingObjectID(), assignments.getSecondRoutingObject()); @@ -612,7 +450,7 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract // no overflow, only adjust parameters of the entry representing the // node else { - if(!isRoot(node)) { + if (!isRoot(node)) { E parentEntry = subtree.getParentPath().getLastPathComponent().getEntry(); N parent = getNode(parentEntry); int index = subtree.getLastPathComponent().getIndex(); @@ -639,7 +477,7 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract * otherwise */ private boolean hasOverflow(N node) { - if(node.isLeaf()) { + if (node.isLeaf()) { return node.getNumEntries() == leafCapacity; } @@ -665,9 +503,9 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract // switch the ids oldRoot.setPageID(root.getPageID()); - if(!oldRoot.isLeaf()) { + if (!oldRoot.isLeaf()) { // FIXME: what is happening here? - for(int i = 0; i < oldRoot.getNumEntries(); i++) { + for (int i = 0; i < oldRoot.getNumEntries(); i++) { N node = getNode(oldRoot.getEntry(i)); writeNode(node); } @@ -691,7 +529,7 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract writeNode(root); writeNode(oldRoot); writeNode(newNode); - if(getLogger().isDebugging()) { + if (getLogger().isDebugging()) { String msg = "Create new Root: ID=" + root.getPageID(); msg += "\nchild1 " + oldRoot; msg += "\nchild2 " + newNode; @@ -707,10 +545,22 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract * @apiviz.composedOf MTreeSplit */ private class SplitResult { + /** + * Split used + */ protected MTreeSplit<O, D, N, E> split; + /** + * New sibling + */ protected N newNode; + /** + * Constructor. + * + * @param split Split that was used + * @param newNode New sibling + */ public SplitResult(MTreeSplit<O, D, N, E> split, N newNode) { this.split = split; this.newNode = newNode; @@ -721,16 +571,13 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract public List<E> getLeaves() { List<E> result = new ArrayList<E>(); BreadthFirstEnumeration<N, E> enumeration = new BreadthFirstEnumeration<N, E>(this, getRootPath()); - while(enumeration.hasMoreElements()) { + while (enumeration.hasMoreElements()) { IndexTreePath<E> path = enumeration.nextElement(); E entry = path.getLastPathComponent().getEntry(); - if(entry.isLeafEntry()) { - // ignore, we are within a leaf! - } - else { + if (!entry.isLeafEntry()) { // TODO: any way to skip unnecessary reads? N node = getNode(entry); - if(node.isLeaf()) { + if (node.isLeaf()) { result.add(entry); } } @@ -747,8 +594,8 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract int levels = 0; N node = getRoot(); - while(!node.isLeaf()) { - if(node.getNumEntries() > 0) { + while (!node.isLeaf()) { + if (node.getNumEntries() > 0) { E entry = node.getEntry(0); node = getNode(entry); levels++; @@ -756,4 +603,4 @@ public abstract class AbstractMTree<O, D extends Distance<D>, N extends Abstract } return levels; } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/AbstractMTreeFactory.java b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/AbstractMTreeFactory.java index 9ae50bbe..3769a562 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/AbstractMTreeFactory.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/AbstractMTreeFactory.java @@ -60,7 +60,7 @@ public abstract class AbstractMTreeFactory<O, D extends Distance<D>, N extends A * {@link de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction} * </p> */ - public static final OptionID DISTANCE_FUNCTION_ID = OptionID.getOrCreateOptionID("mtree.distancefunction", "Distance function to determine the distance between database objects."); + public static final OptionID DISTANCE_FUNCTION_ID = new OptionID("mtree.distancefunction", "Distance function to determine the distance between database objects."); /** * Holds the instance of the distance function specified by @@ -93,7 +93,7 @@ public abstract class AbstractMTreeFactory<O, D extends Distance<D>, N extends A * * @apiviz.exclude */ - public static abstract class Parameterizer<O, D extends Distance<D>> extends TreeIndexFactory.Parameterizer<O> { + public abstract static class Parameterizer<O, D extends Distance<D>> extends TreeIndexFactory.Parameterizer<O> { protected DistanceFunction<O, D> distanceFunction = null; @Override diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/MTreeDirectoryEntry.java b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/MTreeDirectoryEntry.java index 0b07c446..3902973f 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/MTreeDirectoryEntry.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/MTreeDirectoryEntry.java @@ -152,7 +152,7 @@ public class MTreeDirectoryEntry<D extends Distance<D>> extends AbstractDirector @Override public void writeExternal(ObjectOutput out) throws IOException { super.writeExternal(out); - out.writeInt(routingObjectID.getIntegerID()); + out.writeInt(DBIDUtil.asInteger(routingObjectID)); out.writeObject(parentDistance); out.writeObject(coveringRadius); } @@ -209,6 +209,6 @@ public class MTreeDirectoryEntry<D extends Distance<D>> extends AbstractDirector if(parentDistance != null ? !parentDistance.equals(that.parentDistance) : that.parentDistance != null) { return false; } - return !(routingObjectID != null ? !routingObjectID.equals(that.routingObjectID) : that.routingObjectID != null); + return !(routingObjectID != null ? !DBIDUtil.equal(routingObjectID, that.routingObjectID) : that.routingObjectID != null); } } diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/AbstractMkTree.java b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/AbstractMkTree.java index 9391a2fa..f3c440b5 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/AbstractMkTree.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/AbstractMkTree.java @@ -23,16 +23,24 @@ package de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.mktrees; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import java.util.List; +import java.util.HashMap; +import java.util.Map; +import de.lmu.ifi.dbs.elki.database.ids.DBID; +import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; -import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; +import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; +import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult; import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.AbstractMTree; import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.AbstractMTreeNode; import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.MTreeEntry; +import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.query.MTreeQueryUtil; import de.lmu.ifi.dbs.elki.persistent.PageFile; /** @@ -48,6 +56,11 @@ import de.lmu.ifi.dbs.elki.persistent.PageFile; */ public abstract class AbstractMkTree<O, D extends Distance<D>, N extends AbstractMTreeNode<O, D, N, E>, E extends MTreeEntry<D>> extends AbstractMTree<O, D, N, E> { /** + * Internal class for performing knn queries + */ + protected KNNQuery<O, D> knnq; + + /** * Constructor. * * @param pagefile Page file @@ -56,8 +69,9 @@ public abstract class AbstractMkTree<O, D extends Distance<D>, N extends Abstrac */ public AbstractMkTree(PageFile<N> pagefile, DistanceQuery<O, D> distanceQuery, DistanceFunction<O, D> distanceFunction) { super(pagefile, distanceQuery, distanceFunction); + this.knnq = MTreeQueryUtil.getKNNQuery(this, distanceQuery); } - + /** * Performs a reverse k-nearest neighbor query for the given object ID. The * query result is in ascending order to the distance to the query object. @@ -66,5 +80,25 @@ public abstract class AbstractMkTree<O, D extends Distance<D>, N extends Abstrac * @param k the number of nearest neighbors to be returned * @return a List of the query results */ - public abstract List<DistanceResultPair<D>> reverseKNNQuery(final DBIDRef id, int k); -}
\ No newline at end of file + public abstract DistanceDBIDResult<D> reverseKNNQuery(final DBIDRef id, int k); + + /** + * Performs a batch k-nearest neighbor query for a list of query objects. + * + * @param node the node representing the subtree on which the query should be + * performed + * @param ids the ids of the query objects + * @param kmax Maximum k value + * + * @deprecated Change to use by-object NN lookups instead. + */ + @Deprecated + protected final Map<DBID, KNNResult<D>> batchNN(N node, DBIDs ids, int kmax) { + Map<DBID, KNNResult<D>> res = new HashMap<DBID, KNNResult<D>>(ids.size()); + for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { + DBID id = DBIDUtil.deref(iter); + res.put(id, knnq.getKNNForDBID(id, kmax)); + } + return res; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/AbstractMkTreeUnified.java b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/AbstractMkTreeUnified.java index 88013000..34507479 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/AbstractMkTreeUnified.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/AbstractMkTreeUnified.java @@ -23,7 +23,6 @@ package de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.mktrees; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import java.util.HashMap; import java.util.List; import java.util.Map; @@ -32,12 +31,12 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult; import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; import de.lmu.ifi.dbs.elki.index.tree.TreeIndexHeader; import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.AbstractMTreeNode; import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.MTreeEntry; import de.lmu.ifi.dbs.elki.persistent.PageFile; -import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.KNNHeap; /** * Abstract class for all M-Tree variants supporting processing of reverse @@ -82,35 +81,29 @@ public abstract class AbstractMkTreeUnified<O, D extends Distance<D>, N extends @Override public void insertAll(List<E> entries) { - if(entries.size() <= 0) { + if (entries.size() <= 0) { return; } - if(!initialized) { + if (!initialized) { initialize(entries.get(0)); } - Map<DBID, KNNHeap<D>> knnLists = new HashMap<DBID, KNNHeap<D>>(); ModifiableDBIDs ids = DBIDUtil.newArray(entries.size()); // insert sequentially - for(E entry : entries) { - // create knnList for the object - final DBID id = entry.getRoutingObjectID(); - - ids.add(id); - knnLists.put(id, new KNNHeap<D>(k_max, getDistanceFactory().infiniteDistance())); - + for (E entry : entries) { + ids.add(entry.getRoutingObjectID()); // insert the object super.insert(entry, false); } // do batch nn - batchNN(getRoot(), ids, knnLists); + Map<DBID, KNNResult<D>> knnLists = batchNN(getRoot(), ids, k_max); // adjust the knn distances kNNdistanceAdjustment(getRootEntry(), knnLists); - if(extraIntegrityChecks) { + if (EXTRA_INTEGRITY_CHECKS) { getRoot().integrityCheck(this, getRootEntry()); } } @@ -121,7 +114,7 @@ public abstract class AbstractMkTreeUnified<O, D extends Distance<D>, N extends * @param entry the root entry of the current subtree * @param knnLists a map of knn lists for each leaf entry */ - protected abstract void kNNdistanceAdjustment(E entry, Map<DBID, KNNHeap<D>> knnLists); + protected abstract void kNNdistanceAdjustment(E entry, Map<DBID, KNNResult<D>> knnLists); /** * Get the value of k_max. @@ -131,4 +124,4 @@ public abstract class AbstractMkTreeUnified<O, D extends Distance<D>, N extends public int getKmax() { return k_max; } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/AbstractMkTreeUnifiedFactory.java b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/AbstractMkTreeUnifiedFactory.java index 51df5077..9570995d 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/AbstractMkTreeUnifiedFactory.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/AbstractMkTreeUnifiedFactory.java @@ -55,7 +55,7 @@ public abstract class AbstractMkTreeUnifiedFactory<O, D extends Distance<D>, N e * Key: {@code -mktree.kmax} * </p> */ - public static final OptionID K_MAX_ID = OptionID.getOrCreateOptionID("mktree.kmax", "Specifies the maximal number k of reverse k nearest neighbors to be supported."); + public static final OptionID K_MAX_ID = new OptionID("mktree.kmax", "Specifies the maximal number k of reverse k nearest neighbors to be supported."); /** * Holds the value of parameter {@link #K_MAX_ID}. @@ -83,13 +83,14 @@ public abstract class AbstractMkTreeUnifiedFactory<O, D extends Distance<D>, N e * * @apiviz.exclude */ - public static abstract class Parameterizer<O, D extends Distance<D>> extends AbstractMTreeFactory.Parameterizer<O, D> { + public abstract static class Parameterizer<O, D extends Distance<D>> extends AbstractMTreeFactory.Parameterizer<O, D> { protected int k_max; @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - IntParameter k_maxP = new IntParameter(K_MAX_ID, new GreaterConstraint(0)); + IntParameter k_maxP = new IntParameter(K_MAX_ID); + k_maxP.addConstraint(new GreaterConstraint(0)); if (config.grab(k_maxP)) { k_max = k_maxP.getValue(); diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkapp/MkAppTree.java b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkapp/MkAppTree.java index 0f15a4a9..51d59e73 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkapp/MkAppTree.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkapp/MkAppTree.java @@ -24,11 +24,8 @@ package de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.mktrees.mkapp; */ import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.Map.Entry; import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs; import de.lmu.ifi.dbs.elki.database.ids.DBID; @@ -37,10 +34,12 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs; -import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair; -import de.lmu.ifi.dbs.elki.database.query.GenericDistanceResultPair; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.GenericDistanceDBIDList; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult; import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.index.tree.LeafEntry; import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.mktrees.AbstractMkTree; @@ -49,8 +48,6 @@ import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.math.statistics.PolynomialRegression; import de.lmu.ifi.dbs.elki.persistent.PageFile; import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap; -import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.KNNHeap; -import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.KNNList; import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.UpdatableHeap; /** @@ -69,7 +66,7 @@ public class MkAppTree<O, D extends NumberDistance<D, ?>> extends AbstractMkTree /** * The logger for this class. */ - private static final Logging logger = Logging.getLogger(MkAppTree.class); + private static final Logging LOG = Logging.getLogger(MkAppTree.class); /** * Parameter k. @@ -130,41 +127,30 @@ public class MkAppTree<O, D extends NumberDistance<D, ?>> extends AbstractMkTree return; } - if(logger.isDebugging()) { - logger.debugFine("insert " + entries + "\n"); + if(LOG.isDebugging()) { + LOG.debugFine("insert " + entries + "\n"); } if(!initialized) { initialize(entries.get(0)); } - Map<DBID, KNNHeap<D>> knnHeaps = new HashMap<DBID, KNNHeap<D>>(entries.size()); ModifiableDBIDs ids = DBIDUtil.newArray(entries.size()); // insert for(MkAppEntry<D> entry : entries) { - DBID id = entry.getRoutingObjectID(); - // create knnList for the object - knnHeaps.put(id, new KNNHeap<D>(k_max + 1, getDistanceQuery().infiniteDistance())); - - ids.add(id); + ids.add(entry.getRoutingObjectID()); // insert the object super.insert(entry, false); } // do batch nn - batchNN(getRoot(), ids, knnHeaps); - - // finish KNN lists (sort them completely) - Map<DBID, KNNList<D>> knnLists = new HashMap<DBID, KNNList<D>>(); - for(Entry<DBID, KNNHeap<D>> ent : knnHeaps.entrySet()) { - knnLists.put(ent.getKey(), ent.getValue().toKNNList()); - } - + Map<DBID, KNNResult<D>> knnLists = batchNN(getRoot(), ids, k_max + 1); + // adjust the knn distances adjustApproximatedKNNDistances(getRootEntry(), knnLists); - if(extraIntegrityChecks) { + if(EXTRA_INTEGRITY_CHECKS) { getRoot().integrityCheck(this, getRootEntry()); } } @@ -178,9 +164,55 @@ public class MkAppTree<O, D extends NumberDistance<D, ?>> extends AbstractMkTree * @return a List of the query results */ @Override - public List<DistanceResultPair<D>> reverseKNNQuery(DBIDRef id, int k) { - List<DistanceResultPair<D>> result = doReverseKNNQuery(k, id); - Collections.sort(result); + public DistanceDBIDResult<D> reverseKNNQuery(DBIDRef id, int k) { + GenericDistanceDBIDList<D> result = new GenericDistanceDBIDList<D>(); + final Heap<GenericMTreeDistanceSearchCandidate<D>> pq = new UpdatableHeap<GenericMTreeDistanceSearchCandidate<D>>(); + + // push root + pq.add(new GenericMTreeDistanceSearchCandidate<D>(getDistanceQuery().nullDistance(), getRootID(), null, null)); + + // search in tree + while(!pq.isEmpty()) { + GenericMTreeDistanceSearchCandidate<D> pqNode = pq.poll(); + // FIXME: cache the distance to the routing object in the queue node! + + MkAppTreeNode<O, D> node = getNode(pqNode.nodeID); + + // directory node + if(!node.isLeaf()) { + for(int i = 0; i < node.getNumEntries(); i++) { + MkAppEntry<D> entry = node.getEntry(i); + D distance = getDistanceQuery().distance(entry.getRoutingObjectID(), id); + D minDist = entry.getCoveringRadius().compareTo(distance) > 0 ? getDistanceQuery().nullDistance() : distance.minus(entry.getCoveringRadius()); + + double approxValue = log ? Math.exp(entry.approximatedValueAt(k)) : entry.approximatedValueAt(k); + if(approxValue < 0) { + approxValue = 0; + } + D approximatedKnnDist = getDistanceQuery().getDistanceFactory().fromDouble(approxValue); + + if(minDist.compareTo(approximatedKnnDist) <= 0) { + pq.add(new GenericMTreeDistanceSearchCandidate<D>(minDist, getPageID(entry), entry.getRoutingObjectID(), null)); + } + } + } + // data node + else { + for(int i = 0; i < node.getNumEntries(); i++) { + MkAppLeafEntry<D> entry = (MkAppLeafEntry<D>) node.getEntry(i); + D distance = getDistanceQuery().distance(entry.getRoutingObjectID(), id); + double approxValue = log ? StrictMath.exp(entry.approximatedValueAt(k)) : entry.approximatedValueAt(k); + if(approxValue < 0) { + approxValue = 0; + } + D approximatedKnnDist = getDistanceQuery().getDistanceFactory().fromDouble(approxValue); + + if(distance.compareTo(approximatedKnnDist) <= 0) { + result.add(distance, entry.getRoutingObjectID()); + } + } + } + } return result; } @@ -215,7 +247,7 @@ public class MkAppTree<O, D extends NumberDistance<D, ?>> extends AbstractMkTree } if(dirCapacity < 10) { - logger.warning("Page size is choosen too small! Maximum number of entries " + "in a directory node = " + (dirCapacity - 1)); + LOG.warning("Page size is choosen too small! Maximum number of entries " + "in a directory node = " + (dirCapacity - 1)); } // leafCapacity = (file.getPageSize() - overhead) / (objectID + @@ -228,90 +260,31 @@ public class MkAppTree<O, D extends NumberDistance<D, ?>> extends AbstractMkTree } if(leafCapacity < 10) { - logger.warning("Page size is choosen too small! Maximum number of entries " + "in a leaf node = " + (leafCapacity - 1)); + LOG.warning("Page size is choosen too small! Maximum number of entries " + "in a leaf node = " + (leafCapacity - 1)); } initialized = true; - if(logger.isVerbose()) { - logger.verbose("Directory Capacity: " + (dirCapacity - 1) + "\nLeaf Capacity: " + (leafCapacity - 1)); - } - } - - /** - * Performs a reverse knn query. - * - * @param k the parameter k of the rknn query - * @param q the id of the query object - * @return the result of the reverse knn query - */ - private List<DistanceResultPair<D>> doReverseKNNQuery(int k, DBIDRef q) { - List<DistanceResultPair<D>> result = new ArrayList<DistanceResultPair<D>>(); - final Heap<GenericMTreeDistanceSearchCandidate<D>> pq = new UpdatableHeap<GenericMTreeDistanceSearchCandidate<D>>(); - - // push root - pq.add(new GenericMTreeDistanceSearchCandidate<D>(getDistanceQuery().nullDistance(), getRootID(), null)); - - // search in tree - while(!pq.isEmpty()) { - GenericMTreeDistanceSearchCandidate<D> pqNode = pq.poll(); - - MkAppTreeNode<O, D> node = getNode(pqNode.nodeID); - - // directory node - if(!node.isLeaf()) { - for(int i = 0; i < node.getNumEntries(); i++) { - MkAppEntry<D> entry = node.getEntry(i); - D distance = getDistanceQuery().distance(entry.getRoutingObjectID(), q); - D minDist = entry.getCoveringRadius().compareTo(distance) > 0 ? getDistanceQuery().nullDistance() : distance.minus(entry.getCoveringRadius()); - - double approxValue = log ? Math.exp(entry.approximatedValueAt(k)) : entry.approximatedValueAt(k); - if(approxValue < 0) { - approxValue = 0; - } - D approximatedKnnDist = getDistanceQuery().getDistanceFactory().parseString(Double.toString(approxValue)); - - if(minDist.compareTo(approximatedKnnDist) <= 0) { - pq.add(new GenericMTreeDistanceSearchCandidate<D>(minDist, getPageID(entry), entry.getRoutingObjectID())); - } - } - } - // data node - else { - for(int i = 0; i < node.getNumEntries(); i++) { - MkAppLeafEntry<D> entry = (MkAppLeafEntry<D>) node.getEntry(i); - D distance = getDistanceQuery().distance(entry.getRoutingObjectID(), q); - double approxValue = log ? StrictMath.exp(entry.approximatedValueAt(k)) : entry.approximatedValueAt(k); - if(approxValue < 0) { - approxValue = 0; - } - D approximatedKnnDist = getDistanceQuery().getDistanceFactory().parseString(Double.toString(approxValue)); - - if(distance.compareTo(approximatedKnnDist) <= 0) { - result.add(new GenericDistanceResultPair<D>(distance, entry.getRoutingObjectID())); - } - } - } + if(LOG.isVerbose()) { + LOG.verbose("Directory Capacity: " + (dirCapacity - 1) + "\nLeaf Capacity: " + (leafCapacity - 1)); } - return result; } - private List<D> getMeanKNNList(DBIDs ids, Map<DBID, KNNList<D>> knnLists) { + private List<D> getMeanKNNList(DBIDs ids, Map<DBID, KNNResult<D>> knnLists) { double[] means = new double[k_max]; - for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { - DBID id = iter.getDBID(); - KNNList<D> knns = knnLists.get(id); - List<D> knnDists = knns.asDistanceList(); - for(int k = 0; k < k_max; k++) { - D knnDist = knnDists.get(k); - means[k] += knnDist.doubleValue(); + for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { + DBID id = DBIDUtil.deref(iter); + KNNResult<D> knns = knnLists.get(id); + int k = 0; + for(DistanceDBIDResultIter<D> it = knns.iter(); k < k_max && it.valid(); it.advance(), k++) { + means[k] += it.getDistance().doubleValue(); } } List<D> result = new ArrayList<D>(); for(int k = 0; k < k_max; k++) { means[k] /= ids.size(); - result.add(getDistanceQuery().getDistanceFactory().parseString(Double.toString(means[k]))); + result.add(getDistanceQuery().getDistanceFactory().fromDouble(means[k])); } return result; @@ -323,7 +296,7 @@ public class MkAppTree<O, D extends NumberDistance<D, ?>> extends AbstractMkTree * @param entry the root entry of the current subtree * @param knnLists a map of knn lists for each leaf entry */ - private void adjustApproximatedKNNDistances(MkAppEntry<D> entry, Map<DBID, KNNList<D>> knnLists) { + private void adjustApproximatedKNNDistances(MkAppEntry<D> entry, Map<DBID, KNNResult<D>> knnLists) { MkAppTreeNode<O, D> node = getNode(entry); if(node.isLeaf()) { @@ -378,7 +351,7 @@ public class MkAppTree<O, D extends NumberDistance<D, ?>> extends AbstractMkTree * @return the polynomial approximation of the specified knn-distances. */ private PolynomialApproximation approximateKnnDistances(List<D> knnDistances) { - StringBuffer msg = new StringBuffer(); + StringBuilder msg = new StringBuilder(); // count the zero distances (necessary of log-log space is used) int k_0 = 0; @@ -411,9 +384,9 @@ public class MkAppTree<O, D extends NumberDistance<D, ?>> extends AbstractMkTree PolynomialRegression regression = new PolynomialRegression(y, x, p); PolynomialApproximation approximation = new PolynomialApproximation(regression.getEstimatedCoefficients().getArrayCopy()); - if(logger.isDebugging()) { + if(LOG.isDebugging()) { msg.append("approximation ").append(approximation); - logger.debugFine(msg.toString()); + LOG.debugFine(msg.toString()); } return approximation; @@ -464,6 +437,6 @@ public class MkAppTree<O, D extends NumberDistance<D, ?>> extends AbstractMkTree @Override protected Logging getLogger() { - return logger; + return LOG; } }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkapp/MkAppTreeFactory.java b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkapp/MkAppTreeFactory.java index 233129b4..c0f12895 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkapp/MkAppTreeFactory.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkapp/MkAppTreeFactory.java @@ -50,17 +50,17 @@ public class MkAppTreeFactory<O, D extends NumberDistance<D, ?>> extends Abstrac /** * Parameter for nolog */ - public static final OptionID NOLOG_ID = OptionID.getOrCreateOptionID("mkapp.nolog", "Flag to indicate that the approximation is done in the ''normal'' space instead of the log-log space (which is default)."); + public static final OptionID NOLOG_ID = new OptionID("mkapp.nolog", "Flag to indicate that the approximation is done in the ''normal'' space instead of the log-log space (which is default)."); /** * Parameter for k */ - public static final OptionID K_ID = OptionID.getOrCreateOptionID("mkapp.k", "positive integer specifying the maximum number k of reverse k nearest neighbors to be supported."); + public static final OptionID K_ID = new OptionID("mkapp.k", "positive integer specifying the maximum number k of reverse k nearest neighbors to be supported."); /** * Parameter for p */ - public static final OptionID P_ID = OptionID.getOrCreateOptionID("mkapp.p", "positive integer specifying the order of the polynomial approximation."); + public static final OptionID P_ID = new OptionID("mkapp.p", "positive integer specifying the order of the polynomial approximation."); /** * Parameter k. @@ -131,19 +131,21 @@ public class MkAppTreeFactory<O, D extends NumberDistance<D, ?>> extends Abstrac @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - IntParameter K_PARAM = new IntParameter(K_ID, new GreaterConstraint(0)); - if(config.grab(K_PARAM)) { - k_max = K_PARAM.getValue(); + IntParameter kP = new IntParameter(K_ID); + kP.addConstraint(new GreaterConstraint(0)); + if(config.grab(kP)) { + k_max = kP.getValue(); } - IntParameter P_PARAM = new IntParameter(P_ID, new GreaterConstraint(0)); - if(config.grab(P_PARAM)) { - p = P_PARAM.getValue(); + IntParameter pP = new IntParameter(P_ID); + pP.addConstraint(new GreaterConstraint(0)); + if(config.grab(pP)) { + p = pP.getValue(); } - Flag NOLOG_FLAG = new Flag(NOLOG_ID); - if(config.grab(NOLOG_FLAG)) { - log = !NOLOG_FLAG.getValue(); + Flag nologF = new Flag(NOLOG_ID); + if(config.grab(nologF)) { + log = !nologF.getValue(); } } diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkapp/MkAppTreeIndex.java b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkapp/MkAppTreeIndex.java index 90c31676..9776de63 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkapp/MkAppTreeIndex.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkapp/MkAppTreeIndex.java @@ -28,6 +28,8 @@ import java.util.List; import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; @@ -43,8 +45,7 @@ import de.lmu.ifi.dbs.elki.index.RKNNIndex; import de.lmu.ifi.dbs.elki.index.RangeIndex; import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.AbstractMTree; import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.mktrees.AbstractMkTree; -import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.query.MetricalIndexKNNQuery; -import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.query.MetricalIndexRangeQuery; +import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.query.MTreeQueryUtil; import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.query.MkTreeRKNNQuery; import de.lmu.ifi.dbs.elki.persistent.PageFile; import de.lmu.ifi.dbs.elki.utilities.exceptions.ExceptionMessages; @@ -93,7 +94,7 @@ public class MkAppTreeIndex<O, D extends NumberDistance<D, ?>> extends MkAppTree } @Override - public void insert(DBID id) { + public void insert(DBIDRef id) { throw new UnsupportedOperationException("Insertion of single objects is not supported!"); } @@ -101,7 +102,7 @@ public class MkAppTreeIndex<O, D extends NumberDistance<D, ?>> extends MkAppTree public void insertAll(DBIDs ids) { List<MkAppEntry<D>> objs = new ArrayList<MkAppEntry<D>>(ids.size()); for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { - DBID id = iter.getDBID(); + DBID id = DBIDUtil.deref(iter); final O object = relation.get(id); objs.add(createNewLeafEntry(id, object, getDistanceFactory().undefinedDistance())); } @@ -116,7 +117,7 @@ public class MkAppTreeIndex<O, D extends NumberDistance<D, ?>> extends MkAppTree * implemented yet. */ @Override - public final boolean delete(DBID id) { + public final boolean delete(DBIDRef id) { throw new UnsupportedOperationException(ExceptionMessages.UNSUPPORTED_NOT_YET); } @@ -154,7 +155,7 @@ public class MkAppTreeIndex<O, D extends NumberDistance<D, ?>> extends MkAppTree } AbstractMTree<O, S, ?, ?> idx = (AbstractMTree<O, S, ?, ?>) this; DistanceQuery<O, S> dq = distanceFunction.instantiate(relation); - return new MetricalIndexKNNQuery<O, S>(idx, dq); + return MTreeQueryUtil.getKNNQuery(idx, dq, hints); } @SuppressWarnings("unchecked") @@ -179,7 +180,7 @@ public class MkAppTreeIndex<O, D extends NumberDistance<D, ?>> extends MkAppTree } AbstractMTree<O, S, ?, ?> idx = (AbstractMTree<O, S, ?, ?>) this; DistanceQuery<O, S> dq = distanceFunction.instantiate(relation); - return new MetricalIndexRangeQuery<O, S>(idx, dq); + return MTreeQueryUtil.getRangeQuery(idx, dq, hints); } @SuppressWarnings("unchecked") diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkapp/MkAppTreeNode.java b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkapp/MkAppTreeNode.java index bc6a83a9..c730eb4f 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkapp/MkAppTreeNode.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkapp/MkAppTreeNode.java @@ -91,7 +91,7 @@ class MkAppTreeNode<O, D extends NumberDistance<D, ?>> extends AbstractMTreeNode } if(LoggingConfiguration.DEBUG) { - StringBuffer msg = new StringBuffer(); + StringBuilder msg = new StringBuilder(); msg.append("b " + FormatUtil.format(b, 4)); Logger.getLogger(this.getClass().getName()).fine(msg.toString()); } diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkcop/ConvexHull.java b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkcop/ConvexHull.java index 7cb5a16b..6787aa9c 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkcop/ConvexHull.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkcop/ConvexHull.java @@ -112,7 +112,7 @@ public class ConvexHull { * should be computed */ private void determineLowerAndUpperHull(double[] x, double[] y) { - StringBuffer msg = new StringBuffer(); + StringBuilder msg = new StringBuilder(); // first point is always in lowerHull and upperHull lowerHull[0] = 0; l = 1; diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkcop/MkCoPTree.java b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkcop/MkCoPTree.java index 26ae17db..562f7f4a 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkcop/MkCoPTree.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkcop/MkCoPTree.java @@ -24,21 +24,20 @@ package de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.mktrees.mkcop; */ import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.Map.Entry; import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs; -import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair; -import de.lmu.ifi.dbs.elki.database.query.GenericDistanceResultPair; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.GenericDistanceDBIDList; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult; import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.mktrees.AbstractMkTree; import de.lmu.ifi.dbs.elki.index.tree.query.GenericMTreeDistanceSearchCandidate; @@ -47,9 +46,6 @@ import de.lmu.ifi.dbs.elki.persistent.PageFile; import de.lmu.ifi.dbs.elki.utilities.FormatUtil; import de.lmu.ifi.dbs.elki.utilities.QueryStatistic; import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap; -import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.KNNHeap; -import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.KNNList; -import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.UpdatableHeap; /** * MkCopTree is a metrical index structure based on the concepts of the M-Tree @@ -68,7 +64,7 @@ public class MkCoPTree<O, D extends NumberDistance<D, ?>> extends AbstractMkTree /** * The logger for this class. */ - private static final Logging logger = Logging.getLogger(MkCoPTree.class); + private static final Logging LOG = Logging.getLogger(MkCoPTree.class); /** * Parameter k. @@ -125,41 +121,30 @@ public class MkCoPTree<O, D extends NumberDistance<D, ?>> extends AbstractMkTree return; } - if(logger.isDebugging()) { - logger.debugFine("insert " + entries + "\n"); + if(LOG.isDebugging()) { + LOG.debugFine("insert " + entries + "\n"); } if(!initialized) { initialize(entries.get(0)); } - Map<DBID, KNNHeap<D>> knnHeaps = new HashMap<DBID, KNNHeap<D>>(entries.size()); ModifiableDBIDs ids = DBIDUtil.newArray(entries.size()); // insert for(MkCoPEntry<D> entry : entries) { - DBID id = entry.getRoutingObjectID(); - // create knnList for the object - knnHeaps.put(id, new KNNHeap<D>(k_max + 1, getDistanceQuery().infiniteDistance())); - - ids.add(id); + ids.add(entry.getRoutingObjectID()); // insert the object super.insert(entry, false); } - // do batch nn - batchNN(getRoot(), ids, knnHeaps); - - // finish KNN lists (sort them completely) - Map<DBID, KNNList<D>> knnLists = new HashMap<DBID, KNNList<D>>(); - for(Entry<DBID, KNNHeap<D>> ent : knnHeaps.entrySet()) { - knnLists.put(ent.getKey(), ent.getValue().toKNNList()); - } + // perform nearest neighbor queries + Map<DBID, KNNResult<D>> knnLists = batchNN(getRoot(), ids, k_max); // adjust the knn distances adjustApproximatedKNNDistances(getRootEntry(), knnLists); - if(extraIntegrityChecks) { + if(EXTRA_INTEGRITY_CHECKS) { getRoot().integrityCheck(this, getRootEntry()); } } @@ -173,38 +158,35 @@ public class MkCoPTree<O, D extends NumberDistance<D, ?>> extends AbstractMkTree * @return a List of the query results */ @Override - public List<DistanceResultPair<D>> reverseKNNQuery(DBIDRef id, int k) { + public DistanceDBIDResult<D> reverseKNNQuery(DBIDRef id, int k) { if(k > this.k_max) { throw new IllegalArgumentException("Parameter k has to be less or equal than " + "parameter kmax of the MCop-Tree!"); } - List<DistanceResultPair<D>> result = new ArrayList<DistanceResultPair<D>>(); + GenericDistanceDBIDList<D> result = new GenericDistanceDBIDList<D>(); ModifiableDBIDs candidates = DBIDUtil.newArray(); doReverseKNNQuery(k, id, result, candidates); // refinement of candidates - Map<DBID, KNNHeap<D>> knnLists = new HashMap<DBID, KNNHeap<D>>(); - for (DBIDIter iter = candidates.iter(); iter.valid(); iter.advance()) { - knnLists.put(iter.getDBID(), new KNNHeap<D>(k, getDistanceQuery().infiniteDistance())); - } - batchNN(getRoot(), candidates, knnLists); + Map<DBID, KNNResult<D>> knnLists = batchNN(getRoot(), candidates, k); - Collections.sort(result); + result.sort(); // Collections.sort(candidates); rkNNStatistics.addCandidates(candidates.size()); rkNNStatistics.addTrueHits(result.size()); - for (DBIDIter iter = candidates.iter(); iter.valid(); iter.advance()) { - DBID cid = iter.getDBID(); - for(DistanceResultPair<D> qr : knnLists.get(id)) { - if(qr.getDBID().equals(id)) { - result.add(new GenericDistanceResultPair<D>(qr.getDistance(), cid)); + for(DBIDIter iter = candidates.iter(); iter.valid(); iter.advance()) { + DBID cid = DBIDUtil.deref(iter); + KNNResult<D> cands = knnLists.get(cid); + for (DistanceDBIDResultIter<D> iter2 = cands.iter(); iter2.valid(); iter2.advance()) { + if(DBIDUtil.equal(id, iter2)) { + result.add(iter2.getDistance(), cid); break; } } } - Collections.sort(result); + result.sort(); rkNNStatistics.addResults(result.size()); return result; @@ -257,10 +239,11 @@ public class MkCoPTree<O, D extends NumberDistance<D, ?>> extends AbstractMkTree } if(dirCapacity < 10) { - logger.warning("Page size is choosen too small! Maximum number of entries " + "in a directory node = " + (dirCapacity - 1)); + LOG.warning("Page size is choosen too small! Maximum number of entries " + "in a directory node = " + (dirCapacity - 1)); } - // leafCapacity = (file.getPageSize() - overhead) / (objectID + parentDistance + + // leafCapacity = (file.getPageSize() - overhead) / (objectID + + // parentDistance + // consApprox + progrApprox) + 1 leafCapacity = (int) (getPageSize() - overhead) / (4 + distanceSize + 2 * 10) + 1; @@ -269,13 +252,13 @@ public class MkCoPTree<O, D extends NumberDistance<D, ?>> extends AbstractMkTree } if(leafCapacity < 10) { - logger.warning("Page size is choosen too small! Maximum number of entries " + "in a leaf node = " + (leafCapacity - 1)); + LOG.warning("Page size is choosen too small! Maximum number of entries " + "in a leaf node = " + (leafCapacity - 1)); } initialized = true; - if(logger.isVerbose()) { - logger.verbose("Directory Capacity: " + (dirCapacity - 1) + "\nLeaf Capacity: " + (leafCapacity - 1)); + if(LOG.isVerbose()) { + LOG.verbose("Directory Capacity: " + (dirCapacity - 1) + "\nLeaf Capacity: " + (leafCapacity - 1)); } } @@ -288,15 +271,16 @@ public class MkCoPTree<O, D extends NumberDistance<D, ?>> extends AbstractMkTree * @param candidates holds possible candidates for the result (they need a * refinement) */ - private void doReverseKNNQuery(int k, DBIDRef q, List<DistanceResultPair<D>> result, ModifiableDBIDs candidates) { - final Heap<GenericMTreeDistanceSearchCandidate<D>> pq = new UpdatableHeap<GenericMTreeDistanceSearchCandidate<D>>(); + private void doReverseKNNQuery(int k, DBIDRef q, GenericDistanceDBIDList<D> result, ModifiableDBIDs candidates) { + final Heap<GenericMTreeDistanceSearchCandidate<D>> pq = new Heap<GenericMTreeDistanceSearchCandidate<D>>(); // push root - pq.add(new GenericMTreeDistanceSearchCandidate<D>(getDistanceQuery().nullDistance(), getRootID(), null)); + pq.add(new GenericMTreeDistanceSearchCandidate<D>(getDistanceQuery().nullDistance(), getRootID(), null, null)); // search in tree while(!pq.isEmpty()) { GenericMTreeDistanceSearchCandidate<D> pqNode = pq.poll(); + // FIXME: cache the distance to the routing object in the queue node! MkCoPTreeNode<O, D> node = getNode(pqNode.nodeID); @@ -309,7 +293,7 @@ public class MkCoPTree<O, D extends NumberDistance<D, ?>> extends AbstractMkTree D approximatedKnnDist_cons = entry.approximateConservativeKnnDistance(k, getDistanceQuery()); if(minDist.compareTo(approximatedKnnDist_cons) <= 0) { - pq.add(new GenericMTreeDistanceSearchCandidate<D>(minDist, getPageID(entry), entry.getRoutingObjectID())); + pq.add(new GenericMTreeDistanceSearchCandidate<D>(minDist, getPageID(entry), entry.getRoutingObjectID(), null)); } } } @@ -321,7 +305,7 @@ public class MkCoPTree<O, D extends NumberDistance<D, ?>> extends AbstractMkTree D approximatedKnnDist_prog = entry.approximateProgressiveKnnDistance(k, getDistanceQuery()); if(distance.compareTo(approximatedKnnDist_prog) <= 0) { - result.add(new GenericDistanceResultPair<D>(distance, entry.getRoutingObjectID())); + result.add(distance, entry.getRoutingObjectID()); } else { D approximatedKnnDist_cons = entry.approximateConservativeKnnDistance(k, getDistanceQuery()); @@ -341,7 +325,7 @@ public class MkCoPTree<O, D extends NumberDistance<D, ?>> extends AbstractMkTree * @param entry the root entry of the current subtree * @param knnLists a map of knn lists for each leaf entry */ - private void adjustApproximatedKNNDistances(MkCoPEntry<D> entry, Map<DBID, KNNList<D>> knnLists) { + private void adjustApproximatedKNNDistances(MkCoPEntry<D> entry, Map<DBID, KNNResult<D>> knnLists) { MkCoPTreeNode<O, D> node = getNode(entry); if(node.isLeaf()) { @@ -393,10 +377,10 @@ public class MkCoPTree<O, D extends NumberDistance<D, ?>> extends AbstractMkTree * @param knnDistances TODO: Spezialbehandlung fuer identische Punkte in DB * (insbes. Distanz 0) */ - private void approximateKnnDistances(MkCoPLeafEntry<D> entry, KNNList<D> knnDistances) { - StringBuffer msg = new StringBuffer(); - if(logger.isDebugging()) { - msg.append("\nknnDistances " + knnDistances); + private void approximateKnnDistances(MkCoPLeafEntry<D> entry, KNNResult<D> knnDistances) { + StringBuilder msg = LOG.isDebugging() ? new StringBuilder() : null; + if(msg != null) { + msg.append("\nknnDistances ").append(knnDistances); } // count the zero distances @@ -434,16 +418,16 @@ public class MkCoPTree<O, D extends NumberDistance<D, ?>> extends AbstractMkTree sum_log_k2 += (log_k[i] * log_k[i]); } - if(logger.isDebugging()) { - msg.append("\nk_0 " + k_0); - msg.append("\nk_max " + k_max); - msg.append("\nlog_k(" + log_k.length + ") " + FormatUtil.format(log_k)); - msg.append("\nsum_log_k " + sum_log_k); - msg.append("\nsum_log_k^2 " + sum_log_k2); - msg.append("\nkDists " + knnDistances); - msg.append("\nlog_kDist(" + log_kDist.length + ") " + FormatUtil.format(log_kDist)); - msg.append("\nsum_log_kDist " + sum_log_kDist); - msg.append("\nsum_log_k_kDist " + sum_log_k_kDist); + if(msg != null) { + msg.append("\nk_0 ").append(k_0); + msg.append("\nk_max ").append(k_max); + msg.append("\nlog_k(").append(log_k.length).append(") ").append(FormatUtil.format(log_k)); + msg.append("\nsum_log_k ").append(sum_log_k); + msg.append("\nsum_log_k^2 ").append(sum_log_k2); + msg.append("\nkDists ").append(knnDistances); + msg.append("\nlog_kDist(").append(log_kDist.length).append(") ").append(FormatUtil.format(log_kDist)); + msg.append("\nsum_log_kDist ").append(sum_log_kDist); + msg.append("\nsum_log_k_kDist ").append(sum_log_k_kDist); } // lower and upper hull @@ -457,28 +441,28 @@ public class MkCoPTree<O, D extends NumberDistance<D, ?>> extends AbstractMkTree double err1 = ssqerr(k_0, k_max, log_k, log_kDist, conservative.getM(), conservative.getT()); double err2 = ssqerr(k_0, k_max, log_k, log_kDist, c2.getM(), c2.getT()); - if(logger.isDebugging()) { - msg.append("err1 " + err1); - msg.append("err2 " + err2); + if(msg != null) { + msg.append("err1 ").append(err1); + msg.append("err2 ").append(err2); } if(err1 > err2 && err1 - err2 > 0.000000001) { // if (err1 > err2) { - StringBuffer warning = new StringBuffer(); + StringBuilder warning = new StringBuilder(); int u = convexHull.getNumberOfPointsInUpperHull(); int[] upperHull = convexHull.getUpperHull(); - warning.append("\nentry " + entry.getRoutingObjectID()); - warning.append("\nlower Hull " + convexHull.getNumberOfPointsInLowerHull() + " " + FormatUtil.format(convexHull.getLowerHull())); - warning.append("\nupper Hull " + convexHull.getNumberOfPointsInUpperHull() + " " + FormatUtil.format(convexHull.getUpperHull())); - warning.append("\nerr1 " + err1); - warning.append("\nerr2 " + err2); - warning.append("\nconservative1 " + conservative); - warning.append("\nconservative2 " + c2); + warning.append("\nentry ").append(entry.getRoutingObjectID()); + warning.append("\nlower Hull ").append(convexHull.getNumberOfPointsInLowerHull()).append(" ").append(FormatUtil.format(convexHull.getLowerHull())); + warning.append("\nupper Hull ").append(convexHull.getNumberOfPointsInUpperHull()).append(" ").append(FormatUtil.format(convexHull.getUpperHull())); + warning.append("\nerr1 ").append(err1); + warning.append("\nerr2 ").append(err2); + warning.append("\nconservative1 ").append(conservative); + warning.append("\nconservative2 ").append(c2); for(int i = 0; i < u; i++) { - warning.append("\nlog_k[" + upperHull[i] + "] = " + log_k[upperHull[i]]); - warning.append("\nlog_kDist[" + upperHull[i] + "] = " + log_kDist[upperHull[i]]); + warning.append("\nlog_k[").append(upperHull[i]).append("] = ").append(log_k[upperHull[i]]); + warning.append("\nlog_kDist[").append(upperHull[i]).append("] = ").append(log_kDist[upperHull[i]]); } // warning(warning.toString()); } @@ -489,10 +473,9 @@ public class MkCoPTree<O, D extends NumberDistance<D, ?>> extends AbstractMkTree entry.setConservativeKnnDistanceApproximation(conservative); entry.setProgressiveKnnDistanceApproximation(progressive); - if(logger.isDebugging()) { - logger.debugFine(msg.toString()); + if(msg != null) { + LOG.debugFine(msg.toString()); } - } /** @@ -505,13 +488,13 @@ public class MkCoPTree<O, D extends NumberDistance<D, ?>> extends AbstractMkTree */ private ApproximationLine approximateLowerHull(ConvexHull convexHull, double[] log_k, double sum_log_k, double sum_log_k2, double[] log_kDist, double sum_log_kDist, double sum_log_k_kDist) { - StringBuffer msg = new StringBuffer(); + StringBuilder msg = new StringBuilder(); int[] lowerHull = convexHull.getLowerHull(); int l = convexHull.getNumberOfPointsInLowerHull(); int k_0 = k_max - lowerHull.length + 1; // linear search on all line segments on the lower convex hull - msg.append("lower hull l = " + l + "\n"); + msg.append("lower hull l = ").append(l).append("\n"); double low_error = Double.MAX_VALUE; double low_m = 0.0; double low_t = 0.0; @@ -520,7 +503,7 @@ public class MkCoPTree<O, D extends NumberDistance<D, ?>> extends AbstractMkTree double cur_m = (log_kDist[lowerHull[i]] - log_kDist[lowerHull[i - 1]]) / (log_k[lowerHull[i]] - log_k[lowerHull[i - 1]]); double cur_t = log_kDist[lowerHull[i]] - cur_m * log_k[lowerHull[i]]; double cur_error = ssqerr(k_0, k_max, log_k, log_kDist, cur_m, cur_t); - msg.append(" Segment = " + i + " m = " + cur_m + " t = " + cur_t + " lowerror = " + cur_error + "\n"); + msg.append(" Segment = ").append(i).append(" m = ").append(cur_m).append(" t = ").append(cur_t).append(" lowerror = ").append(cur_error).append("\n"); if(cur_error < low_error) { low_error = cur_error; low_m = cur_m; @@ -557,7 +540,7 @@ public class MkCoPTree<O, D extends NumberDistance<D, ?>> extends AbstractMkTree } private ApproximationLine approximateUpperHull(ConvexHull convexHull, double[] log_k, double[] log_kDist) { - StringBuffer msg = new StringBuffer(); + StringBuilder msg = new StringBuilder(); int[] upperHull = convexHull.getUpperHull(); int u = convexHull.getNumberOfPointsInUpperHull(); @@ -572,13 +555,13 @@ public class MkCoPTree<O, D extends NumberDistance<D, ?>> extends AbstractMkTree double current_t = log_kDist[ii] - current_m * log_k[ii]; ApproximationLine current_approx = new ApproximationLine(k_0, current_m, current_t); - if(logger.isDebugging()) { - msg.append("\nlog_kDist[" + jj + "] " + log_kDist[jj]); - msg.append("\nlog_kDist[" + ii + "] " + log_kDist[ii]); - msg.append("\nlog_k[" + jj + "] " + log_k[jj]); - msg.append("\nlog_k[" + ii + "] " + log_k[ii]); - msg.append("\n" + (log_kDist[jj] - log_kDist[ii])); - msg.append("\ncurrent_approx_" + i + " " + current_approx); + if(LOG.isDebugging()) { + msg.append("\nlog_kDist[").append(jj).append("] ").append(log_kDist[jj]); + msg.append("\nlog_kDist[").append(ii).append("] ").append(log_kDist[ii]); + msg.append("\nlog_k[").append(jj).append("] ").append(log_k[jj]); + msg.append("\nlog_k[").append(ii).append("] ").append(log_k[ii]); + msg.append("\n").append((log_kDist[jj] - log_kDist[ii])); + msg.append("\ncurrent_approx_").append(i).append(" ").append(current_approx); } boolean ok = true; @@ -598,15 +581,15 @@ public class MkCoPTree<O, D extends NumberDistance<D, ?>> extends AbstractMkTree } } - if(logger.isDebugging()) { - msg.append("\nupper Approx " + approx); - logger.debugFine(msg.toString()); + if(LOG.isDebugging()) { + msg.append("\nupper Approx ").append(approx); + LOG.debugFine(msg.toString()); } return approx; } private ApproximationLine approximateUpperHull_PAPER(ConvexHull convexHull, double[] log_k, double sum_log_k, double sum_log_k2, double[] log_kDist, double sum_log_kDist, double sum_log_k_kDist) { - StringBuffer msg = new StringBuffer(); + StringBuilder msg = LOG.isDebugging() ? new StringBuilder() : null; int[] upperHull = convexHull.getUpperHull(); int u = convexHull.getNumberOfPointsInUpperHull(); @@ -624,9 +607,9 @@ public class MkCoPTree<O, D extends NumberDistance<D, ?>> extends AbstractMkTree double m_a = optimize(k_0, k_max, sum_log_k, sum_log_k2, x_a, y_a, sum_log_k_kDist, sum_log_kDist); double t_a = y_a - m_a * x_a; - if(logger.isDebugging()) { - msg.append("\na=" + a + " m_a=" + m_a + ", t_a=" + t_a); - msg.append("\n err " + ssqerr(k_0, k_max, log_k, log_kDist, m_a, m_a)); + if(msg != null) { + msg.append("\na=").append(a).append(" m_a=").append(m_a).append(", t_a=").append(t_a); + msg.append("\n err ").append(ssqerr(k_0, k_max, log_k, log_kDist, m_a, m_a)); } double x_p = a == 0 ? Double.NaN : log_k[upperHull[a - 1]]; @@ -639,13 +622,12 @@ public class MkCoPTree<O, D extends NumberDistance<D, ?>> extends AbstractMkTree if(lessThanPre && lessThanSuc) { ApproximationLine appr = new ApproximationLine(k_0, m_a, t_a); - if(logger.isDebugging()) { - msg.append("\n1 anchor = " + a); - logger.debugFine(msg.toString()); + if(msg != null) { + msg.append("\n1 anchor = ").append(a); + LOG.debugFine(msg.toString()); } return appr; } - else if(!lessThanPre) { if(marked.contains(a - 1)) { m_a = (y_a - y_p) / (x_a - x_p); @@ -655,14 +637,14 @@ public class MkCoPTree<O, D extends NumberDistance<D, ?>> extends AbstractMkTree t_a = y_a - m_a * x_a; ApproximationLine appr = new ApproximationLine(k_0, m_a, t_a); - if(logger.isDebugging()) { - msg.append("2 anchor = " + a); - msg.append(" appr1 " + appr); - msg.append(" x_a " + x_a + ", y_a " + y_a); - msg.append(" x_p " + x_p + ", y_p " + y_p); - msg.append(" a " + a); - msg.append(" upperHull " + FormatUtil.format(upperHull)); - logger.debugFine(msg.toString()); + if(msg != null) { + msg.append("2 anchor = ").append(a); + msg.append(" appr1 ").append(appr); + msg.append(" x_a ").append(x_a).append(", y_a ").append(y_a); + msg.append(" x_p ").append(x_p).append(", y_p ").append(y_p); + msg.append(" a ").append(a); + msg.append(" upperHull ").append(FormatUtil.format(upperHull)); + LOG.debugFine(msg.toString()); } return appr; } @@ -679,10 +661,10 @@ public class MkCoPTree<O, D extends NumberDistance<D, ?>> extends AbstractMkTree t_a = y_a - m_a * x_a; ApproximationLine appr = new ApproximationLine(k_0, m_a, t_a); - if(logger.isDebugging()) { - msg.append("3 anchor = " + a + " -- " + (a + 1)); - msg.append(" appr2 " + appr); - logger.debugFine(msg.toString()); + if(msg != null) { + msg.append("3 anchor = ").append(a).append(" -- ").append((a + 1)); + msg.append(" appr2 ").append(appr); + LOG.debugFine(msg.toString()); } return appr; } @@ -698,7 +680,7 @@ public class MkCoPTree<O, D extends NumberDistance<D, ?>> extends AbstractMkTree @SuppressWarnings("unused") private ApproximationLine approximateUpperHull_OLD(ConvexHull convexHull, double[] log_k, double sum_log_k, double sum_log_k2, double[] log_kDist, double sum_log_kDist, double sum_log_k_kDist) { - StringBuffer msg = new StringBuffer(); + StringBuilder msg = new StringBuilder(); int[] upperHull = convexHull.getUpperHull(); int u = convexHull.getNumberOfPointsInUpperHull(); int k_0 = k_max - upperHull.length + 1; @@ -794,6 +776,6 @@ public class MkCoPTree<O, D extends NumberDistance<D, ?>> extends AbstractMkTree @Override protected Logging getLogger() { - return logger; + return LOG; } }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkcop/MkCoPTreeIndex.java b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkcop/MkCoPTreeIndex.java index 460e15b7..d3e45f8c 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkcop/MkCoPTreeIndex.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkcop/MkCoPTreeIndex.java @@ -28,6 +28,8 @@ import java.util.List; import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; @@ -43,8 +45,7 @@ import de.lmu.ifi.dbs.elki.index.RKNNIndex; import de.lmu.ifi.dbs.elki.index.RangeIndex; import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.AbstractMTree; import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.mktrees.AbstractMkTree; -import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.query.MetricalIndexKNNQuery; -import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.query.MetricalIndexRangeQuery; +import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.query.MTreeQueryUtil; import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.query.MkTreeRKNNQuery; import de.lmu.ifi.dbs.elki.persistent.PageFile; import de.lmu.ifi.dbs.elki.utilities.exceptions.ExceptionMessages; @@ -92,7 +93,7 @@ public class MkCoPTreeIndex<O, D extends NumberDistance<D, ?>> extends MkCoPTree } @Override - public void insert(DBID id) { + public void insert(DBIDRef id) { throw new UnsupportedOperationException("Insertion of single objects is not supported!"); } @@ -100,7 +101,7 @@ public class MkCoPTreeIndex<O, D extends NumberDistance<D, ?>> extends MkCoPTree public void insertAll(DBIDs ids) { List<MkCoPEntry<D>> objs = new ArrayList<MkCoPEntry<D>>(ids.size()); for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { - DBID id = iter.getDBID(); + DBID id = DBIDUtil.deref(iter); final O object = relation.get(id); objs.add(createNewLeafEntry(id, object, getDistanceFactory().undefinedDistance())); } @@ -115,7 +116,7 @@ public class MkCoPTreeIndex<O, D extends NumberDistance<D, ?>> extends MkCoPTree * implemented yet. */ @Override - public final boolean delete(DBID id) { + public final boolean delete(DBIDRef id) { throw new UnsupportedOperationException(ExceptionMessages.UNSUPPORTED_NOT_YET); } @@ -153,7 +154,7 @@ public class MkCoPTreeIndex<O, D extends NumberDistance<D, ?>> extends MkCoPTree } AbstractMTree<O, S, ?, ?> idx = (AbstractMTree<O, S, ?, ?>) this; DistanceQuery<O, S> dq = distanceFunction.instantiate(relation); - return new MetricalIndexKNNQuery<O, S>(idx, dq); + return MTreeQueryUtil.getKNNQuery(idx, dq, hints); } @SuppressWarnings("unchecked") @@ -178,7 +179,7 @@ public class MkCoPTreeIndex<O, D extends NumberDistance<D, ?>> extends MkCoPTree } AbstractMTree<O, S, ?, ?> idx = (AbstractMTree<O, S, ?, ?>) this; DistanceQuery<O, S> dq = distanceFunction.instantiate(relation); - return new MetricalIndexRangeQuery<O, S>(idx, dq); + return MTreeQueryUtil.getRangeQuery(idx, dq, hints); } @SuppressWarnings("unchecked") diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkcop/MkCoPTreeNode.java b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkcop/MkCoPTreeNode.java index 01fca2f2..f2e4a114 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkcop/MkCoPTreeNode.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkcop/MkCoPTreeNode.java @@ -23,13 +23,10 @@ package de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.mktrees.mkcop; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import java.util.logging.Logger; - import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.AbstractMTree; import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.AbstractMTreeNode; -import de.lmu.ifi.dbs.elki.logging.LoggingConfiguration; /** * Represents a node in an MkCop-Tree. @@ -99,15 +96,6 @@ class MkCoPTreeNode<O, D extends NumberDistance<D, ?>> extends AbstractMTreeNode } } - if(LoggingConfiguration.DEBUG) { - StringBuffer msg = new StringBuffer(); - msg.append("k_0 " + k_0); - msg.append("k_max " + k_max); - msg.append("y_1 " + y_1); - msg.append("y_kmax " + y_kmax); - Logger.getLogger(this.getClass().getName()).fine(msg.toString()); - } - // determine m and t double m = (y_kmax - y_1) / (Math.log(k_max) - Math.log(k_0)); double t = y_1 - m * Math.log(k_0); diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkcop/MkCopTreeFactory.java b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkcop/MkCopTreeFactory.java index 8d88ee79..4ac96df3 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkcop/MkCopTreeFactory.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkcop/MkCopTreeFactory.java @@ -49,7 +49,7 @@ public class MkCopTreeFactory<O, D extends NumberDistance<D, ?>> extends Abstrac /** * Parameter for k */ - public static final OptionID K_ID = OptionID.getOrCreateOptionID("mkcop.k", "positive integer specifying the maximum number k of reverse k nearest neighbors to be supported."); + public static final OptionID K_ID = new OptionID("mkcop.k", "positive integer specifying the maximum number k of reverse k nearest neighbors to be supported."); /** * Parameter k. @@ -93,9 +93,10 @@ public class MkCopTreeFactory<O, D extends NumberDistance<D, ?>> extends Abstrac @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - IntParameter k_maxP = new IntParameter(K_ID, new GreaterConstraint(0)); + IntParameter k_maxP = new IntParameter(K_ID); + k_maxP.addConstraint(new GreaterConstraint(0)); if (config.grab(k_maxP)) { - k_max = k_maxP.getValue(); + k_max = k_maxP.intValue(); } } diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkmax/MkMaxTree.java b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkmax/MkMaxTree.java index f9f77723..36f9bbf1 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkmax/MkMaxTree.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkmax/MkMaxTree.java @@ -23,9 +23,6 @@ package de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.mktrees.mkmax; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; import java.util.List; import java.util.Map; @@ -34,18 +31,22 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs; -import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair; -import de.lmu.ifi.dbs.elki.database.query.GenericDistanceResultPair; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.distance.DistanceUtil; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.GenericDistanceDBIDList; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNHeap; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNUtil; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.ModifiableDistanceDBIDResult; import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; import de.lmu.ifi.dbs.elki.index.tree.DistanceEntry; import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.mktrees.AbstractMkTreeUnified; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.persistent.PageFile; import de.lmu.ifi.dbs.elki.utilities.QueryStatistic; -import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.KNNHeap; /** * MkMaxTree is a metrical index structure based on the concepts of the M-Tree @@ -64,7 +65,7 @@ public class MkMaxTree<O, D extends Distance<D>> extends AbstractMkTreeUnified<O /** * The logger for this class. */ - private static final Logging logger = Logging.getLogger(MkMaxTree.class); + private static final Logging LOG = Logging.getLogger(MkMaxTree.class); /** * Provides some statistics about performed reverse knn-queries. @@ -90,38 +91,36 @@ public class MkMaxTree<O, D extends Distance<D>> extends AbstractMkTreeUnified<O * in a second step. */ @Override - public List<DistanceResultPair<D>> reverseKNNQuery(DBIDRef id, int k) { + public DistanceDBIDResult<D> reverseKNNQuery(DBIDRef id, int k) { if(k > this.getKmax()) { throw new IllegalArgumentException("Parameter k has to be equal or less than " + "parameter k of the MkMax-Tree!"); } // get the candidates - List<DistanceResultPair<D>> candidates = new ArrayList<DistanceResultPair<D>>(); + GenericDistanceDBIDList<D> candidates = new GenericDistanceDBIDList<D>(); doReverseKNNQuery(id, getRoot(), null, candidates); if(k == this.getKmax()) { - Collections.sort(candidates); + candidates.sort(); rkNNStatistics.addTrueHits(candidates.size()); rkNNStatistics.addResults(candidates.size()); return candidates; } // refinement of candidates - Map<DBID, KNNHeap<D>> knnLists = new HashMap<DBID, KNNHeap<D>>(); - ModifiableDBIDs candidateIDs = DBIDUtil.newArray(); - for(DistanceResultPair<D> candidate : candidates) { - KNNHeap<D> knns = new KNNHeap<D>(k, getDistanceQuery().infiniteDistance()); - knnLists.put(candidate.getDBID(), knns); - candidateIDs.add(candidate.getDBID()); + ModifiableDBIDs candidateIDs = DBIDUtil.newArray(candidates.size()); + for (DBIDIter candidate = candidates.iter(); candidate.valid(); candidate.advance()) { + candidateIDs.add(candidate); } - batchNN(getRoot(), candidateIDs, knnLists); + Map<DBID, KNNResult<D>> knnLists = batchNN(getRoot(), candidateIDs, k); - List<DistanceResultPair<D>> result = new ArrayList<DistanceResultPair<D>>(); + GenericDistanceDBIDList<D> result = new GenericDistanceDBIDList<D>(); for (DBIDIter iter = candidateIDs.iter(); iter.valid(); iter.advance()) { - DBID cid = iter.getDBID(); - for(DistanceResultPair<D> qr : knnLists.get(cid)) { - if(id.equals(qr.getDBID())) { - result.add(new GenericDistanceResultPair<D>(qr.getDistance(), cid)); + DBID cid = DBIDUtil.deref(iter); + KNNResult<D> cands = knnLists.get(cid); + for (DistanceDBIDResultIter<D> iter2 = cands.iter(); iter2.valid(); iter2.advance()) { + if(DBIDUtil.equal(id, iter2)) { + result.add(iter2.getDistance(), cid); break; } } @@ -129,7 +128,7 @@ public class MkMaxTree<O, D extends Distance<D>> extends AbstractMkTreeUnified<O rkNNStatistics.addResults(result.size()); rkNNStatistics.addCandidates(candidates.size()); - Collections.sort(result); + result.sort(); return result; } @@ -155,7 +154,7 @@ public class MkMaxTree<O, D extends Distance<D>> extends AbstractMkTreeUnified<O */ @Override protected void preInsert(MkMaxEntry<D> entry) { - KNNHeap<D> knns_o = new KNNHeap<D>(getKmax(), getDistanceQuery().infiniteDistance()); + KNNHeap<D> knns_o = KNNUtil.newHeap(distanceFunction, getKmax()); preInsert(entry, getRootEntry(), knns_o); } @@ -163,7 +162,7 @@ public class MkMaxTree<O, D extends Distance<D>> extends AbstractMkTreeUnified<O * Adjusts the knn distance in the subtree of the specified root entry. */ @Override - protected void kNNdistanceAdjustment(MkMaxEntry<D> entry, Map<DBID, KNNHeap<D>> knnLists) { + protected void kNNdistanceAdjustment(MkMaxEntry<D> entry, Map<DBID, KNNResult<D>> knnLists) { MkMaxTreeNode<O, D> node = getNode(entry); D knnDist_node = getDistanceQuery().nullDistance(); if(node.isLeaf()) { @@ -194,14 +193,14 @@ public class MkMaxTree<O, D extends Distance<D>> extends AbstractMkTreeUnified<O * @param node_entry the entry representing the node * @param result the list for the query result */ - private void doReverseKNNQuery(DBIDRef q, MkMaxTreeNode<O, D> node, MkMaxEntry<D> node_entry, List<DistanceResultPair<D>> result) { + private void doReverseKNNQuery(DBIDRef q, MkMaxTreeNode<O, D> node, MkMaxEntry<D> node_entry, ModifiableDistanceDBIDResult<D> result) { // data node if(node.isLeaf()) { for(int i = 0; i < node.getNumEntries(); i++) { MkMaxEntry<D> entry = node.getEntry(i); D distance = getDistanceQuery().distance(entry.getRoutingObjectID(), q); if(distance.compareTo(entry.getKnnDistance()) <= 0) { - result.add(new GenericDistanceResultPair<D>(distance, entry.getRoutingObjectID())); + result.add(distance, entry.getRoutingObjectID()); } } } @@ -231,8 +230,8 @@ public class MkMaxTree<O, D extends Distance<D>> extends AbstractMkTreeUnified<O * @param knns_q the knns of q */ private void preInsert(MkMaxEntry<D> q, MkMaxEntry<D> nodeEntry, KNNHeap<D> knns_q) { - if(logger.isDebugging()) { - logger.debugFine("preInsert " + q + " - " + nodeEntry + "\n"); + if(LOG.isDebugging()) { + LOG.debugFine("preInsert " + q + " - " + nodeEntry + "\n"); } D knnDist_q = knns_q.getKNNDistance(); @@ -248,10 +247,9 @@ public class MkMaxTree<O, D extends Distance<D>> extends AbstractMkTreeUnified<O // p is nearer to q than the farthest kNN-candidate of q // ==> p becomes a knn-candidate if(dist_pq.compareTo(knnDist_q) <= 0) { - DistanceResultPair<D> knn = new GenericDistanceResultPair<D>(dist_pq, p.getRoutingObjectID()); - knns_q.add(knn); + knns_q.add(dist_pq, p.getRoutingObjectID()); if(knns_q.size() >= getKmax()) { - knnDist_q = knns_q.getMaximumDistance(); + knnDist_q = knns_q.getKNNDistance(); q.setKnnDistance(knnDist_q); } @@ -259,15 +257,13 @@ public class MkMaxTree<O, D extends Distance<D>> extends AbstractMkTreeUnified<O // p is nearer to q than to its farthest knn-candidate // q becomes knn of p if(dist_pq.compareTo(p.getKnnDistance()) <= 0) { - KNNHeap<D> knns_p = new KNNHeap<D>(getKmax(), getDistanceQuery().infiniteDistance()); - knns_p.add(new GenericDistanceResultPair<D>(dist_pq, q.getRoutingObjectID())); - doKNNQuery(p.getRoutingObjectID(), knns_p); + KNNResult<D> knns_p = knnq.getKNNForDBID(p.getRoutingObjectID(), getKmax() - 1); - if(knns_p.size() < getKmax()) { + if(knns_p.size() + 1 < getKmax()) { p.setKnnDistance(getDistanceQuery().undefinedDistance()); } else { - D knnDist_p = knns_p.getMaximumDistance(); + D knnDist_p = DistanceUtil.max(dist_pq, knns_p.getKNNDistance()); p.setKnnDistance(knnDist_p); } } @@ -288,8 +284,8 @@ public class MkMaxTree<O, D extends Distance<D>> extends AbstractMkTreeUnified<O knnDist_node = DistanceUtil.max(knnDist_node, dirEntry.getKnnDistance()); } } - if(logger.isDebugging()) { - logger.debugFine(nodeEntry + "set knn dist " + knnDist_node); + if(LOG.isDebugging()) { + LOG.debugFine(nodeEntry + "set knn dist " + knnDist_node); } nodeEntry.setKnnDistance(knnDist_node); } @@ -313,7 +309,7 @@ public class MkMaxTree<O, D extends Distance<D>> extends AbstractMkTreeUnified<O } if(dirCapacity < 10) { - logger.warning("Page size is choosen too small! Maximum number of entries " + "in a directory node = " + (dirCapacity - 1)); + LOG.warning("Page size is choosen too small! Maximum number of entries " + "in a directory node = " + (dirCapacity - 1)); } // leafCapacity = (file.getPageSize() - overhead) / (objectID + @@ -326,7 +322,7 @@ public class MkMaxTree<O, D extends Distance<D>> extends AbstractMkTreeUnified<O } if(leafCapacity < 10) { - logger.warning("Page size is choosen too small! Maximum number of entries " + "in a leaf node = " + (leafCapacity - 1)); + LOG.warning("Page size is choosen too small! Maximum number of entries " + "in a leaf node = " + (leafCapacity - 1)); } } @@ -365,6 +361,6 @@ public class MkMaxTree<O, D extends Distance<D>> extends AbstractMkTreeUnified<O @Override protected Logging getLogger() { - return logger; + return LOG; } }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkmax/MkMaxTreeIndex.java b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkmax/MkMaxTreeIndex.java index d1fd2b0f..0bd31dca 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkmax/MkMaxTreeIndex.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mkmax/MkMaxTreeIndex.java @@ -28,6 +28,8 @@ import java.util.List; import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; @@ -36,22 +38,32 @@ import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery; import de.lmu.ifi.dbs.elki.database.query.rknn.RKNNQuery; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult; import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; import de.lmu.ifi.dbs.elki.index.KNNIndex; import de.lmu.ifi.dbs.elki.index.RKNNIndex; import de.lmu.ifi.dbs.elki.index.RangeIndex; import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.AbstractMTree; import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.mktrees.AbstractMkTreeUnified; -import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.query.MetricalIndexKNNQuery; -import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.query.MetricalIndexRangeQuery; +import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.query.MTreeQueryUtil; import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.query.MkTreeRKNNQuery; import de.lmu.ifi.dbs.elki.persistent.PageFile; -import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.KNNHeap; import de.lmu.ifi.dbs.elki.utilities.exceptions.ExceptionMessages; +/** + * MkMax tree + * + * @author Elke Achtert + * + * @param <O> Object type + * @param <D> Distance type + */ public class MkMaxTreeIndex<O, D extends Distance<D>> extends MkMaxTree<O, D> implements RangeIndex<O>, KNNIndex<O>, RKNNIndex<O> { + /** + * Relation indexed. + */ private Relation<O> relation; - + /** * Constructor. * @@ -71,22 +83,21 @@ public class MkMaxTreeIndex<O, D extends Distance<D>> extends MkMaxTree<O, D> im * @return a new MkMaxLeafEntry representing the specified data object */ protected MkMaxLeafEntry<D> createNewLeafEntry(DBID id, O object, D parentDistance) { - KNNHeap<D> knnList = new KNNHeap<D>(getKmax() - 1); - doKNNQuery(id, knnList); - D knnDistance = knnList.getMaximumDistance(); + KNNResult<D> knns = knnq.getKNNForObject(object, getKmax() - 1); + D knnDistance = knns.getKNNDistance(); return new MkMaxLeafEntry<D>(id, parentDistance, knnDistance); } @Override - public void insert(DBID id) { - insert(createNewLeafEntry(id, relation.get(id), getDistanceFactory().undefinedDistance()), false); + public void insert(DBIDRef id) { + insert(createNewLeafEntry(DBIDUtil.deref(id), relation.get(id), getDistanceFactory().undefinedDistance()), false); } @Override public void insertAll(DBIDs ids) { List<MkMaxEntry<D>> objs = new ArrayList<MkMaxEntry<D>>(ids.size()); for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { - DBID id = iter.getDBID(); + DBID id = DBIDUtil.deref(iter); final O object = relation.get(id); objs.add(createNewLeafEntry(id, object, getDistanceFactory().undefinedDistance())); } @@ -101,7 +112,7 @@ public class MkMaxTreeIndex<O, D extends Distance<D>> extends MkMaxTree<O, D> im * implemented yet. */ @Override - public final boolean delete(DBID id) { + public final boolean delete(DBIDRef id) { throw new UnsupportedOperationException(ExceptionMessages.UNSUPPORTED_NOT_YET); } @@ -139,7 +150,7 @@ public class MkMaxTreeIndex<O, D extends Distance<D>> extends MkMaxTree<O, D> im } AbstractMTree<O, S, ?, ?> idx = (AbstractMTree<O, S, ?, ?>) this; DistanceQuery<O, S> dq = distanceFunction.instantiate(relation); - return new MetricalIndexKNNQuery<O, S>(idx, dq); + return MTreeQueryUtil.getKNNQuery(idx, dq, hints); } @SuppressWarnings("unchecked") @@ -164,7 +175,7 @@ public class MkMaxTreeIndex<O, D extends Distance<D>> extends MkMaxTree<O, D> im } AbstractMTree<O, S, ?, ?> idx = (AbstractMTree<O, S, ?, ?>) this; DistanceQuery<O, S> dq = distanceFunction.instantiate(relation); - return new MetricalIndexRangeQuery<O, S>(idx, dq); + return MTreeQueryUtil.getRangeQuery(idx, dq); } @SuppressWarnings("unchecked") diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mktab/MkTabTree.java b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mktab/MkTabTree.java index 433a01fa..f5410839 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mktab/MkTabTree.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mktab/MkTabTree.java @@ -24,22 +24,22 @@ package de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.mktrees.mktab; */ import java.util.ArrayList; -import java.util.Collections; import java.util.List; import java.util.Map; import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; -import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair; -import de.lmu.ifi.dbs.elki.database.query.GenericDistanceResultPair; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.distance.DistanceUtil; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.GenericDistanceDBIDList; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNUtil; import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.mktrees.AbstractMkTreeUnified; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.persistent.PageFile; -import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.KNNHeap; /** * MkTabTree is a metrical index structure based on the concepts of the M-Tree @@ -58,7 +58,7 @@ public class MkTabTree<O, D extends Distance<D>> extends AbstractMkTreeUnified<O /** * The logger for this class. */ - private static final Logging logger = Logging.getLogger(MkTabTree.class); + private static final Logging LOG = Logging.getLogger(MkTabTree.class); /** * Constructor. @@ -91,15 +91,15 @@ public class MkTabTree<O, D extends Distance<D>> extends AbstractMkTreeUnified<O } @Override - public List<DistanceResultPair<D>> reverseKNNQuery(DBIDRef id, int k) { + public DistanceDBIDResult<D> reverseKNNQuery(DBIDRef id, int k) { if(k > this.getKmax()) { throw new IllegalArgumentException("Parameter k has to be less or equal than " + "parameter kmax of the MkTab-Tree!"); } - List<DistanceResultPair<D>> result = new ArrayList<DistanceResultPair<D>>(); + GenericDistanceDBIDList<D> result = new GenericDistanceDBIDList<D>(); doReverseKNNQuery(k, id, null, getRoot(), result); - Collections.sort(result); + result.sort(); return result; } @@ -122,7 +122,7 @@ public class MkTabTree<O, D extends Distance<D>> extends AbstractMkTreeUnified<O } if(dirCapacity < 10) { - logger.warning("Page size is choosen too small! Maximum number of entries " + "in a directory node = " + (dirCapacity - 1)); + LOG.warning("Page size is choosen too small! Maximum number of entries " + "in a directory node = " + (dirCapacity - 1)); } // leafCapacity = (pageSize - overhead) / (objectID + parentDistance + + @@ -134,19 +134,18 @@ public class MkTabTree<O, D extends Distance<D>> extends AbstractMkTreeUnified<O } if(leafCapacity < 10) { - logger.warning("Page size is choosen too small! Maximum number of entries " + "in a leaf node = " + (leafCapacity - 1)); + LOG.warning("Page size is choosen too small! Maximum number of entries " + "in a leaf node = " + (leafCapacity - 1)); } - } - + @Override - protected void kNNdistanceAdjustment(MkTabEntry<D> entry, Map<DBID, KNNHeap<D>> knnLists) { + protected void kNNdistanceAdjustment(MkTabEntry<D> entry, Map<DBID, KNNResult<D>> knnLists) { MkTabTreeNode<O, D> node = getNode(entry); List<D> knnDistances_node = initKnnDistanceList(); if(node.isLeaf()) { for(int i = 0; i < node.getNumEntries(); i++) { MkTabEntry<D> leafEntry = node.getEntry(i); - leafEntry.setKnnDistances(knnLists.get(getPageID(leafEntry)).toKNNList().asDistanceList()); + leafEntry.setKnnDistances(KNNUtil.asDistanceList(knnLists.get(getPageID(leafEntry)))); knnDistances_node = max(knnDistances_node, leafEntry.getKnnDistances()); } } @@ -210,14 +209,14 @@ public class MkTabTree<O, D extends Distance<D>> extends AbstractMkTreeUnified<O * @param node the root of the subtree * @param result the list holding the query result */ - private void doReverseKNNQuery(int k, DBIDRef q, MkTabEntry<D> node_entry, MkTabTreeNode<O, D> node, List<DistanceResultPair<D>> result) { + private void doReverseKNNQuery(int k, DBIDRef q, MkTabEntry<D> node_entry, MkTabTreeNode<O, D> node, GenericDistanceDBIDList<D> result) { // data node if(node.isLeaf()) { for(int i = 0; i < node.getNumEntries(); i++) { MkTabEntry<D> entry = node.getEntry(i); D distance = getDistanceQuery().distance(entry.getRoutingObjectID(), q); if(distance.compareTo(entry.getKnnDistance(k)) <= 0) { - result.add(new GenericDistanceResultPair<D>(distance, entry.getRoutingObjectID())); + result.add(distance, entry.getRoutingObjectID()); } } } @@ -278,6 +277,6 @@ public class MkTabTree<O, D extends Distance<D>> extends AbstractMkTreeUnified<O @Override protected Logging getLogger() { - return logger; + return LOG; } }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mktab/MkTabTreeIndex.java b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mktab/MkTabTreeIndex.java index f1d23bfd..b12ac059 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mktab/MkTabTreeIndex.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mktrees/mktab/MkTabTreeIndex.java @@ -28,24 +28,25 @@ import java.util.List; import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; -import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult; -import de.lmu.ifi.dbs.elki.database.query.knn.KNNUtil; import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery; import de.lmu.ifi.dbs.elki.database.query.rknn.RKNNQuery; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNUtil; import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; import de.lmu.ifi.dbs.elki.index.KNNIndex; import de.lmu.ifi.dbs.elki.index.RKNNIndex; import de.lmu.ifi.dbs.elki.index.RangeIndex; import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.AbstractMTree; import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.mktrees.AbstractMkTreeUnified; -import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.query.MetricalIndexKNNQuery; -import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.query.MetricalIndexRangeQuery; +import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.query.MTreeQueryUtil; import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.query.MkTreeRKNNQuery; import de.lmu.ifi.dbs.elki.persistent.PageFile; import de.lmu.ifi.dbs.elki.utilities.exceptions.ExceptionMessages; @@ -109,7 +110,7 @@ public class MkTabTreeIndex<O, D extends Distance<D>> extends MkTabTree<O, D> im } @Override - public void insert(DBID id) { + public void insert(DBIDRef id) { throw new UnsupportedOperationException("Insertion of single objects is not supported!"); } @@ -117,7 +118,7 @@ public class MkTabTreeIndex<O, D extends Distance<D>> extends MkTabTree<O, D> im public void insertAll(DBIDs ids) { List<MkTabEntry<D>> objs = new ArrayList<MkTabEntry<D>>(ids.size()); for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { - DBID id = iter.getDBID(); + DBID id = DBIDUtil.deref(iter); final O object = relation.get(id); objs.add(createNewLeafEntry(id, object, getDistanceFactory().undefinedDistance())); } @@ -132,7 +133,7 @@ public class MkTabTreeIndex<O, D extends Distance<D>> extends MkTabTree<O, D> im * implemented yet. */ @Override - public final boolean delete(DBID id) { + public final boolean delete(DBIDRef id) { throw new UnsupportedOperationException(ExceptionMessages.UNSUPPORTED_NOT_YET); } @@ -170,7 +171,7 @@ public class MkTabTreeIndex<O, D extends Distance<D>> extends MkTabTree<O, D> im } AbstractMTree<O, S, ?, ?> idx = (AbstractMTree<O, S, ?, ?>) this; DistanceQuery<O, S> dq = distanceFunction.instantiate(relation); - return new MetricalIndexKNNQuery<O, S>(idx, dq); + return MTreeQueryUtil.getKNNQuery(idx, dq, hints); } @SuppressWarnings("unchecked") @@ -195,7 +196,7 @@ public class MkTabTreeIndex<O, D extends Distance<D>> extends MkTabTree<O, D> im } AbstractMTree<O, S, ?, ?> idx = (AbstractMTree<O, S, ?, ?>) this; DistanceQuery<O, S> dq = distanceFunction.instantiate(relation); - return new MetricalIndexRangeQuery<O, S>(idx, dq); + return MTreeQueryUtil.getRangeQuery(idx, dq); } @SuppressWarnings("unchecked") diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mtree/MTree.java b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mtree/MTree.java index 80955b8a..4276329c 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mtree/MTree.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mtree/MTree.java @@ -55,7 +55,7 @@ public class MTree<O, D extends Distance<D>> extends AbstractMTree<O, D, MTreeNo /** * The logger for this class. */ - private static final Logging logger = Logging.getLogger(MTree.class); + private static final Logging LOG = Logging.getLogger(MTree.class); /** * Constructor. @@ -81,7 +81,7 @@ public class MTree<O, D extends Distance<D>> extends AbstractMTree<O, D, MTreeNo int distanceSize = exampleLeaf.getParentDistance().externalizableSize(); // FIXME: simulate a proper feature size! - int featuresize = 0; // DatabaseUtil.dimensionality(relation); + int featuresize = 0; // RelationUtil.dimensionality(relation); // overhead = index(4), numEntries(4), id(4), isLeaf(0.125) double overhead = 12.125; @@ -103,7 +103,7 @@ public class MTree<O, D extends Distance<D>> extends AbstractMTree<O, D, MTreeNo } if(dirCapacity < 10) { - logger.warning("Page size is choosen too small! Maximum number of entries " + "in a directory node = " + (dirCapacity - 1)); + LOG.warning("Page size is choosen too small! Maximum number of entries " + "in a directory node = " + (dirCapacity - 1)); } // leafCapacity = (pageSize - overhead) / (objectID + parentDistance) + // 1 @@ -118,11 +118,11 @@ public class MTree<O, D extends Distance<D>> extends AbstractMTree<O, D, MTreeNo } if(leafCapacity < 10) { - logger.warning("Page size is choosen too small! Maximum number of entries " + "in a leaf node = " + (leafCapacity - 1)); + LOG.warning("Page size is choosen too small! Maximum number of entries " + "in a leaf node = " + (leafCapacity - 1)); } - if(logger.isVerbose()) { - logger.verbose("Directory Capacity: " + (dirCapacity - 1) + "\nLeaf Capacity: " + (leafCapacity - 1)); + if(LOG.isVerbose()) { + LOG.verbose("Directory Capacity: " + (dirCapacity - 1) + "\nLeaf Capacity: " + (leafCapacity - 1)); } } @@ -161,6 +161,6 @@ public class MTree<O, D extends Distance<D>> extends AbstractMTree<O, D, MTreeNo @Override protected Logging getLogger() { - return logger; + return LOG; } }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mtree/MTreeFactory.java b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mtree/MTreeFactory.java index 2567f81f..5c5aac04 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mtree/MTreeFactory.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mtree/MTreeFactory.java @@ -30,7 +30,6 @@ import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.AbstractMTreeFactor import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.MTreeEntry; import de.lmu.ifi.dbs.elki.persistent.PageFile; import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; /** * Factory for a M-Tree @@ -75,11 +74,6 @@ public class MTreeFactory<O, D extends Distance<D>> extends AbstractMTreeFactory */ public static class Parameterizer<O, D extends Distance<D>> extends AbstractMTreeFactory.Parameterizer<O, D> { @Override - protected void makeOptions(Parameterization config) { - super.makeOptions(config); - } - - @Override protected MTreeFactory<O, D> makeInstance() { return new MTreeFactory<O, D>(fileName, pageSize, cacheSize, distanceFunction); } diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mtree/MTreeIndex.java b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mtree/MTreeIndex.java index fe60c04d..8afe1f2a 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mtree/MTreeIndex.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/mtree/MTreeIndex.java @@ -28,6 +28,8 @@ import java.util.List; import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; @@ -41,8 +43,7 @@ import de.lmu.ifi.dbs.elki.index.RangeIndex; import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.AbstractMTree; import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.MTreeEntry; import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.MTreeLeafEntry; -import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.query.MetricalIndexKNNQuery; -import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.query.MetricalIndexRangeQuery; +import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.query.MTreeQueryUtil; import de.lmu.ifi.dbs.elki.persistent.PageFile; import de.lmu.ifi.dbs.elki.utilities.exceptions.ExceptionMessages; @@ -82,15 +83,15 @@ public class MTreeIndex<O, D extends Distance<D>> extends MTree<O, D> implements } @Override - public void insert(DBID id) { - insert(createNewLeafEntry(id, relation.get(id), getDistanceFactory().undefinedDistance()), false); + public void insert(DBIDRef id) { + insert(createNewLeafEntry(DBIDUtil.deref(id), relation.get(id), getDistanceFactory().undefinedDistance()), false); } @Override public void insertAll(DBIDs ids) { List<MTreeEntry<D>> objs = new ArrayList<MTreeEntry<D>>(ids.size()); for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { - DBID id = iter.getDBID(); + DBID id = DBIDUtil.deref(iter); final O object = relation.get(id); objs.add(createNewLeafEntry(id, object, getDistanceFactory().undefinedDistance())); } @@ -105,7 +106,7 @@ public class MTreeIndex<O, D extends Distance<D>> extends MTree<O, D> implements * implemented yet. */ @Override - public final boolean delete(DBID id) { + public final boolean delete(DBIDRef id) { throw new UnsupportedOperationException(ExceptionMessages.UNSUPPORTED_NOT_YET); } @@ -143,7 +144,7 @@ public class MTreeIndex<O, D extends Distance<D>> extends MTree<O, D> implements } AbstractMTree<O, S, ?, ?> idx = (AbstractMTree<O, S, ?, ?>) this; DistanceQuery<O, S> dq = distanceFunction.instantiate(relation); - return new MetricalIndexKNNQuery<O, S>(idx, dq); + return MTreeQueryUtil.getKNNQuery(idx, dq, hints); } @SuppressWarnings("unchecked") @@ -168,7 +169,7 @@ public class MTreeIndex<O, D extends Distance<D>> extends MTree<O, D> implements } AbstractMTree<O, S, ?, ?> idx = (AbstractMTree<O, S, ?, ?>) this; DistanceQuery<O, S> dq = distanceFunction.instantiate(relation); - return new MetricalIndexRangeQuery<O, S>(idx, dq); + return MTreeQueryUtil.getRangeQuery(idx, dq); } @Override diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/query/DoubleDistanceMetricalIndexKNNQuery.java b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/query/DoubleDistanceMetricalIndexKNNQuery.java new file mode 100644 index 00000000..52f777ba --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/query/DoubleDistanceMetricalIndexKNNQuery.java @@ -0,0 +1,155 @@ +package de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.query; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.List; + +import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs; +import de.lmu.ifi.dbs.elki.database.ids.DBID; +import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; +import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; +import de.lmu.ifi.dbs.elki.database.query.knn.AbstractDistanceKNNQuery; +import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceKNNHeap; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult; +import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; +import de.lmu.ifi.dbs.elki.index.tree.DirectoryEntry; +import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.AbstractMTree; +import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.AbstractMTreeNode; +import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.MTreeEntry; +import de.lmu.ifi.dbs.elki.index.tree.query.DoubleMTreeDistanceSearchCandidate; +import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap; +import de.lmu.ifi.dbs.elki.utilities.exceptions.ExceptionMessages; + +/** + * Instance of a KNN query for a particular spatial index. + * + * @author Erich Schubert + * + * @apiviz.uses AbstractMTree + * + * @param <O> Object type + */ +public class DoubleDistanceMetricalIndexKNNQuery<O> extends AbstractDistanceKNNQuery<O, DoubleDistance> { + /** + * The index to use + */ + protected final AbstractMTree<O, DoubleDistance, ?, ?> index; + + /** + * Distance function + */ + protected PrimitiveDoubleDistanceFunction<? super O> distf; + + /** + * Constructor. + * + * @param index Index to use + * @param distanceQuery Distance query used + * @param distf Distance function + */ + public DoubleDistanceMetricalIndexKNNQuery(AbstractMTree<O, DoubleDistance, ?, ?> index, DistanceQuery<O, DoubleDistance> distanceQuery, PrimitiveDoubleDistanceFunction<? super O> distf) { + super(distanceQuery); + this.index = index; + this.distf = distf; + } + + @Override + public KNNResult<DoubleDistance> getKNNForObject(O q, int k) { + if (k < 1) { + throw new IllegalArgumentException("At least one object has to be requested!"); + } + + DoubleDistanceKNNHeap knnList = new DoubleDistanceKNNHeap(k); + double d_k = Double.POSITIVE_INFINITY; + + final Heap<DoubleMTreeDistanceSearchCandidate> pq = new Heap<DoubleMTreeDistanceSearchCandidate>(); + + // Push the root node + pq.add(new DoubleMTreeDistanceSearchCandidate(0, index.getRootID(), null, 0)); + + // search in tree + while (!pq.isEmpty()) { + DoubleMTreeDistanceSearchCandidate pqNode = pq.poll(); + DBID id_p = pqNode.routingObjectID; + double d1 = pqNode.routingDistance; + + if (knnList.size() >= k && pqNode.mindist > d_k) { + break; + } + + AbstractMTreeNode<?, DoubleDistance, ?, ?> node = index.getNode(pqNode.nodeID); + + // directory node + if (!node.isLeaf()) { + for (int i = 0; i < node.getNumEntries(); i++) { + final MTreeEntry<DoubleDistance> entry = node.getEntry(i); + final DBID id_i = entry.getRoutingObjectID(); + double or_i = entry.getCoveringRadius().doubleValue(); + double d2 = id_p != null ? entry.getParentDistance().doubleValue() : 0; + double diff = Math.abs(d1 - d2); + + if (diff <= d_k + or_i) { + final O ob_i = relation.get(id_i); + double d3 = distf.doubleDistance(ob_i, q); + double d_min = Math.max(d3 - or_i, 0); + if (d_min <= d_k) { + pq.add(new DoubleMTreeDistanceSearchCandidate(d_min, ((DirectoryEntry) entry).getPageID(), id_i, d3)); + } + } + } + } + // data node + else { + for (int i = 0; i < node.getNumEntries(); i++) { + final MTreeEntry<DoubleDistance> entry = node.getEntry(i); + final DBID id_i = entry.getRoutingObjectID(); + double d2 = id_p != null ? entry.getParentDistance().doubleValue() : 0; + double diff = Math.abs(d1 - d2); + + if (diff <= d_k) { + final O o_i = relation.get(id_i); + double d3 = distf.doubleDistance(o_i, q); + if (d3 <= d_k) { + knnList.add(d3, id_i); + d_k = knnList.doubleKNNDistance(); + } + } + } + } + } + return knnList.toKNNList(); + } + + @Override + public KNNResult<DoubleDistance> getKNNForDBID(DBIDRef id, int k) { + return getKNNForObject(relation.get(id), k); + } + + @Override + public List<KNNResult<DoubleDistance>> getKNNForBulkDBIDs(ArrayDBIDs ids, int k) { + // TODO: implement + throw new UnsupportedOperationException(ExceptionMessages.UNSUPPORTED_NOT_YET); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/query/DoubleDistanceMetricalIndexRangeQuery.java b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/query/DoubleDistanceMetricalIndexRangeQuery.java new file mode 100644 index 00000000..c3680111 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/query/DoubleDistanceMetricalIndexRangeQuery.java @@ -0,0 +1,135 @@ +package de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.query; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.database.ids.DBID; +import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; +import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; +import de.lmu.ifi.dbs.elki.database.query.range.AbstractDistanceRangeQuery; +import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDList; +import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; +import de.lmu.ifi.dbs.elki.index.tree.DirectoryEntry; +import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.AbstractMTree; +import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.AbstractMTreeNode; +import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.MTreeEntry; + +/** + * Instance of a range query for a particular spatial index. + * + * @author Erich Schubert + * + * @apiviz.uses AbstractMTree + */ +public class DoubleDistanceMetricalIndexRangeQuery<O> extends AbstractDistanceRangeQuery<O, DoubleDistance> { + /** + * The index to use + */ + protected final AbstractMTree<O, DoubleDistance, ?, ?> index; + + /** + * Distance function + */ + protected PrimitiveDoubleDistanceFunction<? super O> distf; + + /** + * Constructor. + * + * @param index Index to use + * @param distanceQuery Distance query used + * @param distf Distance function + */ + public DoubleDistanceMetricalIndexRangeQuery(AbstractMTree<O, DoubleDistance, ?, ?> index, DistanceQuery<O, DoubleDistance> distanceQuery, PrimitiveDoubleDistanceFunction<? super O> distf) { + super(distanceQuery); + this.index = index; + this.distf = distf; + } + + /** + * Performs a range query on the specified subtree. It recursively traverses + * all paths from the specified node, which cannot be excluded from leading to + * qualifying objects. + * + * @param id_p the routing object of the specified node + * @param node the root of the subtree to be traversed + * @param q the query object + * @param r_q the query range + * @param result the list holding the query results + */ + private void doRangeQuery(DBID id_p, AbstractMTreeNode<O, DoubleDistance, ?, ?> node, O q, double r_q, DoubleDistanceDBIDList result) { + final O o_p = id_p != null ? relation.get(id_p) : null; + double d1 = id_p != null ? distf.doubleDistance(o_p, q) : 0; + if (!node.isLeaf()) { + for (int i = 0; i < node.getNumEntries(); i++) { + MTreeEntry<DoubleDistance> entry = node.getEntry(i); + + double r_or = entry.getCoveringRadius().doubleValue(); + double d2 = id_p != null ? entry.getParentDistance().doubleValue() : 0; + double diff = Math.abs(d1 - d2); + + double sum = r_q + r_or; + + if (diff <= sum) { + DBID id_r = entry.getRoutingObjectID(); + double d3 = distf.doubleDistance(relation.get(id_r), q); + if (d3 <= sum) { + AbstractMTreeNode<O, DoubleDistance, ?, ?> child = index.getNode(((DirectoryEntry) entry).getPageID()); + doRangeQuery(id_r, child, q, r_q, result); + } + } + } + } else { + for (int i = 0; i < node.getNumEntries(); i++) { + MTreeEntry<DoubleDistance> entry = node.getEntry(i); + + double d2 = id_p != null ? entry.getParentDistance().doubleValue() : 0; + double diff = Math.abs(d1 - d2); + + if (diff <= r_q) { + DBID id_j = entry.getRoutingObjectID(); + O o_j = relation.get(id_j); + double d3 = distf.doubleDistance(o_j, q); + if (d3 <= r_q) { + result.add(d3, id_j); + } + } + } + } + } + + @Override + public DistanceDBIDResult<DoubleDistance> getRangeForObject(O obj, DoubleDistance range) { + final DoubleDistanceDBIDList result = new DoubleDistanceDBIDList(); + + doRangeQuery(null, index.getRoot(), obj, range.doubleValue(), result); + result.sort(); + return result; + } + + @Override + public DistanceDBIDResult<DoubleDistance> getRangeForDBID(DBIDRef id, DoubleDistance range) { + return getRangeForObject(relation.get(id), range); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/query/MTreeQueryUtil.java b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/query/MTreeQueryUtil.java new file mode 100644 index 00000000..34980542 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/query/MTreeQueryUtil.java @@ -0,0 +1,90 @@ +package de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.query; + +import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; +import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; +import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery; +import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; +import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; +import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.AbstractMTree; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** + * Query utility classes for MTrees. + * + * @author Erich Schubert + */ +public final class MTreeQueryUtil { + /** + * Get an RTree knn query, using an optimized double implementation when + * possible. + * + * @param <O> Object type + * @param <D> Distance type + * @param tree Tree to query + * @param distanceQuery distance query + * @param hints Optimizer hints + * @return Query object + */ + @SuppressWarnings({ "cast", "unchecked" }) + public static <O, D extends Distance<D>> KNNQuery<O, D> getKNNQuery(AbstractMTree<O, D, ?, ?> tree, DistanceQuery<O, D> distanceQuery, Object... hints) { + DistanceFunction<? super O, D> df = distanceQuery.getDistanceFunction(); + // Can we use an optimized query? + if(df instanceof PrimitiveDoubleDistanceFunction) { + PrimitiveDoubleDistanceFunction<? super O> dfc = (PrimitiveDoubleDistanceFunction<? super O>) df; + AbstractMTree<O, DoubleDistance, ?, ?> treec = (AbstractMTree<O, DoubleDistance, ?, ?>) tree; + DistanceQuery<O, DoubleDistance> dqc = (DistanceQuery<O, DoubleDistance>) distanceQuery; + KNNQuery<O, ?> q = new DoubleDistanceMetricalIndexKNNQuery<O>(treec, dqc, dfc); + return (KNNQuery<O, D>) q; + } + return new MetricalIndexKNNQuery<O, D>(tree, distanceQuery); + } + + /** + * Get an RTree knn query, using an optimized double implementation when + * possible. + * + * @param <O> Object type + * @param <D> Distance type + * @param tree Tree to query + * @param distanceQuery distance query + * @param hints Optimizer hints + * @return Query object + */ + @SuppressWarnings({ "cast", "unchecked" }) + public static <O, D extends Distance<D>> RangeQuery<O, D> getRangeQuery(AbstractMTree<O, D, ?, ?> tree, DistanceQuery<O, D> distanceQuery, Object... hints) { + DistanceFunction<? super O, D> df = distanceQuery.getDistanceFunction(); + // Can we use an optimized query? + if(df instanceof PrimitiveDoubleDistanceFunction) { + PrimitiveDoubleDistanceFunction<? super O> dfc = (PrimitiveDoubleDistanceFunction<? super O>) df; + AbstractMTree<O, DoubleDistance, ?, ?> treec = (AbstractMTree<O, DoubleDistance, ?, ?>) tree; + DistanceQuery<O, DoubleDistance> dqc = (DistanceQuery<O, DoubleDistance>) distanceQuery; + RangeQuery<O, ?> q = new DoubleDistanceMetricalIndexRangeQuery<O>(treec, dqc, dfc); + return (RangeQuery<O, D>) q; + } + return new MetricalIndexRangeQuery<O, D>(tree, distanceQuery); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/query/MetricalIndexKNNQuery.java b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/query/MetricalIndexKNNQuery.java index c2450988..c2fafdae 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/query/MetricalIndexKNNQuery.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/query/MetricalIndexKNNQuery.java @@ -24,15 +24,16 @@ package de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.query; */ import java.util.List; -import java.util.Map; import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs; import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.query.knn.AbstractDistanceKNNQuery; -import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult; import de.lmu.ifi.dbs.elki.distance.DistanceUtil; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNHeap; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNUtil; import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; import de.lmu.ifi.dbs.elki.index.tree.DirectoryEntry; import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.AbstractMTree; @@ -40,8 +41,6 @@ import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.AbstractMTreeNode; import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.MTreeEntry; import de.lmu.ifi.dbs.elki.index.tree.query.GenericMTreeDistanceSearchCandidate; import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap; -import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.KNNHeap; -import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.UpdatableHeap; import de.lmu.ifi.dbs.elki.utilities.exceptions.ExceptionMessages; /** @@ -62,7 +61,7 @@ public class MetricalIndexKNNQuery<O, D extends Distance<D>> extends AbstractDis /** * Constructor. - * + * * @param index Index to use * @param distanceQuery Distance query used */ @@ -71,70 +70,67 @@ public class MetricalIndexKNNQuery<O, D extends Distance<D>> extends AbstractDis this.index = index; } - /** - * Performs a k-nearest neighbor query for the given FeatureVector with the - * given parameter k and the according distance function. The query result is - * in ascending order to the distance to the query object. - * - * @param q the id of the query object - * @param knnList the query result list - */ - protected final void doKNNQuery(O q, KNNHeap<D> knnList) { - final Heap<GenericMTreeDistanceSearchCandidate<D>> pq = new UpdatableHeap<GenericMTreeDistanceSearchCandidate<D>>(); + @Override + public KNNResult<D> getKNNForObject(O q, int k) { + if (k < 1) { + throw new IllegalArgumentException("At least one object has to be requested!"); + } - // push root - pq.add(new GenericMTreeDistanceSearchCandidate<D>(distanceQuery.nullDistance(), index.getRootID(), null)); + final D nullDistance = index.getDistanceFactory().nullDistance(); + KNNHeap<D> knnList = KNNUtil.newHeap(distanceQuery.getDistanceFactory(), k); D d_k = knnList.getKNNDistance(); + final Heap<GenericMTreeDistanceSearchCandidate<D>> pq = new Heap<GenericMTreeDistanceSearchCandidate<D>>(); + + // push root + pq.add(new GenericMTreeDistanceSearchCandidate<D>(nullDistance, index.getRootID(), null, nullDistance)); + // search in tree - while(!pq.isEmpty()) { + while (!pq.isEmpty()) { GenericMTreeDistanceSearchCandidate<D> pqNode = pq.poll(); - if(pqNode.mindist.compareTo(d_k) > 0) { - return; + if (knnList.size() >= k && pqNode.mindist.compareTo(d_k) > 0) { + break; } - AbstractMTreeNode<O, D, ?, ?> node = index.getNode(pqNode.nodeID); - DBID o_p = pqNode.routingObjectID; + AbstractMTreeNode<?, D, ?, ?> node = index.getNode(pqNode.nodeID); + DBID id_p = pqNode.routingObjectID; + D d1 = pqNode.routingDistance; // directory node - if(!node.isLeaf()) { - for(int i = 0; i < node.getNumEntries(); i++) { + if (!node.isLeaf()) { + for (int i = 0; i < node.getNumEntries(); i++) { MTreeEntry<D> entry = node.getEntry(i); DBID o_r = entry.getRoutingObjectID(); D r_or = entry.getCoveringRadius(); - D d1 = o_p != null ? distanceQuery.distance(o_p, q) : distanceQuery.nullDistance(); - D d2 = o_p != null ? distanceQuery.distance(o_r, o_p) : distanceQuery.nullDistance(); + D d2 = id_p != null ? entry.getParentDistance() : nullDistance; D diff = d1.compareTo(d2) > 0 ? d1.minus(d2) : d2.minus(d1); D sum = d_k.plus(r_or); - if(diff.compareTo(sum) <= 0) { + if (diff.compareTo(sum) <= 0) { D d3 = distanceQuery.distance(o_r, q); - D d_min = DistanceUtil.max(d3.minus(r_or), distanceQuery.nullDistance()); - if(d_min.compareTo(d_k) <= 0) { - pq.add(new GenericMTreeDistanceSearchCandidate<D>(d_min, ((DirectoryEntry)entry).getPageID(), o_r)); + D d_min = DistanceUtil.max(d3.minus(r_or), index.getDistanceFactory().nullDistance()); + if (d_min.compareTo(d_k) <= 0) { + pq.add(new GenericMTreeDistanceSearchCandidate<D>(d_min, ((DirectoryEntry) entry).getPageID(), o_r, d3)); } } } - } - // data node else { - for(int i = 0; i < node.getNumEntries(); i++) { + for (int i = 0; i < node.getNumEntries(); i++) { MTreeEntry<D> entry = node.getEntry(i); DBID o_j = entry.getRoutingObjectID(); - D d1 = o_p != null ? distanceQuery.distance(o_p, q) : distanceQuery.nullDistance(); - D d2 = o_p != null ? distanceQuery.distance(o_j, o_p) : distanceQuery.nullDistance(); + D d2 = id_p != null ? entry.getParentDistance() : nullDistance; - D diff = d1.compareTo(d2) > 0 ? d1.minus(d2) : d2.minus(d1); + D diff = (d1.compareTo(d2) > 0) ? d1.minus(d2) : d2.minus(d1); - if(diff.compareTo(d_k) <= 0) { + if (diff.compareTo(d_k) <= 0) { D d3 = distanceQuery.distance(o_j, q); - if(d3.compareTo(d_k) <= 0) { + if (d3.compareTo(d_k) <= 0) { knnList.add(d3, o_j); d_k = knnList.getKNNDistance(); } @@ -142,16 +138,6 @@ public class MetricalIndexKNNQuery<O, D extends Distance<D>> extends AbstractDis } } } - } - - @Override - public KNNResult<D> getKNNForObject(O obj, int k) { - if(k < 1) { - throw new IllegalArgumentException("At least one object has to be requested!"); - } - - final KNNHeap<D> knnList = new KNNHeap<D>(k, distanceQuery.getDistanceFactory().infiniteDistance()); - doKNNQuery(obj, knnList); return knnList.toKNNList(); } @@ -165,10 +151,4 @@ public class MetricalIndexKNNQuery<O, D extends Distance<D>> extends AbstractDis // TODO: implement throw new UnsupportedOperationException(ExceptionMessages.UNSUPPORTED_NOT_YET); } - - @Override - public void getKNNForBulkHeaps(Map<DBID, KNNHeap<D>> heaps) { - // TODO: implement - throw new UnsupportedOperationException(ExceptionMessages.UNSUPPORTED_NOT_YET); - } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/query/MetricalIndexRangeQuery.java b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/query/MetricalIndexRangeQuery.java index e2df2dc7..2ca19877 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/query/MetricalIndexRangeQuery.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/query/MetricalIndexRangeQuery.java @@ -23,17 +23,12 @@ package de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.query; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import java.util.Collections; -import java.util.List; - import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; -import de.lmu.ifi.dbs.elki.database.query.DistanceDBIDResult; -import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair; -import de.lmu.ifi.dbs.elki.database.query.GenericDistanceDBIDList; -import de.lmu.ifi.dbs.elki.database.query.GenericDistanceResultPair; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.query.range.AbstractDistanceRangeQuery; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.GenericDistanceDBIDList; import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; import de.lmu.ifi.dbs.elki.index.tree.DirectoryEntry; import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.AbstractMTree; @@ -75,109 +70,47 @@ public class MetricalIndexRangeQuery<O, D extends Distance<D>> extends AbstractD * @param r_q the query range * @param result the list holding the query results */ - private void doRangeQuery(DBID o_p, AbstractMTreeNode<O, D, ?, ?> node, DBID q, D r_q, List<DistanceResultPair<D>> result) { - if(!node.isLeaf()) { - for(int i = 0; i < node.getNumEntries(); i++) { + private void doRangeQuery(DBID o_p, AbstractMTreeNode<O, D, ?, ?> node, O q, D r_q, GenericDistanceDBIDList<D> result) { + final D nullDistance = distanceQuery.nullDistance(); + D d1 = o_p != null ? distanceQuery.distance(o_p, q) : nullDistance; + if (!node.isLeaf()) { + for (int i = 0; i < node.getNumEntries(); i++) { MTreeEntry<D> entry = node.getEntry(i); DBID o_r = entry.getRoutingObjectID(); D r_or = entry.getCoveringRadius(); - D d1 = o_p != null ? distanceQuery.distance(o_p, q) : distanceQuery.nullDistance(); - D d2 = o_p != null ? entry.getParentDistance() : distanceQuery.nullDistance(); - // o_p != null ? distanceFunction.distance(o_r, o_p) :/ distanceFunction.nullDistance(); - + D d2 = o_p != null ? entry.getParentDistance() : nullDistance; D diff = d1.compareTo(d2) > 0 ? d1.minus(d2) : d2.minus(d1); D sum = r_q.plus(r_or); - if(diff.compareTo(sum) <= 0) { + if (diff.compareTo(sum) <= 0) { D d3 = distanceQuery.distance(o_r, q); - if(d3.compareTo(sum) <= 0) { - AbstractMTreeNode<O, D, ?, ?> child = index.getNode(((DirectoryEntry)entry).getPageID()); + if (d3.compareTo(sum) <= 0) { + AbstractMTreeNode<O, D, ?, ?> child = index.getNode(((DirectoryEntry) entry).getPageID()); doRangeQuery(o_r, child, q, r_q, result); } } - } - } - - else { - for(int i = 0; i < node.getNumEntries(); i++) { + } else { + for (int i = 0; i < node.getNumEntries(); i++) { MTreeEntry<D> entry = node.getEntry(i); DBID o_j = entry.getRoutingObjectID(); - D d1 = o_p != null ? distanceQuery.distance(o_p, q) : distanceQuery.nullDistance(); - D d2 = o_p != null ? distanceQuery.distance(o_j, o_p) : distanceQuery.nullDistance(); + D d2 = o_p != null ? entry.getParentDistance() : nullDistance; D diff = d1.compareTo(d2) > 0 ? d1.minus(d2) : d2.minus(d1); - if(diff.compareTo(r_q) <= 0) { + if (diff.compareTo(r_q) <= 0) { D d3 = distanceQuery.distance(o_j, q); - if(d3.compareTo(r_q) <= 0) { - DistanceResultPair<D> queryResult = new GenericDistanceResultPair<D>(d3, o_j); - result.add(queryResult); + if (d3.compareTo(r_q) <= 0) { + result.add(d3, o_j); } } } } } - /** - * Performs a range query on the specified subtree. It recursively traverses - * all paths from the specified node, which cannot be excluded from leading to - * qualifying objects. - * - * @param o_p the routing object of the specified node - * @param node the root of the subtree to be traversed - * @param q the id of the query object - * @param r_q the query range - * @param result the list holding the query results - */ - private void doRangeQuery(DBID o_p, AbstractMTreeNode<O, D, ?, ?> node, O q, D r_q, List<DistanceResultPair<D>> result) { - if(!node.isLeaf()) { - for(int i = 0; i < node.getNumEntries(); i++) { - MTreeEntry<D> entry = node.getEntry(i); - DBID o_r = entry.getRoutingObjectID(); - - D r_or = entry.getCoveringRadius(); - D d1 = o_p != null ? distanceQuery.distance(o_p, q) : distanceQuery.nullDistance(); - D d2 = o_p != null ? entry.getParentDistance() : distanceQuery.nullDistance(); - // o_p != null ? distanceFunction.distance(o_r, o_p) : distanceFunction.nullDistance(); - - D diff = d1.compareTo(d2) > 0 ? d1.minus(d2) : d2.minus(d1); - - D sum = r_q.plus(r_or); - - if(diff.compareTo(sum) <= 0) { - D d3 = distanceQuery.distance(o_r, q); - if(d3.compareTo(sum) <= 0) { - AbstractMTreeNode<O, D, ?, ?> child = index.getNode(((DirectoryEntry)entry).getPageID()); - doRangeQuery(o_r, child, q, r_q, result); - } - } - } - } - else { - for(int i = 0; i < node.getNumEntries(); i++) { - MTreeEntry<D> entry = node.getEntry(i); - DBID o_j = entry.getRoutingObjectID(); - - D d1 = o_p != null ? distanceQuery.distance(o_p, q) : distanceQuery.nullDistance(); - D d2 = o_p != null ? distanceQuery.distance(o_j, o_p) : distanceQuery.nullDistance(); - - D diff = d1.compareTo(d2) > 0 ? d1.minus(d2) : d2.minus(d1); - - if(diff.compareTo(r_q) <= 0) { - D d3 = distanceQuery.distance(o_j, q); - if(d3.compareTo(r_q) <= 0) { - DistanceResultPair<D> queryResult = new GenericDistanceResultPair<D>(d3, o_j); - result.add(queryResult); - } - } - } - } - } - @Override public DistanceDBIDResult<D> getRangeForObject(O obj, D range) { final GenericDistanceDBIDList<D> result = new GenericDistanceDBIDList<D>(); @@ -185,7 +118,7 @@ public class MetricalIndexRangeQuery<O, D extends Distance<D>> extends AbstractD doRangeQuery(null, index.getRoot(), obj, range, result); // sort the result according to the distances - Collections.sort(result); + result.sort(); return result; } @@ -193,4 +126,4 @@ public class MetricalIndexRangeQuery<O, D extends Distance<D>> extends AbstractD public DistanceDBIDResult<D> getRangeForDBID(DBIDRef id, D range) { return getRangeForObject(relation.get(id), range); } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/query/MkTreeRKNNQuery.java b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/query/MkTreeRKNNQuery.java index 067c3abe..e3f35c50 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/query/MkTreeRKNNQuery.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/metrical/mtreevariants/query/MkTreeRKNNQuery.java @@ -27,9 +27,9 @@ import java.util.List; import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs; import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; -import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.query.rknn.AbstractRKNNQuery; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult; import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; import de.lmu.ifi.dbs.elki.index.tree.metrical.mtreevariants.mktrees.AbstractMkTree; import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; @@ -60,17 +60,17 @@ public class MkTreeRKNNQuery<O, D extends Distance<D>> extends AbstractRKNNQuery } @Override - public List<DistanceResultPair<D>> getRKNNForObject(O obj, int k) { + public DistanceDBIDResult<D> getRKNNForObject(O obj, int k) { throw new AbortException("Preprocessor KNN query only supports ID queries."); } @Override - public List<DistanceResultPair<D>> getRKNNForDBID(DBIDRef id, int k) { + public DistanceDBIDResult<D> getRKNNForDBID(DBIDRef id, int k) { return index.reverseKNNQuery(id, k); } @Override - public List<List<DistanceResultPair<D>>> getRKNNForBulkDBIDs(ArrayDBIDs ids, int k) { + public List<? extends DistanceDBIDResult<D>> getRKNNForBulkDBIDs(ArrayDBIDs ids, int k) { // TODO: implement throw new UnsupportedOperationException(ExceptionMessages.UNSUPPORTED_NOT_YET); } diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/query/DoubleDistanceSearchCandidate.java b/src/de/lmu/ifi/dbs/elki/index/tree/query/DoubleDistanceSearchCandidate.java index 699c8290..1a3fc948 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/query/DoubleDistanceSearchCandidate.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/query/DoubleDistanceSearchCandidate.java @@ -23,10 +23,9 @@ package de.lmu.ifi.dbs.elki.index.tree.query; along with this program. If not, see <http://www.gnu.org/licenses/>. */ - - /** - * Candidate for expansion in a distance search (generic implementation). + * Candidate for expansion in a distance search (double optimized + * implementation). * * @author Erich Schubert */ diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/query/DoubleMTreeDistanceSearchCandidate.java b/src/de/lmu/ifi/dbs/elki/index/tree/query/DoubleMTreeDistanceSearchCandidate.java new file mode 100644 index 00000000..c26ac801 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/index/tree/query/DoubleMTreeDistanceSearchCandidate.java @@ -0,0 +1,62 @@ +package de.lmu.ifi.dbs.elki.index.tree.query; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.database.ids.DBID; + +/** + * Encapsulates the attributes for a object that can be stored in a heap. The + * object to be stored represents a node in a M-Tree and some additional + * information. Additionally to the regular expansion candidate, this object + * holds the id of the routing object of the underlying M-Tree node and its + * covering radius. + * + * @author Elke Achtert + */ +public class DoubleMTreeDistanceSearchCandidate extends DoubleDistanceSearchCandidate { + /** + * The id of the routing object. + */ + public DBID routingObjectID; + + /** + * The distance from the query object to the routing object + */ + public double routingDistance; + + /** + * Creates a new heap node with the specified parameters. + * + * @param mindist the minimum distance of the node + * @param nodeID the id of the node + * @param routingObjectID the id of the routing object of the node + * @param routingDistance the distance from the query object to the query + * object + */ + public DoubleMTreeDistanceSearchCandidate(final double mindist, final Integer nodeID, final DBID routingObjectID, double routingDistance) { + super(mindist, nodeID); + this.routingObjectID = routingObjectID; + this.routingDistance = routingDistance; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/query/GenericMTreeDistanceSearchCandidate.java b/src/de/lmu/ifi/dbs/elki/index/tree/query/GenericMTreeDistanceSearchCandidate.java index e7fc0b19..94335860 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/query/GenericMTreeDistanceSearchCandidate.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/query/GenericMTreeDistanceSearchCandidate.java @@ -42,6 +42,11 @@ public class GenericMTreeDistanceSearchCandidate<D extends Distance<D>> extends * The id of the routing object. */ public DBID routingObjectID; + + /** + * The distance from the query to the routing object. + */ + public D routingDistance; /** * Creates a new heap node with the specified parameters. @@ -49,9 +54,11 @@ public class GenericMTreeDistanceSearchCandidate<D extends Distance<D>> extends * @param mindist the minimum distance of the node * @param nodeID the id of the node * @param routingObjectID the id of the routing object of the node + * @param routingDistance the distance from query to routing object */ - public GenericMTreeDistanceSearchCandidate(final D mindist, final Integer nodeID, final DBID routingObjectID) { + public GenericMTreeDistanceSearchCandidate(final D mindist, final Integer nodeID, final DBID routingObjectID, final D routingDistance) { super(mindist, nodeID); this.routingObjectID = routingObjectID; + this.routingDistance = routingDistance; } }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/SpatialPointLeafEntry.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/SpatialPointLeafEntry.java index 0911a9c0..60981677 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/SpatialPointLeafEntry.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/SpatialPointLeafEntry.java @@ -39,6 +39,9 @@ import de.lmu.ifi.dbs.elki.index.tree.AbstractLeafEntry; * @author Elke Achtert */ public class SpatialPointLeafEntry extends AbstractLeafEntry implements SpatialEntry { + /** + * Serial version. + */ private static final long serialVersionUID = 1; /** @@ -65,17 +68,17 @@ public class SpatialPointLeafEntry extends AbstractLeafEntry implements SpatialE } /** - * Constructor from number vector + * Constructor from number vector. * * @param id Object id * @param vector Number vector */ - public SpatialPointLeafEntry(DBID id, NumberVector<?, ?> vector) { + public SpatialPointLeafEntry(DBID id, NumberVector<?> vector) { super(id); int dim = vector.getDimensionality(); this.values = new double[dim]; for(int i = 0; i < dim; i++) { - values[i] = vector.doubleValue(i + 1); + values[i] = vector.doubleValue(i); } } @@ -84,20 +87,14 @@ public class SpatialPointLeafEntry extends AbstractLeafEntry implements SpatialE return values.length; } - /** - * @return the value at the specified dimension - */ @Override public double getMin(int dimension) { - return values[dimension - 1]; + return values[dimension]; } - /** - * @return the value at the specified dimension - */ @Override public double getMax(int dimension) { - return values[dimension - 1]; + return values[dimension]; } /** diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/AbstractRStarTree.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/AbstractRStarTree.java index 3bac751c..8ede52d7 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/AbstractRStarTree.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/AbstractRStarTree.java @@ -80,7 +80,7 @@ public abstract class AbstractRStarTree<N extends AbstractRStarTreeNode<N, E>, E /** * Development flag: This will enable some extra integrity checks on the tree. */ - protected final static boolean extraIntegrityChecks = false; + protected static final boolean EXTRA_INTEGRITY_CHECKS = false; /** * The height of this R*-Tree. @@ -93,7 +93,7 @@ public abstract class AbstractRStarTree<N extends AbstractRStarTreeNode<N, E>, E public int distanceCalcs = 0; /** - * The last inserted entry + * The last inserted entry. */ E lastInsertedEntry = null; @@ -103,27 +103,27 @@ public abstract class AbstractRStarTree<N extends AbstractRStarTreeNode<N, E>, E protected BulkSplit bulkSplitter; /** - * The split strategy + * The split strategy. */ protected SplitStrategy nodeSplitter = TopologicalSplitter.STATIC; /** - * The insertion strategy to use + * The insertion strategy to use. */ protected InsertionStrategy insertionStrategy = LeastOverlapInsertionStrategy.STATIC; /** - * Overflow treatment + * Overflow treatment. */ protected OverflowTreatment overflowTreatment = LimitedReinsertOverflowTreatment.RSTAR_OVERFLOW; /** - * Relative minimum fill + * Relative minimum fill. */ protected double relativeMinFill = 0.4; /** - * Constructor + * Constructor. * * @param pagefile Page file */ @@ -132,7 +132,7 @@ public abstract class AbstractRStarTree<N extends AbstractRStarTreeNode<N, E>, E } /** - * Set the bulk loading strategy + * Set the bulk loading strategy. * * @param bulkSplitter Bulk loading strategy */ @@ -155,7 +155,7 @@ public abstract class AbstractRStarTree<N extends AbstractRStarTreeNode<N, E>, E } /** - * Set insertion strategy + * Set insertion strategy. * * @param insertionStrategy the insertion strategy to set */ @@ -200,7 +200,7 @@ public abstract class AbstractRStarTree<N extends AbstractRStarTreeNode<N, E>, E N node = getNode(subtree.getLastPathComponent().getEntry()); if(node.isLeaf()) { for(int i = 0; i < node.getNumEntries(); i++) { - if(((LeafEntry) node.getEntry(i)).getDBID().sameDBID(id)) { + if(DBIDUtil.equal(((LeafEntry) node.getEntry(i)).getDBID(), id)) { return subtree.pathByAddingChild(new TreeIndexPathComponent<E>(node.getEntry(i), i)); } } @@ -319,6 +319,8 @@ public abstract class AbstractRStarTree<N extends AbstractRStarTreeNode<N, E>, E /** * Initializes this R*-Tree from an existing persistent file. + * + * {@inheritDoc} */ @Override public void initializeFromFile(TreeIndexHeader header, PageFile<N> file) { @@ -327,7 +329,7 @@ public abstract class AbstractRStarTree<N extends AbstractRStarTreeNode<N, E>, E this.height = computeHeight(); if(getLogger().isDebugging()) { - StringBuffer msg = new StringBuffer(); + StringBuilder msg = new StringBuilder(); msg.append(getClass()); msg.append("\n height = ").append(height); getLogger().debugFine(msg.toString()); @@ -454,8 +456,10 @@ public abstract class AbstractRStarTree<N extends AbstractRStarTreeNode<N, E>, E /** * Performs a bulk load on this RTree with the specified data. Is called by * the constructor. + * + * @param entries Entries to bulk load */ - abstract protected void bulkLoad(List<E> entrys); + protected abstract void bulkLoad(List<E> entries); /** * Returns the height of this R*-Tree. @@ -480,7 +484,7 @@ public abstract class AbstractRStarTree<N extends AbstractRStarTreeNode<N, E>, E * * @return the height of this RTree */ - abstract protected int computeHeight(); + protected abstract int computeHeight(); /** * Returns true if in the specified node an overflow occurred, false @@ -489,7 +493,7 @@ public abstract class AbstractRStarTree<N extends AbstractRStarTreeNode<N, E>, E * @param node the node to be tested for overflow * @return true if in the specified node an overflow occurred, false otherwise */ - abstract protected boolean hasOverflow(N node); + protected abstract boolean hasOverflow(N node); /** * Returns true if in the specified node an underflow occurred, false @@ -499,7 +503,7 @@ public abstract class AbstractRStarTree<N extends AbstractRStarTreeNode<N, E>, E * @return true if in the specified node an underflow occurred, false * otherwise */ - abstract protected boolean hasUnderflow(N node); + protected abstract boolean hasUnderflow(N node); /** * Creates a new directory entry representing the specified node. @@ -507,7 +511,7 @@ public abstract class AbstractRStarTree<N extends AbstractRStarTreeNode<N, E>, E * @param node the node to be represented by the new entry * @return the newly created directory entry */ - abstract protected E createNewDirectoryEntry(N node); + protected abstract E createNewDirectoryEntry(N node); /** * Creates a new root node that points to the two specified child nodes and @@ -852,7 +856,7 @@ public abstract class AbstractRStarTree<N extends AbstractRStarTreeNode<N, E>, E // node is root else { - if(hasUnderflow(node) & node.getNumEntries() == 1 && !node.isLeaf()) { + if(hasUnderflow(node) && node.getNumEntries() == 1 && !node.isLeaf()) { N child = getNode(node.getEntry(0)); N newRoot; if(child.isLeaf()) { @@ -889,7 +893,7 @@ public abstract class AbstractRStarTree<N extends AbstractRStarTreeNode<N, E>, E } /** - * Determines the entries pointing to the leaf nodes of the specified subtree + * Determines the entries pointing to the leaf nodes of the specified subtree. * * @param node the subtree * @param result the result to store the ids in @@ -914,7 +918,7 @@ public abstract class AbstractRStarTree<N extends AbstractRStarTreeNode<N, E>, E * Perform additional integrity checks. */ public void doExtraIntegrityChecks() { - if(extraIntegrityChecks) { + if(EXTRA_INTEGRITY_CHECKS) { getRoot().integrityCheck(this); } } @@ -926,7 +930,7 @@ public abstract class AbstractRStarTree<N extends AbstractRStarTreeNode<N, E>, E */ @Override public String toString() { - StringBuffer result = new StringBuffer(); + StringBuilder result = new StringBuilder(); int dirNodes = 0; int leafNodes = 0; int objects = 0; @@ -964,7 +968,7 @@ public abstract class AbstractRStarTree<N extends AbstractRStarTreeNode<N, E>, E result.append(getClass().getName()).append(" has ").append((levels + 1)).append(" levels.\n"); result.append(dirNodes).append(" Directory Knoten (max = ").append(dirCapacity - 1).append(", min = ").append(dirMinimum).append(")\n"); result.append(leafNodes).append(" Daten Knoten (max = ").append(leafCapacity - 1).append(", min = ").append(leafMinimum).append(")\n"); - result.append(objects).append(" ").append(dim).append("-dim. Punkte im Baum \n"); + result.append(objects).append(' ').append(dim).append("-dim. Punkte im Baum \n"); PageFileUtil.appendPageFileStatistics(result, getPageFileStatistics()); } else { diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/AbstractRStarTreeFactory.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/AbstractRStarTreeFactory.java index e2bb3abb..ace5ad41 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/AbstractRStarTreeFactory.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/AbstractRStarTreeFactory.java @@ -37,8 +37,8 @@ import de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.strategies.overflow. import de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.strategies.split.SplitStrategy; import de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.strategies.split.TopologicalSplitter; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.IntervalConstraint; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.IntervalConstraint.IntervalBoundary; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessConstraint; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; @@ -56,31 +56,31 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * @param <E> Entry type * @param <I> Index type */ -public abstract class AbstractRStarTreeFactory<O extends NumberVector<O, ?>, N extends AbstractRStarTreeNode<N, E>, E extends SpatialEntry, I extends AbstractRStarTree<N, E> & Index> extends TreeIndexFactory<O, I> { +public abstract class AbstractRStarTreeFactory<O extends NumberVector<?>, N extends AbstractRStarTreeNode<N, E>, E extends SpatialEntry, I extends AbstractRStarTree<N, E> & Index> extends TreeIndexFactory<O, I> { /** * Fast-insertion parameter. Optional. */ - public static OptionID INSERTION_STRATEGY_ID = OptionID.getOrCreateOptionID("rtree.insertionstrategy", "The strategy to use for object insertion."); + public static OptionID INSERTION_STRATEGY_ID = new OptionID("rtree.insertionstrategy", "The strategy to use for object insertion."); /** * Split strategy parameter. Optional. */ - public static OptionID SPLIT_STRATEGY_ID = OptionID.getOrCreateOptionID("rtree.splitstrategy", "The strategy to use for node splitting."); + public static OptionID SPLIT_STRATEGY_ID = new OptionID("rtree.splitstrategy", "The strategy to use for node splitting."); /** * Parameter for bulk strategy */ - public static final OptionID BULK_SPLIT_ID = OptionID.getOrCreateOptionID("spatial.bulkstrategy", "The class to perform the bulk split with."); + public static final OptionID BULK_SPLIT_ID = new OptionID("spatial.bulkstrategy", "The class to perform the bulk split with."); /** * Parameter for the relative minimum fill. */ - public static final OptionID MINIMUM_FILL_ID = OptionID.getOrCreateOptionID("rtree.minimum-fill", "Minimum relative fill required for data pages."); + public static final OptionID MINIMUM_FILL_ID = new OptionID("rtree.minimum-fill", "Minimum relative fill required for data pages."); /** * Overflow treatment. */ - public static OptionID OVERFLOW_STRATEGY_ID = OptionID.getOrCreateOptionID("rtree.overflowtreatment", "The strategy to use for handling overflows."); + public static OptionID OVERFLOW_STRATEGY_ID = new OptionID("rtree.overflowtreatment", "The strategy to use for handling overflows."); /** * Strategy to find the insertion node with. @@ -140,7 +140,7 @@ public abstract class AbstractRStarTreeFactory<O extends NumberVector<O, ?>, N e * * @apiviz.exclude */ - public static abstract class Parameterizer<O extends NumberVector<O, ?>> extends TreeIndexFactory.Parameterizer<O> { + public abstract static class Parameterizer<O extends NumberVector<?>> extends TreeIndexFactory.Parameterizer<O> { /** * Insertion strategy */ @@ -170,19 +170,21 @@ public abstract class AbstractRStarTreeFactory<O extends NumberVector<O, ?>, N e protected void makeOptions(Parameterization config) { super.makeOptions(config); ObjectParameter<InsertionStrategy> insertionStrategyP = new ObjectParameter<InsertionStrategy>(INSERTION_STRATEGY_ID, InsertionStrategy.class, CombinedInsertionStrategy.class); - if(config.grab(insertionStrategyP)) { + if (config.grab(insertionStrategyP)) { insertionStrategy = insertionStrategyP.instantiateClass(config); } ObjectParameter<SplitStrategy> splitStrategyP = new ObjectParameter<SplitStrategy>(SPLIT_STRATEGY_ID, SplitStrategy.class, TopologicalSplitter.class); - if(config.grab(splitStrategyP)) { + if (config.grab(splitStrategyP)) { nodeSplitter = splitStrategyP.instantiateClass(config); } - DoubleParameter minimumFillP = new DoubleParameter(MINIMUM_FILL_ID, new IntervalConstraint(0.0, IntervalBoundary.OPEN, 0.5, IntervalBoundary.OPEN), 0.4); + DoubleParameter minimumFillP = new DoubleParameter(MINIMUM_FILL_ID, 0.4); + minimumFillP.addConstraint(new GreaterConstraint(0.0)); + minimumFillP.addConstraint(new LessConstraint(0.5)); if (config.grab(minimumFillP)) { minimumFill = minimumFillP.getValue(); } ObjectParameter<OverflowTreatment> overflowP = new ObjectParameter<OverflowTreatment>(OVERFLOW_STRATEGY_ID, OverflowTreatment.class, LimitedReinsertOverflowTreatment.class); - if(config.grab(overflowP)) { + if (config.grab(overflowP)) { overflowTreatment = overflowP.instantiateClass(config); } configBulkLoad(config); @@ -195,7 +197,7 @@ public abstract class AbstractRStarTreeFactory<O extends NumberVector<O, ?>, N e */ protected void configBulkLoad(Parameterization config) { ObjectParameter<BulkSplit> bulkSplitP = new ObjectParameter<BulkSplit>(BULK_SPLIT_ID, BulkSplit.class, true); - if(config.grab(bulkSplitP)) { + if (config.grab(bulkSplitP)) { bulkSplitter = bulkSplitP.instantiateClass(config); } } @@ -203,4 +205,4 @@ public abstract class AbstractRStarTreeFactory<O extends NumberVector<O, ?>, N e @Override protected abstract AbstractRStarTreeFactory<O, ?, ?, ?> makeInstance(); } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/AbstractRStarTreeNode.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/AbstractRStarTreeNode.java index 46e3003e..80666ebb 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/AbstractRStarTreeNode.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/AbstractRStarTreeNode.java @@ -96,7 +96,7 @@ public abstract class AbstractRStarTreeNode<N extends AbstractRStarTreeNode<N, E if(se.hasMBR()) { final int dim = se.getDimensionality(); // Test for changes - for(int i = 1; i <= dim; i++) { + for(int i = 0; i < dim; i++) { if(Math.abs(se.getMin(i) - mbr.getMin(i)) > Float.MIN_NORMAL) { changed = true; break; diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/NonFlatRStarTree.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/NonFlatRStarTree.java index 41882ce2..fd7d3d8b 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/NonFlatRStarTree.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/NonFlatRStarTree.java @@ -120,7 +120,7 @@ public abstract class NonFlatRStarTree<N extends AbstractRStarTreeNode<N, E>, E initialize(spatialObjects.get(0)); } - StringBuffer msg = getLogger().isDebuggingFine() ? new StringBuffer() : null; + StringBuilder msg = getLogger().isDebuggingFine() ? new StringBuilder() : null; // Tiny tree that fits into a single page if(spatialObjects.size() <= leafCapacity) { @@ -165,8 +165,8 @@ public abstract class NonFlatRStarTree<N extends AbstractRStarTreeNode<N, E>, E } if(msg != null) { msg.append("\n height = ").append(getHeight()); - msg.append("\n root " + getRoot()); - getLogger().debugFine(msg.toString() + "\n"); + msg.append("\n root ").append(getRoot()); + getLogger().debugFine(msg.toString()); } } @@ -228,9 +228,9 @@ public abstract class NonFlatRStarTree<N extends AbstractRStarTreeNode<N, E>, E // write to file writeNode(root); if(getLogger().isDebuggingFiner()) { - StringBuffer msg = new StringBuffer(); + StringBuilder msg = new StringBuilder(); msg.append("pageNo ").append(root.getPageID()); - getLogger().debugFiner(msg.toString() + "\n"); + getLogger().debugFiner(msg.toString()); } return root; diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/deliclu/DeLiCluLeafEntry.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/deliclu/DeLiCluLeafEntry.java index 9848f121..bc701185 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/deliclu/DeLiCluLeafEntry.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/deliclu/DeLiCluLeafEntry.java @@ -29,23 +29,23 @@ import de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialPointLeafEntry; /** * Defines the requirements for a leaf entry in an DeLiClu-Tree node. - * Additionally to a leaf entry in an R*-Tree two boolean flags that indicate whether this entry's node - * contains handled or unhandled data objects. - * + * Additionally to a leaf entry in an R*-Tree two boolean flags that indicate + * whether this entry's node contains handled or unhandled data objects. + * * @author Elke Achtert */ public class DeLiCluLeafEntry extends SpatialPointLeafEntry implements DeLiCluEntry { private static final long serialVersionUID = 1; /** - * Indicates that the node (or its child nodes) which is represented by this entry - * contains handled data objects. + * Indicates that the node (or its child nodes) which is represented by this + * entry contains handled data objects. */ private boolean hasHandled; /** - * Indicates that the node (or its child nodes) which is represented by this entry - * contains unhandled data objects. + * Indicates that the node (or its child nodes) which is represented by this + * entry contains unhandled data objects. */ private boolean hasUnhandled; @@ -53,16 +53,16 @@ public class DeLiCluLeafEntry extends SpatialPointLeafEntry implements DeLiCluEn * Empty constructor for serialization purposes. */ public DeLiCluLeafEntry() { - // empty constructor + // empty constructor } /** * Constructs a new LeafEntry object with the given parameters. - * - * @param id the unique id of the underlying data object + * + * @param id the unique id of the underlying data object * @param vector the vector to store */ - public DeLiCluLeafEntry(DBID id, NumberVector<?,?> vector) { + public DeLiCluLeafEntry(DBID id, NumberVector<?> vector) { super(id, vector); this.hasHandled = false; this.hasUnhandled = true; @@ -90,7 +90,7 @@ public class DeLiCluLeafEntry extends SpatialPointLeafEntry implements DeLiCluEn /** * Returns the id as a string representation of this entry. - * + * * @return a string representation of this entry */ @Override diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/deliclu/DeLiCluTree.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/deliclu/DeLiCluTree.java index 6f4f782d..0cd74a14 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/deliclu/DeLiCluTree.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/deliclu/DeLiCluTree.java @@ -48,7 +48,7 @@ public class DeLiCluTree extends NonFlatRStarTree<DeLiCluNode, DeLiCluEntry> { /** * The logger for this class. */ - private static final Logging logger = Logging.getLogger(DeLiCluTree.class); + private static final Logging LOG = Logging.getLogger(DeLiCluTree.class); /** * Holds the ids of the expanded nodes. @@ -168,6 +168,6 @@ public class DeLiCluTree extends NonFlatRStarTree<DeLiCluNode, DeLiCluEntry> { @Override protected Logging getLogger() { - return logger; + return LOG; } }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/deliclu/DeLiCluTreeFactory.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/deliclu/DeLiCluTreeFactory.java index 3f9627a9..b64192c1 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/deliclu/DeLiCluTreeFactory.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/deliclu/DeLiCluTreeFactory.java @@ -42,7 +42,7 @@ import de.lmu.ifi.dbs.elki.persistent.PageFile; * * @param <O> Object type */ -public class DeLiCluTreeFactory<O extends NumberVector<O, ?>> extends AbstractRStarTreeFactory<O, DeLiCluNode, DeLiCluEntry, DeLiCluTreeIndex<O>> { +public class DeLiCluTreeFactory<O extends NumberVector<?>> extends AbstractRStarTreeFactory<O, DeLiCluNode, DeLiCluEntry, DeLiCluTreeIndex<O>> { /** * Constructor. * @@ -82,7 +82,7 @@ public class DeLiCluTreeFactory<O extends NumberVector<O, ?>> extends AbstractRS * * @apiviz.exclude */ - public static class Parameterizer<O extends NumberVector<O, ?>> extends AbstractRStarTreeFactory.Parameterizer<O> { + public static class Parameterizer<O extends NumberVector<?>> extends AbstractRStarTreeFactory.Parameterizer<O> { @Override protected DeLiCluTreeFactory<O> makeInstance() { return new DeLiCluTreeFactory<O>(fileName, pageSize, cacheSize, bulkSplitter, insertionStrategy, nodeSplitter, overflowTreatment, minimumFill); diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/deliclu/DeLiCluTreeIndex.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/deliclu/DeLiCluTreeIndex.java index dd3b4d6b..b1216a51 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/deliclu/DeLiCluTreeIndex.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/deliclu/DeLiCluTreeIndex.java @@ -29,6 +29,8 @@ import java.util.List; import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.query.distance.SpatialDistanceQuery; @@ -52,9 +54,9 @@ import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; * * @param <O> Object type */ -public class DeLiCluTreeIndex<O extends NumberVector<?, ?>> extends DeLiCluTree implements KNNIndex<O>, RangeIndex<O> { +public class DeLiCluTreeIndex<O extends NumberVector<?>> extends DeLiCluTree implements KNNIndex<O>, RangeIndex<O> { /** - * The relation we index + * The relation we index. */ private Relation<O> relation; @@ -73,7 +75,7 @@ public class DeLiCluTreeIndex<O extends NumberVector<?, ?>> extends DeLiCluTree /** * The appropriate logger for this index. */ - private static final Logging logger = Logging.getLogger(DeLiCluTreeIndex.class); + private static final Logging LOG = Logging.getLogger(DeLiCluTreeIndex.class); /** * Creates a new leaf entry representing the specified data object. @@ -93,14 +95,14 @@ public class DeLiCluTreeIndex<O extends NumberVector<?, ?>> extends DeLiCluTree * @return the path of node ids from the root to the objects's parent */ public synchronized List<TreeIndexPathComponent<DeLiCluEntry>> setHandled(DBID id, O obj) { - if(logger.isDebugging()) { - logger.debugFine("setHandled " + id + ", " + obj + "\n"); + if (LOG.isDebugging()) { + LOG.debugFine("setHandled " + id + ", " + obj + "\n"); } // find the leaf node containing o IndexTreePath<DeLiCluEntry> pathToObject = findPathToObject(getRootPath(), obj, id); - if(pathToObject == null) { + if (pathToObject == null) { throw new AbortException("Object not found in setHandled."); } @@ -109,12 +111,12 @@ public class DeLiCluTreeIndex<O extends NumberVector<?, ?>> extends DeLiCluTree entry.setHasHandled(true); entry.setHasUnhandled(false); - for(IndexTreePath<DeLiCluEntry> path = pathToObject; path.getParentPath() != null; path = path.getParentPath()) { + for (IndexTreePath<DeLiCluEntry> path = pathToObject; path.getParentPath() != null; path = path.getParentPath()) { DeLiCluEntry parentEntry = path.getParentPath().getLastPathComponent().getEntry(); DeLiCluNode node = getNode(parentEntry); boolean hasHandled = false; boolean hasUnhandled = false; - for(int i = 0; i < node.getNumEntries(); i++) { + for (int i = 0; i < node.getNumEntries(); i++) { final DeLiCluEntry nodeEntry = node.getEntry(i); hasHandled = hasHandled || nodeEntry.hasHandled(); hasUnhandled = hasUnhandled || nodeEntry.hasUnhandled(); @@ -132,8 +134,8 @@ public class DeLiCluTreeIndex<O extends NumberVector<?, ?>> extends DeLiCluTree * @param id the object id that was inserted */ @Override - public final void insert(DBID id) { - insertLeaf(createNewLeafEntry(id)); + public final void insert(DBIDRef id) { + insertLeaf(createNewLeafEntry(DBIDUtil.deref(id))); } /** @@ -144,21 +146,20 @@ public class DeLiCluTreeIndex<O extends NumberVector<?, ?>> extends DeLiCluTree */ @Override public final void insertAll(DBIDs ids) { - if(ids.isEmpty() || (ids.size() == 1)) { + if (ids.isEmpty() || (ids.size() == 1)) { return; } // Make an example leaf - if(canBulkLoad()) { + if (canBulkLoad()) { List<DeLiCluEntry> leafs = new ArrayList<DeLiCluEntry>(ids.size()); for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { - leafs.add(createNewLeafEntry(iter.getDBID())); + leafs.add(createNewLeafEntry(DBIDUtil.deref(iter))); } bulkLoad(leafs); - } - else { + } else { for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { - insert(iter.getDBID()); + insert(iter); } } @@ -172,11 +173,11 @@ public class DeLiCluTreeIndex<O extends NumberVector<?, ?>> extends DeLiCluTree * false otherwise */ @Override - public final boolean delete(DBID id) { + public final boolean delete(DBIDRef id) { // find the leaf node containing o O obj = relation.get(id); IndexTreePath<DeLiCluEntry> deletionPath = findPathToObject(getRootPath(), obj, id); - if(deletionPath == null) { + if (deletionPath == null) { return false; } deletePath(deletionPath); @@ -186,18 +187,18 @@ public class DeLiCluTreeIndex<O extends NumberVector<?, ?>> extends DeLiCluTree @Override public void deleteAll(DBIDs ids) { for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { - delete(iter.getDBID()); + delete(DBIDUtil.deref(iter)); } } @Override public <D extends Distance<D>> RangeQuery<O, D> getRangeQuery(DistanceQuery<O, D> distanceQuery, Object... hints) { // Query on the relation we index - if(distanceQuery.getRelation() != relation) { + if (distanceQuery.getRelation() != relation) { return null; } // Can we support this distance function - spatial distances only! - if(!(distanceQuery instanceof SpatialDistanceQuery)) { + if (!(distanceQuery instanceof SpatialDistanceQuery)) { return null; } SpatialDistanceQuery<O, D> dq = (SpatialDistanceQuery<O, D>) distanceQuery; @@ -207,11 +208,11 @@ public class DeLiCluTreeIndex<O extends NumberVector<?, ?>> extends DeLiCluTree @Override public <D extends Distance<D>> KNNQuery<O, D> getKNNQuery(DistanceQuery<O, D> distanceQuery, Object... hints) { // Query on the relation we index - if(distanceQuery.getRelation() != relation) { + if (distanceQuery.getRelation() != relation) { return null; } // Can we support this distance function - spatial distances only! - if(!(distanceQuery instanceof SpatialDistanceQuery)) { + if (!(distanceQuery instanceof SpatialDistanceQuery)) { return null; } SpatialDistanceQuery<O, D> dq = (SpatialDistanceQuery<O, D>) distanceQuery; @@ -230,6 +231,6 @@ public class DeLiCluTreeIndex<O extends NumberVector<?, ?>> extends DeLiCluTree @Override protected Logging getLogger() { - return logger; + return LOG; } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/query/DoubleDistanceRStarTreeKNNQuery.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/query/DoubleDistanceRStarTreeKNNQuery.java index f0acc233..6d539f25 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/query/DoubleDistanceRStarTreeKNNQuery.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/query/DoubleDistanceRStarTreeKNNQuery.java @@ -38,11 +38,11 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs; -import de.lmu.ifi.dbs.elki.database.query.DoubleDistanceResultPair; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.query.knn.AbstractDistanceKNNQuery; -import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult; import de.lmu.ifi.dbs.elki.distance.distancefunction.SpatialPrimitiveDoubleDistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceKNNHeap; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceKNNList; import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; import de.lmu.ifi.dbs.elki.index.tree.DirectoryEntry; import de.lmu.ifi.dbs.elki.index.tree.LeafEntry; @@ -51,7 +51,6 @@ import de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialEntry; import de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.AbstractRStarTree; import de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.AbstractRStarTreeNode; import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap; -import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.KNNHeap; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; /** @@ -102,8 +101,8 @@ public class DoubleDistanceRStarTreeKNNQuery<O extends SpatialComparable> extend * @param object the query object * @param knnList the knn list containing the result */ - protected void doKNNQuery(O object, KNNHeap<DoubleDistance> knnList) { - final Heap<DoubleDistanceSearchCandidate> pq = new Heap<DoubleDistanceSearchCandidate>(Math.min(knnList.getK() * 2, 20)); + protected void doKNNQuery(O object, DoubleDistanceKNNHeap knnList) { + final Heap<DoubleDistanceSearchCandidate> pq = new Heap<DoubleDistanceSearchCandidate>(Math.min(knnList.getK() << 1, 20)); // push root pq.add(new DoubleDistanceSearchCandidate(0.0, tree.getRootID())); @@ -120,7 +119,7 @@ public class DoubleDistanceRStarTreeKNNQuery<O extends SpatialComparable> extend } } - private double expandNode(O object, KNNHeap<DoubleDistance> knnList, final Heap<DoubleDistanceSearchCandidate> pq, double maxDist, final Integer nodeID) { + private double expandNode(O object, DoubleDistanceKNNHeap knnList, final Heap<DoubleDistanceSearchCandidate> pq, double maxDist, final int nodeID) { AbstractRStarTreeNode<?, ?> node = tree.getNode(nodeID); // data node if(node.isLeaf()) { @@ -129,8 +128,8 @@ public class DoubleDistanceRStarTreeKNNQuery<O extends SpatialComparable> extend double distance = distanceFunction.doubleMinDist(entry, object); tree.distanceCalcs++; if(distance <= maxDist) { - knnList.add(new DoubleDistanceResultPair(distance, ((LeafEntry) entry).getDBID())); - maxDist = knnList.getKNNDistance().doubleValue(); + knnList.add(distance, ((LeafEntry) entry).getDBID()); + maxDist = knnList.doubleKNNDistance(); } } } @@ -160,20 +159,20 @@ public class DoubleDistanceRStarTreeKNNQuery<O extends SpatialComparable> extend * @param node the node for which the query should be performed * @param knnLists a map containing the knn lists for each query objects */ - protected void batchNN(AbstractRStarTreeNode<?, ?> node, Map<DBID, KNNHeap<DoubleDistance>> knnLists) { + protected void batchNN(AbstractRStarTreeNode<?, ?> node, Map<DBID, DoubleDistanceKNNHeap> knnLists) { if(node.isLeaf()) { for(int i = 0; i < node.getNumEntries(); i++) { SpatialEntry p = node.getEntry(i); - for(Entry<DBID, KNNHeap<DoubleDistance>> ent : knnLists.entrySet()) { + for(Entry<DBID, DoubleDistanceKNNHeap> ent : knnLists.entrySet()) { final DBID q = ent.getKey(); - final KNNHeap<DoubleDistance> knns_q = ent.getValue(); - DoubleDistance knn_q_maxDist = knns_q.getKNNDistance(); + final DoubleDistanceKNNHeap knns_q = ent.getValue(); + double knn_q_maxDist = knns_q.doubleKNNDistance(); DBID pid = ((LeafEntry) p).getDBID(); // FIXME: objects are NOT accessible by DBID in a plain rtree context! - DoubleDistance dist_pq = distanceFunction.distance(relation.get(pid), relation.get(q)); + double dist_pq = distanceFunction.doubleDistance(relation.get(pid), relation.get(q)); tree.distanceCalcs++; - if(dist_pq.compareTo(knn_q_maxDist) <= 0) { + if(dist_pq <= knn_q_maxDist) { knns_q.add(dist_pq, pid); } } @@ -187,13 +186,13 @@ public class DoubleDistanceRStarTreeKNNQuery<O extends SpatialComparable> extend List<DoubleDistanceEntry> entries = getSortedEntries(node, ids); for(DoubleDistanceEntry distEntry : entries) { double minDist = distEntry.distance; - for(Entry<DBID, KNNHeap<DoubleDistance>> ent : knnLists.entrySet()) { - final KNNHeap<DoubleDistance> knns_q = ent.getValue(); - double knn_q_maxDist = knns_q.getKNNDistance().doubleValue(); + for(Entry<DBID, DoubleDistanceKNNHeap> ent : knnLists.entrySet()) { + final DoubleDistanceKNNHeap knns_q = ent.getValue(); + double knn_q_maxDist = knns_q.doubleKNNDistance(); if(minDist <= knn_q_maxDist) { SpatialEntry entry = distEntry.entry; - AbstractRStarTreeNode<?, ?> child = tree.getNode(((DirectoryEntry) entry).getPageID()); + AbstractRStarTreeNode<?, ?> child = tree.getNode(((DirectoryEntry) entry).getPageID().intValue()); batchNN(child, knnLists); break; } @@ -264,47 +263,41 @@ public class DoubleDistanceRStarTreeKNNQuery<O extends SpatialComparable> extend } @Override - public KNNResult<DoubleDistance> getKNNForObject(O obj, int k) { + public DoubleDistanceKNNList getKNNForObject(O obj, int k) { if(k < 1) { throw new IllegalArgumentException("At least one enumeration has to be requested!"); } - final KNNHeap<DoubleDistance> knnList = new KNNHeap<DoubleDistance>(k, distanceFunction.getDistanceFactory().infiniteDistance()); + final DoubleDistanceKNNHeap knnList = new DoubleDistanceKNNHeap(k); doKNNQuery(obj, knnList); return knnList.toKNNList(); } @Override - public KNNResult<DoubleDistance> getKNNForDBID(DBIDRef id, int k) { + public DoubleDistanceKNNList getKNNForDBID(DBIDRef id, int k) { return getKNNForObject(relation.get(id), k); } @Override - public List<KNNResult<DoubleDistance>> getKNNForBulkDBIDs(ArrayDBIDs ids, int k) { + public List<DoubleDistanceKNNList> getKNNForBulkDBIDs(ArrayDBIDs ids, int k) { if(k < 1) { throw new IllegalArgumentException("At least one enumeration has to be requested!"); } // While this works, it seems to be slow at least for large sets! - final Map<DBID, KNNHeap<DoubleDistance>> knnLists = new HashMap<DBID, KNNHeap<DoubleDistance>>(ids.size()); + final Map<DBID, DoubleDistanceKNNHeap> knnLists = new HashMap<DBID, DoubleDistanceKNNHeap>(ids.size()); for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { - DBID id = iter.getDBID(); - knnLists.put(id, new KNNHeap<DoubleDistance>(k, distanceFunction.getDistanceFactory().infiniteDistance())); + DBID id = DBIDUtil.deref(iter); + knnLists.put(id, new DoubleDistanceKNNHeap(k)); } batchNN(tree.getRoot(), knnLists); - List<KNNResult<DoubleDistance>> result = new ArrayList<KNNResult<DoubleDistance>>(); + List<DoubleDistanceKNNList> result = new ArrayList<DoubleDistanceKNNList>(); for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { - DBID id = iter.getDBID(); + DBID id = DBIDUtil.deref(iter); result.add(knnLists.get(id).toKNNList()); } return result; } - - @Override - public void getKNNForBulkHeaps(Map<DBID, KNNHeap<DoubleDistance>> heaps) { - AbstractRStarTreeNode<?, ?> root = tree.getRoot(); - batchNN(root, heaps); - } }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/query/DoubleDistanceRStarTreeRangeQuery.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/query/DoubleDistanceRStarTreeRangeQuery.java index 1a861c3e..715b9552 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/query/DoubleDistanceRStarTreeRangeQuery.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/query/DoubleDistanceRStarTreeRangeQuery.java @@ -23,16 +23,13 @@ package de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.query; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import java.util.Collections; - import de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable; import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; -import de.lmu.ifi.dbs.elki.database.query.DistanceDBIDResult; -import de.lmu.ifi.dbs.elki.database.query.DoubleDistanceResultPair; -import de.lmu.ifi.dbs.elki.database.query.GenericDistanceDBIDList; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.query.range.AbstractDistanceRangeQuery; import de.lmu.ifi.dbs.elki.distance.distancefunction.SpatialPrimitiveDoubleDistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDList; import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; import de.lmu.ifi.dbs.elki.index.tree.DirectoryEntry; import de.lmu.ifi.dbs.elki.index.tree.LeafEntry; @@ -90,8 +87,8 @@ public class DoubleDistanceRStarTreeRangeQuery<O extends SpatialComparable> exte * @param epsilon Query range * @return Objects contained in query range. */ - protected GenericDistanceDBIDList<DoubleDistance> doRangeQuery(O object, double epsilon) { - final GenericDistanceDBIDList<DoubleDistance> result = new GenericDistanceDBIDList<DoubleDistance>(); + protected DoubleDistanceDBIDList doRangeQuery(O object, double epsilon) { + final DoubleDistanceDBIDList result = new DoubleDistanceDBIDList(); final Heap<DoubleDistanceSearchCandidate> pq = new Heap<DoubleDistanceSearchCandidate>(); // push root @@ -104,7 +101,7 @@ public class DoubleDistanceRStarTreeRangeQuery<O extends SpatialComparable> exte break; } - AbstractRStarTreeNode<?, ?> node = tree.getNode(pqNode.nodeID); + AbstractRStarTreeNode<?, ?> node = tree.getNode(pqNode.nodeID.intValue()); final int numEntries = node.getNumEntries(); for(int i = 0; i < numEntries; i++) { @@ -113,7 +110,7 @@ public class DoubleDistanceRStarTreeRangeQuery<O extends SpatialComparable> exte if(distance <= epsilon) { if(node.isLeaf()) { LeafEntry entry = (LeafEntry) node.getEntry(i); - result.add(new DoubleDistanceResultPair(distance, entry.getDBID())); + result.add(distance, entry.getDBID()); } else { DirectoryEntry entry = (DirectoryEntry) node.getEntry(i); @@ -124,7 +121,7 @@ public class DoubleDistanceRStarTreeRangeQuery<O extends SpatialComparable> exte } // sort the result according to the distances - Collections.sort(result); + result.sort(); return result; } diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/query/GenericRStarTreeKNNQuery.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/query/GenericRStarTreeKNNQuery.java index 09ebb61a..ed7f5949 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/query/GenericRStarTreeKNNQuery.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/query/GenericRStarTreeKNNQuery.java @@ -40,9 +40,11 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs; import de.lmu.ifi.dbs.elki.database.query.distance.SpatialDistanceQuery; import de.lmu.ifi.dbs.elki.database.query.knn.AbstractDistanceKNNQuery; -import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult; import de.lmu.ifi.dbs.elki.distance.DistanceUtil; import de.lmu.ifi.dbs.elki.distance.distancefunction.SpatialPrimitiveDistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNHeap; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNUtil; import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; import de.lmu.ifi.dbs.elki.index.tree.DirectoryEntry; import de.lmu.ifi.dbs.elki.index.tree.DistanceEntry; @@ -52,7 +54,6 @@ import de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialEntry; import de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.AbstractRStarTree; import de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.AbstractRStarTreeNode; import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap; -import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.KNNHeap; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; /** @@ -103,7 +104,7 @@ public class GenericRStarTreeKNNQuery<O extends SpatialComparable, D extends Dis * @param knnList the knn list containing the result */ protected void doKNNQuery(O object, KNNHeap<D> knnList) { - final Heap<GenericDistanceSearchCandidate<D>> pq = new Heap<GenericDistanceSearchCandidate<D>>(Math.min(knnList.getK() * 2, 20)); + final Heap<GenericDistanceSearchCandidate<D>> pq = new Heap<GenericDistanceSearchCandidate<D>>(Math.min(knnList.getK() << 1, 20)); // push root pq.add(new GenericDistanceSearchCandidate<D>(distanceFunction.getDistanceFactory().nullDistance(), tree.getRootID())); @@ -120,7 +121,7 @@ public class GenericRStarTreeKNNQuery<O extends SpatialComparable, D extends Dis } } - private D expandNode(O object, KNNHeap<D> knnList, final Heap<GenericDistanceSearchCandidate<D>> pq, D maxDist, final Integer nodeID) { + private D expandNode(O object, KNNHeap<D> knnList, final Heap<GenericDistanceSearchCandidate<D>> pq, D maxDist, final int nodeID) { AbstractRStarTreeNode<?, ?> node = tree.getNode(nodeID); // data node if(node.isLeaf()) { @@ -192,7 +193,7 @@ public class GenericRStarTreeKNNQuery<O extends SpatialComparable, D extends Dis if(minDist.compareTo(knn_q_maxDist) <= 0) { SpatialEntry entry = distEntry.getEntry(); - AbstractRStarTreeNode<?, ?> child = tree.getNode(((DirectoryEntry) entry).getPageID()); + AbstractRStarTreeNode<?, ?> child = tree.getNode(((DirectoryEntry) entry).getPageID().intValue()); batchNN(child, knnLists); break; } @@ -201,12 +202,6 @@ public class GenericRStarTreeKNNQuery<O extends SpatialComparable, D extends Dis } } - @Override - public void getKNNForBulkHeaps(Map<DBID, KNNHeap<D>> heaps) { - AbstractRStarTreeNode<?, ?> root = tree.getRoot(); - batchNN(root, heaps); - } - /** * Sorts the entries of the specified node according to their minimum distance * to the specified objects. @@ -238,7 +233,7 @@ public class GenericRStarTreeKNNQuery<O extends SpatialComparable, D extends Dis throw new IllegalArgumentException("At least one enumeration has to be requested!"); } - final KNNHeap<D> knnList = new KNNHeap<D>(k, distanceFunction.getDistanceFactory().infiniteDistance()); + final KNNHeap<D> knnList = KNNUtil.newHeap(distanceFunction, k); doKNNQuery(obj, knnList); return knnList.toKNNList(); } @@ -256,14 +251,14 @@ public class GenericRStarTreeKNNQuery<O extends SpatialComparable, D extends Dis // While this works, it seems to be slow at least for large sets! final Map<DBID, KNNHeap<D>> knnLists = new HashMap<DBID, KNNHeap<D>>(ids.size()); for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { - knnLists.put(iter.getDBID(), new KNNHeap<D>(k, distanceFunction.getDistanceFactory().infiniteDistance())); + knnLists.put(DBIDUtil.deref(iter), KNNUtil.newHeap(distanceFunction, k)); } batchNN(tree.getRoot(), knnLists); List<KNNResult<D>> result = new ArrayList<KNNResult<D>>(); for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { - result.add(knnLists.get(iter.getDBID()).toKNNList()); + result.add(knnLists.get(DBIDUtil.deref(iter)).toKNNList()); } return result; } diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/query/GenericRStarTreeRangeQuery.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/query/GenericRStarTreeRangeQuery.java index 3b312ed7..a5232b30 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/query/GenericRStarTreeRangeQuery.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/query/GenericRStarTreeRangeQuery.java @@ -23,16 +23,13 @@ package de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.query; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import java.util.Collections; - import de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable; import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; -import de.lmu.ifi.dbs.elki.database.query.DistanceDBIDResult; -import de.lmu.ifi.dbs.elki.database.query.GenericDistanceDBIDList; -import de.lmu.ifi.dbs.elki.database.query.GenericDistanceResultPair; import de.lmu.ifi.dbs.elki.database.query.distance.SpatialDistanceQuery; import de.lmu.ifi.dbs.elki.database.query.range.AbstractDistanceRangeQuery; import de.lmu.ifi.dbs.elki.distance.distancefunction.SpatialPrimitiveDistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.GenericDistanceDBIDList; import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; import de.lmu.ifi.dbs.elki.index.tree.DirectoryEntry; import de.lmu.ifi.dbs.elki.index.tree.LeafEntry; @@ -89,7 +86,7 @@ public class GenericRStarTreeRangeQuery<O extends SpatialComparable, D extends D * @param epsilon Query range * @return Objects contained in query range. */ - protected GenericDistanceDBIDList<D> doRangeQuery(O object, D epsilon) { + protected DistanceDBIDResult<D> doRangeQuery(O object, D epsilon) { final GenericDistanceDBIDList<D> result = new GenericDistanceDBIDList<D>(); final Heap<GenericDistanceSearchCandidate<D>> pq = new Heap<GenericDistanceSearchCandidate<D>>(); @@ -103,7 +100,7 @@ public class GenericRStarTreeRangeQuery<O extends SpatialComparable, D extends D break; } - AbstractRStarTreeNode<?, ?> node = tree.getNode(pqNode.nodeID); + AbstractRStarTreeNode<?, ?> node = tree.getNode(pqNode.nodeID.intValue()); final int numEntries = node.getNumEntries(); for(int i = 0; i < numEntries; i++) { @@ -111,7 +108,7 @@ public class GenericRStarTreeRangeQuery<O extends SpatialComparable, D extends D if(distance.compareTo(epsilon) <= 0) { if(node.isLeaf()) { LeafEntry entry = (LeafEntry) node.getEntry(i); - result.add(new GenericDistanceResultPair<D>(distance, entry.getDBID())); + result.add(distance, entry.getDBID()); } else { DirectoryEntry entry = (DirectoryEntry) node.getEntry(i); @@ -122,7 +119,7 @@ public class GenericRStarTreeRangeQuery<O extends SpatialComparable, D extends D } // sort the result according to the distances - Collections.sort(result); + result.sort(); return result; } diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/rstar/RStarTree.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/rstar/RStarTree.java index 53b32c6b..d63d77cb 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/rstar/RStarTree.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/rstar/RStarTree.java @@ -48,7 +48,7 @@ public class RStarTree extends NonFlatRStarTree<RStarTreeNode, SpatialEntry> { /** * The logger for this class. */ - private static final Logging logger = Logging.getLogger(RStarTree.class); + private static final Logging LOG = Logging.getLogger(RStarTree.class); /** * Constructor. @@ -91,6 +91,6 @@ public class RStarTree extends NonFlatRStarTree<RStarTreeNode, SpatialEntry> { @Override protected Logging getLogger() { - return logger; + return LOG; } }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/rstar/RStarTreeFactory.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/rstar/RStarTreeFactory.java index 79aac0bd..da7c03fd 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/rstar/RStarTreeFactory.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/rstar/RStarTreeFactory.java @@ -43,7 +43,7 @@ import de.lmu.ifi.dbs.elki.persistent.PageFile; * * @param <O> Object type */ -public class RStarTreeFactory<O extends NumberVector<O, ?>> extends AbstractRStarTreeFactory<O, RStarTreeNode, SpatialEntry, RStarTreeIndex<O>> { +public class RStarTreeFactory<O extends NumberVector<?>> extends AbstractRStarTreeFactory<O, RStarTreeNode, SpatialEntry, RStarTreeIndex<O>> { /** * Constructor. * @@ -83,7 +83,7 @@ public class RStarTreeFactory<O extends NumberVector<O, ?>> extends AbstractRSta * * @apiviz.exclude */ - public static class Parameterizer<O extends NumberVector<O, ?>> extends AbstractRStarTreeFactory.Parameterizer<O> { + public static class Parameterizer<O extends NumberVector<?>> extends AbstractRStarTreeFactory.Parameterizer<O> { @Override protected RStarTreeFactory<O> makeInstance() { return new RStarTreeFactory<O>(fileName, pageSize, cacheSize, bulkSplitter, insertionStrategy, nodeSplitter, overflowTreatment, minimumFill); diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/rstar/RStarTreeIndex.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/rstar/RStarTreeIndex.java index ab136926..1946293f 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/rstar/RStarTreeIndex.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/rstar/RStarTreeIndex.java @@ -27,8 +27,9 @@ import java.util.ArrayList; import java.util.List; import de.lmu.ifi.dbs.elki.data.NumberVector; -import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.query.distance.SpatialDistanceQuery; @@ -52,11 +53,11 @@ import de.lmu.ifi.dbs.elki.persistent.PageFile; * * @param <O> Object type */ -public class RStarTreeIndex<O extends NumberVector<?, ?>> extends RStarTree implements RangeIndex<O>, KNNIndex<O> { +public class RStarTreeIndex<O extends NumberVector<?>> extends RStarTree implements RangeIndex<O>, KNNIndex<O> { /** * The appropriate logger for this index. */ - private static final Logging logger = Logging.getLogger(RStarTreeIndex.class); + private static final Logging LOG = Logging.getLogger(RStarTreeIndex.class); /** * Relation @@ -81,8 +82,8 @@ public class RStarTreeIndex<O extends NumberVector<?, ?>> extends RStarTree impl * @param id Object id * @return Spatial leaf entry */ - protected SpatialPointLeafEntry createNewLeafEntry(DBID id) { - return new SpatialPointLeafEntry(id, relation.get(id)); + protected SpatialPointLeafEntry createNewLeafEntry(DBIDRef id) { + return new SpatialPointLeafEntry(DBIDUtil.deref(id), relation.get(id)); } /** @@ -91,7 +92,7 @@ public class RStarTreeIndex<O extends NumberVector<?, ?>> extends RStarTree impl * @param id the object id that was inserted */ @Override - public void insert(DBID id) { + public void insert(DBIDRef id) { insertLeaf(createNewLeafEntry(id)); } @@ -111,13 +112,13 @@ public class RStarTreeIndex<O extends NumberVector<?, ?>> extends RStarTree impl if(canBulkLoad()) { List<SpatialEntry> leafs = new ArrayList<SpatialEntry>(ids.size()); for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { - leafs.add(createNewLeafEntry(iter.getDBID())); + leafs.add(createNewLeafEntry(iter)); } bulkLoad(leafs); } else { for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { - insert(iter.getDBID()); + insert(DBIDUtil.deref(iter)); } } @@ -131,7 +132,7 @@ public class RStarTreeIndex<O extends NumberVector<?, ?>> extends RStarTree impl * false otherwise */ @Override - public boolean delete(DBID id) { + public boolean delete(DBIDRef id) { // find the leaf node containing o O obj = relation.get(id); IndexTreePath<SpatialEntry> deletionPath = findPathToObject(getRootPath(), obj, id); @@ -145,7 +146,7 @@ public class RStarTreeIndex<O extends NumberVector<?, ?>> extends RStarTree impl @Override public void deleteAll(DBIDs ids) { for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { - delete(iter.getDBID()); + delete(iter); } } @@ -189,6 +190,6 @@ public class RStarTreeIndex<O extends NumberVector<?, ?>> extends RStarTree impl @Override protected Logging getLogger() { - return logger; + return LOG; } }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/bulk/MaxExtensionBulkSplit.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/bulk/MaxExtensionBulkSplit.java index 90a8b622..2fd69531 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/bulk/MaxExtensionBulkSplit.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/bulk/MaxExtensionBulkSplit.java @@ -45,7 +45,7 @@ public class MaxExtensionBulkSplit extends AbstractBulkSplit { /** * Logger. */ - private static final Logging logger = Logging.getLogger(MaxExtensionBulkSplit.class); + private static final Logging LOG = Logging.getLogger(MaxExtensionBulkSplit.class); /** * Static instance @@ -74,12 +74,12 @@ public class MaxExtensionBulkSplit extends AbstractBulkSplit { List<N> objects = new ArrayList<N>(spatialObjects); while(objects.size() > 0) { - StringBuffer msg = new StringBuffer(); + StringBuilder msg = new StringBuilder(); // get the split axis and split point int splitAxis = chooseMaximalExtendedSplitAxis(objects); int splitPoint = chooseBulkSplitPoint(objects.size(), minEntries, maxEntries); - if(logger.isDebugging()) { + if(LOG.isDebugging()) { msg.append("\nsplitAxis ").append(splitAxis); msg.append("\nsplitPoint ").append(splitPoint); } @@ -96,15 +96,15 @@ public class MaxExtensionBulkSplit extends AbstractBulkSplit { partitions.add(partition1); // copy array - if(logger.isDebugging()) { - msg.append("\ncurrent partition " + partition1); + if(LOG.isDebugging()) { + msg.append("\ncurrent partition ").append(partition1); msg.append("\nremaining objects # ").append(objects.size()); - logger.debugFine(msg.toString()); + LOG.debugFine(msg.toString()); } } - if(logger.isDebugging()) { - logger.debugFine("partitions " + partitions); + if(LOG.isDebugging()) { + LOG.debugFine("partitions " + partitions); } return partitions; } @@ -125,17 +125,17 @@ public class MaxExtensionBulkSplit extends AbstractBulkSplit { // compute min and max value in each dimension for(SpatialComparable object : objects) { - for(int d = 1; d <= dimension; d++) { + for(int d = 0; d < dimension; d++) { double min, max; min = object.getMin(d); max = object.getMax(d); - if(maxExtension[d - 1] < max) { - maxExtension[d - 1] = max; + if(maxExtension[d] < max) { + maxExtension[d] = max; } - if(minExtension[d - 1] > min) { - minExtension[d - 1] = min; + if(minExtension[d] > min) { + minExtension[d] = min; } } } @@ -143,8 +143,8 @@ public class MaxExtensionBulkSplit extends AbstractBulkSplit { // set split axis to dim with maximal extension int splitAxis = -1; double max = 0; - for(int d = 1; d <= dimension; d++) { - double currentExtension = maxExtension[d - 1] - minExtension[d - 1]; + for(int d = 0; d < dimension; d++) { + double currentExtension = maxExtension[d] - minExtension[d]; if(max < currentExtension) { max = currentExtension; splitAxis = d; diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/bulk/OneDimSortBulkSplit.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/bulk/OneDimSortBulkSplit.java index b99ae01e..2209ddef 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/bulk/OneDimSortBulkSplit.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/bulk/OneDimSortBulkSplit.java @@ -45,7 +45,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; @Reference(authors = "Roussopoulos, N. and Leifker, D.", title = "Direct spatial search on pictorial databases using packed R-trees", booktitle = "ACM SIGMOD Record 14-4", url = "http://dx.doi.org/10.1145/971699.318900") public class OneDimSortBulkSplit extends AbstractBulkSplit { /** - * Static instance + * Static instance. */ public static final AbstractBulkSplit STATIC = new OneDimSortBulkSplit(); @@ -62,8 +62,8 @@ public class OneDimSortBulkSplit extends AbstractBulkSplit { Collections.sort(spatialObjects, new Comparator<SpatialComparable>() { @Override public int compare(SpatialComparable o1, SpatialComparable o2) { - double min1 = (o1.getMax(1) + o1.getMin(1)) / 2; - double min2 = (o2.getMax(1) + o2.getMin(1)) / 2; + double min1 = (o1.getMax(0) + o1.getMin(0)) * .5; + double min2 = (o2.getMax(0) + o2.getMin(0)) * .5; return Double.compare(min1, min2); } }); diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/bulk/SortTileRecursiveBulkSplit.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/bulk/SortTileRecursiveBulkSplit.java index 28e96da6..e24ef3ce 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/bulk/SortTileRecursiveBulkSplit.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/bulk/SortTileRecursiveBulkSplit.java @@ -59,15 +59,15 @@ public class SortTileRecursiveBulkSplit extends AbstractBulkSplit { * * @param objs Object list * @param start Subinterval start - * @param end Subinteval end + * @param end Subinterval end * @param depth Iteration depth (must be less than dimensionality!) * @param dims Total number of dimensions * @param maxEntries Maximum page size * @param c Comparison helper * @param ret Output list + * @param <T> data type */ protected <T extends SpatialComparable> void strPartition(List<T> objs, int start, int end, int depth, int dims, int maxEntries, Compare<T> c, List<List<T>> ret) { - c.dim = depth + 1; final int p = (int) Math.ceil((end - start) / (double) maxEntries); final int s = (int) Math.ceil(Math.pow(p, 1.0 / (dims - depth))); @@ -78,6 +78,7 @@ public class SortTileRecursiveBulkSplit extends AbstractBulkSplit { int e2 = start + (int) (((i + 1) * len) / s); // LoggingUtil.warning("STR " + dim + " s2:" + s2 + " e2:" + e2); if(e2 < end) { + c.dim = depth; QuickSelect.quickSelect(objs, c, s2, end, e2); } if(depth + 1 == dims) { @@ -101,9 +102,9 @@ public class SortTileRecursiveBulkSplit extends AbstractBulkSplit { */ private static class Compare<T extends SpatialComparable> implements Comparator<T> { /** - * Current dimension + * Current dimension. */ - public int dim; + int dim; @Override public int compare(T o1, T o2) { diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/bulk/SpatialSortBulkSplit.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/bulk/SpatialSortBulkSplit.java index 9c3a41a1..beb6e657 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/bulk/SpatialSortBulkSplit.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/bulk/SpatialSortBulkSplit.java @@ -82,7 +82,7 @@ public class SpatialSortBulkSplit extends AbstractBulkSplit { /** * Option ID for spatial sorting */ - public static final OptionID SORTER_ID = OptionID.getOrCreateOptionID("rtree.bulk.spatial-sort", "Strategy for spatial sorting in bulk loading."); + public static final OptionID SORTER_ID = new OptionID("rtree.bulk.spatial-sort", "Strategy for spatial sorting in bulk loading."); /** * Sorting class diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/insert/ApproximativeLeastOverlapInsertionStrategy.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/insert/ApproximativeLeastOverlapInsertionStrategy.java index a1dfbdb0..c39bd914 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/insert/ApproximativeLeastOverlapInsertionStrategy.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/insert/ApproximativeLeastOverlapInsertionStrategy.java @@ -144,7 +144,7 @@ public class ApproximativeLeastOverlapInsertionStrategy extends LeastOverlapInse /** * Fast-insertion parameter. Optional. */ - public static OptionID INSERTION_CANDIDATES_ID = OptionID.getOrCreateOptionID("rtree.insertion-candidates", "defines how many children are tested for finding the child generating the least overlap when inserting an object."); + public static OptionID INSERTION_CANDIDATES_ID = new OptionID("rtree.insertion-candidates", "defines how many children are tested for finding the child generating the least overlap when inserting an object."); /** * The number of candidates to use @@ -154,7 +154,8 @@ public class ApproximativeLeastOverlapInsertionStrategy extends LeastOverlapInse @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - IntParameter insertionCandidatesP = new IntParameter(INSERTION_CANDIDATES_ID, new GreaterConstraint(0), numCandidates); + IntParameter insertionCandidatesP = new IntParameter(INSERTION_CANDIDATES_ID, numCandidates); + insertionCandidatesP.addConstraint(new GreaterConstraint(0)); if(config.grab(insertionCandidatesP)) { numCandidates = insertionCandidatesP.getValue(); } diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/insert/CombinedInsertionStrategy.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/insert/CombinedInsertionStrategy.java index c90d99b2..63bbe9dc 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/insert/CombinedInsertionStrategy.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/insert/CombinedInsertionStrategy.java @@ -88,12 +88,12 @@ public class CombinedInsertionStrategy implements InsertionStrategy { /** * Insertion strategy for directory nodes. */ - public static final OptionID DIR_STRATEGY_ID = OptionID.getOrCreateOptionID("rtree.insert-directory", "Insertion strategy for directory nodes."); + public static final OptionID DIR_STRATEGY_ID = new OptionID("rtree.insert-directory", "Insertion strategy for directory nodes."); /** * Insertion strategy for leaf nodes. */ - public static final OptionID LEAF_STRATEGY_ID = OptionID.getOrCreateOptionID("rtree.insert-leaf", "Insertion strategy for leaf nodes."); + public static final OptionID LEAF_STRATEGY_ID = new OptionID("rtree.insert-leaf", "Insertion strategy for leaf nodes."); /** * Strategy when inserting into directory nodes diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/overflow/LimitedReinsertOverflowTreatment.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/overflow/LimitedReinsertOverflowTreatment.java index 2532f351..3c25cbbc 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/overflow/LimitedReinsertOverflowTreatment.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/overflow/LimitedReinsertOverflowTreatment.java @@ -111,7 +111,7 @@ public class LimitedReinsertOverflowTreatment implements OverflowTreatment { /** * Fast-insertion parameter. Optional. */ - public static OptionID REINSERT_STRATEGY_ID = OptionID.getOrCreateOptionID("rtree.reinsertion-strategy", "The strategy to select candidates for reinsertion."); + public static OptionID REINSERT_STRATEGY_ID = new OptionID("rtree.reinsertion-strategy", "The strategy to select candidates for reinsertion."); /** * The actual reinsertion strategy diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/reinsert/AbstractPartialReinsert.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/reinsert/AbstractPartialReinsert.java index e0277606..bb222dfe 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/reinsert/AbstractPartialReinsert.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/reinsert/AbstractPartialReinsert.java @@ -27,7 +27,8 @@ import de.lmu.ifi.dbs.elki.distance.distancefunction.SpatialPrimitiveDoubleDista import de.lmu.ifi.dbs.elki.distance.distancefunction.SquaredEuclideanDistanceFunction; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.IntervalConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessConstraint; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; @@ -68,16 +69,16 @@ public abstract class AbstractPartialReinsert implements ReinsertStrategy { * * @apiviz.exclude */ - public static abstract class Parameterizer extends AbstractParameterizer { + public abstract static class Parameterizer extends AbstractParameterizer { /** * Reinsertion share */ - public static OptionID REINSERT_AMOUNT_ID = OptionID.getOrCreateOptionID("rtree.reinsertion-amount", "The amount of entries to reinsert."); + public static OptionID REINSERT_AMOUNT_ID = new OptionID("rtree.reinsertion-amount", "The amount of entries to reinsert."); /** * Reinsertion share */ - public static OptionID REINSERT_DISTANCE_ID = OptionID.getOrCreateOptionID("rtree.reinsertion-distancce", "The distance function to compute reinsertion candidates by."); + public static OptionID REINSERT_DISTANCE_ID = new OptionID("rtree.reinsertion-distancce", "The distance function to compute reinsertion candidates by."); /** * The actual reinsertion strategy @@ -92,12 +93,14 @@ public abstract class AbstractPartialReinsert implements ReinsertStrategy { @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - DoubleParameter reinsertAmountP = new DoubleParameter(REINSERT_AMOUNT_ID, new IntervalConstraint(0.0, IntervalConstraint.IntervalBoundary.OPEN, 0.5, IntervalConstraint.IntervalBoundary.OPEN), 0.3); - if(config.grab(reinsertAmountP)) { + DoubleParameter reinsertAmountP = new DoubleParameter(REINSERT_AMOUNT_ID, 0.3); + reinsertAmountP.addConstraint(new GreaterConstraint(0.0)); + reinsertAmountP.addConstraint(new LessConstraint(0.5)); + if (config.grab(reinsertAmountP)) { reinsertAmount = reinsertAmountP.getValue(); } ObjectParameter<SpatialPrimitiveDoubleDistanceFunction<?>> distanceP = new ObjectParameter<SpatialPrimitiveDoubleDistanceFunction<?>>(REINSERT_DISTANCE_ID, SpatialPrimitiveDoubleDistanceFunction.class, SquaredEuclideanDistanceFunction.class); - if(config.grab(distanceP)) { + if (config.grab(distanceP)) { distanceFunction = distanceP.instantiateClass(config); } } diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/split/AngTanLinearSplit.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/split/AngTanLinearSplit.java index e59fe10e..3e3d599c 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/split/AngTanLinearSplit.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/split/AngTanLinearSplit.java @@ -56,7 +56,7 @@ public class AngTanLinearSplit implements SplitStrategy { /** * Logger class */ - private static final Logging logger = Logging.getLogger(AngTanLinearSplit.class); + private static final Logging LOG = Logging.getLogger(AngTanLinearSplit.class); /** * Static instance. @@ -82,11 +82,11 @@ public class AngTanLinearSplit implements SplitStrategy { } for(int i = 0; i < num; i++) { E e = getter.get(entries, i); - for(int d = 1; d <= dim; d++) { + for(int d = 0; d < dim; d++) { double low = e.getMin(d) - total.getMin(d); double hig = total.getMax(d) - e.getMax(d); if(low >= hig) { - closer[d - 1].set(i); + closer[d].set(i); } } } @@ -105,7 +105,7 @@ public class AngTanLinearSplit implements SplitStrategy { continue; } if(card < bestcard) { - axis = d + 1; + axis = d; bestcard = card; bestset = cand; bestover = Double.NaN; @@ -117,16 +117,16 @@ public class AngTanLinearSplit implements SplitStrategy { } double overlap = computeOverlap(entries, getter, cand); if(overlap < bestover) { - axis = d + 1; + axis = d; bestcard = card; bestset = cand; bestover = overlap; } else if(overlap == bestover) { double bestlen = total.getMax(axis) - total.getMin(axis); - double candlen = total.getMax(d + 1) - total.getMin(d + 1); + double candlen = total.getMax(d) - total.getMin(d); if(candlen < bestlen) { - axis = d + 1; + axis = d; bestcard = card; bestset = cand; bestover = overlap; @@ -135,8 +135,8 @@ public class AngTanLinearSplit implements SplitStrategy { } } if(bestset == null) { - logger.warning("No Ang-Tan-Split found. Probably all points are the same? Returning random split."); - return Util.randomBitSet(num / 2, num, new Random()); + LOG.warning("No Ang-Tan-Split found. Probably all points are the same? Returning random split."); + return Util.randomBitSet(num >> 1, num, new Random()); } return bestset; } diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/split/GreeneSplit.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/split/GreeneSplit.java index 7401fbe5..99c15fd6 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/split/GreeneSplit.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/split/GreeneSplit.java @@ -70,72 +70,83 @@ public class GreeneSplit implements SplitStrategy { // Compute individual areas double[] areas = new double[num]; - for(int e1 = 0; e1 < num - 1; e1++) { + for (int e1 = 0; e1 < num - 1; e1++) { final E e1i = getter.get(entries, e1); areas[e1] = SpatialUtil.volume(e1i); } // Compute area increase - for(int e1 = 0; e1 < num - 1; e1++) { + for (int e1 = 0; e1 < num - 1; e1++) { final E e1i = getter.get(entries, e1); - for(int e2 = e1 + 1; e2 < num; e2++) { + for (int e2 = e1 + 1; e2 < num; e2++) { final E e2i = getter.get(entries, e2); final double areaJ = SpatialUtil.volumeUnion(e1i, e2i); final double d = areaJ - areas[e1] - areas[e2]; - if(d > worst) { + if (d > worst) { worst = d; w1 = e1; w2 = e2; } } } - // Data to keep - // Initial mbrs and areas - E m1 = getter.get(entries, w1); - E m2 = getter.get(entries, w2); + if (worst > 0) { + // Data to keep + // Initial mbrs and areas + E m1 = getter.get(entries, w1); + E m2 = getter.get(entries, w2); - double bestsep = Double.NEGATIVE_INFINITY; - double bestsep2 = Double.NEGATIVE_INFINITY; - for(int d = 1; d <= m1.getDimensionality(); d++) { - final double s1 = m1.getMin(d) - m2.getMax(d); - final double s2 = m2.getMin(d) - m1.getMax(d); - final double sm = Math.max(s1, s2); - final double no = Math.max(m1.getMax(d), m2.getMax(d)) - Math.min(m1.getMin(d), m2.getMin(d)); - final double sep = sm / no; - if(sep > bestsep || (sep == bestsep && sm > bestsep2)) { - bestsep = sep; - bestsep2 = sm; - axis = d; + double bestsep = Double.NEGATIVE_INFINITY; + double bestsep2 = Double.NEGATIVE_INFINITY; + for (int d = 0; d < m1.getDimensionality(); d++) { + final double s1 = m1.getMin(d) - m2.getMax(d); + final double s2 = m2.getMin(d) - m1.getMax(d); + final double sm = Math.max(s1, s2); + final double no = Math.max(m1.getMax(d), m2.getMax(d)) - Math.min(m1.getMin(d), m2.getMin(d)); + final double sep = sm / no; + if (sep > bestsep || (sep == bestsep && sm > bestsep2)) { + bestsep = sep; + bestsep2 = sm; + axis = d; + } + } + } else { + // All objects are identical! + final BitSet assignment = new BitSet(num); + final int half = (num + 1) >> 1; + // Put the first half into second node + for (int i = 0; i < half; i++) { + assignment.set(i); } + return assignment; } } // Sort by minimum value DoubleIntPair[] data = new DoubleIntPair[num]; - for(int i = 0; i < num; i++) { + for (int i = 0; i < num; i++) { data[i] = new DoubleIntPair(getter.get(entries, i).getMin(axis), i); } Arrays.sort(data); // Object assignment final BitSet assignment = new BitSet(num); - final int half = (num + 1) / 2; + final int half = (num + 1) >> 1; // Put the first half into second node - for(int i = 0; i < half; i++) { + for (int i = 0; i < half; i++) { assignment.set(data[i].second); } // Tie handling - if(num % 2 == 0) { + if (num % 2 == 0) { // We need to compute the bounding boxes ModifiableHyperBoundingBox mbr1 = new ModifiableHyperBoundingBox(getter.get(entries, data[0].second)); - for(int i = 1; i < half; i++) { + for (int i = 1; i < half; i++) { mbr1.extend(getter.get(entries, data[i].second)); } ModifiableHyperBoundingBox mbr2 = new ModifiableHyperBoundingBox(getter.get(entries, data[num - 1].second)); - for(int i = half + 1; i < num - 1; i++) { + for (int i = half + 1; i < num - 1; i++) { mbr2.extend(getter.get(entries, data[i].second)); } E e = getter.get(entries, data[half].second); double inc1 = SpatialUtil.volumeUnion(mbr1, e) - SpatialUtil.volume(mbr1); double inc2 = SpatialUtil.volumeUnion(mbr2, e) - SpatialUtil.volume(mbr2); - if(inc1 < inc2) { + if (inc1 < inc2) { assignment.set(data[half].second); } } @@ -155,4 +166,4 @@ public class GreeneSplit implements SplitStrategy { return GreeneSplit.STATIC; } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/split/RTreeLinearSplit.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/split/RTreeLinearSplit.java index 296f6b3b..b4ff2364 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/split/RTreeLinearSplit.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/split/RTreeLinearSplit.java @@ -67,7 +67,7 @@ public class RTreeLinearSplit implements SplitStrategy { double bestsep = Double.NEGATIVE_INFINITY; int w1 = -1, w2 = -1; // LPS1: find extreme rectangles - for(int d = 1; d <= dim; d++) { + for(int d = 0; d < dim; d++) { // We need to find two candidates each, in case of el==eh! double minlow = Double.POSITIVE_INFINITY; double maxlow = Double.NEGATIVE_INFINITY, maxlow2 = Double.NEGATIVE_INFINITY; diff --git a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/split/TopologicalSplitter.java b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/split/TopologicalSplitter.java index 1789ab22..ab32ba19 100644 --- a/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/split/TopologicalSplitter.java +++ b/src/de/lmu/ifi/dbs/elki/index/tree/spatial/rstarvariants/strategies/split/TopologicalSplitter.java @@ -105,22 +105,25 @@ public class TopologicalSplitter implements SplitStrategy { private A entries; /** - * The getter class for the entries + * The getter class for the entries. */ private ArrayAdapter<E, A> getter; /** - * List size + * List size. */ private int size; /** - * Dimensionality + * Dimensionality. */ private int dimensionality; /** * Constructor. + * + * @param entries Entires to split + * @param getter Array adapter for entries */ public Split(A entries, ArrayAdapter<E, A> getter) { this.entries = entries; @@ -140,7 +143,7 @@ public class TopologicalSplitter implements SplitStrategy { double minSurface = Double.MAX_VALUE; int splitAxis = -1; - for(int d = 1; d <= dimensionality; d++) { + for(int d = 0; d < dimensionality; d++) { double sumOfAllMargins = 0; fillAndSort(d); @@ -180,7 +183,7 @@ public class TopologicalSplitter implements SplitStrategy { } /** - * Init the arrays we use + * Init the arrays we use. */ protected void initMinMaxArrays() { maxSorting = new DoubleIntPair[size]; @@ -227,7 +230,7 @@ public class TopologicalSplitter implements SplitStrategy { // is best for the split axis bestSorting = null; - assert (size - 2 * minEntries > 0) : "Cannot split underfull nodes."; + assert (size - 2 * minEntries >= 0) : "Cannot split nodes (" + size + " < 2*" + minEntries + ")"; // test the sorting with respect to the minimal values { ModifiableHyperBoundingBox mbr1 = mbr(minSorting, 0, minEntries - 1); @@ -267,10 +270,22 @@ public class TopologicalSplitter implements SplitStrategy { assert (splitPoint < size) : "No split found? Volume outside of double precision?"; } + /** + * Get an entry. + * + * @param off Offset + * @return Entry + */ private E get(int off) { return getter.get(entries, off); } + /** + * Get an entry. + * + * @param pair Entry pair + * @return Entry + */ private E get(DoubleIntPair pair) { return getter.get(entries, pair.second); } @@ -306,4 +321,4 @@ public class TopologicalSplitter implements SplitStrategy { return TopologicalSplitter.STATIC; } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/index/vafile/DAFile.java b/src/de/lmu/ifi/dbs/elki/index/vafile/DAFile.java index f99f5918..089397dd 100644 --- a/src/de/lmu/ifi/dbs/elki/index/vafile/DAFile.java +++ b/src/de/lmu/ifi/dbs/elki/index/vafile/DAFile.java @@ -26,9 +26,9 @@ package de.lmu.ifi.dbs.elki.index.vafile; import java.util.Arrays;
import de.lmu.ifi.dbs.elki.data.NumberVector;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.persistent.ByteArrayUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
/**
@@ -50,21 +50,23 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; @Reference(authors = "Hans-Peter Kriegel, Peer Kröger, Matthias Schubert, Ziyue Zhu", title = "Efficient Query Processing in Arbitrary Subspaces Using Vector Approximations", booktitle = "Proc. 18th Int. Conf. on Scientific and Statistical Database Management (SSDBM 06), Wien, Austria, 2006", url = "http://dx.doi.org/10.1109/SSDBM.2006.23")
public class DAFile {
/**
- * Dimension of this approximation file
+ * Dimension of this approximation file.
*/
- final private int dimension;
+ private final int dimension;
/**
- * Splitting grid
+ * Splitting grid.
*/
- final private double[] splitPositions;
+ private final double[] splitPositions;
/**
* Constructor.
*
+ * @param relation Relation to index
* @param dimension Dimension of this file
+ * @param partitions Number of partitions
*/
- public DAFile(Relation<? extends NumberVector<?, ?>> relation, int dimension, int partitions) {
+ public DAFile(Relation<? extends NumberVector<?>> relation, int dimension, int partitions) {
final int size = relation.size();
this.dimension = dimension;
this.splitPositions = new double[partitions + 1];
@@ -72,8 +74,7 @@ public class DAFile { double[] tempdata = new double[size];
int j = 0;
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { - DBID id = iditer.getDBID();
- tempdata[j] = relation.get(id).doubleValue(dimension + 1);
+ tempdata[j] = relation.get(iditer).doubleValue(dimension);
j += 1;
}
Arrays.sort(tempdata);
@@ -87,6 +88,8 @@ public class DAFile { }
/**
+ * Return the split positions.
+ *
* @return the split positions
*/
public double[] getSplitPositions() {
@@ -94,6 +97,8 @@ public class DAFile { }
/**
+ * Return the dimension we indexed.
+ *
* @return the dimension
*/
public int getDimension() {
@@ -106,6 +111,6 @@ public class DAFile { * @return IO costs
*/
public int getIOCosts() {
- return splitPositions.length * 8 + 4;
+ return splitPositions.length * ByteArrayUtil.SIZE_DOUBLE + 4;
}
}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/index/vafile/PartialVAFile.java b/src/de/lmu/ifi/dbs/elki/index/vafile/PartialVAFile.java index 848597d6..37fbe994 100644 --- a/src/de/lmu/ifi/dbs/elki/index/vafile/PartialVAFile.java +++ b/src/de/lmu/ifi/dbs/elki/index/vafile/PartialVAFile.java @@ -37,19 +37,20 @@ import de.lmu.ifi.dbs.elki.data.type.TypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
-import de.lmu.ifi.dbs.elki.database.query.DistanceDBIDResult;
-import de.lmu.ifi.dbs.elki.database.query.DoubleDistanceResultPair;
-import de.lmu.ifi.dbs.elki.database.query.GenericDistanceDBIDList;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.LPNormDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceLPNormDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDList;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceKNNHeap;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceKNNList;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.index.AbstractRefiningIndex;
@@ -60,11 +61,8 @@ import de.lmu.ifi.dbs.elki.index.tree.TreeIndexFactory; import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.MathUtil;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
-import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap;
-import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.KNNHeap;
-import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.KNNList;
-import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TopBoundedHeap;
+import de.lmu.ifi.dbs.elki.persistent.ByteArrayUtil;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.DoubleMaxHeap;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
@@ -94,36 +92,38 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair; * @apiviz.uses PartialVACandidate
* @apiviz.has PartialVAFileRangeQuery
* @apiviz.has PartialVAFileKNNQuery
+ *
+ * @param <V> Vector type
*/
@Reference(authors = "Hans-Peter Kriegel, Peer Kröger, Matthias Schubert, Ziyue Zhu", title = "Efficient Query Processing in Arbitrary Subspaces Using Vector Approximations", booktitle = "Proc. 18th Int. Conf. on Scientific and Statistical Database Management (SSDBM 06), Wien, Austria, 2006", url = "http://dx.doi.org/10.1109/SSDBM.2006.23")
-public class PartialVAFile<V extends NumberVector<?, ?>> extends AbstractRefiningIndex<V> implements KNNIndex<V>, RangeIndex<V> {
+public class PartialVAFile<V extends NumberVector<?>> extends AbstractRefiningIndex<V> implements KNNIndex<V>, RangeIndex<V> {
/**
- * Class logger
+ * Class logger.
*/
- private static final Logging logger = Logging.getLogger(PartialVAFile.class);
+ private static final Logging LOG = Logging.getLogger(PartialVAFile.class);
/**
- * Partial VA files
+ * Partial VA files.
*/
List<DAFile> daFiles;
/**
- * Number of partitions
+ * Number of partitions.
*/
private final int partitions;
/**
- * Page size
+ * Page size.
*/
private final int pageSize;
/**
- * Splitting grid
+ * Splitting grid.
*/
private double[][] splitPartitions;
/**
- * Statistics
+ * Statistics.
*/
protected Statistics stats;
@@ -156,7 +156,7 @@ public class PartialVAFile<V extends NumberVector<?, ?>> extends AbstractRefinin throw new IllegalArgumentException("Number of partitions must be a power of 2!");
}
- final int dimensions = DatabaseUtil.dimensionality(relation);
+ final int dimensions = RelationUtil.dimensionality(relation);
splitPartitions = new double[dimensions][];
daFiles = new ArrayList<DAFile>(dimensions);
@@ -168,7 +168,7 @@ public class PartialVAFile<V extends NumberVector<?, ?>> extends AbstractRefinin vectorApprox = new ArrayList<VectorApproximation>();
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
- DBID id = iter.getDBID();
+ DBID id = DBIDUtil.deref(iter);
V dv = relation.get(id);
VectorApproximation va = calculateFullApproximation(id, dv);
vectorApprox.add(va);
@@ -191,8 +191,8 @@ public class PartialVAFile<V extends NumberVector<?, ?>> extends AbstractRefinin * @param relation Relation with full dimensionality
* @return Bit set with all bits set.
*/
- protected static BitSet fakeSubspace(Relation<? extends NumberVector<?, ?>> relation) {
- int dim = DatabaseUtil.dimensionality(relation);
+ protected static BitSet fakeSubspace(Relation<? extends NumberVector<?>> relation) {
+ int dim = RelationUtil.dimensionality(relation);
BitSet bits = new BitSet();
for(int i = 0; i < dim; i++) {
bits.set(i);
@@ -211,20 +211,20 @@ public class PartialVAFile<V extends NumberVector<?, ?>> extends AbstractRefinin int approximation[] = new int[dv.getDimensionality()];
for(int d = 0; d < splitPartitions.length; d++) {
double[] split = daFiles.get(d).getSplitPositions();
- final double val = dv.doubleValue(d + 1);
+ final double val = dv.doubleValue(d);
final int lastBorderIndex = split.length - 1;
// Value is below data grid
if(val < split[0]) {
approximation[d] = 0;
if(id != null) {
- logger.warning("Vector outside of VAFile grid!");
+ LOG.warning("Vector outside of VAFile grid!");
}
} // Value is above data grid
else if(val > split[lastBorderIndex]) {
approximation[d] = lastBorderIndex - 1;
if(id != null) {
- logger.warning("Vector outside of VAFile grid!");
+ LOG.warning("Vector outside of VAFile grid!");
}
} // normal case
else {
@@ -294,7 +294,7 @@ public class PartialVAFile<V extends NumberVector<?, ?>> extends AbstractRefinin * @param query Query vector
* @param epsilon Epsilon radius
*/
- protected static void calculateSelectivityCoeffs(List<DoubleObjPair<DAFile>> daFiles, NumberVector<?, ?> query, double epsilon) {
+ protected static void calculateSelectivityCoeffs(List<DoubleObjPair<DAFile>> daFiles, NumberVector<?> query, double epsilon) {
final int dimensions = query.getDimensionality();
double[] lowerVals = new double[dimensions];
double[] upperVals = new double[dimensions];
@@ -302,8 +302,9 @@ public class PartialVAFile<V extends NumberVector<?, ?>> extends AbstractRefinin VectorApproximation queryApprox = calculatePartialApproximation(null, query, daFiles);
for(int i = 0; i < dimensions; i++) {
- lowerVals[i] = query.doubleValue(i + 1) - epsilon;
- upperVals[i] = query.doubleValue(i + 1) + epsilon;
+ final double val = query.doubleValue(i);
+ lowerVals[i] = val - epsilon;
+ upperVals[i] = val + epsilon;
}
Vector lowerEpsilon = new Vector(lowerVals);
@@ -319,17 +320,17 @@ public class PartialVAFile<V extends NumberVector<?, ?>> extends AbstractRefinin }
/**
- * Calculate partial vector approximation
+ * Calculate partial vector approximation.
*
* @param id Object ID
* @param dv Object vector
* @param daFiles List of approximations to use
* @return Vector approximation
*/
- protected static VectorApproximation calculatePartialApproximation(DBID id, NumberVector<?, ?> dv, List<DoubleObjPair<DAFile>> daFiles) {
+ protected static VectorApproximation calculatePartialApproximation(DBID id, NumberVector<?> dv, List<DoubleObjPair<DAFile>> daFiles) {
int[] approximation = new int[dv.getDimensionality()];
for(int i = 0; i < daFiles.size(); i++) {
- double val = dv.doubleValue(i + 1);
+ double val = dv.doubleValue(i);
double[] borders = daFiles.get(i).second.getSplitPositions();
assert borders != null : "borders are null";
int lastBorderIndex = borders.length - 1;
@@ -353,7 +354,7 @@ public class PartialVAFile<V extends NumberVector<?, ?>> extends AbstractRefinin }
/**
- * Class for tracking Partial VA file statistics
+ * Class for tracking Partial VA file statistics.
*
* TODO: refactor into a common statistics API
*
@@ -387,7 +388,7 @@ public class PartialVAFile<V extends NumberVector<?, ?>> extends AbstractRefinin protected double minDistP = 0.0;
/**
- * The actual approximation
+ * The actual approximation.
*/
final private VectorApproximation approx;
@@ -429,12 +430,12 @@ public class PartialVAFile<V extends NumberVector<?, ?>> extends AbstractRefinin */
public class PartialVAFileRangeQuery extends AbstractRefiningIndex<V>.AbstractRangeQuery<DoubleDistance> {
/**
- * Lp-Norm p
+ * Lp-Norm p.
*/
private double p;
/**
- * Subspace
+ * Subspace.
*/
private BitSet subspace;
@@ -452,7 +453,7 @@ public class PartialVAFile<V extends NumberVector<?, ?>> extends AbstractRefinin }
@Override
- public DistanceDBIDResult<DoubleDistance> getRangeForObject(V query, DoubleDistance range) {
+ public DoubleDistanceDBIDList getRangeForObject(V query, DoubleDistance range) {
stats.issuedQueries++;
long t = System.nanoTime();
@@ -480,7 +481,7 @@ public class PartialVAFile<V extends NumberVector<?, ?>> extends AbstractRefinin // create candidate list (all objects) and prune candidates w.r.t.
// mindist (i.e. remove them from the list)
// important: this structure contains the maxDist values for refinement!
- DistanceDBIDResult<DoubleDistance> result = new GenericDistanceDBIDList<DoubleDistance>();
+ DoubleDistanceDBIDList result = new DoubleDistanceDBIDList();
int candidates = 0;
for(VectorApproximation va : vectorApprox) {
DBID id = va.getId();
@@ -503,26 +504,26 @@ public class PartialVAFile<V extends NumberVector<?, ?>> extends AbstractRefinin // candidate cannot be dropped
// TODO: actually: no refinement needed - need API that allows
// reporting maxdists only.
- result.add(new DoubleDistanceResultPair(refine(id, query).doubleValue(), id));
+ result.add(refine(id, query).doubleValue(), id);
}
else { // refine candidate - true refinement
DoubleDistance dis = refine(id, query);
stats.refinements += 1;
if(dis.doubleValue() <= range.doubleValue()) {
- result.add(new DoubleDistanceResultPair(dis.doubleValue(), id));
+ result.add(dis.doubleValue(), id);
}
}
}
}
- Collections.sort(result);
+ result.sort();
stats.scannedBytes += relation.size() * VectorApproximation.byteOnDisk(subspace.cardinality(), partitions);
stats.queryTime += System.nanoTime() - t;
- if(logger.isDebuggingFine()) {
- logger.fine("query = " + query);
- logger.fine("database: " + relation.size() + ", candidates: " + candidates + ", results: " + result.size());
+ if(LOG.isDebuggingFine()) {
+ LOG.fine("query = " + query);
+ LOG.fine("database: " + relation.size() + ", candidates: " + candidates + ", results: " + result.size());
}
return result;
@@ -537,12 +538,12 @@ public class PartialVAFile<V extends NumberVector<?, ?>> extends AbstractRefinin */
public class PartialVAFileKNNQuery extends AbstractRefiningIndex<V>.AbstractKNNQuery<DoubleDistance> {
/**
- * Lp-Norm p
+ * Lp-Norm p.
*/
private double p;
/**
- * Subspace
+ * Subspace.
*/
private BitSet subspace;
@@ -560,7 +561,7 @@ public class PartialVAFile<V extends NumberVector<?, ?>> extends AbstractRefinin }
@Override
- public KNNResult<DoubleDistance> getKNNForObject(V query, int k) {
+ public DoubleDistanceKNNList getKNNForObject(V query, int k) {
stats.issuedQueries++;
long t = System.nanoTime();
@@ -574,14 +575,14 @@ public class PartialVAFile<V extends NumberVector<?, ?>> extends AbstractRefinin final int currentSubspaceDims = subspace.cardinality();
int reducedDims = (2 * currentSubspaceDims) / 3;
reducedDims = Math.max(1, reducedDims);
- if(logger.isDebuggingFine()) {
- logger.fine("subspaceDims=" + currentSubspaceDims + ", reducedDims=" + reducedDims);
+ if(LOG.isDebuggingFine()) {
+ LOG.fine("subspaceDims=" + currentSubspaceDims + ", reducedDims=" + reducedDims);
}
// filter 1
LinkedList<PartialVACandidate> candidates1 = filter1(k, reducedDims, daFiles, queryApprox, currentSubspaceDims, dist);
- if(logger.isDebuggingFine()) {
- logger.fine("candidate set after filter 1: " + candidates1.size());
+ if(LOG.isDebuggingFine()) {
+ LOG.fine("candidate set after filter 1: " + candidates1.size());
}
// filters 2+
@@ -596,15 +597,15 @@ public class PartialVAFile<V extends NumberVector<?, ?>> extends AbstractRefinin // continue filtering until I/O costs of refining candidates < I/O
// costs of loading new DA files
while(candidates2 == null || (getIOCosts(candidates2.size(), currentSubspaceDims) >= getIOCosts(daFiles.get(0), currentSubspaceDims - addition)) && addition < currentSubspaceDims) {
- if(candidates2 != null && logger.isDebuggingFine()) {
- logger.fine("filter " + filterStep + ": refining costs " + getIOCosts(candidates2.size(), currentSubspaceDims) + " (" + candidates2.size() + "/" + currentSubspaceDims + "), DA file costs " + getIOCosts(daFiles.get(0), currentSubspaceDims - addition) + " (dim " + (addition + 1) + " of " + currentSubspaceDims + ")");
+ if(candidates2 != null && LOG.isDebuggingFine()) {
+ LOG.fine("filter " + filterStep + ": refining costs " + getIOCosts(candidates2.size(), currentSubspaceDims) + " (" + candidates2.size() + "/" + currentSubspaceDims + "), DA file costs " + getIOCosts(daFiles.get(0), currentSubspaceDims - addition) + " (dim " + (addition + 1) + " of " + currentSubspaceDims + ")");
}
if(candidates2 != null) {
candidates1 = candidates2;
}
candidates2 = new LinkedList<PartialVACandidate>();
- Heap<Double> kMinMaxDists = new TopBoundedHeap<Double>(k, Collections.reverseOrder());
+ DoubleMaxHeap kMinMaxDists = new DoubleMaxHeap(k+1);
for(PartialVACandidate va : candidates1) {
int dimension = daFiles.get(addition).getDimension();
int objectCell = va.getApproximation(dimension);
@@ -614,12 +615,12 @@ public class PartialVAFile<V extends NumberVector<?, ?>> extends AbstractRefinin if(kMinMaxDists.size() < k || va.minDistP <= kMinMaxDists.peek()) {
candidates2.add(va);
- kMinMaxDists.add(va.maxDistP);
+ kMinMaxDists.add(va.maxDistP, k);
}
}
- if(logger.isDebuggingFine()) {
- logger.fine("candidate set after filter " + filterStep + ": " + candidates2.size());
+ if(LOG.isDebuggingFine()) {
+ LOG.fine("candidate set after filter " + filterStep + ": " + candidates2.size());
}
addition++;
@@ -633,7 +634,7 @@ public class PartialVAFile<V extends NumberVector<?, ?>> extends AbstractRefinin ArrayList<PartialVACandidate> sortedCandidates = new ArrayList<PartialVACandidate>(candidates2);
// sort candidates by lower bound (minDist)
Collections.sort(sortedCandidates);
- KNNList<DoubleDistance> result = retrieveAccurateDistances(sortedCandidates, k, subspace, query);
+ DoubleDistanceKNNList result = retrieveAccurateDistances(sortedCandidates, k, subspace, query);
stats.queryTime += System.nanoTime() - t;
return result;
@@ -641,7 +642,7 @@ public class PartialVAFile<V extends NumberVector<?, ?>> extends AbstractRefinin private LinkedList<PartialVACandidate> filter1(int k, int reducedDims, List<DAFile> daFiles, VectorApproximation queryApprox, int subspaceDims, VALPNormDistance dist) {
LinkedList<PartialVACandidate> candidates1 = new LinkedList<PartialVACandidate>();
- Heap<Double> minmaxdist = new TopBoundedHeap<Double>(k, Collections.reverseOrder());
+ DoubleMaxHeap minmaxdist = new DoubleMaxHeap(k+1);
for(VectorApproximation va : vectorApprox) {
PartialVACandidate pva = new PartialVACandidate(va);
@@ -656,7 +657,7 @@ public class PartialVAFile<V extends NumberVector<?, ?>> extends AbstractRefinin }
if(minmaxdist.size() < k || pva.minDistP <= minmaxdist.peek()) {
candidates1.add(pva);
- minmaxdist.add(pva.maxDistP);
+ minmaxdist.add(pva.maxDistP, k);
}
}
// Drop candidates that don't satisfy the latest minmaxdist
@@ -680,7 +681,7 @@ public class PartialVAFile<V extends NumberVector<?, ?>> extends AbstractRefinin * @return the cost value (in bytes)
*/
private int getIOCosts(int size, int subspaceDims) {
- return size * (subspaceDims * 8 + 4);
+ return size * (subspaceDims * ByteArrayUtil.SIZE_DOUBLE + 4);
}
/**
@@ -711,16 +712,16 @@ public class PartialVAFile<V extends NumberVector<?, ?>> extends AbstractRefinin return result;
}
- protected KNNList<DoubleDistance> retrieveAccurateDistances(List<PartialVACandidate> sortedCandidates, int k, BitSet subspace, V query) {
- KNNHeap<DoubleDistance> result = new KNNHeap<DoubleDistance>(k, DoubleDistance.FACTORY.infiniteDistance());
+ protected DoubleDistanceKNNList retrieveAccurateDistances(List<PartialVACandidate> sortedCandidates, int k, BitSet subspace, V query) {
+ DoubleDistanceKNNHeap result = new DoubleDistanceKNNHeap(k);
for(PartialVACandidate va : sortedCandidates) {
- double stopdist = result.getKNNDistance().doubleValue();
+ double stopdist = result.doubleKNNDistance();
DBID currentID = va.getId();
if(result.size() < k || va.minDistP < stopdist) {
DoubleDistance dist = refine(currentID, query);
stats.refinements += 1;
if(dist.doubleValue() < stopdist) {
- result.add(new DoubleDistanceResultPair(dist.doubleValue(), currentID));
+ result.add(dist.doubleValue(), currentID);
}
}
}
@@ -747,16 +748,16 @@ public class PartialVAFile<V extends NumberVector<?, ?>> extends AbstractRefinin }
/**
- * Index factory class
+ * Index factory class.
*
* @author Erich Schubert
*
- * @apiviz.stereotype «factory»
+ * @apiviz.stereotype factory
* @apiviz.has PartialVAFile
*
* @param <V> Vector type
*/
- public static class Factory<V extends NumberVector<?, ?>> implements IndexFactory<V, PartialVAFile<V>> {
+ public static class Factory<V extends NumberVector<?>> implements IndexFactory<V, PartialVAFile<V>> {
/**
* Number of partitions to use in each dimension.
*
@@ -764,15 +765,15 @@ public class PartialVAFile<V extends NumberVector<?, ?>> extends AbstractRefinin * -vafile.partitions 8
* </pre>
*/
- public static final OptionID PARTITIONS_ID = OptionID.getOrCreateOptionID("vafile.partitions", "Number of partitions to use in each dimension.");
+ public static final OptionID PARTITIONS_ID = new OptionID("vafile.partitions", "Number of partitions to use in each dimension.");
/**
- * Page size
+ * Page size.
*/
int pagesize = 1;
/**
- * Number of partitions
+ * Number of partitions.
*/
int numpart = 2;
@@ -799,7 +800,7 @@ public class PartialVAFile<V extends NumberVector<?, ?>> extends AbstractRefinin }
/**
- * Parameterization class
+ * Parameterization class.
*
* @author Erich Schubert
*
@@ -807,23 +808,25 @@ public class PartialVAFile<V extends NumberVector<?, ?>> extends AbstractRefinin */
public static class Parameterizer extends AbstractParameterizer {
/**
- * Page size
+ * Page size.
*/
int pagesize = 1;
/**
- * Number of partitions
+ * Number of partitions.
*/
int numpart = 2;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- IntParameter pagesizeP = new IntParameter(TreeIndexFactory.PAGE_SIZE_ID, new GreaterConstraint(0), 1024);
+ IntParameter pagesizeP = new IntParameter(TreeIndexFactory.PAGE_SIZE_ID, 1024);
+ pagesizeP.addConstraint(new GreaterConstraint(0));
if(config.grab(pagesizeP)) {
pagesize = pagesizeP.getValue();
}
- IntParameter partitionsP = new IntParameter(Factory.PARTITIONS_ID, new GreaterConstraint(2));
+ IntParameter partitionsP = new IntParameter(Factory.PARTITIONS_ID);
+ partitionsP.addConstraint(new GreaterConstraint(2));
if(config.grab(partitionsP)) {
numpart = partitionsP.getValue();
}
@@ -831,7 +834,7 @@ public class PartialVAFile<V extends NumberVector<?, ?>> extends AbstractRefinin @Override
protected Factory<?> makeInstance() {
- return new Factory<NumberVector<?, ?>>(pagesize, numpart);
+ return new Factory<NumberVector<?>>(pagesize, numpart);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/index/vafile/VAFile.java b/src/de/lmu/ifi/dbs/elki/index/vafile/VAFile.java index 1d4f8f6d..09b91be7 100644 --- a/src/de/lmu/ifi/dbs/elki/index/vafile/VAFile.java +++ b/src/de/lmu/ifi/dbs/elki/index/vafile/VAFile.java @@ -33,18 +33,19 @@ import de.lmu.ifi.dbs.elki.data.type.TypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
-import de.lmu.ifi.dbs.elki.database.query.DistanceDBIDResult;
-import de.lmu.ifi.dbs.elki.database.query.DoubleDistanceResultPair;
-import de.lmu.ifi.dbs.elki.database.query.GenericDistanceDBIDList;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.LPNormDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDList;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceKNNHeap;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceKNNList;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.index.AbstractRefiningIndex;
@@ -53,10 +54,7 @@ import de.lmu.ifi.dbs.elki.index.KNNIndex; import de.lmu.ifi.dbs.elki.index.RangeIndex;
import de.lmu.ifi.dbs.elki.index.tree.TreeIndexFactory;
import de.lmu.ifi.dbs.elki.logging.Logging;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
-import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap;
-import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.KNNHeap;
-import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TopBoundedHeap;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.DoubleMaxHeap;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
@@ -85,17 +83,19 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair; * @apiviz.has VAFileRangeQuery
* @apiviz.has VAFileKNNQuery
* @apiviz.uses VALPNormDistance
+ *
+ * @param <V> Vector type
*/
@Title("An approximation based data structure for similarity search")
@Reference(authors = "Weber, R. and Blott, S.", title = "An approximation based data structure for similarity search", booktitle = "Report TR1997b, ETH Zentrum, Zurich, Switzerland", url = "http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.40.480&rep=rep1&type=pdf")
-public class VAFile<V extends NumberVector<?, ?>> extends AbstractRefiningIndex<V> implements KNNIndex<V>, RangeIndex<V> {
+public class VAFile<V extends NumberVector<?>> extends AbstractRefiningIndex<V> implements KNNIndex<V>, RangeIndex<V> {
/**
- * Logging class
+ * Logging class.
*/
- private static final Logging log = Logging.getLogger(VAFile.class);
+ private static final Logging LOG = Logging.getLogger(VAFile.class);
/**
- * Approximation index
+ * Approximation index.
*/
private List<VectorApproximation> vectorApprox;
@@ -105,12 +105,12 @@ public class VAFile<V extends NumberVector<?, ?>> extends AbstractRefiningIndex< private int partitions;
/**
- * Quantile grid we use
+ * Quantile grid we use.
*/
private double[][] splitPositions;
/**
- * Page size, for estimating the VA file size
+ * Page size, for estimating the VA file size.
*/
int pageSize;
@@ -138,7 +138,7 @@ public class VAFile<V extends NumberVector<?, ?>> extends AbstractRefiningIndex< protected void initialize(Relation<V> relation, DBIDs ids) {
setPartitions(relation);
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
- DBID id = iter.getDBID();
+ DBID id = DBIDUtil.deref(iter);
vectorApprox.add(calculateApproximation(id, relation.get(id)));
}
}
@@ -154,7 +154,7 @@ public class VAFile<V extends NumberVector<?, ?>> extends AbstractRefiningIndex< throw new IllegalArgumentException("Number of partitions must be a power of 2!");
}
- final int dimensions = DatabaseUtil.dimensionality(relation);
+ final int dimensions = RelationUtil.dimensionality(relation);
final int size = relation.size();
splitPositions = new double[dimensions][partitions + 1];
@@ -162,8 +162,7 @@ public class VAFile<V extends NumberVector<?, ?>> extends AbstractRefiningIndex< double[] tempdata = new double[size];
int j = 0;
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { - DBID id = iditer.getDBID();
- tempdata[j] = relation.get(id).doubleValue(d + 1);
+ tempdata[j] = relation.get(iditer).doubleValue(d);
j += 1;
}
Arrays.sort(tempdata);
@@ -187,20 +186,20 @@ public class VAFile<V extends NumberVector<?, ?>> extends AbstractRefiningIndex< public VectorApproximation calculateApproximation(DBID id, V dv) {
int approximation[] = new int[dv.getDimensionality()];
for(int d = 0; d < splitPositions.length; d++) {
- final double val = dv.doubleValue(d + 1);
+ final double val = dv.doubleValue(d);
final int lastBorderIndex = splitPositions[d].length - 1;
// Value is below data grid
if(val < splitPositions[d][0]) {
approximation[d] = 0;
if(id != null) {
- log.warning("Vector outside of VAFile grid!");
+ LOG.warning("Vector outside of VAFile grid!");
}
} // Value is above data grid
else if(val > splitPositions[d][lastBorderIndex]) {
approximation[d] = lastBorderIndex - 1;
if(id != null) {
- log.warning("Vector outside of VAFile grid!");
+ LOG.warning("Vector outside of VAFile grid!");
}
} // normal case
else {
@@ -318,7 +317,7 @@ public class VAFile<V extends NumberVector<?, ?>> extends AbstractRefiningIndex< }
@Override
- public DistanceDBIDResult<DoubleDistance> getRangeForObject(V query, DoubleDistance range) {
+ public DoubleDistanceDBIDList getRangeForObject(V query, DoubleDistance range) {
final double eps = range.doubleValue();
// generate query approximation and lookup table
VectorApproximation queryApprox = calculateApproximation(null, query);
@@ -329,7 +328,7 @@ public class VAFile<V extends NumberVector<?, ?>> extends AbstractRefiningIndex< // Count a VA file scan
scans += 1;
- GenericDistanceDBIDList<DoubleDistance> result = new GenericDistanceDBIDList<DoubleDistance>();
+ DoubleDistanceDBIDList result = new DoubleDistanceDBIDList();
// Approximation step
for(int i = 0; i < vectorApprox.size(); i++) {
VectorApproximation va = vectorApprox.get(i);
@@ -345,10 +344,10 @@ public class VAFile<V extends NumberVector<?, ?>> extends AbstractRefiningIndex< // refine the next element
final double dist = refine(va.id, query).doubleValue();
if(dist <= eps) {
- result.add(new DoubleDistanceResultPair(dist, va.id));
+ result.add(dist, va.id);
}
}
- Collections.sort(result);
+ result.sort();
return result;
}
}
@@ -376,15 +375,15 @@ public class VAFile<V extends NumberVector<?, ?>> extends AbstractRefiningIndex< }
@Override
- public KNNResult<DoubleDistance> getKNNForObject(V query, int k) {
+ public DoubleDistanceKNNList getKNNForObject(V query, int k) {
// generate query approximation and lookup table
VectorApproximation queryApprox = calculateApproximation(null, query);
// Approximative distance function
VALPNormDistance vadist = new VALPNormDistance(p, splitPositions, query, queryApprox);
- // Heap for the kth smallest maximum distance
- Heap<Double> minMaxHeap = new TopBoundedHeap<Double>(k, Collections.reverseOrder());
+ // Heap for the kth smallest maximum distance (yes, we need a max heap!)
+ DoubleMaxHeap minMaxHeap = new DoubleMaxHeap(k+1);
double minMaxDist = Double.POSITIVE_INFINITY;
// Candidates with minDist <= kth maxDist
ArrayList<DoubleObjPair<DBID>> candidates = new ArrayList<DoubleObjPair<DBID>>(vectorApprox.size());
@@ -405,7 +404,7 @@ public class VAFile<V extends NumberVector<?, ?>> extends AbstractRefiningIndex< candidates.add(new DoubleObjPair<DBID>(minDist, va.id));
// Update candidate pruning heap
- minMaxHeap.add(maxDist);
+ minMaxHeap.add(maxDist, k);
if(minMaxHeap.size() >= k) {
minMaxDist = minMaxHeap.peek();
}
@@ -414,14 +413,14 @@ public class VAFile<V extends NumberVector<?, ?>> extends AbstractRefiningIndex< Collections.sort(candidates);
// refinement step
- KNNHeap<DoubleDistance> result = new KNNHeap<DoubleDistance>(k);
+ DoubleDistanceKNNHeap result = new DoubleDistanceKNNHeap(k);
// log.fine("candidates size " + candidates.size());
// retrieve accurate distances
for(DoubleObjPair<DBID> va : candidates) {
// Stop when we are sure to have all elements
if(result.size() >= k) {
- double kDist = result.getKNNDistance().doubleValue();
+ double kDist = result.doubleKNNDistance();
if(va.first > kDist) {
break;
}
@@ -429,11 +428,11 @@ public class VAFile<V extends NumberVector<?, ?>> extends AbstractRefiningIndex< // refine the next element
final double dist = refine(va.second, query).doubleValue();
- result.add(new DoubleDistanceResultPair(dist, va.second));
+ result.add(dist, va.second);
}
- if(log.isDebuggingFinest()) {
- log.finest("query = (" + query + ")");
- log.finest("database: " + vectorApprox.size() + ", candidates: " + candidates.size() + ", results: " + result.size());
+ if(LOG.isDebuggingFinest()) {
+ LOG.finest("query = (" + query + ")");
+ LOG.finest("database: " + vectorApprox.size() + ", candidates: " + candidates.size() + ", results: " + result.size());
}
return result.toKNNList();
@@ -441,16 +440,16 @@ public class VAFile<V extends NumberVector<?, ?>> extends AbstractRefiningIndex< }
/**
- * Index factory class
+ * Index factory class.
*
* @author Erich Schubert
*
- * @apiviz.stereotype «factory»
+ * @apiviz.stereotype factory
* @apiviz.has VAFile
*
* @param <V> Vector type
*/
- public static class Factory<V extends NumberVector<?, ?>> implements IndexFactory<V, VAFile<V>> {
+ public static class Factory<V extends NumberVector<?>> implements IndexFactory<V, VAFile<V>> {
/**
* Number of partitions to use in each dimension.
*
@@ -458,15 +457,15 @@ public class VAFile<V extends NumberVector<?, ?>> extends AbstractRefiningIndex< * -vafile.partitions 8
* </pre>
*/
- public static final OptionID PARTITIONS_ID = OptionID.getOrCreateOptionID("vafile.partitions", "Number of partitions to use in each dimension.");
+ public static final OptionID PARTITIONS_ID = new OptionID("vafile.partitions", "Number of partitions to use in each dimension.");
/**
- * Page size
+ * Page size.
*/
int pagesize = 1;
/**
- * Number of partitions
+ * Number of partitions.
*/
int numpart = 2;
@@ -493,7 +492,7 @@ public class VAFile<V extends NumberVector<?, ?>> extends AbstractRefiningIndex< }
/**
- * Parameterization class
+ * Parameterization class.
*
* @author Erich Schubert
*
@@ -501,23 +500,25 @@ public class VAFile<V extends NumberVector<?, ?>> extends AbstractRefiningIndex< */
public static class Parameterizer extends AbstractParameterizer {
/**
- * Page size
+ * Page size.
*/
int pagesize = 1;
/**
- * Number of partitions
+ * Number of partitions.
*/
int numpart = 2;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- IntParameter pagesizeP = new IntParameter(TreeIndexFactory.PAGE_SIZE_ID, new GreaterConstraint(0), 1024);
+ IntParameter pagesizeP = new IntParameter(TreeIndexFactory.PAGE_SIZE_ID, 1024);
+ pagesizeP.addConstraint(new GreaterConstraint(0));
if(config.grab(pagesizeP)) {
pagesize = pagesizeP.getValue();
}
- IntParameter partitionsP = new IntParameter(Factory.PARTITIONS_ID, new GreaterConstraint(2));
+ IntParameter partitionsP = new IntParameter(Factory.PARTITIONS_ID);
+ partitionsP.addConstraint(new GreaterConstraint(2));
if(config.grab(partitionsP)) {
numpart = partitionsP.getValue();
}
@@ -525,7 +526,7 @@ public class VAFile<V extends NumberVector<?, ?>> extends AbstractRefiningIndex< @Override
protected Factory<?> makeInstance() {
- return new Factory<NumberVector<?, ?>>(pagesize, numpart);
+ return new Factory<NumberVector<?>>(pagesize, numpart);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/index/vafile/VALPNormDistance.java b/src/de/lmu/ifi/dbs/elki/index/vafile/VALPNormDistance.java index 77815c97..2d9469e7 100644 --- a/src/de/lmu/ifi/dbs/elki/index/vafile/VALPNormDistance.java +++ b/src/de/lmu/ifi/dbs/elki/index/vafile/VALPNormDistance.java @@ -26,23 +26,23 @@ package de.lmu.ifi.dbs.elki.index.vafile; import de.lmu.ifi.dbs.elki.data.NumberVector; /** - * Lp-Norm distance function for partially computed objects + * Lp-Norm distance function for partially computed objects. * * @author Erich Schubert */ public class VALPNormDistance { /** - * Value of 1/p for lP norm + * Value of 1/p for lP norm. */ private final double onebyp; /** - * Lookup table for grid cells + * Lookup table for grid cells. */ private double[][] lookup; /** - * Approximation of the query vector + * Approximation of the query vector. */ private VectorApproximation queryApprox; @@ -52,9 +52,9 @@ public class VALPNormDistance { * @param p Value of p * @param splitPositions Split positions * @param query Query vector - * @param queryApprox + * @param queryApprox Query approximation */ - public VALPNormDistance(double p, double[][] splitPositions, NumberVector<?, ?> query, VectorApproximation queryApprox) { + public VALPNormDistance(double p, double[][] splitPositions, NumberVector<?> query, VectorApproximation queryApprox) { super(); this.onebyp = 1.0 / p; this.queryApprox = queryApprox; @@ -62,7 +62,7 @@ public class VALPNormDistance { } /** - * Get the minimum distance contribution of a single dimension + * Get the minimum distance contribution of a single dimension. * * @param dimension Dimension * @param vp Vector position @@ -82,7 +82,7 @@ public class VALPNormDistance { } /** - * Get the minimum distance to approximated vector vec + * Get the minimum distance to approximated vector vec. * * @param vec Vector approximation * @return Minimum distance @@ -98,7 +98,7 @@ public class VALPNormDistance { } /** - * Get the maximum distance contribution of a single dimension + * Get the maximum distance contribution of a single dimension. * * @param dimension Dimension * @param vp Vector position @@ -149,18 +149,18 @@ public class VALPNormDistance { } /** - * Initialize the lookup table + * Initialize the lookup table. * * @param splitPositions Split positions * @param query Query vector * @param p p */ - private void initializeLookupTable(double[][] splitPositions, NumberVector<?, ?> query, double p) { + private void initializeLookupTable(double[][] splitPositions, NumberVector<?> query, double p) { final int dimensions = splitPositions.length; final int bordercount = splitPositions[0].length; lookup = new double[dimensions][bordercount]; for(int d = 0; d < dimensions; d++) { - final double val = query.doubleValue(d + 1); + final double val = query.doubleValue(d); for(int i = 0; i < bordercount; i++) { lookup[d][i] = Math.pow(splitPositions[d][i] - val, p); } diff --git a/src/de/lmu/ifi/dbs/elki/index/vafile/VectorApproximation.java b/src/de/lmu/ifi/dbs/elki/index/vafile/VectorApproximation.java index 4b170eb8..f679cf16 100644 --- a/src/de/lmu/ifi/dbs/elki/index/vafile/VectorApproximation.java +++ b/src/de/lmu/ifi/dbs/elki/index/vafile/VectorApproximation.java @@ -26,6 +26,7 @@ package de.lmu.ifi.dbs.elki.index.vafile; import java.util.Arrays;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.persistent.ByteArrayUtil;
/**
* Object in a VA approximation.
@@ -46,7 +47,7 @@ public class VectorApproximation { /**
* Constructor.
- *
+ *
* @param id Object represented (may be <code>null</code> for query objects)
* @param approximation Approximation
*/
@@ -95,9 +96,10 @@ public class VectorApproximation { * @param numberOfPartitions the number of relevant partitions
* @return the cost values (in bytes)
*/
- //nicht gleich in bytes umwandeln, sonst rundungsfehler erst nachdem *anzahl objekte
+ // nicht gleich in bytes umwandeln, sonst rundungsfehler erst nachdem *anzahl
+ // objekte
public static int byteOnDisk(int numberOfDimensions, int numberOfPartitions) {
- //(partition*dimension+id) alles in Bit 32bit für 4 byte id
- return (int) (Math.ceil(numberOfDimensions * ((Math.log(numberOfPartitions) / Math.log(2)))+32) /8);
+ // (partition*dimension+id) alles in Bit 32bit für 4 byte id
+ return (int) (Math.ceil(numberOfDimensions * ((Math.log(numberOfPartitions) / Math.log(2))) + 32) / ByteArrayUtil.SIZE_DOUBLE);
}
-}
\ No newline at end of file +}
|