summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/index/preprocessed/knn')
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/AbstractMaterializeKNNPreprocessor.java68
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/KNNChangeEvent.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/KNNJoinMaterializeKNNPreprocessor.java128
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/KNNListener.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MaterializeKNNAndRKNNPreprocessor.java37
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MaterializeKNNPreprocessor.java87
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MetricalIndexApproximationMaterializeKNNPreprocessor.java13
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/PartitionApproximationMaterializeKNNPreprocessor.java14
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/RandomSampleKNNPreprocessor.java237
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/SpatialApproximationMaterializeKNNPreprocessor.java12
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/package-info.java2
11 files changed, 493 insertions, 109 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/AbstractMaterializeKNNPreprocessor.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/AbstractMaterializeKNNPreprocessor.java
index 0579262b..b9b72d87 100644
--- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/AbstractMaterializeKNNPreprocessor.java
+++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/AbstractMaterializeKNNPreprocessor.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.index.preprocessed.knn;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -23,14 +23,15 @@ package de.lmu.ifi.dbs.elki.index.preprocessed.knn;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.List;
-
-import javax.swing.event.EventListenerList;
-
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
@@ -54,7 +55,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
* @param <O> Object type
* @param <D> Distance type
*/
-public abstract class AbstractMaterializeKNNPreprocessor<O, D extends Distance<D>> extends AbstractPreprocessorIndex<O, List<DistanceResultPair<D>>> implements KNNIndex<O> {
+public abstract class AbstractMaterializeKNNPreprocessor<O, D extends Distance<D>, T extends KNNResult<D>> extends AbstractPreprocessorIndex<O, T> implements KNNIndex<O> {
/**
* The query k value.
*/
@@ -71,11 +72,6 @@ public abstract class AbstractMaterializeKNNPreprocessor<O, D extends Distance<D
protected final DistanceQuery<O, D> distanceQuery;
/**
- * Holds the listener.
- */
- protected final EventListenerList listenerList = new EventListenerList();
-
- /**
* Constructor.
*
* @param relation Relation
@@ -121,6 +117,42 @@ public abstract class AbstractMaterializeKNNPreprocessor<O, D extends Distance<D
*/
protected abstract void preprocess();
+ /**
+ * Get the k nearest neighbors.
+ *
+ * @param objid Object ID
+ * @return Neighbors
+ */
+ public KNNResult<D> get(DBID objid) {
+ if(storage == null) {
+ if(getLogger().isDebugging()) {
+ getLogger().debug("Running kNN preprocessor: " + this.getClass());
+ }
+ preprocess();
+ }
+ return storage.get(objid);
+ }
+
+ /**
+ * Create the default storage.
+ */
+ void createStorage() {
+ WritableDataStore<T> s = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT, KNNResult.class);
+ storage = s;
+ }
+
+ @Override
+ public void insertAll(DBIDs ids) {
+ if(storage == null) {
+ if(ids.size() > 0) {
+ preprocess();
+ }
+ }
+ else {
+ throw new UnsupportedOperationException("Preprocessor already ran.");
+ }
+ }
+
@SuppressWarnings("unchecked")
@Override
public <S extends Distance<S>> KNNQuery<O, S> getKNNQuery(DistanceQuery<O, S> distanceQuery, Object... hints) {
@@ -136,7 +168,9 @@ public abstract class AbstractMaterializeKNNPreprocessor<O, D extends Distance<D
break;
}
}
- return new PreprocessorKNNQuery<O, S>(relation, (MaterializeKNNPreprocessor<O, S>) this);
+ // To make compilers happy:
+ AbstractMaterializeKNNPreprocessor<?, ?, ?> tmp = this;
+ return new PreprocessorKNNQuery<O, S, KNNResult<S>>(relation, (AbstractMaterializeKNNPreprocessor<O, S, KNNResult<S>>) tmp);
}
/**
@@ -151,7 +185,7 @@ public abstract class AbstractMaterializeKNNPreprocessor<O, D extends Distance<D
* @param <O> The object type
* @param <D> The distance type
*/
- public static abstract class Factory<O, D extends Distance<D>> implements IndexFactory<O, KNNIndex<O>> {
+ public static abstract class Factory<O, D extends Distance<D>, T extends KNNResult<D>> implements IndexFactory<O, KNNIndex<O>> {
/**
* Parameter to specify the number of nearest neighbors of an object to be
* materialized. must be an integer greater than 1.
@@ -197,7 +231,7 @@ public abstract class AbstractMaterializeKNNPreprocessor<O, D extends Distance<D
}
@Override
- abstract public AbstractMaterializeKNNPreprocessor<O, D> instantiate(Relation<O> relation);
+ abstract public AbstractMaterializeKNNPreprocessor<O, D, T> instantiate(Relation<O> relation);
/**
* Get the distance function.
@@ -217,7 +251,7 @@ public abstract class AbstractMaterializeKNNPreprocessor<O, D extends Distance<D
public D getDistanceFactory() {
return distanceFunction.getDistanceFactory();
}
-
+
@Override
public TypeInformation getInputTypeRestriction() {
return distanceFunction.getInputTypeRestriction();
@@ -258,7 +292,7 @@ public abstract class AbstractMaterializeKNNPreprocessor<O, D extends Distance<D
}
@Override
- abstract protected Factory<O, D> makeInstance();
+ abstract protected Factory<O, D, ?> makeInstance();
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/KNNChangeEvent.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/KNNChangeEvent.java
index ef26d62b..6c677fda 100644
--- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/KNNChangeEvent.java
+++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/KNNChangeEvent.java
@@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.index.preprocessed.knn;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2011
+Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/KNNJoinMaterializeKNNPreprocessor.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/KNNJoinMaterializeKNNPreprocessor.java
new file mode 100644
index 00000000..b7c3f0ac
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/KNNJoinMaterializeKNNPreprocessor.java
@@ -0,0 +1,128 @@
+package de.lmu.ifi.dbs.elki.index.preprocessed.knn;
+
+import de.lmu.ifi.dbs.elki.algorithm.KNNJoin;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
+import de.lmu.ifi.dbs.elki.index.tree.spatial.SpatialEntry;
+import de.lmu.ifi.dbs.elki.index.tree.spatial.rstarvariants.rstar.RStarTreeNode;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.KNNList;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * Class to materialize the kNN using a spatial join on an R-tree.
+ *
+ * @author Erich Schubert
+ *
+ * @param <V> vector type
+ * @param <D> distance type
+ */
+public class KNNJoinMaterializeKNNPreprocessor<V extends NumberVector<V, ?>, D extends Distance<D>> extends AbstractMaterializeKNNPreprocessor<V, D, KNNList<D>> {
+ /**
+ * Logging class.
+ */
+ private static final Logging logger = Logging.getLogger(KNNJoinMaterializeKNNPreprocessor.class);
+
+ /**
+ * Constructor.
+ *
+ * @param relation Relation to index
+ * @param distanceFunction Distance function
+ * @param k k
+ */
+ public KNNJoinMaterializeKNNPreprocessor(Relation<V> relation, DistanceFunction<? super V, D> distanceFunction, int k) {
+ super(relation, distanceFunction, k);
+ }
+
+ @Override
+ protected void preprocess() {
+ // Run KNNJoin
+ KNNJoin<V, D, ?, ?> knnjoin = new KNNJoin<V, D, RStarTreeNode, SpatialEntry>(distanceFunction, k);
+ storage = knnjoin.run(relation.getDatabase(), relation);
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return logger;
+ }
+
+ @Override
+ public String getLongName() {
+ return "knn-join materialized neighbors";
+ }
+
+ @Override
+ public String getShortName() {
+ return "knn-join";
+ }
+
+ /**
+ * The parameterizable factory.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.landmark
+ * @apiviz.stereotype factory
+ * @apiviz.uses AbstractMaterializeKNNPreprocessor oneway - - «create»
+ *
+ * @param <O> The object type
+ * @param <D> The distance type
+ */
+ public static class Factory<O extends NumberVector<O, ?>, D extends Distance<D>> extends AbstractMaterializeKNNPreprocessor.Factory<O, D, KNNList<D>> {
+ /**
+ * Constructor.
+ *
+ * @param k K
+ * @param distanceFunction distance function
+ */
+ public Factory(int k, DistanceFunction<? super O, D> distanceFunction) {
+ super(k, distanceFunction);
+ }
+
+ @Override
+ public KNNJoinMaterializeKNNPreprocessor<O, D> instantiate(Relation<O> relation) {
+ return new KNNJoinMaterializeKNNPreprocessor<O, D>(relation, distanceFunction, k);
+ }
+
+ /**
+ * Parameterization class
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ *
+ * @param <O> Object type
+ * @param <D> Distance type
+ */
+ public static class Parameterizer<O extends NumberVector<O, ?>, D extends Distance<D>> extends AbstractMaterializeKNNPreprocessor.Factory.Parameterizer<O, D> {
+ @Override
+ protected KNNJoinMaterializeKNNPreprocessor.Factory<O, D> makeInstance() {
+ return new KNNJoinMaterializeKNNPreprocessor.Factory<O, D>(k, distanceFunction);
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/KNNListener.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/KNNListener.java
index c4456f83..4a726ad0 100644
--- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/KNNListener.java
+++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/KNNListener.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.index.preprocessed.knn;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MaterializeKNNAndRKNNPreprocessor.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MaterializeKNNAndRKNNPreprocessor.java
index 9abd4a74..1436efe2 100644
--- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MaterializeKNNAndRKNNPreprocessor.java
+++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MaterializeKNNAndRKNNPreprocessor.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.index.preprocessed.knn;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -34,13 +34,15 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.TreeSetDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.SetDBIDs;
import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.GenericDistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.query.rknn.PreprocessorRKNNQuery;
import de.lmu.ifi.dbs.elki.database.query.rknn.RKNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
@@ -51,6 +53,7 @@ import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.KNNHeap;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.KNNList;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -88,7 +91,7 @@ public class MaterializeKNNAndRKNNPreprocessor<O, D extends Distance<D>> extends
@Override
protected void preprocess() {
- storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT, List.class);
+ createStorage();
materialized_RkNN = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT, Set.class);
FiniteProgress progress = getLogger().isVerbose() ? new FiniteProgress("Materializing k nearest neighbors and reverse k nearest neighbors (k=" + k + ")", relation.size(), getLogger()) : null;
materializeKNNAndRKNNs(DBIDUtil.ensureArray(relation.getDBIDs()), progress);
@@ -100,7 +103,6 @@ public class MaterializeKNNAndRKNNPreprocessor<O, D extends Distance<D>> extends
* @param ids the IDs of the objects
*/
private void materializeKNNAndRKNNs(ArrayDBIDs ids, FiniteProgress progress) {
-
// add an empty list to each rknn
for(DBID id : ids) {
if(materialized_RkNN.get(id) == null) {
@@ -109,10 +111,10 @@ public class MaterializeKNNAndRKNNPreprocessor<O, D extends Distance<D>> extends
}
// knn query
- List<List<DistanceResultPair<D>>> kNNList = knnQuery.getKNNForBulkDBIDs(ids, k);
+ List<KNNResult<D>> kNNList = knnQuery.getKNNForBulkDBIDs(ids, k);
for(int i = 0; i < ids.size(); i++) {
DBID id = ids.get(i);
- List<DistanceResultPair<D>> kNNs = kNNList.get(i);
+ KNNResult<D> kNNs = kNNList.get(i);
storage.put(id, kNNs);
for(DistanceResultPair<D> kNN : kNNs) {
Set<DistanceResultPair<D>> rknns = materialized_RkNN.get(kNN.getDBID());
@@ -165,13 +167,12 @@ public class MaterializeKNNAndRKNNPreprocessor<O, D extends Distance<D>> extends
* updated
*/
private ArrayDBIDs updateKNNsAndRkNNs(DBIDs ids) {
- ArrayDBIDs rkNN_ids = DBIDUtil.newArray();
+ ArrayModifiableDBIDs rkNN_ids = DBIDUtil.newArray();
DBIDs oldids = DBIDUtil.difference(relation.getDBIDs(), ids);
for(DBID id1 : oldids) {
- List<DistanceResultPair<D>> kNNs = storage.get(id1);
- D knnDist = kNNs.get(kNNs.size() - 1).getDistance();
+ KNNResult<D> kNNs = storage.get(id1);
+ D knnDist = kNNs.getKNNDistance();
// look for new kNNs
- List<DistanceResultPair<D>> newKNNs = new ArrayList<DistanceResultPair<D>>();
KNNHeap<D> heap = null;
for(DBID id2 : ids) {
D dist = distanceQuery.distance(id1, id2);
@@ -184,7 +185,7 @@ public class MaterializeKNNAndRKNNPreprocessor<O, D extends Distance<D>> extends
}
}
if(heap != null) {
- newKNNs = heap.toSortedArrayList();
+ KNNList<D> newKNNs = heap.toKNNList();
storage.put(id1, newKNNs);
// get the difference
@@ -234,7 +235,7 @@ public class MaterializeKNNAndRKNNPreprocessor<O, D extends Distance<D>> extends
if(stepprog != null) {
stepprog.beginStep(1, "New deletions ocurred, remove their materialized kNNs and RkNNs.", getLogger());
}
- List<List<DistanceResultPair<D>>> kNNs = new ArrayList<List<DistanceResultPair<D>>>(ids.size());
+ List<KNNResult<D>> kNNs = new ArrayList<KNNResult<D>>(ids.size());
List<List<DistanceResultPair<D>>> rkNNs = new ArrayList<List<DistanceResultPair<D>>>(ids.size());
for(DBID id : aids) {
kNNs.add(storage.get(id));
@@ -250,7 +251,7 @@ public class MaterializeKNNAndRKNNPreprocessor<O, D extends Distance<D>> extends
stepprog.beginStep(2, "New deletions ocurred, update the affected kNNs and RkNNs.", getLogger());
}
// update the kNNs of the RkNNs
- List<List<DistanceResultPair<D>>> kNNList = knnQuery.getKNNForBulkDBIDs(rkNN_ids, k);
+ List<KNNResult<D>> kNNList = knnQuery.getKNNForBulkDBIDs(rkNN_ids, k);
for(int i = 0; i < rkNN_ids.size(); i++) {
DBID id = rkNN_ids.get(i);
storage.put(id, kNNList.get(i));
@@ -260,7 +261,7 @@ public class MaterializeKNNAndRKNNPreprocessor<O, D extends Distance<D>> extends
}
}
// update the RkNNs of the kNNs
- TreeSetDBIDs idsSet = DBIDUtil.newTreeSet(ids);
+ SetDBIDs idsSet = DBIDUtil.ensureSet(ids);
for(int i = 0; i < kNN_ids.size(); i++) {
DBID id = kNN_ids.get(i);
SortedSet<DistanceResultPair<D>> rkNN = materialized_RkNN.get(id);
@@ -289,7 +290,7 @@ public class MaterializeKNNAndRKNNPreprocessor<O, D extends Distance<D>> extends
* @param id the query id
* @return the kNNs
*/
- public List<DistanceResultPair<D>> getKNN(DBID id) {
+ public KNNResult<D> getKNN(DBID id) {
return storage.get(id);
}
@@ -333,7 +334,7 @@ public class MaterializeKNNAndRKNNPreprocessor<O, D extends Distance<D>> extends
public String getShortName() {
return "knn and rknn preprocessor";
}
-
+
@Override
protected Logging getLogger() {
return logger;
@@ -373,8 +374,8 @@ public class MaterializeKNNAndRKNNPreprocessor<O, D extends Distance<D>> extends
*/
public static class Parameterizer<O, D extends Distance<D>> extends MaterializeKNNPreprocessor.Factory.Parameterizer<O, D> {
@Override
- protected Factory<O,D> makeInstance() {
- return new Factory<O,D>(k, distanceFunction);
+ protected Factory<O, D> makeInstance() {
+ return new Factory<O, D>(k, distanceFunction);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MaterializeKNNPreprocessor.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MaterializeKNNPreprocessor.java
index 9d55e916..fcd6fad1 100644
--- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MaterializeKNNPreprocessor.java
+++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MaterializeKNNPreprocessor.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.index.preprocessed.knn;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -23,19 +23,22 @@ package de.lmu.ifi.dbs.elki.index.preprocessed.knn;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.ArrayList;
+import java.util.Collection;
import java.util.List;
-import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
-import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import javax.swing.event.EventListenerList;
+
import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.TreeSetModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.SetDBIDs;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
@@ -64,7 +67,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
*/
@Title("Materialize kNN Neighborhood preprocessor")
@Description("Materializes the k nearest neighbors of objects of a database.")
-public class MaterializeKNNPreprocessor<O, D extends Distance<D>> extends AbstractMaterializeKNNPreprocessor<O, D> {
+public class MaterializeKNNPreprocessor<O, D extends Distance<D>> extends AbstractMaterializeKNNPreprocessor<O, D, KNNResult<D>> {
/**
* Logger to use.
*/
@@ -83,30 +86,20 @@ public class MaterializeKNNPreprocessor<O, D extends Distance<D>> extends Abstra
protected final KNNQuery<O, D> knnQuery;
/**
- * Constructor with preprocessing step.
- *
- * @param relation Relation to preprocess
- * @param distanceFunction the distance function to use
- * @param k query k
+ * Holds the listener.
*/
- public MaterializeKNNPreprocessor(Relation<O> relation, DistanceFunction<? super O, D> distanceFunction, int k) {
- this(relation, distanceFunction, k, true);
- }
+ protected final EventListenerList listenerList = new EventListenerList();
/**
- * Constructor.
+ * Constructor with preprocessing step.
*
* @param relation Relation to preprocess
* @param distanceFunction the distance function to use
* @param k query k
*/
- protected MaterializeKNNPreprocessor(Relation<O> relation, DistanceFunction<? super O, D> distanceFunction, int k, boolean preprocess) {
+ public MaterializeKNNPreprocessor(Relation<O> relation, DistanceFunction<? super O, D> distanceFunction, int k) {
super(relation, distanceFunction, k);
this.knnQuery = relation.getDatabase().getKNNQuery(distanceQuery, k, DatabaseQuery.HINT_BULK, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_NO_CACHE);
-
- if(preprocess) {
- preprocess();
- }
}
/**
@@ -114,13 +107,13 @@ public class MaterializeKNNPreprocessor<O, D extends Distance<D>> extends Abstra
*/
@Override
protected void preprocess() {
- storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, List.class);
+ createStorage();
ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
FiniteProgress progress = getLogger().isVerbose() ? new FiniteProgress("Materializing k nearest neighbors (k=" + k + ")", ids.size(), getLogger()) : null;
// Try bulk
- List<List<DistanceResultPair<D>>> kNNList = null;
+ List<KNNResult<D>> kNNList = null;
if(usebulk) {
kNNList = knnQuery.getKNNForBulkDBIDs(ids, k);
if(kNNList != null) {
@@ -135,7 +128,7 @@ public class MaterializeKNNPreprocessor<O, D extends Distance<D>> extends Abstra
}
else {
for(DBID id : ids) {
- List<DistanceResultPair<D>> knn = knnQuery.getKNNForDBID(id, k);
+ KNNResult<D> knn = knnQuery.getKNNForDBID(id, k);
storage.put(id, knn);
if(progress != null) {
progress.incrementProcessed(getLogger());
@@ -148,16 +141,6 @@ public class MaterializeKNNPreprocessor<O, D extends Distance<D>> extends Abstra
}
}
- /**
- * Get the k nearest neighbors.
- *
- * @param objid Object ID
- * @return Neighbors
- */
- public List<DistanceResultPair<D>> get(DBID objid) {
- return storage.get(objid);
- }
-
@Override
public final void insert(DBID id) {
objectsInserted(id);
@@ -165,6 +148,9 @@ public class MaterializeKNNPreprocessor<O, D extends Distance<D>> extends Abstra
@Override
public void insertAll(DBIDs ids) {
+ if(storage == null && ids.size() > 0) {
+ preprocess();
+ }
objectsInserted(ids);
}
@@ -193,7 +179,7 @@ public class MaterializeKNNPreprocessor<O, D extends Distance<D>> extends Abstra
if(stepprog != null) {
stepprog.beginStep(1, "New insertions ocurred, materialize their new kNNs.", getLogger());
}
- List<List<DistanceResultPair<D>>> kNNList = knnQuery.getKNNForBulkDBIDs(aids, k);
+ List<KNNResult<D>> kNNList = knnQuery.getKNNForBulkDBIDs(aids, k);
for(int i = 0; i < aids.size(); i++) {
DBID id = aids.get(i);
storage.put(id, kNNList.get(i));
@@ -225,13 +211,12 @@ public class MaterializeKNNPreprocessor<O, D extends Distance<D>> extends Abstra
* updated
*/
private ArrayDBIDs updateKNNsAfterInsertion(DBIDs ids) {
- ArrayDBIDs rkNN_ids = DBIDUtil.newArray();
+ ArrayModifiableDBIDs rkNN_ids = DBIDUtil.newArray();
DBIDs oldids = DBIDUtil.difference(relation.getDBIDs(), ids);
for(DBID id1 : oldids) {
- List<DistanceResultPair<D>> kNNs = storage.get(id1);
+ KNNResult<D> kNNs = storage.get(id1);
D knnDist = kNNs.get(kNNs.size() - 1).getDistance();
// look for new kNNs
- List<DistanceResultPair<D>> newKNNs = new ArrayList<DistanceResultPair<D>>();
KNNHeap<D> heap = null;
for(DBID id2 : ids) {
D dist = distanceQuery.distance(id1, id2);
@@ -244,8 +229,8 @@ public class MaterializeKNNPreprocessor<O, D extends Distance<D>> extends Abstra
}
}
if(heap != null) {
- newKNNs = heap.toSortedArrayList();
- storage.put(id1, newKNNs);
+ kNNs = heap.toKNNList();
+ storage.put(id1, kNNs);
rkNN_ids.add(id1);
}
}
@@ -260,10 +245,10 @@ public class MaterializeKNNPreprocessor<O, D extends Distance<D>> extends Abstra
* updated
*/
private ArrayDBIDs updateKNNsAfterDeletion(DBIDs ids) {
- TreeSetModifiableDBIDs idsSet = DBIDUtil.newTreeSet(ids);
- ArrayDBIDs rkNN_ids = DBIDUtil.newArray();
+ SetDBIDs idsSet = DBIDUtil.ensureSet(ids);
+ ArrayModifiableDBIDs rkNN_ids = DBIDUtil.newArray();
for(DBID id1 : relation.iterDBIDs()) {
- List<DistanceResultPair<D>> kNNs = storage.get(id1);
+ KNNResult<D> kNNs = storage.get(id1);
for(DistanceResultPair<D> kNN : kNNs) {
if(idsSet.contains(kNN.getDBID())) {
rkNN_ids.add(id1);
@@ -273,7 +258,7 @@ public class MaterializeKNNPreprocessor<O, D extends Distance<D>> extends Abstra
}
// update the kNNs of the RkNNs
- List<List<DistanceResultPair<D>>> kNNList = knnQuery.getKNNForBulkDBIDs(rkNN_ids, k);
+ List<KNNResult<D>> kNNList = knnQuery.getKNNForBulkDBIDs(rkNN_ids, k);
for(int i = 0; i < rkNN_ids.size(); i++) {
DBID id = rkNN_ids.get(i);
storage.put(id, kNNList.get(i));
@@ -360,16 +345,18 @@ public class MaterializeKNNPreprocessor<O, D extends Distance<D>> extends Abstra
* @param remove the ids to remove
* @return the DBIDs in the given collection
*/
- protected ArrayDBIDs extractAndRemoveIDs(List<List<DistanceResultPair<D>>> extraxt, ArrayDBIDs remove) {
- TreeSetModifiableDBIDs ids = DBIDUtil.newTreeSet();
- for(List<DistanceResultPair<D>> drps : extraxt) {
+ protected ArrayDBIDs extractAndRemoveIDs(List<? extends Collection<DistanceResultPair<D>>> extraxt, ArrayDBIDs remove) {
+ HashSetModifiableDBIDs ids = DBIDUtil.newHashSet();
+ for(Collection<DistanceResultPair<D>> drps : extraxt) {
for(DistanceResultPair<D> drp : drps) {
ids.add(drp.getDBID());
}
}
- ids.removeAll(remove);
- return DBIDUtil.ensureArray(ids);
-
+ for(DBID id : remove) {
+ ids.remove(id);
+ }
+ // Convert back to array
+ return DBIDUtil.newArray(ids);
}
/**
@@ -423,7 +410,7 @@ public class MaterializeKNNPreprocessor<O, D extends Distance<D>> extends Abstra
* @param <O> The object type
* @param <D> The distance type
*/
- public static class Factory<O, D extends Distance<D>> extends AbstractMaterializeKNNPreprocessor.Factory<O, D> {
+ public static class Factory<O, D extends Distance<D>> extends AbstractMaterializeKNNPreprocessor.Factory<O, D, KNNResult<D>> {
/**
* Index factory.
*
diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MetricalIndexApproximationMaterializeKNNPreprocessor.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MetricalIndexApproximationMaterializeKNNPreprocessor.java
index f20e6e39..5ac7d2d2 100644
--- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MetricalIndexApproximationMaterializeKNNPreprocessor.java
+++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/MetricalIndexApproximationMaterializeKNNPreprocessor.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.index.preprocessed.knn;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -28,12 +28,11 @@ import java.util.HashMap;
import java.util.List;
import de.lmu.ifi.dbs.elki.data.NumberVector;
-import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
-import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDPair;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
@@ -69,7 +68,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
*/
@Title("Spatial Approximation Materialize kNN Preprocessor")
@Description("Caterializes the (approximate) k nearest neighbors of objects of a database using a spatial approximation.")
-public class MetricalIndexApproximationMaterializeKNNPreprocessor<O extends NumberVector<? super O, ?>, D extends Distance<D>, N extends Node<E>, E extends MTreeEntry<D>> extends AbstractMaterializeKNNPreprocessor<O, D> {
+public class MetricalIndexApproximationMaterializeKNNPreprocessor<O extends NumberVector<? super O, ?>, D extends Distance<D>, N extends Node<E>, E extends MTreeEntry<D>> extends AbstractMaterializeKNNPreprocessor<O, D, KNNResult<D>> {
/**
* Logger to use
*/
@@ -92,7 +91,7 @@ public class MetricalIndexApproximationMaterializeKNNPreprocessor<O extends Numb
MetricalIndexTree<O, D, N, E> index = getMetricalIndex(relation);
- storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, List.class);
+ createStorage();
MeanVariance pagesize = new MeanVariance();
MeanVariance ksize = new MeanVariance();
if(getLogger().isVerbose()) {
@@ -133,7 +132,7 @@ public class MetricalIndexApproximationMaterializeKNNPreprocessor<O extends Numb
}
}
ksize.put(kNN.size());
- storage.put(id, kNN.toSortedArrayList());
+ storage.put(id, kNN.toKNNList());
}
if(getLogger().isDebugging()) {
if(cache.size() > 0) {
@@ -203,7 +202,7 @@ public class MetricalIndexApproximationMaterializeKNNPreprocessor<O extends Numb
* @param <N> the type of spatial nodes in the spatial index
* @param <E> the type of spatial entries in the spatial index
*/
- public static class Factory<O extends NumberVector<? super O, ?>, D extends Distance<D>, N extends Node<E>, E extends MTreeEntry<D>> extends AbstractMaterializeKNNPreprocessor.Factory<O, D> {
+ public static class Factory<O extends NumberVector<? super O, ?>, D extends Distance<D>, N extends Node<E>, E extends MTreeEntry<D>> extends AbstractMaterializeKNNPreprocessor.Factory<O, D, KNNResult<D>> {
/**
* Constructor.
*
diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/PartitionApproximationMaterializeKNNPreprocessor.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/PartitionApproximationMaterializeKNNPreprocessor.java
index 3e7d1b8f..90813b92 100644
--- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/PartitionApproximationMaterializeKNNPreprocessor.java
+++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/PartitionApproximationMaterializeKNNPreprocessor.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.index.preprocessed.knn;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -24,7 +24,6 @@ package de.lmu.ifi.dbs.elki.index.preprocessed.knn;
*/
import java.util.HashMap;
-import java.util.List;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
@@ -34,6 +33,7 @@ import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDPair;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
@@ -61,7 +61,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
*/
@Title("Partitioning Approximate kNN Preprocessor")
@Description("Caterializes the (approximate) k nearest neighbors of objects of a database by partitioning and only computing kNN within each partition.")
-public class PartitionApproximationMaterializeKNNPreprocessor<O, D extends Distance<D>> extends AbstractMaterializeKNNPreprocessor<O, D> {
+public class PartitionApproximationMaterializeKNNPreprocessor<O, D extends Distance<D>> extends AbstractMaterializeKNNPreprocessor<O, D, KNNResult<D>> {
// TODO: randomize/shuffle?
/**
@@ -85,14 +85,12 @@ public class PartitionApproximationMaterializeKNNPreprocessor<O, D extends Dista
public PartitionApproximationMaterializeKNNPreprocessor(Relation<O> relation, DistanceFunction<? super O, D> distanceFunction, int k, int partitions) {
super(relation, distanceFunction, k);
this.partitions = partitions;
- // preprocess now
- preprocess();
}
@Override
protected void preprocess() {
DistanceQuery<O, D> distanceQuery = relation.getDatabase().getDistanceQuery(relation, distanceFunction);
- storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, List.class);
+ storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, KNNResult.class);
MeanVariance ksize = new MeanVariance();
if(logger.isVerbose()) {
logger.verbose("Approximating nearest neighbor lists to database objects");
@@ -130,7 +128,7 @@ public class PartitionApproximationMaterializeKNNPreprocessor<O, D extends Dista
}
}
ksize.put(kNN.size());
- storage.put(id, kNN.toSortedArrayList());
+ storage.put(id, kNN.toKNNList());
}
if(logger.isDebugging()) {
if(cache.size() > 0) {
@@ -176,7 +174,7 @@ public class PartitionApproximationMaterializeKNNPreprocessor<O, D extends Dista
* @param <O> The object type
* @param <D> The distance type
*/
- public static class Factory<O, D extends Distance<D>> extends AbstractMaterializeKNNPreprocessor.Factory<O, D> {
+ public static class Factory<O, D extends Distance<D>> extends AbstractMaterializeKNNPreprocessor.Factory<O, D, KNNResult<D>> {
/**
* Parameter to specify the number of partitions to use for materializing
* the kNN. Must be an integer greater than 1.
diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/RandomSampleKNNPreprocessor.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/RandomSampleKNNPreprocessor.java
new file mode 100644
index 00000000..924c7c3c
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/RandomSampleKNNPreprocessor.java
@@ -0,0 +1,237 @@
+package de.lmu.ifi.dbs.elki.index.preprocessed.knn;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.Random;
+
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.query.GenericDistanceResultPair;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.KNNHeap;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.IntervalConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.IntervalConstraint.IntervalBoundary;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.LongParameter;
+
+/**
+ * Class that computed the kNN only on a random sample.
+ *
+ * @author Erich Schubert
+ *
+ * @param <O> Object type
+ * @param <D> Distance type
+ */
+public class RandomSampleKNNPreprocessor<O, D extends Distance<D>> extends AbstractMaterializeKNNPreprocessor<O, D, KNNResult<D>> {
+ /**
+ * Logger
+ */
+ private static final Logging logger = Logging.getLogger(RandomSampleKNNPreprocessor.class);
+
+ /**
+ * Relative share of objects to get
+ */
+ private final double share;
+
+ /**
+ * Random seed
+ */
+ private final Long seed;
+
+ /**
+ * Constructor.
+ *
+ * @param relation Relation to index
+ * @param distanceFunction distance function
+ * @param k k
+ * @param share Relative share
+ * @param seed Random seed (may be null)
+ */
+ public RandomSampleKNNPreprocessor(Relation<O> relation, DistanceFunction<? super O, D> distanceFunction, int k, double share, Long seed) {
+ super(relation, distanceFunction, k);
+ this.share = share;
+ this.seed = seed;
+ }
+
+ @Override
+ protected void preprocess() {
+ DistanceQuery<O, D> distanceQuery = relation.getDatabase().getDistanceQuery(relation, distanceFunction);
+ storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, KNNResult.class);
+ FiniteProgress progress = getLogger().isVerbose() ? new FiniteProgress("Materializing random-sample k nearest neighbors (k=" + k + ")", relation.size(), getLogger()) : null;
+
+ final ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
+ final int samplesize = (int) (ids.size() * share);
+ final long iseed = (seed != null) ? seed : (new Random()).nextLong();
+
+ int i = 0;
+ for(DBID id : ids) {
+ KNNHeap<D> kNN = new KNNHeap<D>(k, distanceQuery.infiniteDistance());
+
+ long rseed = i * 0x7FFFFFFFFFFFFFE7L + iseed;
+ DBIDs rsamp = DBIDUtil.randomSample(ids, samplesize, rseed);
+ for(DBID oid : rsamp) {
+ D dist = distanceQuery.distance(id, oid);
+ kNN.add(new GenericDistanceResultPair<D>(dist, oid));
+ }
+
+ storage.put(id, kNN.toKNNList());
+ if(progress != null) {
+ progress.incrementProcessed(getLogger());
+ }
+ }
+
+ if(progress != null) {
+ progress.ensureCompleted(getLogger());
+ }
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return logger;
+ }
+
+ @Override
+ public String getLongName() {
+ return "random sample kNN";
+ }
+
+ @Override
+ public String getShortName() {
+ return "random-sample-knn";
+ }
+
+ /**
+ * The parameterizable factory.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.landmark
+ * @apiviz.stereotype factory
+ * @apiviz.uses AbstractMaterializeKNNPreprocessor oneway - - «create»
+ *
+ * @param <O> The object type
+ * @param <D> The distance type
+ */
+ public static class Factory<O, D extends Distance<D>> extends AbstractMaterializeKNNPreprocessor.Factory<O, D, KNNResult<D>> {
+ /**
+ * Relative share of objects to get
+ */
+ private final double share;
+
+ /**
+ * Random seed
+ */
+ private final Long seed;
+
+ /**
+ * Constructor.
+ *
+ * @param k K
+ * @param distanceFunction distance function
+ * @param share Sample size (relative)
+ * @param seed Random seed
+ */
+ public Factory(int k, DistanceFunction<? super O, D> distanceFunction, double share, Long seed) {
+ super(k, distanceFunction);
+ this.share = share;
+ this.seed = seed;
+ }
+
+ @Override
+ public RandomSampleKNNPreprocessor<O, D> instantiate(Relation<O> relation) {
+ return new RandomSampleKNNPreprocessor<O, D>(relation, distanceFunction, k, share, seed);
+ }
+
+ /**
+ * Parameterization class
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ *
+ * @param <O> Object type
+ * @param <D> Distance type
+ */
+ public static class Parameterizer<O, D extends Distance<D>> extends AbstractMaterializeKNNPreprocessor.Factory.Parameterizer<O, D> {
+ /**
+ * Parameter to specify how many objects to consider for computing the
+ * kNN.
+ *
+ * <p>
+ * Key: {@code -randomknn.share}
+ * </p>
+ */
+ public static final OptionID SHARE_ID = OptionID.getOrCreateOptionID("randomknn.share", "The relative amount of objects to consider for kNN computations.");
+
+ /**
+ * Random number generator seed.
+ *
+ * <p>
+ * Key: {@code -randomknn.seed}
+ * </p>
+ */
+ public static final OptionID SEED_ID = OptionID.getOrCreateOptionID("randomknn.seed", "The random number seed.");
+
+ /**
+ * Relative share of objects to get
+ */
+ private double share = 0.0;
+
+ /**
+ * Random seed
+ */
+ private Long seed = null;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ DoubleParameter shareP = new DoubleParameter(SHARE_ID, new IntervalConstraint(0.0, IntervalBoundary.OPEN, 1.0, IntervalBoundary.OPEN));
+ if(config.grab(shareP)) {
+ share = shareP.getValue();
+ }
+ LongParameter seedP = new LongParameter(SEED_ID, true);
+ if(config.grab(seedP)) {
+ seed = seedP.getValue();
+ }
+ }
+
+ @Override
+ protected RandomSampleKNNPreprocessor.Factory<O, D> makeInstance() {
+ return new RandomSampleKNNPreprocessor.Factory<O, D>(k, distanceFunction, share, seed);
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/SpatialApproximationMaterializeKNNPreprocessor.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/SpatialApproximationMaterializeKNNPreprocessor.java
index 320a3636..e78f5e89 100644
--- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/SpatialApproximationMaterializeKNNPreprocessor.java
+++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/SpatialApproximationMaterializeKNNPreprocessor.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.index.preprocessed.knn;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -34,6 +34,7 @@ import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDPair;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
@@ -68,7 +69,7 @@ import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
*/
@Title("Spatial Approximation Materialize kNN Preprocessor")
@Description("Caterializes the (approximate) k nearest neighbors of objects of a database using a spatial approximation.")
-public class SpatialApproximationMaterializeKNNPreprocessor<O extends NumberVector<?, ?>, D extends Distance<D>, N extends SpatialNode<N, E>, E extends SpatialEntry> extends AbstractMaterializeKNNPreprocessor<O, D> {
+public class SpatialApproximationMaterializeKNNPreprocessor<O extends NumberVector<?, ?>, D extends Distance<D>, N extends SpatialNode<N, E>, E extends SpatialEntry> extends AbstractMaterializeKNNPreprocessor<O, D, KNNResult<D>> {
/**
* Logger to use
*/
@@ -83,7 +84,6 @@ public class SpatialApproximationMaterializeKNNPreprocessor<O extends NumberVect
*/
public SpatialApproximationMaterializeKNNPreprocessor(Relation<O> relation, DistanceFunction<? super O, D> distanceFunction, int k) {
super(relation, distanceFunction, k);
- preprocess();
}
@Override
@@ -96,7 +96,7 @@ public class SpatialApproximationMaterializeKNNPreprocessor<O extends NumberVect
}
SpatialIndexTree<N, E> index = indexes.iterator().next();
- storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, List.class);
+ storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, KNNResult.class);
MeanVariance pagesize = new MeanVariance();
MeanVariance ksize = new MeanVariance();
if(getLogger().isVerbose()) {
@@ -137,7 +137,7 @@ public class SpatialApproximationMaterializeKNNPreprocessor<O extends NumberVect
}
}
ksize.put(kNN.size());
- storage.put(id, kNN.toSortedArrayList());
+ storage.put(id, kNN.toKNNList());
}
if(getLogger().isDebugging()) {
if(cache.size() > 0) {
@@ -185,7 +185,7 @@ public class SpatialApproximationMaterializeKNNPreprocessor<O extends NumberVect
* @param <N> the type of spatial nodes in the spatial index
* @param <E> the type of spatial entries in the spatial index
*/
- public static class Factory<D extends Distance<D>, N extends SpatialNode<N, E>, E extends SpatialEntry> extends AbstractMaterializeKNNPreprocessor.Factory<NumberVector<?, ?>, D> {
+ public static class Factory<D extends Distance<D>, N extends SpatialNode<N, E>, E extends SpatialEntry> extends AbstractMaterializeKNNPreprocessor.Factory<NumberVector<?, ?>, D, KNNResult<D>> {
/**
* Constructor.
*
diff --git a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/package-info.java b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/package-info.java
index 57fe5fbd..1780cdc0 100644
--- a/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/index/preprocessed/knn/package-info.java
@@ -5,7 +5,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2011
+Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team