summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/application/cache
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/application/cache')
-rw-r--r--src/de/lmu/ifi/dbs/elki/application/cache/CacheDoubleDistanceInOnDiskMatrix.java92
-rw-r--r--src/de/lmu/ifi/dbs/elki/application/cache/CacheDoubleDistanceKNNLists.java272
-rw-r--r--src/de/lmu/ifi/dbs/elki/application/cache/CacheDoubleDistanceRangeQueries.java277
-rw-r--r--src/de/lmu/ifi/dbs/elki/application/cache/CacheFloatDistanceInOnDiskMatrix.java77
-rw-r--r--src/de/lmu/ifi/dbs/elki/application/cache/package-info.java2
5 files changed, 618 insertions, 102 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/application/cache/CacheDoubleDistanceInOnDiskMatrix.java b/src/de/lmu/ifi/dbs/elki/application/cache/CacheDoubleDistanceInOnDiskMatrix.java
index 07907313..6a28282c 100644
--- a/src/de/lmu/ifi/dbs/elki/application/cache/CacheDoubleDistanceInOnDiskMatrix.java
+++ b/src/de/lmu/ifi/dbs/elki/application/cache/CacheDoubleDistanceInOnDiskMatrix.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.application.cache;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -28,7 +28,6 @@ import java.io.IOException;
import de.lmu.ifi.dbs.elki.application.AbstractApplication;
import de.lmu.ifi.dbs.elki.database.Database;
-import de.lmu.ifi.dbs.elki.database.StaticArrayDatabase;
import de.lmu.ifi.dbs.elki.database.ids.DBIDFactory;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
@@ -44,10 +43,10 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.FileParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+import de.lmu.ifi.dbs.elki.workflow.InputStep;
/**
- * Wrapper to convert a traditional text-serialized result into a on-disk matrix
- * for random access.
+ * Precompute an on-disk distance matrix, using double precision.
*
* @author Erich Schubert
*
@@ -64,30 +63,14 @@ public class CacheDoubleDistanceInOnDiskMatrix<O, D extends NumberDistance<D, ?>
private static final Logging LOG = Logging.getLogger(CacheDoubleDistanceInOnDiskMatrix.class);
/**
- * Parameter that specifies the name of the directory to be re-parsed.
- * <p>
- * Key: {@code -loader.diskcache}
- * </p>
- */
- public static final OptionID CACHE_ID = new OptionID("loader.diskcache", "File name of the disk cache to create.");
-
- /**
- * Parameter that specifies the name of the directory to be re-parsed.
- * <p>
- * Key: {@code -loader.distance}
- * </p>
- */
- public static final OptionID DISTANCE_ID = new OptionID("loader.distance", "Distance function to cache.");
-
- /**
* Debug flag, to double-check all write operations.
*/
private static final boolean debugExtraCheckSymmetry = false;
/**
- * Holds the database connection to have the algorithm run with.
+ * Data source to process.
*/
- private Database database;
+ private InputStep input;
/**
* Distance function that is to be cached.
@@ -102,28 +85,28 @@ public class CacheDoubleDistanceInOnDiskMatrix<O, D extends NumberDistance<D, ?>
/**
* Constructor.
*
- * @param database Database
+ * @param input Data source
* @param distance Distance function
* @param out Matrix output file
*/
- public CacheDoubleDistanceInOnDiskMatrix(Database database, DistanceFunction<O, D> distance, File out) {
+ public CacheDoubleDistanceInOnDiskMatrix(InputStep input, DistanceFunction<O, D> distance, File out) {
super();
- this.database = database;
+ this.input = input;
this.distance = distance;
this.out = out;
}
@Override
public void run() {
- database.initialize();
+ Database database = input.getDatabase();
Relation<O> relation = database.getRelation(distance.getInputTypeRestriction());
DistanceQuery<O, D> distanceQuery = database.getDistanceQuery(relation, distance);
int matrixsize = 0;
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
int intid = DBIDUtil.asInteger(iditer);
matrixsize = Math.max(matrixsize, intid + 1);
- if(intid < 0) {
+ if (intid < 0) {
throw new AbortException("OnDiskMatrixCache does not allow negative DBIDs.");
}
}
@@ -131,25 +114,23 @@ public class CacheDoubleDistanceInOnDiskMatrix<O, D extends NumberDistance<D, ?>
OnDiskUpperTriangleMatrix matrix;
try {
matrix = new OnDiskUpperTriangleMatrix(out, DiskCacheBasedDoubleDistanceFunction.DOUBLE_CACHE_MAGIC, 0, 8, matrixsize);
- }
- catch(IOException e) {
+ } catch (IOException e) {
throw new AbortException("Error creating output matrix.", e);
}
- for(DBIDIter id1 = relation.iterDBIDs(); id1.valid(); id1.advance()) {
- for(DBIDIter id2 = relation.iterDBIDs(); id2.valid(); id2.advance()) {
- if(DBIDUtil.asInteger(id2) >= DBIDUtil.asInteger(id1)) {
+ for (DBIDIter id1 = relation.iterDBIDs(); id1.valid(); id1.advance()) {
+ for (DBIDIter id2 = relation.iterDBIDs(); id2.valid(); id2.advance()) {
+ if (DBIDUtil.asInteger(id2) >= DBIDUtil.asInteger(id1)) {
double d = distanceQuery.distance(id1, id2).doubleValue();
- if(debugExtraCheckSymmetry) {
+ if (debugExtraCheckSymmetry) {
double d2 = distanceQuery.distance(id2, id1).doubleValue();
- if(Math.abs(d - d2) > 0.0000001) {
+ if (Math.abs(d - d2) > 0.0000001) {
LOG.warning("Distance function doesn't appear to be symmetric!");
}
}
try {
matrix.getRecordBuffer(DBIDUtil.asInteger(id1), DBIDUtil.asInteger(id2)).putDouble(d);
- }
- catch(IOException e) {
+ } catch (IOException e) {
throw new AbortException("Error writing distance record " + DBIDFactory.FACTORY.toString(id1) + "," + DBIDFactory.FACTORY.toString(id2) + " to matrix.", e);
}
}
@@ -166,9 +147,25 @@ public class CacheDoubleDistanceInOnDiskMatrix<O, D extends NumberDistance<D, ?>
*/
public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractApplication.Parameterizer {
/**
- * Holds the database connection to have the algorithm run with.
+ * Parameter that specifies the name of the directory to be re-parsed.
+ * <p>
+ * Key: {@code -loader.diskcache}
+ * </p>
*/
- private Database database = null;
+ public static final OptionID CACHE_ID = new OptionID("loader.diskcache", "File name of the disk cache to create.");
+
+ /**
+ * Parameter that specifies the name of the directory to be re-parsed.
+ * <p>
+ * Key: {@code -loader.distance}
+ * </p>
+ */
+ public static final OptionID DISTANCE_ID = new OptionID("loader.distance", "Distance function to cache.");
+
+ /**
+ * Data source to process.
+ */
+ private InputStep input = null;
/**
* Distance function that is to be cached.
@@ -183,27 +180,22 @@ public class CacheDoubleDistanceInOnDiskMatrix<O, D extends NumberDistance<D, ?>
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- // Database connection parameter
- final ObjectParameter<Database> dbpar = new ObjectParameter<Database>(OptionID.DATABASE_CONNECTION, Database.class, StaticArrayDatabase.class);
- if(config.grab(dbpar)) {
- database = dbpar.instantiateClass(config);
- }
+ input = config.tryInstantiate(InputStep.class);
// Distance function parameter
- final ObjectParameter<DistanceFunction<O, D>> dpar = new ObjectParameter<DistanceFunction<O, D>>(DISTANCE_ID, DistanceFunction.class);
- if(config.grab(dpar)) {
+ final ObjectParameter<DistanceFunction<O, D>> dpar = new ObjectParameter<>(DISTANCE_ID, DistanceFunction.class);
+ if (config.grab(dpar)) {
distance = dpar.instantiateClass(config);
}
// Output file parameter
final FileParameter cpar = new FileParameter(CACHE_ID, FileParameter.FileType.OUTPUT_FILE);
- if(config.grab(cpar)) {
+ if (config.grab(cpar)) {
out = cpar.getValue();
}
-
}
@Override
protected CacheDoubleDistanceInOnDiskMatrix<O, D> makeInstance() {
- return new CacheDoubleDistanceInOnDiskMatrix<O, D>(database, distance, out);
+ return new CacheDoubleDistanceInOnDiskMatrix<>(input, distance, out);
}
}
@@ -215,4 +207,4 @@ public class CacheDoubleDistanceInOnDiskMatrix<O, D extends NumberDistance<D, ?>
public static void main(String[] args) {
runCLIApplication(CacheDoubleDistanceInOnDiskMatrix.class, args);
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/application/cache/CacheDoubleDistanceKNNLists.java b/src/de/lmu/ifi/dbs/elki/application/cache/CacheDoubleDistanceKNNLists.java
new file mode 100644
index 00000000..e9bd4480
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/application/cache/CacheDoubleDistanceKNNLists.java
@@ -0,0 +1,272 @@
+package de.lmu.ifi.dbs.elki.application.cache;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+import java.nio.channels.FileLock;
+
+import de.lmu.ifi.dbs.elki.application.AbstractApplication;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDList;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
+import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.persistent.ByteArrayUtil;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.FileParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+import de.lmu.ifi.dbs.elki.workflow.InputStep;
+
+/**
+ * Precompute the k nearest neighbors in a disk cache.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has DistanceFunction
+ *
+ * @param <O> Object type
+ * @param <D> Distance type
+ */
+public class CacheDoubleDistanceKNNLists<O, D extends NumberDistance<D, ?>> extends AbstractApplication {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(CacheDoubleDistanceKNNLists.class);
+
+ /**
+ * Data source to process.
+ */
+ private InputStep input;
+
+ /**
+ * Distance function that is to be cached.
+ */
+ private DistanceFunction<O, D> distance;
+
+ /**
+ * Number of neighbors to precompute.
+ */
+ private int k;
+
+ /**
+ * Output file.
+ */
+ private File out;
+
+ /**
+ * Magic number to identify files.
+ *
+ * Note, when cloning this class, and performing any incompatible change to
+ * the file format, you should also change this magic ID!
+ */
+ public static final int KNN_CACHE_MAGIC = 0xCAC43D1C;
+
+ /**
+ * Constructor.
+ *
+ * @param input Data source
+ * @param distance Distance function
+ * @param k Number of nearest neighbors
+ * @param out Matrix output file
+ */
+ public CacheDoubleDistanceKNNLists(InputStep input, DistanceFunction<O, D> distance, int k, File out) {
+ super();
+ this.input = input;
+ this.distance = distance;
+ this.k = k;
+ this.out = out;
+ }
+
+ @Override
+ public void run() {
+ Database database = input.getDatabase();
+ Relation<O> relation = database.getRelation(distance.getInputTypeRestriction());
+ DistanceQuery<O, D> distanceQuery = database.getDistanceQuery(relation, distance);
+ KNNQuery<O, D> knnQ = database.getKNNQuery(distanceQuery, DatabaseQuery.HINT_HEAVY_USE);
+
+ // open file.
+ try (RandomAccessFile file = new RandomAccessFile(out, "rw");
+ FileChannel channel = file.getChannel();
+ // and acquire a file write lock
+ FileLock lock = channel.lock()) {
+ // write magic header
+ file.writeInt(KNN_CACHE_MAGIC);
+
+ int bufsize = k * 12 * 2 + 10; // Initial size, enough for 2 kNN.
+ ByteBuffer buffer = ByteBuffer.allocateDirect(bufsize);
+
+ FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Computing kNN", relation.size(), LOG) : null;
+
+ for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
+ final KNNList<D> nn = knnQ.getKNNForDBID(it, k);
+ final int nnsize = nn.size();
+
+ // Grow the buffer when needed:
+ if (nnsize * 12 + 10 > bufsize) {
+ while (nnsize * 12 + 10 > bufsize) {
+ bufsize <<= 1;
+ }
+ buffer = ByteBuffer.allocateDirect(bufsize);
+ }
+
+ buffer.clear();
+ ByteArrayUtil.writeUnsignedVarint(buffer, it.internalGetIndex());
+ ByteArrayUtil.writeUnsignedVarint(buffer, nnsize);
+ int c = 0;
+ if (nn instanceof DoubleDistanceDBIDList) {
+ for (DoubleDistanceDBIDListIter ni = ((DoubleDistanceDBIDList) nn).iter(); ni.valid(); ni.advance(), c++) {
+ ByteArrayUtil.writeUnsignedVarint(buffer, ni.internalGetIndex());
+ buffer.putDouble(ni.doubleDistance());
+ }
+ } else {
+ for (DistanceDBIDListIter<D> ni = nn.iter(); ni.valid(); ni.advance(), c++) {
+ ByteArrayUtil.writeUnsignedVarint(buffer, ni.internalGetIndex());
+ buffer.putDouble(ni.getDistance().doubleValue());
+ }
+ }
+ if (c != nn.size()) {
+ throw new AbortException("Sizes did not agree. Cache is invalid.");
+ }
+
+ buffer.flip();
+ channel.write(buffer);
+ if (prog != null) {
+ prog.incrementProcessed(LOG);
+ }
+ }
+ if (prog != null) {
+ prog.ensureCompleted(LOG);
+ }
+ lock.release();
+ } catch (IOException e) {
+ LOG.exception(e);
+ }
+ // FIXME: close!
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractApplication.Parameterizer {
+ /**
+ * Parameter that specifies the name of the directory to be re-parsed.
+ * <p>
+ * Key: {@code -loader.diskcache}
+ * </p>
+ */
+ public static final OptionID CACHE_ID = new OptionID("loader.diskcache", "File name of the disk cache to create.");
+
+ /**
+ * Parameter that specifies the name of the directory to be re-parsed.
+ * <p>
+ * Key: {@code -loader.distance}
+ * </p>
+ */
+ public static final OptionID DISTANCE_ID = new OptionID("loader.distance", "Distance function to cache.");
+
+ /**
+ * Parameter that specifies the number of neighbors to precompute.
+ * <p>
+ * Key: {@code -loader.k}
+ * </p>
+ */
+ public static final OptionID K_ID = new OptionID("loader.k", "Number of nearest neighbors to precompute.");
+
+ /**
+ * Data source to process.
+ */
+ private InputStep input = null;
+
+ /**
+ * Distance function that is to be cached.
+ */
+ private DistanceFunction<O, D> distance = null;
+
+ /**
+ * Number of neighbors to precompute.
+ */
+ private int k;
+
+ /**
+ * Output file.
+ */
+ private File out = null;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ input = config.tryInstantiate(InputStep.class);
+ // Distance function parameter
+ final ObjectParameter<DistanceFunction<O, D>> dpar = new ObjectParameter<>(DISTANCE_ID, DistanceFunction.class);
+ if (config.grab(dpar)) {
+ distance = dpar.instantiateClass(config);
+ }
+ final IntParameter kpar = new IntParameter(K_ID);
+ kpar.addConstraint(new GreaterEqualConstraint(1));
+ if (config.grab(kpar)) {
+ k = kpar.intValue();
+ }
+ // Output file parameter
+ final FileParameter cpar = new FileParameter(CACHE_ID, FileParameter.FileType.OUTPUT_FILE);
+ if (config.grab(cpar)) {
+ out = cpar.getValue();
+ }
+ }
+
+ @Override
+ protected CacheDoubleDistanceKNNLists<O, D> makeInstance() {
+ return new CacheDoubleDistanceKNNLists<>(input, distance, k, out);
+ }
+ }
+
+ /**
+ * Main method, delegate to super class.
+ *
+ * @param args Command line arguments
+ */
+ public static void main(String[] args) {
+ runCLIApplication(CacheDoubleDistanceKNNLists.class, args);
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/application/cache/CacheDoubleDistanceRangeQueries.java b/src/de/lmu/ifi/dbs/elki/application/cache/CacheDoubleDistanceRangeQueries.java
new file mode 100644
index 00000000..f9434820
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/application/cache/CacheDoubleDistanceRangeQueries.java
@@ -0,0 +1,277 @@
+package de.lmu.ifi.dbs.elki.application.cache;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+import java.nio.channels.FileLock;
+
+import de.lmu.ifi.dbs.elki.application.AbstractApplication;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDList;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.persistent.ByteArrayUtil;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.FileParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+import de.lmu.ifi.dbs.elki.workflow.InputStep;
+
+/**
+ * Precompute the k nearest neighbors in a disk cache.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has DistanceFunction
+ *
+ * @param <O> Object type
+ */
+public class CacheDoubleDistanceRangeQueries<O> extends AbstractApplication {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(CacheDoubleDistanceRangeQueries.class);
+
+ /**
+ * Data source to process.
+ */
+ private InputStep input;
+
+ /**
+ * Distance function that is to be cached.
+ */
+ private DistanceFunction<O, DoubleDistance> distance;
+
+ /**
+ * Query radius.
+ */
+ private double radius;
+
+ /**
+ * Output file.
+ */
+ private File out;
+
+ /**
+ * Magic number to identify files.
+ *
+ * Note, when cloning this class, and performing any incompatible change to
+ * the file format, you should also change this magic ID!
+ */
+ public static final int RANGE_CACHE_MAGIC = 0xCAC43333;
+
+ /**
+ * Constructor.
+ *
+ * @param input Data source
+ * @param distance Distance function
+ * @param radius Query radius
+ * @param out Matrix output file
+ */
+ public CacheDoubleDistanceRangeQueries(InputStep input, DistanceFunction<O, DoubleDistance> distance, double radius, File out) {
+ super();
+ this.input = input;
+ this.distance = distance;
+ this.radius = radius;
+ this.out = out;
+ }
+
+ @Override
+ public void run() {
+ Database database = input.getDatabase();
+ Relation<O> relation = database.getRelation(distance.getInputTypeRestriction());
+ DistanceQuery<O, DoubleDistance> distanceQuery = database.getDistanceQuery(relation, distance);
+ DoubleDistance rad = new DoubleDistance(radius);
+ RangeQuery<O, DoubleDistance> rangeQ = database.getRangeQuery(distanceQuery, rad, DatabaseQuery.HINT_HEAVY_USE);
+
+ LOG.verbose("Performing range queries with radius " + rad);
+
+ // open file.
+ try (RandomAccessFile file = new RandomAccessFile(out, "rw");
+ FileChannel channel = file.getChannel();
+ // and acquire a file write lock
+ FileLock lock = channel.lock()) {
+ // write magic header
+ file.writeInt(RANGE_CACHE_MAGIC);
+ // write the query radius.
+ file.writeDouble(radius);
+
+ int bufsize = 100 * 12 * 2 + 10; // Initial size, enough for 100.
+ ByteBuffer buffer = ByteBuffer.allocateDirect(bufsize);
+
+ FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Computing range queries", relation.size(), LOG) : null;
+
+ for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
+ final DistanceDBIDList<DoubleDistance> nn = rangeQ.getRangeForDBID(it, rad);
+ final int nnsize = nn.size();
+
+ // Grow the buffer when needed:
+ if (nnsize * 12 + 10 > bufsize) {
+ while (nnsize * 12 + 10 > bufsize) {
+ bufsize <<= 1;
+ }
+ LOG.verbose("Resizing buffer to "+bufsize+" to store "+nnsize+" results:");
+ buffer = ByteBuffer.allocateDirect(bufsize);
+ }
+
+ buffer.clear();
+ ByteArrayUtil.writeUnsignedVarint(buffer, it.internalGetIndex());
+ ByteArrayUtil.writeUnsignedVarint(buffer, nnsize);
+ int c = 0;
+ if (nn instanceof DoubleDistanceDBIDList) {
+ for (DoubleDistanceDBIDListIter ni = ((DoubleDistanceDBIDList) nn).iter(); ni.valid(); ni.advance(), c++) {
+ ByteArrayUtil.writeUnsignedVarint(buffer, ni.internalGetIndex());
+ buffer.putDouble(ni.doubleDistance());
+ }
+ } else {
+ for (DistanceDBIDListIter<DoubleDistance> ni = nn.iter(); ni.valid(); ni.advance(), c++) {
+ ByteArrayUtil.writeUnsignedVarint(buffer, ni.internalGetIndex());
+ buffer.putDouble(ni.getDistance().doubleValue());
+ }
+ }
+ if (c != nn.size()) {
+ throw new AbortException("Sizes did not agree. Cache is invalid.");
+ }
+
+ buffer.flip();
+ channel.write(buffer);
+ if (prog != null) {
+ prog.incrementProcessed(LOG);
+ }
+ }
+ if (prog != null) {
+ prog.ensureCompleted(LOG);
+ }
+ lock.release();
+ } catch (IOException e) {
+ LOG.exception(e);
+ }
+ // FIXME: close!
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<O> extends AbstractApplication.Parameterizer {
+ /**
+ * Parameter that specifies the name of the directory to be re-parsed.
+ * <p>
+ * Key: {@code -loader.diskcache}
+ * </p>
+ */
+ public static final OptionID CACHE_ID = new OptionID("loader.diskcache", "File name of the disk cache to create.");
+
+ /**
+ * Parameter that specifies the name of the directory to be re-parsed.
+ * <p>
+ * Key: {@code -loader.distance}
+ * </p>
+ */
+ public static final OptionID DISTANCE_ID = new OptionID("loader.distance", "Distance function to cache.");
+
+ /**
+ * Parameter that specifies the query radius to precompute.
+ * <p>
+ * Key: {@code -loader.radius}
+ * </p>
+ */
+ public static final OptionID RADIUS_ID = new OptionID("loader.radius", "Query radius for precomputation.");
+
+ /**
+ * Data source to process.
+ */
+ private InputStep input = null;
+
+ /**
+ * Distance function that is to be cached.
+ */
+ private DistanceFunction<O, DoubleDistance> distance = null;
+
+ /**
+ * Number of neighbors to precompute.
+ */
+ private double radius;
+
+ /**
+ * Output file.
+ */
+ private File out = null;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ input = config.tryInstantiate(InputStep.class);
+ // Distance function parameter
+ final ObjectParameter<DistanceFunction<O, DoubleDistance>> dpar = new ObjectParameter<>(DISTANCE_ID, DistanceFunction.class);
+ if (config.grab(dpar)) {
+ distance = dpar.instantiateClass(config);
+ }
+ final DoubleParameter kpar = new DoubleParameter(RADIUS_ID);
+ kpar.addConstraint(new GreaterEqualConstraint(0.));
+ if (config.grab(kpar)) {
+ radius = kpar.doubleValue();
+ }
+ // Output file parameter
+ final FileParameter cpar = new FileParameter(CACHE_ID, FileParameter.FileType.OUTPUT_FILE);
+ if (config.grab(cpar)) {
+ out = cpar.getValue();
+ }
+ }
+
+ @Override
+ protected CacheDoubleDistanceRangeQueries<O> makeInstance() {
+ return new CacheDoubleDistanceRangeQueries<>(input, distance, radius, out);
+ }
+ }
+
+ /**
+ * Main method, delegate to super class.
+ *
+ * @param args Command line arguments
+ */
+ public static void main(String[] args) {
+ runCLIApplication(CacheDoubleDistanceRangeQueries.class, args);
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/application/cache/CacheFloatDistanceInOnDiskMatrix.java b/src/de/lmu/ifi/dbs/elki/application/cache/CacheFloatDistanceInOnDiskMatrix.java
index 144b4b70..5499415b 100644
--- a/src/de/lmu/ifi/dbs/elki/application/cache/CacheFloatDistanceInOnDiskMatrix.java
+++ b/src/de/lmu/ifi/dbs/elki/application/cache/CacheFloatDistanceInOnDiskMatrix.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.application.cache;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -28,7 +28,6 @@ import java.io.IOException;
import de.lmu.ifi.dbs.elki.application.AbstractApplication;
import de.lmu.ifi.dbs.elki.database.Database;
-import de.lmu.ifi.dbs.elki.database.StaticArrayDatabase;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
@@ -39,14 +38,13 @@ import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.persistent.OnDiskUpperTriangleMatrix;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.FileParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+import de.lmu.ifi.dbs.elki.workflow.InputStep;
/**
- * Wrapper to convert a traditional text-serialized result into a on-disk matrix
- * for random access.
+ * Precompute an on-disk distance matrix, using double precision.
*
* @author Erich Schubert
*
@@ -63,22 +61,6 @@ public class CacheFloatDistanceInOnDiskMatrix<O, D extends NumberDistance<D, ?>>
private static final Logging LOG = Logging.getLogger(CacheFloatDistanceInOnDiskMatrix.class);
/**
- * Parameter that specifies the name of the directory to be re-parsed.
- * <p>
- * Key: {@code -loader.diskcache}
- * </p>
- */
- public static final OptionID CACHE_ID = new OptionID("loader.diskcache", "File name of the disk cache to create.");
-
- /**
- * Parameter that specifies the name of the directory to be re-parsed.
- * <p>
- * Key: {@code -loader.distance}
- * </p>
- */
- public static final OptionID DISTANCE_ID = new OptionID("loader.distance", "Distance function to cache.");
-
- /**
* Debug flag, to double-check all write operations.
*/
private static final boolean debugExtraCheckSymmetry = false;
@@ -89,9 +71,9 @@ public class CacheFloatDistanceInOnDiskMatrix<O, D extends NumberDistance<D, ?>>
private static final int FLOAT_SIZE = 4;
/**
- * Holds the database connection to have the algorithm run with.
+ * Data source to process.
*/
- private Database database;
+ private InputStep input;
/**
* Distance function that is to be cached.
@@ -106,28 +88,28 @@ public class CacheFloatDistanceInOnDiskMatrix<O, D extends NumberDistance<D, ?>>
/**
* Constructor.
*
- * @param database Database
+ * @param input Data source
* @param distance Distance function
* @param out Matrix output file
*/
- public CacheFloatDistanceInOnDiskMatrix(Database database, DistanceFunction<O, D> distance, File out) {
+ public CacheFloatDistanceInOnDiskMatrix(InputStep input, DistanceFunction<O, D> distance, File out) {
super();
- this.database = database;
+ this.input = input;
this.distance = distance;
this.out = out;
}
@Override
public void run() {
- database.initialize();
+ Database database = input.getDatabase();
Relation<O> relation = database.getRelation(distance.getInputTypeRestriction());
DistanceQuery<O, D> distanceQuery = database.getDistanceQuery(relation, distance);
int matrixsize = 0;
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
final int intid = DBIDUtil.asInteger(iditer);
matrixsize = Math.max(matrixsize, intid + 1);
- if(intid < 0) {
+ if (intid < 0) {
throw new AbortException("OnDiskMatrixCache does not allow negative DBIDs.");
}
}
@@ -135,25 +117,23 @@ public class CacheFloatDistanceInOnDiskMatrix<O, D extends NumberDistance<D, ?>>
OnDiskUpperTriangleMatrix matrix;
try {
matrix = new OnDiskUpperTriangleMatrix(out, DiskCacheBasedFloatDistanceFunction.FLOAT_CACHE_MAGIC, 0, FLOAT_SIZE, matrixsize);
- }
- catch(IOException e) {
+ } catch (IOException e) {
throw new AbortException("Error creating output matrix.", e);
}
- for(DBIDIter id1 = relation.iterDBIDs(); id1.valid(); id1.advance()) {
- for(DBIDIter id2 = relation.iterDBIDs(); id2.valid(); id2.advance()) {
- if(DBIDUtil.asInteger(id2) >= DBIDUtil.asInteger(id1)) {
+ for (DBIDIter id1 = relation.iterDBIDs(); id1.valid(); id1.advance()) {
+ for (DBIDIter id2 = relation.iterDBIDs(); id2.valid(); id2.advance()) {
+ if (DBIDUtil.asInteger(id2) >= DBIDUtil.asInteger(id1)) {
float d = distanceQuery.distance(id1, id2).floatValue();
- if(debugExtraCheckSymmetry) {
+ if (debugExtraCheckSymmetry) {
float d2 = distanceQuery.distance(id2, id1).floatValue();
- if(Math.abs(d - d2) > 0.0000001) {
+ if (Math.abs(d - d2) > 0.0000001) {
LOG.warning("Distance function doesn't appear to be symmetric!");
}
}
try {
matrix.getRecordBuffer(DBIDUtil.asInteger(id1), DBIDUtil.asInteger(id2)).putFloat(d);
- }
- catch(IOException e) {
+ } catch (IOException e) {
throw new AbortException("Error writing distance record " + id1 + "," + id2 + " to matrix.", e);
}
}
@@ -170,9 +150,9 @@ public class CacheFloatDistanceInOnDiskMatrix<O, D extends NumberDistance<D, ?>>
*/
public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractApplication.Parameterizer {
/**
- * Holds the database connection to have the algorithm run with.
+ * Data source to process.
*/
- private Database database = null;
+ private InputStep input = null;
/**
* Distance function that is to be cached.
@@ -187,27 +167,22 @@ public class CacheFloatDistanceInOnDiskMatrix<O, D extends NumberDistance<D, ?>>
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- // Database connection parameter
- final ObjectParameter<Database> dbpar = new ObjectParameter<Database>(OptionID.DATABASE_CONNECTION, Database.class, StaticArrayDatabase.class);
- if(config.grab(dbpar)) {
- database = dbpar.instantiateClass(config);
- }
+ input = config.tryInstantiate(InputStep.class);
// Distance function parameter
- final ObjectParameter<DistanceFunction<O, D>> dpar = new ObjectParameter<DistanceFunction<O, D>>(DISTANCE_ID, DistanceFunction.class);
- if(config.grab(dpar)) {
+ final ObjectParameter<DistanceFunction<O, D>> dpar = new ObjectParameter<>(CacheDoubleDistanceInOnDiskMatrix.Parameterizer.DISTANCE_ID, DistanceFunction.class);
+ if (config.grab(dpar)) {
distance = dpar.instantiateClass(config);
}
// Output file parameter
- final FileParameter cpar = new FileParameter(CACHE_ID, FileParameter.FileType.OUTPUT_FILE);
- if(config.grab(cpar)) {
+ final FileParameter cpar = new FileParameter(CacheDoubleDistanceInOnDiskMatrix.Parameterizer.CACHE_ID, FileParameter.FileType.OUTPUT_FILE);
+ if (config.grab(cpar)) {
out = cpar.getValue();
}
-
}
@Override
protected CacheFloatDistanceInOnDiskMatrix<O, D> makeInstance() {
- return new CacheFloatDistanceInOnDiskMatrix<O, D>(database, distance, out);
+ return new CacheFloatDistanceInOnDiskMatrix<>(input, distance, out);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/application/cache/package-info.java b/src/de/lmu/ifi/dbs/elki/application/cache/package-info.java
index 5ed7ec1c..78a93a07 100644
--- a/src/de/lmu/ifi/dbs/elki/application/cache/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/application/cache/package-info.java
@@ -11,7 +11,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2012
+Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team