diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/application/cache')
5 files changed, 618 insertions, 102 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/application/cache/CacheDoubleDistanceInOnDiskMatrix.java b/src/de/lmu/ifi/dbs/elki/application/cache/CacheDoubleDistanceInOnDiskMatrix.java index 07907313..6a28282c 100644 --- a/src/de/lmu/ifi/dbs/elki/application/cache/CacheDoubleDistanceInOnDiskMatrix.java +++ b/src/de/lmu/ifi/dbs/elki/application/cache/CacheDoubleDistanceInOnDiskMatrix.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.application.cache; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -28,7 +28,6 @@ import java.io.IOException; import de.lmu.ifi.dbs.elki.application.AbstractApplication; import de.lmu.ifi.dbs.elki.database.Database; -import de.lmu.ifi.dbs.elki.database.StaticArrayDatabase; import de.lmu.ifi.dbs.elki.database.ids.DBIDFactory; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; @@ -44,10 +43,10 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.FileParameter; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; +import de.lmu.ifi.dbs.elki.workflow.InputStep; /** - * Wrapper to convert a traditional text-serialized result into a on-disk matrix - * for random access. + * Precompute an on-disk distance matrix, using double precision. * * @author Erich Schubert * @@ -64,30 +63,14 @@ public class CacheDoubleDistanceInOnDiskMatrix<O, D extends NumberDistance<D, ?> private static final Logging LOG = Logging.getLogger(CacheDoubleDistanceInOnDiskMatrix.class); /** - * Parameter that specifies the name of the directory to be re-parsed. - * <p> - * Key: {@code -loader.diskcache} - * </p> - */ - public static final OptionID CACHE_ID = new OptionID("loader.diskcache", "File name of the disk cache to create."); - - /** - * Parameter that specifies the name of the directory to be re-parsed. - * <p> - * Key: {@code -loader.distance} - * </p> - */ - public static final OptionID DISTANCE_ID = new OptionID("loader.distance", "Distance function to cache."); - - /** * Debug flag, to double-check all write operations. */ private static final boolean debugExtraCheckSymmetry = false; /** - * Holds the database connection to have the algorithm run with. + * Data source to process. */ - private Database database; + private InputStep input; /** * Distance function that is to be cached. @@ -102,28 +85,28 @@ public class CacheDoubleDistanceInOnDiskMatrix<O, D extends NumberDistance<D, ?> /** * Constructor. * - * @param database Database + * @param input Data source * @param distance Distance function * @param out Matrix output file */ - public CacheDoubleDistanceInOnDiskMatrix(Database database, DistanceFunction<O, D> distance, File out) { + public CacheDoubleDistanceInOnDiskMatrix(InputStep input, DistanceFunction<O, D> distance, File out) { super(); - this.database = database; + this.input = input; this.distance = distance; this.out = out; } @Override public void run() { - database.initialize(); + Database database = input.getDatabase(); Relation<O> relation = database.getRelation(distance.getInputTypeRestriction()); DistanceQuery<O, D> distanceQuery = database.getDistanceQuery(relation, distance); int matrixsize = 0; - for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { + for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { int intid = DBIDUtil.asInteger(iditer); matrixsize = Math.max(matrixsize, intid + 1); - if(intid < 0) { + if (intid < 0) { throw new AbortException("OnDiskMatrixCache does not allow negative DBIDs."); } } @@ -131,25 +114,23 @@ public class CacheDoubleDistanceInOnDiskMatrix<O, D extends NumberDistance<D, ?> OnDiskUpperTriangleMatrix matrix; try { matrix = new OnDiskUpperTriangleMatrix(out, DiskCacheBasedDoubleDistanceFunction.DOUBLE_CACHE_MAGIC, 0, 8, matrixsize); - } - catch(IOException e) { + } catch (IOException e) { throw new AbortException("Error creating output matrix.", e); } - for(DBIDIter id1 = relation.iterDBIDs(); id1.valid(); id1.advance()) { - for(DBIDIter id2 = relation.iterDBIDs(); id2.valid(); id2.advance()) { - if(DBIDUtil.asInteger(id2) >= DBIDUtil.asInteger(id1)) { + for (DBIDIter id1 = relation.iterDBIDs(); id1.valid(); id1.advance()) { + for (DBIDIter id2 = relation.iterDBIDs(); id2.valid(); id2.advance()) { + if (DBIDUtil.asInteger(id2) >= DBIDUtil.asInteger(id1)) { double d = distanceQuery.distance(id1, id2).doubleValue(); - if(debugExtraCheckSymmetry) { + if (debugExtraCheckSymmetry) { double d2 = distanceQuery.distance(id2, id1).doubleValue(); - if(Math.abs(d - d2) > 0.0000001) { + if (Math.abs(d - d2) > 0.0000001) { LOG.warning("Distance function doesn't appear to be symmetric!"); } } try { matrix.getRecordBuffer(DBIDUtil.asInteger(id1), DBIDUtil.asInteger(id2)).putDouble(d); - } - catch(IOException e) { + } catch (IOException e) { throw new AbortException("Error writing distance record " + DBIDFactory.FACTORY.toString(id1) + "," + DBIDFactory.FACTORY.toString(id2) + " to matrix.", e); } } @@ -166,9 +147,25 @@ public class CacheDoubleDistanceInOnDiskMatrix<O, D extends NumberDistance<D, ?> */ public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractApplication.Parameterizer { /** - * Holds the database connection to have the algorithm run with. + * Parameter that specifies the name of the directory to be re-parsed. + * <p> + * Key: {@code -loader.diskcache} + * </p> */ - private Database database = null; + public static final OptionID CACHE_ID = new OptionID("loader.diskcache", "File name of the disk cache to create."); + + /** + * Parameter that specifies the name of the directory to be re-parsed. + * <p> + * Key: {@code -loader.distance} + * </p> + */ + public static final OptionID DISTANCE_ID = new OptionID("loader.distance", "Distance function to cache."); + + /** + * Data source to process. + */ + private InputStep input = null; /** * Distance function that is to be cached. @@ -183,27 +180,22 @@ public class CacheDoubleDistanceInOnDiskMatrix<O, D extends NumberDistance<D, ?> @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - // Database connection parameter - final ObjectParameter<Database> dbpar = new ObjectParameter<Database>(OptionID.DATABASE_CONNECTION, Database.class, StaticArrayDatabase.class); - if(config.grab(dbpar)) { - database = dbpar.instantiateClass(config); - } + input = config.tryInstantiate(InputStep.class); // Distance function parameter - final ObjectParameter<DistanceFunction<O, D>> dpar = new ObjectParameter<DistanceFunction<O, D>>(DISTANCE_ID, DistanceFunction.class); - if(config.grab(dpar)) { + final ObjectParameter<DistanceFunction<O, D>> dpar = new ObjectParameter<>(DISTANCE_ID, DistanceFunction.class); + if (config.grab(dpar)) { distance = dpar.instantiateClass(config); } // Output file parameter final FileParameter cpar = new FileParameter(CACHE_ID, FileParameter.FileType.OUTPUT_FILE); - if(config.grab(cpar)) { + if (config.grab(cpar)) { out = cpar.getValue(); } - } @Override protected CacheDoubleDistanceInOnDiskMatrix<O, D> makeInstance() { - return new CacheDoubleDistanceInOnDiskMatrix<O, D>(database, distance, out); + return new CacheDoubleDistanceInOnDiskMatrix<>(input, distance, out); } } @@ -215,4 +207,4 @@ public class CacheDoubleDistanceInOnDiskMatrix<O, D extends NumberDistance<D, ?> public static void main(String[] args) { runCLIApplication(CacheDoubleDistanceInOnDiskMatrix.class, args); } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/application/cache/CacheDoubleDistanceKNNLists.java b/src/de/lmu/ifi/dbs/elki/application/cache/CacheDoubleDistanceKNNLists.java new file mode 100644 index 00000000..e9bd4480 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/application/cache/CacheDoubleDistanceKNNLists.java @@ -0,0 +1,272 @@ +package de.lmu.ifi.dbs.elki.application.cache; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.io.File; +import java.io.IOException; +import java.io.RandomAccessFile; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileLock; + +import de.lmu.ifi.dbs.elki.application.AbstractApplication; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter; +import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDList; +import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter; +import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList; +import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery; +import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; +import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; +import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; +import de.lmu.ifi.dbs.elki.persistent.ByteArrayUtil; +import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.FileParameter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; +import de.lmu.ifi.dbs.elki.workflow.InputStep; + +/** + * Precompute the k nearest neighbors in a disk cache. + * + * @author Erich Schubert + * + * @apiviz.has DistanceFunction + * + * @param <O> Object type + * @param <D> Distance type + */ +public class CacheDoubleDistanceKNNLists<O, D extends NumberDistance<D, ?>> extends AbstractApplication { + /** + * The logger for this class. + */ + private static final Logging LOG = Logging.getLogger(CacheDoubleDistanceKNNLists.class); + + /** + * Data source to process. + */ + private InputStep input; + + /** + * Distance function that is to be cached. + */ + private DistanceFunction<O, D> distance; + + /** + * Number of neighbors to precompute. + */ + private int k; + + /** + * Output file. + */ + private File out; + + /** + * Magic number to identify files. + * + * Note, when cloning this class, and performing any incompatible change to + * the file format, you should also change this magic ID! + */ + public static final int KNN_CACHE_MAGIC = 0xCAC43D1C; + + /** + * Constructor. + * + * @param input Data source + * @param distance Distance function + * @param k Number of nearest neighbors + * @param out Matrix output file + */ + public CacheDoubleDistanceKNNLists(InputStep input, DistanceFunction<O, D> distance, int k, File out) { + super(); + this.input = input; + this.distance = distance; + this.k = k; + this.out = out; + } + + @Override + public void run() { + Database database = input.getDatabase(); + Relation<O> relation = database.getRelation(distance.getInputTypeRestriction()); + DistanceQuery<O, D> distanceQuery = database.getDistanceQuery(relation, distance); + KNNQuery<O, D> knnQ = database.getKNNQuery(distanceQuery, DatabaseQuery.HINT_HEAVY_USE); + + // open file. + try (RandomAccessFile file = new RandomAccessFile(out, "rw"); + FileChannel channel = file.getChannel(); + // and acquire a file write lock + FileLock lock = channel.lock()) { + // write magic header + file.writeInt(KNN_CACHE_MAGIC); + + int bufsize = k * 12 * 2 + 10; // Initial size, enough for 2 kNN. + ByteBuffer buffer = ByteBuffer.allocateDirect(bufsize); + + FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Computing kNN", relation.size(), LOG) : null; + + for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) { + final KNNList<D> nn = knnQ.getKNNForDBID(it, k); + final int nnsize = nn.size(); + + // Grow the buffer when needed: + if (nnsize * 12 + 10 > bufsize) { + while (nnsize * 12 + 10 > bufsize) { + bufsize <<= 1; + } + buffer = ByteBuffer.allocateDirect(bufsize); + } + + buffer.clear(); + ByteArrayUtil.writeUnsignedVarint(buffer, it.internalGetIndex()); + ByteArrayUtil.writeUnsignedVarint(buffer, nnsize); + int c = 0; + if (nn instanceof DoubleDistanceDBIDList) { + for (DoubleDistanceDBIDListIter ni = ((DoubleDistanceDBIDList) nn).iter(); ni.valid(); ni.advance(), c++) { + ByteArrayUtil.writeUnsignedVarint(buffer, ni.internalGetIndex()); + buffer.putDouble(ni.doubleDistance()); + } + } else { + for (DistanceDBIDListIter<D> ni = nn.iter(); ni.valid(); ni.advance(), c++) { + ByteArrayUtil.writeUnsignedVarint(buffer, ni.internalGetIndex()); + buffer.putDouble(ni.getDistance().doubleValue()); + } + } + if (c != nn.size()) { + throw new AbortException("Sizes did not agree. Cache is invalid."); + } + + buffer.flip(); + channel.write(buffer); + if (prog != null) { + prog.incrementProcessed(LOG); + } + } + if (prog != null) { + prog.ensureCompleted(LOG); + } + lock.release(); + } catch (IOException e) { + LOG.exception(e); + } + // FIXME: close! + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractApplication.Parameterizer { + /** + * Parameter that specifies the name of the directory to be re-parsed. + * <p> + * Key: {@code -loader.diskcache} + * </p> + */ + public static final OptionID CACHE_ID = new OptionID("loader.diskcache", "File name of the disk cache to create."); + + /** + * Parameter that specifies the name of the directory to be re-parsed. + * <p> + * Key: {@code -loader.distance} + * </p> + */ + public static final OptionID DISTANCE_ID = new OptionID("loader.distance", "Distance function to cache."); + + /** + * Parameter that specifies the number of neighbors to precompute. + * <p> + * Key: {@code -loader.k} + * </p> + */ + public static final OptionID K_ID = new OptionID("loader.k", "Number of nearest neighbors to precompute."); + + /** + * Data source to process. + */ + private InputStep input = null; + + /** + * Distance function that is to be cached. + */ + private DistanceFunction<O, D> distance = null; + + /** + * Number of neighbors to precompute. + */ + private int k; + + /** + * Output file. + */ + private File out = null; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + input = config.tryInstantiate(InputStep.class); + // Distance function parameter + final ObjectParameter<DistanceFunction<O, D>> dpar = new ObjectParameter<>(DISTANCE_ID, DistanceFunction.class); + if (config.grab(dpar)) { + distance = dpar.instantiateClass(config); + } + final IntParameter kpar = new IntParameter(K_ID); + kpar.addConstraint(new GreaterEqualConstraint(1)); + if (config.grab(kpar)) { + k = kpar.intValue(); + } + // Output file parameter + final FileParameter cpar = new FileParameter(CACHE_ID, FileParameter.FileType.OUTPUT_FILE); + if (config.grab(cpar)) { + out = cpar.getValue(); + } + } + + @Override + protected CacheDoubleDistanceKNNLists<O, D> makeInstance() { + return new CacheDoubleDistanceKNNLists<>(input, distance, k, out); + } + } + + /** + * Main method, delegate to super class. + * + * @param args Command line arguments + */ + public static void main(String[] args) { + runCLIApplication(CacheDoubleDistanceKNNLists.class, args); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/application/cache/CacheDoubleDistanceRangeQueries.java b/src/de/lmu/ifi/dbs/elki/application/cache/CacheDoubleDistanceRangeQueries.java new file mode 100644 index 00000000..f9434820 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/application/cache/CacheDoubleDistanceRangeQueries.java @@ -0,0 +1,277 @@ +package de.lmu.ifi.dbs.elki.application.cache; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.io.File; +import java.io.IOException; +import java.io.RandomAccessFile; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileLock; + +import de.lmu.ifi.dbs.elki.application.AbstractApplication; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList; +import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter; +import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDList; +import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter; +import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery; +import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; +import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; +import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; +import de.lmu.ifi.dbs.elki.persistent.ByteArrayUtil; +import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.FileParameter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; +import de.lmu.ifi.dbs.elki.workflow.InputStep; + +/** + * Precompute the k nearest neighbors in a disk cache. + * + * @author Erich Schubert + * + * @apiviz.has DistanceFunction + * + * @param <O> Object type + */ +public class CacheDoubleDistanceRangeQueries<O> extends AbstractApplication { + /** + * The logger for this class. + */ + private static final Logging LOG = Logging.getLogger(CacheDoubleDistanceRangeQueries.class); + + /** + * Data source to process. + */ + private InputStep input; + + /** + * Distance function that is to be cached. + */ + private DistanceFunction<O, DoubleDistance> distance; + + /** + * Query radius. + */ + private double radius; + + /** + * Output file. + */ + private File out; + + /** + * Magic number to identify files. + * + * Note, when cloning this class, and performing any incompatible change to + * the file format, you should also change this magic ID! + */ + public static final int RANGE_CACHE_MAGIC = 0xCAC43333; + + /** + * Constructor. + * + * @param input Data source + * @param distance Distance function + * @param radius Query radius + * @param out Matrix output file + */ + public CacheDoubleDistanceRangeQueries(InputStep input, DistanceFunction<O, DoubleDistance> distance, double radius, File out) { + super(); + this.input = input; + this.distance = distance; + this.radius = radius; + this.out = out; + } + + @Override + public void run() { + Database database = input.getDatabase(); + Relation<O> relation = database.getRelation(distance.getInputTypeRestriction()); + DistanceQuery<O, DoubleDistance> distanceQuery = database.getDistanceQuery(relation, distance); + DoubleDistance rad = new DoubleDistance(radius); + RangeQuery<O, DoubleDistance> rangeQ = database.getRangeQuery(distanceQuery, rad, DatabaseQuery.HINT_HEAVY_USE); + + LOG.verbose("Performing range queries with radius " + rad); + + // open file. + try (RandomAccessFile file = new RandomAccessFile(out, "rw"); + FileChannel channel = file.getChannel(); + // and acquire a file write lock + FileLock lock = channel.lock()) { + // write magic header + file.writeInt(RANGE_CACHE_MAGIC); + // write the query radius. + file.writeDouble(radius); + + int bufsize = 100 * 12 * 2 + 10; // Initial size, enough for 100. + ByteBuffer buffer = ByteBuffer.allocateDirect(bufsize); + + FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Computing range queries", relation.size(), LOG) : null; + + for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) { + final DistanceDBIDList<DoubleDistance> nn = rangeQ.getRangeForDBID(it, rad); + final int nnsize = nn.size(); + + // Grow the buffer when needed: + if (nnsize * 12 + 10 > bufsize) { + while (nnsize * 12 + 10 > bufsize) { + bufsize <<= 1; + } + LOG.verbose("Resizing buffer to "+bufsize+" to store "+nnsize+" results:"); + buffer = ByteBuffer.allocateDirect(bufsize); + } + + buffer.clear(); + ByteArrayUtil.writeUnsignedVarint(buffer, it.internalGetIndex()); + ByteArrayUtil.writeUnsignedVarint(buffer, nnsize); + int c = 0; + if (nn instanceof DoubleDistanceDBIDList) { + for (DoubleDistanceDBIDListIter ni = ((DoubleDistanceDBIDList) nn).iter(); ni.valid(); ni.advance(), c++) { + ByteArrayUtil.writeUnsignedVarint(buffer, ni.internalGetIndex()); + buffer.putDouble(ni.doubleDistance()); + } + } else { + for (DistanceDBIDListIter<DoubleDistance> ni = nn.iter(); ni.valid(); ni.advance(), c++) { + ByteArrayUtil.writeUnsignedVarint(buffer, ni.internalGetIndex()); + buffer.putDouble(ni.getDistance().doubleValue()); + } + } + if (c != nn.size()) { + throw new AbortException("Sizes did not agree. Cache is invalid."); + } + + buffer.flip(); + channel.write(buffer); + if (prog != null) { + prog.incrementProcessed(LOG); + } + } + if (prog != null) { + prog.ensureCompleted(LOG); + } + lock.release(); + } catch (IOException e) { + LOG.exception(e); + } + // FIXME: close! + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer<O> extends AbstractApplication.Parameterizer { + /** + * Parameter that specifies the name of the directory to be re-parsed. + * <p> + * Key: {@code -loader.diskcache} + * </p> + */ + public static final OptionID CACHE_ID = new OptionID("loader.diskcache", "File name of the disk cache to create."); + + /** + * Parameter that specifies the name of the directory to be re-parsed. + * <p> + * Key: {@code -loader.distance} + * </p> + */ + public static final OptionID DISTANCE_ID = new OptionID("loader.distance", "Distance function to cache."); + + /** + * Parameter that specifies the query radius to precompute. + * <p> + * Key: {@code -loader.radius} + * </p> + */ + public static final OptionID RADIUS_ID = new OptionID("loader.radius", "Query radius for precomputation."); + + /** + * Data source to process. + */ + private InputStep input = null; + + /** + * Distance function that is to be cached. + */ + private DistanceFunction<O, DoubleDistance> distance = null; + + /** + * Number of neighbors to precompute. + */ + private double radius; + + /** + * Output file. + */ + private File out = null; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + input = config.tryInstantiate(InputStep.class); + // Distance function parameter + final ObjectParameter<DistanceFunction<O, DoubleDistance>> dpar = new ObjectParameter<>(DISTANCE_ID, DistanceFunction.class); + if (config.grab(dpar)) { + distance = dpar.instantiateClass(config); + } + final DoubleParameter kpar = new DoubleParameter(RADIUS_ID); + kpar.addConstraint(new GreaterEqualConstraint(0.)); + if (config.grab(kpar)) { + radius = kpar.doubleValue(); + } + // Output file parameter + final FileParameter cpar = new FileParameter(CACHE_ID, FileParameter.FileType.OUTPUT_FILE); + if (config.grab(cpar)) { + out = cpar.getValue(); + } + } + + @Override + protected CacheDoubleDistanceRangeQueries<O> makeInstance() { + return new CacheDoubleDistanceRangeQueries<>(input, distance, radius, out); + } + } + + /** + * Main method, delegate to super class. + * + * @param args Command line arguments + */ + public static void main(String[] args) { + runCLIApplication(CacheDoubleDistanceRangeQueries.class, args); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/application/cache/CacheFloatDistanceInOnDiskMatrix.java b/src/de/lmu/ifi/dbs/elki/application/cache/CacheFloatDistanceInOnDiskMatrix.java index 144b4b70..5499415b 100644 --- a/src/de/lmu/ifi/dbs/elki/application/cache/CacheFloatDistanceInOnDiskMatrix.java +++ b/src/de/lmu/ifi/dbs/elki/application/cache/CacheFloatDistanceInOnDiskMatrix.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.application.cache; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -28,7 +28,6 @@ import java.io.IOException; import de.lmu.ifi.dbs.elki.application.AbstractApplication; import de.lmu.ifi.dbs.elki.database.Database; -import de.lmu.ifi.dbs.elki.database.StaticArrayDatabase; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; @@ -39,14 +38,13 @@ import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.persistent.OnDiskUpperTriangleMatrix; import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.FileParameter; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; +import de.lmu.ifi.dbs.elki.workflow.InputStep; /** - * Wrapper to convert a traditional text-serialized result into a on-disk matrix - * for random access. + * Precompute an on-disk distance matrix, using double precision. * * @author Erich Schubert * @@ -63,22 +61,6 @@ public class CacheFloatDistanceInOnDiskMatrix<O, D extends NumberDistance<D, ?>> private static final Logging LOG = Logging.getLogger(CacheFloatDistanceInOnDiskMatrix.class); /** - * Parameter that specifies the name of the directory to be re-parsed. - * <p> - * Key: {@code -loader.diskcache} - * </p> - */ - public static final OptionID CACHE_ID = new OptionID("loader.diskcache", "File name of the disk cache to create."); - - /** - * Parameter that specifies the name of the directory to be re-parsed. - * <p> - * Key: {@code -loader.distance} - * </p> - */ - public static final OptionID DISTANCE_ID = new OptionID("loader.distance", "Distance function to cache."); - - /** * Debug flag, to double-check all write operations. */ private static final boolean debugExtraCheckSymmetry = false; @@ -89,9 +71,9 @@ public class CacheFloatDistanceInOnDiskMatrix<O, D extends NumberDistance<D, ?>> private static final int FLOAT_SIZE = 4; /** - * Holds the database connection to have the algorithm run with. + * Data source to process. */ - private Database database; + private InputStep input; /** * Distance function that is to be cached. @@ -106,28 +88,28 @@ public class CacheFloatDistanceInOnDiskMatrix<O, D extends NumberDistance<D, ?>> /** * Constructor. * - * @param database Database + * @param input Data source * @param distance Distance function * @param out Matrix output file */ - public CacheFloatDistanceInOnDiskMatrix(Database database, DistanceFunction<O, D> distance, File out) { + public CacheFloatDistanceInOnDiskMatrix(InputStep input, DistanceFunction<O, D> distance, File out) { super(); - this.database = database; + this.input = input; this.distance = distance; this.out = out; } @Override public void run() { - database.initialize(); + Database database = input.getDatabase(); Relation<O> relation = database.getRelation(distance.getInputTypeRestriction()); DistanceQuery<O, D> distanceQuery = database.getDistanceQuery(relation, distance); int matrixsize = 0; - for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { + for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { final int intid = DBIDUtil.asInteger(iditer); matrixsize = Math.max(matrixsize, intid + 1); - if(intid < 0) { + if (intid < 0) { throw new AbortException("OnDiskMatrixCache does not allow negative DBIDs."); } } @@ -135,25 +117,23 @@ public class CacheFloatDistanceInOnDiskMatrix<O, D extends NumberDistance<D, ?>> OnDiskUpperTriangleMatrix matrix; try { matrix = new OnDiskUpperTriangleMatrix(out, DiskCacheBasedFloatDistanceFunction.FLOAT_CACHE_MAGIC, 0, FLOAT_SIZE, matrixsize); - } - catch(IOException e) { + } catch (IOException e) { throw new AbortException("Error creating output matrix.", e); } - for(DBIDIter id1 = relation.iterDBIDs(); id1.valid(); id1.advance()) { - for(DBIDIter id2 = relation.iterDBIDs(); id2.valid(); id2.advance()) { - if(DBIDUtil.asInteger(id2) >= DBIDUtil.asInteger(id1)) { + for (DBIDIter id1 = relation.iterDBIDs(); id1.valid(); id1.advance()) { + for (DBIDIter id2 = relation.iterDBIDs(); id2.valid(); id2.advance()) { + if (DBIDUtil.asInteger(id2) >= DBIDUtil.asInteger(id1)) { float d = distanceQuery.distance(id1, id2).floatValue(); - if(debugExtraCheckSymmetry) { + if (debugExtraCheckSymmetry) { float d2 = distanceQuery.distance(id2, id1).floatValue(); - if(Math.abs(d - d2) > 0.0000001) { + if (Math.abs(d - d2) > 0.0000001) { LOG.warning("Distance function doesn't appear to be symmetric!"); } } try { matrix.getRecordBuffer(DBIDUtil.asInteger(id1), DBIDUtil.asInteger(id2)).putFloat(d); - } - catch(IOException e) { + } catch (IOException e) { throw new AbortException("Error writing distance record " + id1 + "," + id2 + " to matrix.", e); } } @@ -170,9 +150,9 @@ public class CacheFloatDistanceInOnDiskMatrix<O, D extends NumberDistance<D, ?>> */ public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractApplication.Parameterizer { /** - * Holds the database connection to have the algorithm run with. + * Data source to process. */ - private Database database = null; + private InputStep input = null; /** * Distance function that is to be cached. @@ -187,27 +167,22 @@ public class CacheFloatDistanceInOnDiskMatrix<O, D extends NumberDistance<D, ?>> @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - // Database connection parameter - final ObjectParameter<Database> dbpar = new ObjectParameter<Database>(OptionID.DATABASE_CONNECTION, Database.class, StaticArrayDatabase.class); - if(config.grab(dbpar)) { - database = dbpar.instantiateClass(config); - } + input = config.tryInstantiate(InputStep.class); // Distance function parameter - final ObjectParameter<DistanceFunction<O, D>> dpar = new ObjectParameter<DistanceFunction<O, D>>(DISTANCE_ID, DistanceFunction.class); - if(config.grab(dpar)) { + final ObjectParameter<DistanceFunction<O, D>> dpar = new ObjectParameter<>(CacheDoubleDistanceInOnDiskMatrix.Parameterizer.DISTANCE_ID, DistanceFunction.class); + if (config.grab(dpar)) { distance = dpar.instantiateClass(config); } // Output file parameter - final FileParameter cpar = new FileParameter(CACHE_ID, FileParameter.FileType.OUTPUT_FILE); - if(config.grab(cpar)) { + final FileParameter cpar = new FileParameter(CacheDoubleDistanceInOnDiskMatrix.Parameterizer.CACHE_ID, FileParameter.FileType.OUTPUT_FILE); + if (config.grab(cpar)) { out = cpar.getValue(); } - } @Override protected CacheFloatDistanceInOnDiskMatrix<O, D> makeInstance() { - return new CacheFloatDistanceInOnDiskMatrix<O, D>(database, distance, out); + return new CacheFloatDistanceInOnDiskMatrix<>(input, distance, out); } } diff --git a/src/de/lmu/ifi/dbs/elki/application/cache/package-info.java b/src/de/lmu/ifi/dbs/elki/application/cache/package-info.java index 5ed7ec1c..78a93a07 100644 --- a/src/de/lmu/ifi/dbs/elki/application/cache/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/application/cache/package-info.java @@ -11,7 +11,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team |