diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/index/distancematrix/PrecomputedDistanceMatrix.java')
-rw-r--r-- | src/de/lmu/ifi/dbs/elki/index/distancematrix/PrecomputedDistanceMatrix.java | 293 |
1 files changed, 293 insertions, 0 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/index/distancematrix/PrecomputedDistanceMatrix.java b/src/de/lmu/ifi/dbs/elki/index/distancematrix/PrecomputedDistanceMatrix.java new file mode 100644 index 00000000..83612236 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/index/distancematrix/PrecomputedDistanceMatrix.java @@ -0,0 +1,293 @@ +package de.lmu.ifi.dbs.elki.index.distancematrix; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2014 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.data.type.TypeInformation; +import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDRange; +import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; +import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; +import de.lmu.ifi.dbs.elki.index.AbstractIndex; +import de.lmu.ifi.dbs.elki.index.DistanceIndex; +import de.lmu.ifi.dbs.elki.index.IndexFactory; +import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; +import de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic; +import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; + +/** + * Distance matrix, for precomputing similarity for a small data set. + * + * This class uses a linear memory layout (not a ragged array), and assumes + * symmetry as well as strictness. This way, it only stores the upper triangle + * matrix with double precision. It has to store (n-1) * (n-2) distance values + * in memory, requiring 8 * (n-1) * (n-2) bytes. Since Java has a size limit of + * arrays of 31 bits (signed integer), we can store at most 2^16 objects + * (precisely, 65536 objects) in a single array, which needs about 16 GB of RAM. + * + * @author Erich Schubert + * + * @apiviz.has PrecomputedDistanceQuery + * + * @param <O> Object type + */ +public class PrecomputedDistanceMatrix<O> extends AbstractIndex<O> implements DistanceIndex<O> { + /** + * Class logger. + */ + private static final Logging LOG = Logging.getLogger(PrecomputedDistanceMatrix.class); + + /** + * Nested distance function. + */ + final protected DistanceFunction<? super O> distanceFunction; + + /** + * Nested distance query. + */ + protected DistanceQuery<O> distanceQuery; + + /** + * Distance matrix. + */ + private double[] matrix = null; + + /** + * DBID range. + */ + private DBIDRange ids; + + /** + * Size of DBID range. + */ + private int size; + + /** + * Constructor. + * + * @param relation Data relation + * @param distanceFunction Distance function + */ + public PrecomputedDistanceMatrix(Relation<O> relation, DistanceFunction<? super O> distanceFunction) { + super(relation); + this.distanceFunction = distanceFunction; + + if(!distanceFunction.isSymmetric()) { + throw new AbortException("Distance matrixes currently only support symmetric distance functions (Patches welcome)."); + } + } + + @Override + public void initialize() { + DBIDs rids = relation.getDBIDs(); + if(!(rids instanceof DBIDRange)) { + throw new AbortException("Distance matrixes are currently only supported for DBID ranges (as used by static databases) for performance reasons (Patches welcome)."); + } + ids = (DBIDRange) rids; + size = ids.size(); + if(size > 65536) { + throw new AbortException("Distance matrixes currently have a limit of 65536 objects (~16 GB). After this, the array size exceeds the Java integer range, and a different data structure needs to be used."); + } + + distanceQuery = distanceFunction.instantiate(relation); + + int msize = triangleSize(size); + matrix = new double[msize]; + DBIDArrayIter ix = ids.iter(), iy = ids.iter(); + + FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Precomputing distance matrix", msize, LOG) : null; + int pos = 0; + for(ix.seek(0); ix.valid(); ix.advance()) { + // y < x -- must match {@link #getOffset}! + for(iy.seek(0); iy.getOffset() < ix.getOffset(); iy.advance()) { + matrix[pos] = distanceQuery.distance(ix, iy); + pos++; + LOG.incrementProcessed(prog); + } + } + LOG.ensureCompleted(prog); + } + + /** + * Compute the size of a complete x by x triangle (minus diagonal) + * + * @param x Offset + * @return Size of complete triangle + */ + protected static int triangleSize(int x) { + return (x * (x - 1)) >>> 1; + } + + /** + * Array offset computation. + * + * @param x X parameter + * @param y Y parameter + * @return Array offset + */ + private int getOffset(int x, int y) { + return (y < x) ? (triangleSize(x) + y) : (triangleSize(y) + x); + } + + @Override + public void logStatistics() { + if(matrix != null) { + LOG.statistics(new LongStatistic(this.getClass().getName() + ".matrix-size", matrix.length)); + } + } + + @Override + public String getLongName() { + return "Precomputed Distance Matrix"; + } + + @Override + public String getShortName() { + return "distance-matrix"; + } + + @Override + public DistanceQuery<O> getDistanceQuery(DistanceFunction<? super O> distanceFunction, Object... hints) { + if(this.distanceQuery.getDistanceFunction().equals(distanceFunction)) { + return new PrecomputedDistanceQuery(); + } + return null; + } + + /** + * Distance query using the precomputed matrix. + * + * @author Erich Schubert + */ + private class PrecomputedDistanceQuery implements DistanceQuery<O> { + @Override + public double distance(DBIDRef id1, DBIDRef id2) { + final int x = ids.getOffset(id1), y = ids.getOffset(id2); + return (x != y) ? matrix[getOffset(x, y)] : 0.; + } + + @Override + public double distance(O o1, DBIDRef id2) { + return distanceQuery.distance(o1, id2); + } + + @Override + public double distance(DBIDRef id1, O o2) { + return distanceQuery.distance(id1, o2); + } + + @Override + public double distance(O o1, O o2) { + return distanceQuery.distance(o1, o2); + } + + @Override + public DistanceFunction<? super O> getDistanceFunction() { + return distanceQuery.getDistanceFunction(); + } + + @Override + public Relation<? extends O> getRelation() { + return relation; + } + } + + /** + * Factory for the index. + * + * @author Erich Schubert + * + * @apiviz.has PrecomputedDistanceMatrix + * + * @param <O> Object type + */ + public static class Factory<O> implements IndexFactory<O, PrecomputedDistanceMatrix<O>> { + /** + * Nested distance function. + */ + final protected DistanceFunction<? super O> distanceFunction; + + /** + * Constructor. + * + * @param distanceFunction Distance function + */ + public Factory(DistanceFunction<? super O> distanceFunction) { + super(); + this.distanceFunction = distanceFunction; + } + + @Override + public PrecomputedDistanceMatrix<O> instantiate(Relation<O> relation) { + return new PrecomputedDistanceMatrix<>(relation, distanceFunction); + } + + @Override + public TypeInformation getInputTypeRestriction() { + return distanceFunction.getInputTypeRestriction(); + } + + /** + * Parameterizer. + * + * @author Erich Schubert + * + * @apiviz.exclude + * + * @param <O> Object type + */ + public static class Parameterizer<O> extends AbstractParameterizer { + /** + * Option parameter for the precomputed distance matrix. + */ + public static final OptionID DISTANCE_ID = new OptionID("matrix.distance", "Distance function for the precomputed distance matrix."); + + /** + * Nested distance function. + */ + protected DistanceFunction<? super O> distanceFunction; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + ObjectParameter<DistanceFunction<? super O>> distanceP = new ObjectParameter<>(DISTANCE_ID, DistanceFunction.class); + if(config.grab(distanceP)) { + distanceFunction = distanceP.instantiateClass(config); + } + } + + @Override + protected Factory<O> makeInstance() { + return new Factory<>(distanceFunction); + } + } + } +} |