summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/index/distancematrix/PrecomputedDistanceMatrix.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/index/distancematrix/PrecomputedDistanceMatrix.java')
-rw-r--r--src/de/lmu/ifi/dbs/elki/index/distancematrix/PrecomputedDistanceMatrix.java293
1 files changed, 293 insertions, 0 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/index/distancematrix/PrecomputedDistanceMatrix.java b/src/de/lmu/ifi/dbs/elki/index/distancematrix/PrecomputedDistanceMatrix.java
new file mode 100644
index 00000000..83612236
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/index/distancematrix/PrecomputedDistanceMatrix.java
@@ -0,0 +1,293 @@
+package de.lmu.ifi.dbs.elki.index.distancematrix;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRange;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.index.AbstractIndex;
+import de.lmu.ifi.dbs.elki.index.DistanceIndex;
+import de.lmu.ifi.dbs.elki.index.IndexFactory;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * Distance matrix, for precomputing similarity for a small data set.
+ *
+ * This class uses a linear memory layout (not a ragged array), and assumes
+ * symmetry as well as strictness. This way, it only stores the upper triangle
+ * matrix with double precision. It has to store (n-1) * (n-2) distance values
+ * in memory, requiring 8 * (n-1) * (n-2) bytes. Since Java has a size limit of
+ * arrays of 31 bits (signed integer), we can store at most 2^16 objects
+ * (precisely, 65536 objects) in a single array, which needs about 16 GB of RAM.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has PrecomputedDistanceQuery
+ *
+ * @param <O> Object type
+ */
+public class PrecomputedDistanceMatrix<O> extends AbstractIndex<O> implements DistanceIndex<O> {
+ /**
+ * Class logger.
+ */
+ private static final Logging LOG = Logging.getLogger(PrecomputedDistanceMatrix.class);
+
+ /**
+ * Nested distance function.
+ */
+ final protected DistanceFunction<? super O> distanceFunction;
+
+ /**
+ * Nested distance query.
+ */
+ protected DistanceQuery<O> distanceQuery;
+
+ /**
+ * Distance matrix.
+ */
+ private double[] matrix = null;
+
+ /**
+ * DBID range.
+ */
+ private DBIDRange ids;
+
+ /**
+ * Size of DBID range.
+ */
+ private int size;
+
+ /**
+ * Constructor.
+ *
+ * @param relation Data relation
+ * @param distanceFunction Distance function
+ */
+ public PrecomputedDistanceMatrix(Relation<O> relation, DistanceFunction<? super O> distanceFunction) {
+ super(relation);
+ this.distanceFunction = distanceFunction;
+
+ if(!distanceFunction.isSymmetric()) {
+ throw new AbortException("Distance matrixes currently only support symmetric distance functions (Patches welcome).");
+ }
+ }
+
+ @Override
+ public void initialize() {
+ DBIDs rids = relation.getDBIDs();
+ if(!(rids instanceof DBIDRange)) {
+ throw new AbortException("Distance matrixes are currently only supported for DBID ranges (as used by static databases) for performance reasons (Patches welcome).");
+ }
+ ids = (DBIDRange) rids;
+ size = ids.size();
+ if(size > 65536) {
+ throw new AbortException("Distance matrixes currently have a limit of 65536 objects (~16 GB). After this, the array size exceeds the Java integer range, and a different data structure needs to be used.");
+ }
+
+ distanceQuery = distanceFunction.instantiate(relation);
+
+ int msize = triangleSize(size);
+ matrix = new double[msize];
+ DBIDArrayIter ix = ids.iter(), iy = ids.iter();
+
+ FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Precomputing distance matrix", msize, LOG) : null;
+ int pos = 0;
+ for(ix.seek(0); ix.valid(); ix.advance()) {
+ // y < x -- must match {@link #getOffset}!
+ for(iy.seek(0); iy.getOffset() < ix.getOffset(); iy.advance()) {
+ matrix[pos] = distanceQuery.distance(ix, iy);
+ pos++;
+ LOG.incrementProcessed(prog);
+ }
+ }
+ LOG.ensureCompleted(prog);
+ }
+
+ /**
+ * Compute the size of a complete x by x triangle (minus diagonal)
+ *
+ * @param x Offset
+ * @return Size of complete triangle
+ */
+ protected static int triangleSize(int x) {
+ return (x * (x - 1)) >>> 1;
+ }
+
+ /**
+ * Array offset computation.
+ *
+ * @param x X parameter
+ * @param y Y parameter
+ * @return Array offset
+ */
+ private int getOffset(int x, int y) {
+ return (y < x) ? (triangleSize(x) + y) : (triangleSize(y) + x);
+ }
+
+ @Override
+ public void logStatistics() {
+ if(matrix != null) {
+ LOG.statistics(new LongStatistic(this.getClass().getName() + ".matrix-size", matrix.length));
+ }
+ }
+
+ @Override
+ public String getLongName() {
+ return "Precomputed Distance Matrix";
+ }
+
+ @Override
+ public String getShortName() {
+ return "distance-matrix";
+ }
+
+ @Override
+ public DistanceQuery<O> getDistanceQuery(DistanceFunction<? super O> distanceFunction, Object... hints) {
+ if(this.distanceQuery.getDistanceFunction().equals(distanceFunction)) {
+ return new PrecomputedDistanceQuery();
+ }
+ return null;
+ }
+
+ /**
+ * Distance query using the precomputed matrix.
+ *
+ * @author Erich Schubert
+ */
+ private class PrecomputedDistanceQuery implements DistanceQuery<O> {
+ @Override
+ public double distance(DBIDRef id1, DBIDRef id2) {
+ final int x = ids.getOffset(id1), y = ids.getOffset(id2);
+ return (x != y) ? matrix[getOffset(x, y)] : 0.;
+ }
+
+ @Override
+ public double distance(O o1, DBIDRef id2) {
+ return distanceQuery.distance(o1, id2);
+ }
+
+ @Override
+ public double distance(DBIDRef id1, O o2) {
+ return distanceQuery.distance(id1, o2);
+ }
+
+ @Override
+ public double distance(O o1, O o2) {
+ return distanceQuery.distance(o1, o2);
+ }
+
+ @Override
+ public DistanceFunction<? super O> getDistanceFunction() {
+ return distanceQuery.getDistanceFunction();
+ }
+
+ @Override
+ public Relation<? extends O> getRelation() {
+ return relation;
+ }
+ }
+
+ /**
+ * Factory for the index.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has PrecomputedDistanceMatrix
+ *
+ * @param <O> Object type
+ */
+ public static class Factory<O> implements IndexFactory<O, PrecomputedDistanceMatrix<O>> {
+ /**
+ * Nested distance function.
+ */
+ final protected DistanceFunction<? super O> distanceFunction;
+
+ /**
+ * Constructor.
+ *
+ * @param distanceFunction Distance function
+ */
+ public Factory(DistanceFunction<? super O> distanceFunction) {
+ super();
+ this.distanceFunction = distanceFunction;
+ }
+
+ @Override
+ public PrecomputedDistanceMatrix<O> instantiate(Relation<O> relation) {
+ return new PrecomputedDistanceMatrix<>(relation, distanceFunction);
+ }
+
+ @Override
+ public TypeInformation getInputTypeRestriction() {
+ return distanceFunction.getInputTypeRestriction();
+ }
+
+ /**
+ * Parameterizer.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ *
+ * @param <O> Object type
+ */
+ public static class Parameterizer<O> extends AbstractParameterizer {
+ /**
+ * Option parameter for the precomputed distance matrix.
+ */
+ public static final OptionID DISTANCE_ID = new OptionID("matrix.distance", "Distance function for the precomputed distance matrix.");
+
+ /**
+ * Nested distance function.
+ */
+ protected DistanceFunction<? super O> distanceFunction;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ ObjectParameter<DistanceFunction<? super O>> distanceP = new ObjectParameter<>(DISTANCE_ID, DistanceFunction.class);
+ if(config.grab(distanceP)) {
+ distanceFunction = distanceP.instantiateClass(config);
+ }
+ }
+
+ @Override
+ protected Factory<O> makeInstance() {
+ return new Factory<>(distanceFunction);
+ }
+ }
+ }
+}