10 files changed, 1642 insertions, 0 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/CovarianceDimensionSimilarity.java b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/CovarianceDimensionSimilarity.java
new file mode 100644
index 00000000..ad59ebfd
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/CovarianceDimensionSimilarity.java
@@ -0,0 +1,82 @@
+package de.lmu.ifi.dbs.elki.math.dimensionsimilarity;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+
+/**
+ * Class to compute the dimension similarity based on covariances.
+ * 
+ * @author Erich Schubert
+ */
+public class CovarianceDimensionSimilarity implements DimensionSimilarity<NumberVector<?>> {
+  /**
+   * Static instance
+   */
+  public static final CovarianceDimensionSimilarity STATIC = new CovarianceDimensionSimilarity();
+
+  /**
+   * Constructor. Use static instance.
+   */
+  protected CovarianceDimensionSimilarity() {
+    super();
+  }
+
+  @Override
+  public void computeDimensionSimilarites(Relation<? extends NumberVector<?>> relation, DBIDs subset, DimensionSimilarityMatrix matrix) {
+    final int dim = matrix.size();
+    // FIXME: Use only necessary dimensions!
+    CovarianceMatrix covmat = CovarianceMatrix.make(relation, subset);
+    double[][] mat = covmat.destroyToSampleMatrix().getArrayRef();
+    // Transform diagonal to 1 / stddev
+    for (int i = 0; i < mat.length; i++) {
+      mat[i][i] = 1. / Math.sqrt(mat[i][i]);
+    }
+    // Fill output matrix:
+    for (int x = 0; x < dim; x++) {
+      final int i = matrix.dim(x);
+      for (int y = x + 1; y < dim; y++) {
+        final int j = matrix.dim(y);
+        matrix.set(x, y, mat[i][j] * mat[i][i] * mat[j][j]);
+      }
+    }
+  }
+
+  /**
+   * Parameterization class.
+   * 
+   * @author Erich Schubert
+   * 
+   * @apiviz.exclude
+   */
+  public static class Parameterizer extends AbstractParameterizer {
+    @Override
+    protected CovarianceDimensionSimilarity makeInstance() {
+      return STATIC;
+    }
+  }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/DimensionSimilarity.java b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/DimensionSimilarity.java
new file mode 100644
index 00000000..6b01b3f3
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/DimensionSimilarity.java
@@ -0,0 +1,46 @@
+package de.lmu.ifi.dbs.elki.math.dimensionsimilarity;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.Parameterizable;
+
+/**
+ * Interface for computing pairwise dimension similarities, used for arranging
+ * dimensions in parallel coordinate plots.
+ * 
+ * @author Erich Schubert
+ * 
+ * @param <V> Object type
+ */
+public interface DimensionSimilarity<V> extends Parameterizable {
+  /**
+   * Compute the dimension similarity matrix
+   * 
+   * @param relation Relation
+   * @param subset DBID subset (for sampling / selection)
+   * @param matrix Matrix to fill
+   */
+  public void computeDimensionSimilarites(Relation<? extends V> relation, DBIDs subset, DimensionSimilarityMatrix matrix);
+}
diff --git a/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/DimensionSimilarityMatrix.java b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/DimensionSimilarityMatrix.java
new file mode 100644
index 00000000..9f7a9707
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/DimensionSimilarityMatrix.java
@@ -0,0 +1,247 @@
+package de.lmu.ifi.dbs.elki.math.dimensionsimilarity;
+
+import de.lmu.ifi.dbs.elki.math.geometry.PrimsMinimumSpanningTree;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * Class representing a similarity matrix between dimensions.
+ * 
+ * @author Erich Schubert
+ */
+public abstract class DimensionSimilarityMatrix {
+  /**
+   * Adapter class for running Prim's minimum spanning tree algorithm.
+   */
+  public static final PrimAdapter PRIM_ADAPTER = new PrimAdapter();
+
+  /**
+   * Flat, symmetric storage. We use a lower triangle matrix.
+   * 
+   * Basic memory layout (X = undef, S = symmetric)
+   * 
+   * <pre>
+   *  X  S  S  S  S  S
+   *  0  X  S  S  S  S
+   *  1  2  X  S  S  S
+   *  3  4  5  X  S  S
+   *  6  7  8  9  X  S
+   * 10 11 12 13 14  X
+   * </pre>
+   * 
+   * 
+   */
+  private final double[] sim;
+
+  /**
+   * Constructor.
+   * 
+   * @param dims Number of dimensions to allocate.
+   */
+  protected DimensionSimilarityMatrix(int dims) {
+    super();
+    this.sim = new double[index(0, dims)];
+  }
+
+  /**
+   * Number of dimensions.
+   * 
+   * @return Size of dimensions array.
+   */
+  public abstract int size();
+
+  /**
+   * Get the dimension at position idx.
+   * 
+   * @param idx Position
+   * @return Dimension
+   */
+  public abstract int dim(int idx);
+
+  /**
+   * Set the value of the given matrix position.
+   * 
+   * Note that {@code x == y} is invalid!
+   * 
+   * @param x X index coordinate
+   * @param y Y index coordinate
+   * @param val Value
+   */
+  public void set(int x, int y, double val) {
+    sim[index(x, y)] = val;
+  }
+
+  /**
+   * Get the value of the given matrix position.
+   * 
+   * Note that {@code x == y} is invalid!
+   * 
+   * @param x X index coordinate
+   * @param y Y index coordinate
+   * @return Value
+   */
+  public double get(int x, int y) {
+    return sim[index(x, y)];
+  }
+
+  /**
+   * Indexing function for triangular matrix.
+   * 
+   * @param x X coordinate
+   * @param y Y coordinate
+   * @return Array index
+   */
+  private int index(int x, int y) {
+    assert (x != y);
+    if (x > y) {
+      return index(y, x);
+    }
+    return ((y * (y - 1)) >> 1) + x;
+  }
+
+  @Override
+  public String toString() {
+    StringBuffer buf = new StringBuffer();
+    final int d = size();
+    for (int x = 1; x < d; x++) {
+      for (int y = 0; y < x; y++) {
+        if (y > 0) {
+          buf.append(' ');
+        }
+        buf.append(get(x, y));
+      }
+      buf.append('\n');
+    }
+    return buf.toString();
+  }
+
+  /**
+   * Complete matrix of pairwise dimension similarities.
+   * 
+   * @author Erich Schubert
+   */
+  public static class FullDimensionSimilarityMatrix extends DimensionSimilarityMatrix {
+    /**
+     * Number of dimensions.
+     */
+    final int dims;
+
+    /**
+     * Constructor.
+     * 
+     * @param dims Number of dimensions
+     */
+    public FullDimensionSimilarityMatrix(int dims) {
+      super(dims);
+      this.dims = dims;
+    }
+
+    @Override
+    public int size() {
+      return dims;
+    }
+
+    @Override
+    public int dim(int idx) {
+      return idx;
+    }
+  }
+
+  /**
+   * Partial matrix of pairwise dimension similarities.
+   * 
+   * @author Erich Schubert
+   */
+  public static class PartialDimensionSimilarityMatrix extends DimensionSimilarityMatrix {
+    /**
+     * Enumeration of dimensions to use (so we could use a subset only!)
+     */
+    final int[] dims;
+
+    /**
+     * Constructor.
+     * 
+     * @param dims Array of dimensions to process.
+     */
+    public PartialDimensionSimilarityMatrix(int[] dims) {
+      super(dims.length);
+      this.dims = dims;
+    }
+
+    @Override
+    public int size() {
+      return dims.length;
+    }
+
+    @Override
+    public int dim(int idx) {
+      return dims[idx];
+    }
+  }
+
+  /**
+   * Adapter class for running prim's algorithm.
+   * 
+   * @author Erich Schubert
+   */
+  public static class PrimAdapter implements PrimsMinimumSpanningTree.Adapter<DimensionSimilarityMatrix> {
+    /**
+     * Constructor. Use static instance!
+     */
+    protected PrimAdapter() {
+      super();
+    }
+
+    @Override
+    public double distance(DimensionSimilarityMatrix data, int i, int j) {
+      return -Math.abs(data.get(i, j));
+    }
+
+    @Override
+    public int size(DimensionSimilarityMatrix data) {
+      return data.size();
+    }
+
+  }
+
+  /**
+   * Make a full dimension similarity matrix.
+   * 
+   * @param dims Number of dimensions.
+   * @return Matrix
+   */
+  public static DimensionSimilarityMatrix make(int dims) {
+    return new FullDimensionSimilarityMatrix(dims);
+  }
+
+  /**
+   * Make a partial dimension similarity matrix.
+   * 
+   * @param dims Array of relevant dimensions
+   * @return Matrix
+   */
+  public static DimensionSimilarityMatrix make(int[] dims) {
+    return new PartialDimensionSimilarityMatrix(dims);
+  }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/HSMDimensionSimilarity.java b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/HSMDimensionSimilarity.java
new file mode 100644
index 00000000..b221866c
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/HSMDimensionSimilarity.java
@@ -0,0 +1,251 @@
+package de.lmu.ifi.dbs.elki.math.dimensionsimilarity;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.math.SinCosTable;
+import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
+
+/**
+ * FIXME: This needs serious TESTING before release. Large parts have been
+ * rewritten, but could not be tested at the time of rewriting.
+ * 
+ * Compute the similarity of dimensions by using a hough transformation.
+ * 
+ * Reference: <br>
+ * <p>
+ * A. Tatu, G. Albuquerque, M. Eisemann, P. Bak, H. Theisel, M. A. Magnor, and
+ * D. A. Keim.<br />
+ * Automated Analytical Methods to Support Visual Exploration of High-
+ * Dimensional Data. <br/>
+ * IEEEVisualization and Computer Graphics, 2011.
+ * </p>
+ * 
+ * @author Erich Schubert
+ * @author Robert Rödler
+ */
+@Reference(authors = "A. Tatu, G. Albuquerque, M. Eisemann, P. Bak, H. Theisel, M. A. Magnor, and D. A. Keim.", title = "Automated Analytical Methods to Support Visual Exploration of High-Dimensional Data", booktitle = "IEEE Trans. Visualization and Computer Graphics, 2011", url = "http://dx.doi.org/10.1109/TVCG.2010.242")
+public class HSMDimensionSimilarity implements DimensionSimilarity<NumberVector<?>> {
+  /**
+   * Static instance.
+   */
+  public static final HSMDimensionSimilarity STATIC = new HSMDimensionSimilarity();
+
+  /**
+   * Angular resolution. Best if divisible by 4: smaller tables.
+   * 
+   * The original publication used 50.
+   */
+  private final static int STEPS = 64;
+
+  /**
+   * Precompute sinus and cosinus
+   */
+  private final static SinCosTable table = SinCosTable.make(STEPS);
+
+  /**
+   * Constructor. Use static instance instead!
+   */
+  protected HSMDimensionSimilarity() {
+    super();
+  }
+
+  @Override
+  public void computeDimensionSimilarites(Relation<? extends NumberVector<?>> relation, DBIDs subset, DimensionSimilarityMatrix matrix) {
+    final int dim = matrix.size();
+    final int resolution = 500;
+    byte[][][][] pics = new byte[dim][dim][][]; // [resolution][resolution];
+
+    // Initialize / allocate "pictures":
+    for (int i = 0; i < dim - 1; i++) {
+      for (int j = i + 1; j < dim; j++) {
+        pics[i][j] = new byte[resolution][resolution];
+      }
+    }
+    // FIXME: Get/keep these statistics in the relation, or compute for the
+    // sample only.
+    double[] off = new double[dim], scale = new double[dim];
+    {
+      Pair<? extends NumberVector<?>, ? extends NumberVector<?>> mm = DatabaseUtil.computeMinMax(relation);
+      NumberVector<?> min = mm.first;
+      NumberVector<?> max = mm.second;
+      for (int d = 0; d < dim; d++) {
+        off[d] = min.doubleValue(matrix.dim(d));
+        final double m = max.doubleValue(matrix.dim(d));
+        scale[d] = (m > off[d]) ? 1. / (m - off[d]) : 1;
+      }
+    }
+    // Iterate dataset
+    for (DBIDIter id = subset.iter(); id.valid(); id.advance()) {
+      NumberVector<?> pvec = relation.get(id);
+      for (int i = 0; i < dim - 1; i++) {
+        for (int j = i + 1; j < dim; j++) {
+          double xi = (pvec.doubleValue(matrix.dim(i)) - off[i]) * scale[i];
+          double xj = (pvec.doubleValue(matrix.dim(j)) - off[j]) * scale[j];
+          drawLine(0, (int) (resolution * xi), resolution - 1, (int) (resolution * xj), pics[i][j]);
+        }
+      }
+    }
+
+    final double stepsq = (double) STEPS * (double) STEPS;
+    for (int x = 0; x < dim; x++) {
+      final int i = matrix.dim(x);
+      for (int y = x + 1; y < dim; y++) {
+        final int j = matrix.dim(y);
+        int[][] hough = houghTransformation(pics[i][j]);
+        pics[i][j] = null; // Release picture
+        // The original publication said "median", but judging from the text,
+        // meant "mean". Otherwise, always half of the cells are above the
+        // threshold, which doesn't match the explanation there.
+        double mean = sumMatrix(hough) / stepsq;
+        int abovemean = countAboveThreshold(hough, mean);
+
+        matrix.set(x, y, 1. - (abovemean / stepsq));
+      }
+    }
+  }
+
+  /**
+   * Compute the sum of a matix.
+   * 
+   * @param mat Matrix
+   * @return Sum of all elements
+   */
+  private long sumMatrix(int[][] mat) {
+    long ret = 0;
+    for (int i = 0; i < mat[0].length; i++) {
+      for (int j = 0; j < mat.length; j++) {
+        ret += mat[i][j];
+      }
+    }
+    return ret;
+  }
+
+  /**
+   * Count the number of cells above the threshold.
+   * 
+   * @param mat Matrix
+   * @param threshold Threshold
+   * @return Number of elements above the threshold.
+   */
+  private int countAboveThreshold(int[][] mat, double threshold) {
+    int ret = 0;
+    for (int i = 0; i < mat.length; i++) {
+      int[] row = mat[i];
+      for (int j = 0; j < row.length; j++) {
+        if (row[j] >= threshold) {
+          ret++;
+        }
+      }
+    }
+    return ret;
+  }
+
+  /**
+   * Perform a hough transformation on the binary image in "mat".
+   * 
+   * @param mat Binary image
+   * @return Hough transformation of image.
+   */
+  private int[][] houghTransformation(byte[][] mat) {
+    final int xres = mat.length, yres = mat[0].length;
+    final double tscale = STEPS / Math.sqrt(xres * xres + yres * yres);
+    final int[][] ret = new int[STEPS][STEPS];
+
+    for (int x = 0; x < mat.length; x++) {
+      for (int y = 0; y < mat[0].length; y++) {
+        if (mat[x][y] > 0) {
+          for (int i = 0; i < STEPS; i++) {
+            final int d = (int) (tscale * (x * table.cos(i) + y * table.sin(i)));
+            if (d > 0 && d < STEPS) {
+              ret[d][i] += mat[x][y];
+            }
+          }
+        }
+      }
+    }
+
+    return ret;
+  }
+
+  /**
+   * Draw a line onto the array, using the classic Bresenham algorithm.
+   * 
+   * @param x0 Start X
+   * @param y0 Start Y
+   * @param x1 End X
+   * @param y1 End Y
+   * @param pic Picture array
+   */
+  private static void drawLine(int x0, int y0, int x1, int y1, byte[][] pic) {
+    final int xres = pic.length, yres = pic[0].length;
+    // Ensure bounds
+    y0 = (y0 < 0) ? 0 : (y0 >= yres) ? (yres - 1) : y0;
+    y1 = (y1 < 0) ? 0 : (y1 >= yres) ? (yres - 1) : y1;
+    x0 = (x0 < 0) ? 0 : (x0 >= xres) ? (xres - 1) : x0;
+    x1 = (x1 < 0) ? 0 : (x1 >= xres) ? (xres - 1) : x1;
+    // Default slope
+    final int dx = +Math.abs(x1 - x0), sx = x0 < x1 ? 1 : -1;
+    final int dy = -Math.abs(y1 - y0), sy = y0 < y1 ? 1 : -1;
+    // Error counter
+    int err = dx + dy;
+
+    for (;;) {
+      pic[x0][y0] = 1;
+      if (x0 == x1 && y0 == y1) {
+        break;
+      }
+
+      final int e2 = err << 1;
+      if (e2 > dy) {
+        err += dy;
+        x0 += sx;
+      }
+      if (e2 < dx) {
+        err += dx;
+        y0 += sy;
+      }
+    }
+  }
+
+  /**
+   * Parameterization class.
+   * 
+   * @author Erich Schubert
+   * 
+   * @apiviz.exclude
+   */
+  public static class Parameterizer extends AbstractParameterizer {
+    @Override
+    protected HSMDimensionSimilarity makeInstance() {
+      return STATIC;
+    }
+  }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/HiCSDimensionSimilarity.java b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/HiCSDimensionSimilarity.java
new file mode 100644
index 00000000..468db679
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/HiCSDimensionSimilarity.java
@@ -0,0 +1,269 @@
+package de.lmu.ifi.dbs.elki.math.dimensionsimilarity;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.ArrayList;
+import java.util.Random;
+
+import de.lmu.ifi.dbs.elki.algorithm.outlier.meta.HiCS;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.VectorUtil;
+import de.lmu.ifi.dbs.elki.data.VectorUtil.SortDBIDsBySingleDimension;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.math.statistics.tests.GoodnessOfFitTest;
+import de.lmu.ifi.dbs.elki.math.statistics.tests.KolmogorovSmirnovTest;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
+
+/**
+ * Use the statistical tests as used by HiCS to arrange dimensions.
+ * 
+ * <p>
+ * Based on:<br />
+ * Fabian Keller, Emmanuel Müller, and Klemens Böhm.<br />
+ * HiCS: High Contrast Subspaces for Density-Based Outlier Ranking. <br />
+ * In ICDE, pages 1037–1048, 2012.
+ * </p>
+ * 
+ * @author Erich Schubert
+ * @author Robert Rödler
+ */
+public class HiCSDimensionSimilarity implements DimensionSimilarity<NumberVector<?>> {
+  /**
+   * Monte-Carlo iterations
+   */
+  private int m = 50;
+
+  /**
+   * Alpha threshold
+   */
+  private double alpha = 0.1;
+
+  /**
+   * Statistical test to use
+   */
+  private GoodnessOfFitTest statTest;
+
+  /**
+   * Random generator
+   */
+  private RandomFactory rnd;
+
+  /**
+   * Constructor.
+   * 
+   * @param statTest Test function
+   * @param m Number of monte-carlo iterations
+   * @param alpha Alpha threshold
+   * @param rnd Random source
+   */
+  public HiCSDimensionSimilarity(GoodnessOfFitTest statTest, int m, double alpha, RandomFactory rnd) {
+    super();
+    this.statTest = statTest;
+    this.m = m;
+    this.alpha = alpha;
+    this.rnd = rnd;
+  }
+
+  @Override
+  public void computeDimensionSimilarites(Relation<? extends NumberVector<?>> relation, DBIDs subset, DimensionSimilarityMatrix matrix) {
+    final Random random = rnd.getRandom();
+    final int dim = matrix.size();
+
+    // FIXME: only compute indexes necessary.
+    ArrayList<ArrayDBIDs> subspaceIndex = buildOneDimIndexes(relation, subset, matrix);
+
+    // compute two-element sets of subspaces
+    for (int x = 0; x < dim; x++) {
+      final int i = matrix.dim(x);
+      for (int y = x + 1; y < dim; y++) {
+        final int j = matrix.dim(y);
+        matrix.set(x, y, calculateContrast(relation, subset, subspaceIndex.get(x), subspaceIndex.get(y), i, j, random));
+      }
+    }
+  }
+
+  /**
+   * Calculates "index structures" for every attribute, i.e. sorts a
+   * ModifiableArray of every DBID in the database for every dimension and
+   * stores them in a list
+   * 
+   * @param relation Relation to index
+   * @param ids IDs to use
+   * @param matrix Matrix (for dimension subset)
+   * @return List of sorted objects
+   */
+  private ArrayList<ArrayDBIDs> buildOneDimIndexes(Relation<? extends NumberVector<?>> relation, DBIDs ids, DimensionSimilarityMatrix matrix) {
+    final int dim = matrix.size();
+    ArrayList<ArrayDBIDs> subspaceIndex = new ArrayList<ArrayDBIDs>(dim);
+
+    SortDBIDsBySingleDimension comp = new VectorUtil.SortDBIDsBySingleDimension(relation);
+    for (int i = 0; i < dim; i++) {
+      ArrayModifiableDBIDs amDBIDs = DBIDUtil.newArray(ids);
+      comp.setDimension(matrix.dim(i));
+      amDBIDs.sort(comp);
+      subspaceIndex.add(amDBIDs);
+    }
+
+    return subspaceIndex;
+  }
+
+  /**
+   * Calculates the actual contrast of a given subspace
+   * 
+   * @param relation Data relation
+   * @param subset Subset to process
+   * @param subspaceIndex1 Index of first subspace
+   * @param subspaceIndex2 Index of second subspace
+   * @param dim1 First dimension
+   * @param dim2 Second dimension
+   * @param random Random generator
+   * @return Contrast
+   */
+  private double calculateContrast(Relation<? extends NumberVector<?>> relation, DBIDs subset, ArrayDBIDs subspaceIndex1, ArrayDBIDs subspaceIndex2, int dim1, int dim2, Random random) {
+    final double alpha1 = Math.pow(alpha, .5);
+    final int windowsize = (int) (relation.size() * alpha1);
+
+    // TODO: speed up by keeping marginal distributions prepared.
+    // Instead of doing the random switch, do half-half.
+    double deviationSum = 0.0;
+    for (int i = 0; i < m; i++) {
+      // Randomly switch dimensions
+      final int cdim1;
+      ArrayDBIDs cindex1, cindex2;
+      if (random.nextDouble() > .5) {
+        cdim1 = dim1;
+        cindex1 = subspaceIndex1;
+        cindex2 = subspaceIndex2;
+      } else {
+        cdim1 = dim2;
+        cindex1 = subspaceIndex2;
+        cindex2 = subspaceIndex1;
+      }
+      // Build the sample
+      DBIDArrayIter iter = cindex2.iter();
+      HashSetModifiableDBIDs conditionalSample = DBIDUtil.newHashSet();
+      iter.seek(random.nextInt(subset.size() - windowsize));
+      for (int k = 0; k < windowsize && iter.valid(); k++, iter.advance()) {
+        conditionalSample.add(iter);
+      }
+      // Project the data
+      double[] fullValues = new double[subset.size()];
+      double[] sampleValues = new double[conditionalSample.size()];
+      {
+        int l = 0, s = 0;
+        // Note: we use the sorted index sets.
+        for (DBIDIter id = cindex1.iter(); id.valid(); id.advance(), l++) {
+          final double val = relation.get(id).doubleValue(cdim1);
+          fullValues[l] = val;
+          if (conditionalSample.contains(id)) {
+            sampleValues[s] = val;
+            s++;
+          }
+        }
+        assert (s == conditionalSample.size());
+      }
+      double contrast = statTest.deviation(fullValues, sampleValues);
+      if (Double.isNaN(contrast)) {
+        i--;
+        continue;
+      }
+      deviationSum += contrast;
+    }
+    return deviationSum / m;
+  }
+
+  /**
+   * Parameterization class.
+   * 
+   * @author Erich Schubert
+   * 
+   * @apiviz.exclude
+   */
+  public static class Parameterizer extends AbstractParameterizer {
+    /**
+     * Statistical test to use
+     */
+    private GoodnessOfFitTest statTest;
+
+    /**
+     * Holds the value of {@link de.lmu.ifi.dbs.elki.algorithm.outlier.meta.HiCS.Parameterizer#M_ID}.
+     */
+    private int m = 50;
+
+    /**
+     * Holds the value of {@link de.lmu.ifi.dbs.elki.algorithm.outlier.meta.HiCS.Parameterizer#ALPHA_ID}.
+     */
+    private double alpha = 0.1;
+
+    /**
+     * Random generator.
+     */
+    private RandomFactory rnd;
+
+    @Override
+    protected void makeOptions(Parameterization config) {
+      super.makeOptions(config);
+      final IntParameter mP = new IntParameter(HiCS.Parameterizer.M_ID, 50);
+      mP.addConstraint(new GreaterConstraint(1));
+      if (config.grab(mP)) {
+        m = mP.intValue();
+      }
+
+      final DoubleParameter alphaP = new DoubleParameter(HiCS.Parameterizer.ALPHA_ID, 0.1);
+      alphaP.addConstraint(new GreaterConstraint(0));
+      if (config.grab(alphaP)) {
+        alpha = alphaP.doubleValue();
+      }
+
+      final ObjectParameter<GoodnessOfFitTest> testP = new ObjectParameter<GoodnessOfFitTest>(HiCS.Parameterizer.TEST_ID, GoodnessOfFitTest.class, KolmogorovSmirnovTest.class);
+      if (config.grab(testP)) {
+        statTest = testP.instantiateClass(config);
+      }
+
+      final RandomParameter rndP = new RandomParameter(HiCS.Parameterizer.SEED_ID);
+      if (config.grab(rndP)) {
+        rnd = rndP.getValue();
+      }
+    }
+
+    @Override
+    protected HiCSDimensionSimilarity makeInstance() {
+      return new HiCSDimensionSimilarity(statTest, m, alpha, rnd);
+    }
+  }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/MCEDimensionSimilarity.java b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/MCEDimensionSimilarity.java
new file mode 100644
index 00000000..aecdf857
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/MCEDimensionSimilarity.java
@@ -0,0 +1,288 @@
+package de.lmu.ifi.dbs.elki.math.dimensionsimilarity;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.ArrayList;
+import java.util.Arrays;
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.VectorUtil;
+import de.lmu.ifi.dbs.elki.data.VectorUtil.SortDBIDsBySingleDimension;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.math.MathUtil;
+import de.lmu.ifi.dbs.elki.math.Mean;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+
+/**
+ * Compute dimension similarity by using a nested means discretization.
+ * 
+ * Reference:
+ * <p>
+ * Diansheng Guo<br />
+ * Coordinating computational and visual approaches for interactive feature
+ * selection and multivariate clustering<br />
+ * Information Visualization, 2(4), 2003.
+ * </p>
+ * 
+ * @author Erich Schubert
+ */
+@Reference(authors = "Diansheng Guo", title = "Coordinating computational and visual approaches for interactive feature selection and multivariate clustering", booktitle = "Information Visualization, 2(4)", url = "http://dx.doi.org/10.1057/palgrave.ivs.9500053")
+public class MCEDimensionSimilarity implements DimensionSimilarity<NumberVector<?>> {
+  /**
+   * Static instance.
+   */
+  public static final MCEDimensionSimilarity STATIC = new MCEDimensionSimilarity();
+
+  /**
+   * Desired size: 35 observations.
+   * 
+   * While this could trivially be made parameterizable, it is a reasonable rule
+   * of thumb and not expected to have a major effect.
+   */
+  public static final int TARGET = 35;
+
+  /**
+   * Constructor. Use static instance instead!
+   */
+  protected MCEDimensionSimilarity() {
+    super();
+  }
+
+  @Override
+  public void computeDimensionSimilarites(Relation<? extends NumberVector<?>> relation, DBIDs subset, DimensionSimilarityMatrix matrix) {
+    final int dim = matrix.size();
+
+    // Find a number of bins as recommended by Cheng et al.
+    double p = Math.log(subset.size() / (double) TARGET) / MathUtil.LOG2;
+    // As we are in 2d, take the root (*.5) But let's use at least 1, too.
+    // Check: for 10000 this should give 4, for 150 it gives 1.
+    int power = Math.max(1, (int) Math.floor(p * .5));
+    int gridsize = 1 << power;
+    double loggrid = Math.log((double) gridsize);
+
+    ArrayList<ArrayList<DBIDs>> parts = buildPartitions(relation, subset, power, matrix);
+
+    // Partition sizes
+    int[][] psizes = new int[dim][gridsize];
+    for (int d = 0; d < dim; d++) {
+      final ArrayList<DBIDs> partsd = parts.get(d);
+      final int[] sizesp = psizes[d];
+      for (int i = 0; i < gridsize; i++) {
+        sizesp[i] = partsd.get(i).size();
+      }
+    }
+
+    int[][] res = new int[gridsize][gridsize];
+    for (int x = 0; x < dim; x++) {
+      ArrayList<DBIDs> partsi = parts.get(x);
+      for (int y = x + 1; y < dim; y++) {
+        ArrayList<DBIDs> partsj = parts.get(y);
+        // Fill the intersection matrix
+        intersectionMatrix(res, partsi, partsj, gridsize);
+        matrix.set(x, y, 1. - getMCEntropy(res, psizes[x], psizes[y], subset.size(), gridsize, loggrid));
+      }
+    }
+  }
+
+  /**
+   * Intersect the two 1d grid decompositions, to obtain a 2d matrix.
+   * 
+   * @param res Output matrix to fill
+   * @param partsx Partitions in first component
+   * @param partsy Partitions in second component.
+   * @param gridsize Size of partition decomposition
+   */
+  private void intersectionMatrix(int[][] res, ArrayList<? extends DBIDs> partsx, ArrayList<? extends DBIDs> partsy, int gridsize) {
+    for (int x = 0; x < gridsize; x++) {
+      final DBIDs px = partsx.get(x);
+      final int[] rowx = res[x];
+      for (int y = 0; y < gridsize; y++) {
+        rowx[y] = DBIDUtil.intersectionSize(px, partsy.get(y));
+      }
+    }
+  }
+
+  /**
+   * Compute the MCE entropy value.
+   * 
+   * @param mat Partition size matrix
+   * @param psizesx Partition sizes on X
+   * @param psizesy Partition sizes on Y
+   * @param size Data set size
+   * @param gridsize Size of grids
+   * @param loggrid Logarithm of grid sizes, for normalization
+   * @return MCE score.
+   */
+  private double getMCEntropy(int[][] mat, int[] psizesx, int[] psizesy, int size, int gridsize, double loggrid) {
+    // Margin entropies:
+    double[] mx = new double[gridsize];
+    double[] my = new double[gridsize];
+
+    for (int i = 0; i < gridsize; i++) {
+      // Note: indexes are a bit tricky here, because we compute both margin
+      // entropies at the same time!
+      final double sumx = (double) psizesx[i];
+      final double sumy = (double) psizesy[i];
+      for (int j = 0; j < gridsize; j++) {
+        double px = mat[i][j] / sumx;
+        double py = mat[j][i] / sumy;
+
+        if (px > 0.) {
+          mx[i] -= px * Math.log(px);
+        }
+        if (py > 0.) {
+          my[i] -= py * Math.log(py);
+        }
+      }
+    }
+
+    // Weighted sums of margin entropies.
+    double sumx = 0., sumy = 0.;
+    for (int i = 0; i < gridsize; i++) {
+      sumx += mx[i] * psizesx[i];
+      sumy += my[i] * psizesy[i];
+    }
+
+    double max = ((sumx > sumy) ? sumx : sumy);
+    return max / (size * loggrid);
+  }
+
+  /**
+   * Calculates "index structures" for every attribute, i.e. sorts a
+   * ModifiableArray of every DBID in the database for every dimension and
+   * stores them in a list.
+   * 
+   * @param relation Relation to index
+   * @param ids IDs to use
+   * @param matrix Matrix for dimension information
+   * @return List of sorted objects
+   */
+  private ArrayList<ArrayList<DBIDs>> buildPartitions(Relation<? extends NumberVector<?>> relation, DBIDs ids, int depth, DimensionSimilarityMatrix matrix) {
+    final int dim = matrix.size();
+    ArrayList<ArrayList<DBIDs>> subspaceIndex = new ArrayList<ArrayList<DBIDs>>(dim);
+    SortDBIDsBySingleDimension comp = new VectorUtil.SortDBIDsBySingleDimension(relation);
+    double[] tmp = new double[ids.size()];
+    Mean mean = new Mean();
+
+    for (int i = 0; i < dim; i++) {
+      final int d = matrix.dim(i);
+      // Index for a single dimension:
+      ArrayList<DBIDs> idx = new ArrayList<DBIDs>(1 << depth);
+      // First, we need a copy of the DBIDs and sort it.
+      ArrayModifiableDBIDs sids = DBIDUtil.newArray(ids);
+      comp.setDimension(d);
+      sids.sort(comp);
+      // Now we build the temp array, and compute the first mean.
+      DBIDArrayIter it = sids.iter();
+      for (int j = 0; j < tmp.length; j++, it.advance()) {
+        assert (it.valid());
+        tmp[j] = relation.get(it).doubleValue(d);
+      }
+      divide(it, tmp, idx, 0, tmp.length, depth, mean);
+      assert (idx.size() == (1 << depth));
+      subspaceIndex.add(idx);
+    }
+
+    return subspaceIndex;
+  }
+
+  /**
+   * Recursive call to further subdivide the array.
+   * 
+   * @param it Iterator (will be reset!)
+   * @param data 1D data, sorted
+   * @param idx Output index
+   * @param start Interval start
+   * @param end Interval end
+   * @param depth Depth
+   * @param mean Mean working variable (will be reset!)
+   */
+  private void divide(DBIDArrayIter it, double[] data, ArrayList<DBIDs> idx, int start, int end, int depth, Mean mean) {
+    final int count = end - start;
+    if (depth == 0) {
+      if (count > 0) {
+        ModifiableDBIDs out = DBIDUtil.newHashSet(count);
+        it.seek(start);
+        for (int i = count; i > 0; i--, it.advance()) {
+          out.add(it);
+        }
+        idx.add(out);
+      } else {
+        idx.add(DBIDUtil.EMPTYDBIDS);
+      }
+      return;
+    } else {
+      if (count > 0) {
+        mean.reset();
+        for (int i = start; i < end; i++) {
+          mean.put(data[i]);
+        }
+        final double m = mean.getMean();
+        int pos = Arrays.binarySearch(data, start, end, m);
+        if (pos >= 0) {
+          // Ties: try to choose the most central element.
+          int opt = (start + end) >> 1;
+          while (Double.compare(data[pos], m) == 0) {
+            if (pos < opt) {
+              pos++;
+            } else if (pos > opt) {
+              pos--;
+            } else {
+              break;
+            }
+          }
+        } else {
+          pos = (-pos - 1);
+        }
+        divide(it, data, idx, start, pos, depth - 1, mean);
+        divide(it, data, idx, pos, end, depth - 1, mean);
+      } else {
+        // Corner case, that should barely happen. But for ties, we currently
+        // Do not yet assure that it doesn't happen!
+        divide(it, data, idx, start, end, depth - 1, mean);
+        divide(it, data, idx, start, end, depth - 1, mean);
+      }
+    }
+  }
+
+  /**
+   * Parameterization class.
+   * 
+   * @author Erich Schubert
+   * 
+   * @apiviz.exclude
+   */
+  public static class Parameterizer extends AbstractParameterizer {
+    @Override
+    protected MCEDimensionSimilarity makeInstance() {
+      return STATIC;
+    }
+  }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/SURFINGDimensionSimilarity.java b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/SURFINGDimensionSimilarity.java
new file mode 100644
index 00000000..551b4759
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/SURFINGDimensionSimilarity.java
@@ -0,0 +1,133 @@
+package de.lmu.ifi.dbs.elki.math.dimensionsimilarity;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.BitSet;
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
+import de.lmu.ifi.dbs.elki.math.Mean;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+
+/**
+ * Compute the similarity of dimensions using the SURFING score. The parameter k
+ * for the k nearest neighbors is currently hard-coded to 10% of the set size.
+ * 
+ * Note that the complexity is roughly O(n n k) * O(d^2), so this is a rather
+ * slow method without index support, and with k at 10% of n, is actually cubic.
+ * So try to use an appropriate index!
+ * 
+ * Reference:
+ * <p>
+ * Christian Baumgartner, Claudia Plant, Karin Kailing, Hans-Peter Kriegel, and
+ * Peer Kröger<br />
+ * Subspace Selection for Clustering High-Dimensional Data<br />
+ * In IEEE International Conference on Data Mining, 2004.
+ * </p>
+ * 
+ * TODO: make the subspace distance function and k parameterizable.
+ * 
+ * @author Robert Rödler
+ * @author Erich Schubert
+ * 
+ * @apiviz.uses SubspaceEuclideanDistanceFunction
+ */
+@Reference(authors = "Christian Baumgartner, Claudia Plant, Karin Kailing, Hans-Peter Kriegel, and Peer Kröger", title = "Subspace Selection for Clustering High-Dimensional Data", booktitle = "IEEE International Conference on Data Mining, 2004", url = "http://dx.doi.org/10.1109/ICDM.2004.10112")
+public class SURFINGDimensionSimilarity implements DimensionSimilarity<NumberVector<?>> {
+  /**
+   * Static instance.
+   */
+  public static final SURFINGDimensionSimilarity STATIC = new SURFINGDimensionSimilarity();
+
+  /**
+   * Constructor. Use static instance instead!
+   */
+  protected SURFINGDimensionSimilarity() {
+    super();
+  }
+
+  @Override
+  public void computeDimensionSimilarites(Relation<? extends NumberVector<?>> relation, DBIDs subset, DimensionSimilarityMatrix matrix) {
+    final int dim = matrix.size();
+    final Database db = relation.getDatabase();
+    Mean kdistmean = new Mean();
+    final int k = Math.max(1, subset.size() / 10);
+
+    double[] knns = new double[subset.size()];
+
+    // TODO: optimize by using 1d indexes?
+    for (int x = 0; x < dim; x++) {
+      final int i = matrix.dim(x);
+      for (int y = x + 1; y < dim; y++) {
+        final int j = matrix.dim(y);
+        BitSet dims = new BitSet(dim);
+        dims.set(i);
+        dims.set(j);
+        DistanceQuery<? extends NumberVector<?>, DoubleDistance> dq = db.getDistanceQuery(relation, new SubspaceEuclideanDistanceFunction(dims));
+        KNNQuery<? extends NumberVector<?>, DoubleDistance> knnq = db.getKNNQuery(dq, k);
+
+        kdistmean.reset();
+        int knn = 0;
+        for (DBIDIter id1 = subset.iter(); id1.valid(); id1.advance(), knn++) {
+          final double kdist = knnq.getKNNForDBID(id1, k).getKNNDistance().doubleValue();
+          kdistmean.put(kdist);
+          knns[knn] = kdist;
+        }
+        double mean = kdistmean.getMean();
+        // Deviation from mean:
+        double diff = 0.;
+        int below = 0;
+        for (int l = 0; l < knns.length; l++) {
+          diff += Math.abs(mean - knns[l]);
+          if (knns[l] < mean) {
+            below++;
+          }
+        }
+        matrix.set(x, y, (below > 0) ? diff / (2. * mean * below) : 0);
+      }
+    }
+  }
+
+  /**
+   * Parameterization class.
+   * 
+   * @author Erich Schubert
+   * 
+   * @apiviz.exclude
+   */
+  public static class Parameterizer extends AbstractParameterizer {
+    @Override
+    protected SURFINGDimensionSimilarity makeInstance() {
+      return STATIC;
+    }
+  }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/SlopeDimensionSimilarity.java b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/SlopeDimensionSimilarity.java
new file mode 100644
index 00000000..3c81da63
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/SlopeDimensionSimilarity.java
@@ -0,0 +1,142 @@
+package de.lmu.ifi.dbs.elki.math.dimensionsimilarity;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
+
+/**
+ * Arrange dimensions based on the entropy of the slope spectrum.
+ * 
+ * @author Erich Schubert
+ * @author Robert Rödler
+ */
+public class SlopeDimensionSimilarity implements DimensionSimilarity<NumberVector<?>> {
+  /**
+   * Static instance.
+   */
+  public static final SlopeDimensionSimilarity STATIC = new SlopeDimensionSimilarity();
+
+  /**
+   * Full precision.
+   */
+  protected final static int PRECISION = 40;
+
+  /**
+   * Precision for entropy normalization.
+   */
+  protected final static double LOG_PRECISION = Math.log(PRECISION);
+
+  /**
+   * Scaling factor.
+   */
+  protected final static double RESCALE = PRECISION * .5;
+
+  /**
+   * Constructor. Use static instance instead!
+   */
+  protected SlopeDimensionSimilarity() {
+    super();
+  }
+
+  @Override
+  public void computeDimensionSimilarites(Relation<? extends NumberVector<?>> relation, DBIDs subset, DimensionSimilarityMatrix matrix) {
+    final int dim = matrix.size();
+    final int size = subset.size();
+
+    // FIXME: Get/keep these statistics in the relation, or compute for the
+    // sample only.
+    double[] off = new double[dim], scale = new double[dim];
+    {
+      Pair<? extends NumberVector<?>, ? extends NumberVector<?>> mm = DatabaseUtil.computeMinMax(relation);
+      NumberVector<?> min = mm.first;
+      NumberVector<?> max = mm.second;
+      for (int d = 0; d < dim; d++) {
+        off[d] = min.doubleValue(matrix.dim(d));
+        final double m = max.doubleValue(matrix.dim(d));
+        scale[d] = (m > off[d]) ? 1. / (m - off[d]) : 1;
+      }
+    }
+
+    // Collect angular histograms.
+    // Note, we only fill half of the matrix
+    int[][][] angles = new int[dim][dim][PRECISION];
+
+    // Scratch buffer
+    double[] vec = new double[dim];
+    for (DBIDIter id = subset.iter(); id.valid(); id.advance()) {
+      final NumberVector<?> obj = relation.get(id);
+      // Map values to 0..1
+      for (int d = 0; d < dim; d++) {
+        vec[d] = (obj.doubleValue(matrix.dim(d)) - off[d]) * scale[d];
+      }
+      for (int i = 0; i < dim - 1; i++) {
+        for (int j = i + 1; j < dim; j++) {
+          // This will be on a scale of 0 .. 2:
+          final double delta = vec[j] - vec[i] + 1;
+          int div = (int) Math.round(delta * RESCALE);
+          // TODO: do we really need this check?
+          div = (div < 0) ? 0 : (div >= PRECISION) ? PRECISION - 1 : div;
+          angles[i][j][div] += 1;
+        }
+      }
+    }
+
+    // Compute entropy in each combination:
+    for (int x = 0; x < dim; x++) {
+      for (int y = x + 1; y < dim; y++) {
+        double entropy = 0.;
+        int[] as = angles[x][y];
+        for (int l = 0; l < PRECISION; l++) {
+          if (as[l] > 0) {
+            final double p = as[l] / (double) size;
+            entropy += p * Math.log(p);
+          }
+        }
+        entropy /= LOG_PRECISION;
+
+        matrix.set(x, y, 1 + entropy);
+      }
+    }
+  }
+  
+  /**
+   * Parameterization class.
+   * 
+   * @author Erich Schubert
+   *
+   * @apiviz.exclude
+   */
+  public static class Parameterizer extends AbstractParameterizer {
+    @Override
+    protected SlopeDimensionSimilarity makeInstance() {
+      return STATIC;
+    }
+  }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/SlopeInversionDimensionSimilarity.java b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/SlopeInversionDimensionSimilarity.java
new file mode 100644
index 00000000..aad58448
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/SlopeInversionDimensionSimilarity.java
@@ -0,0 +1,158 @@
+package de.lmu.ifi.dbs.elki.math.dimensionsimilarity;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
+
+/**
+ * Arrange dimensions based on the entropy of the slope spectrum. In contrast to
+ * {@link SlopeDimensionSimilarity}, we also take the option of inverting an
+ * axis into account.
+ * 
+ * TODO: shouldn't this be normalized by the single-dimension entropies or so?
+ * 
+ * @author Erich Schubert
+ * @author Robert Rödler
+ */
+public class SlopeInversionDimensionSimilarity extends SlopeDimensionSimilarity {
+  /**
+   * Static instance.
+   */
+  public static final SlopeInversionDimensionSimilarity STATIC = new SlopeInversionDimensionSimilarity();
+
+  /**
+   * Constructor. Use static instance instead!
+   */
+  protected SlopeInversionDimensionSimilarity() {
+    super();
+  }
+
+  @Override
+  public void computeDimensionSimilarites(Relation<? extends NumberVector<?>> relation, DBIDs subset, DimensionSimilarityMatrix matrix) {
+    final int dim = matrix.size();
+    final int size = subset.size();
+
+    // Collect angular histograms.
+    // Note, we only fill half of the matrix
+    int[][][] angles = new int[dim][dim][PRECISION];
+    int[][][] angleI = new int[dim][dim][PRECISION];
+
+    // FIXME: Get/keep these statistics in the relation, or compute for the
+    // sample only.
+    double[] off = new double[dim], scale = new double[dim];
+    {
+      Pair<? extends NumberVector<?>, ? extends NumberVector<?>> mm = DatabaseUtil.computeMinMax(relation);
+      NumberVector<?> min = mm.first;
+      NumberVector<?> max = mm.second;
+      for (int d = 0; d < dim; d++) {
+        off[d] = min.doubleValue(matrix.dim(d));
+        final double m = max.doubleValue(matrix.dim(d));
+        scale[d] = (m > off[d]) ? 1. / (m - off[d]) : 1;
+      }
+    }
+
+    // Scratch buffer
+    double[] vec = new double[dim];
+    for (DBIDIter id = subset.iter(); id.valid(); id.advance()) {
+      final NumberVector<?> obj = relation.get(id);
+      // Map values to 0..1
+      for (int d = 0; d < dim; d++) {
+        vec[d] = (obj.doubleValue(matrix.dim(d)) - off[d]) * scale[d];
+      }
+      for (int i = 0; i < dim - 1; i++) {
+        for (int j = i + 1; j < dim; j++) {
+          {
+            // This will be on a scale of 0 .. 2:
+            final double delta = vec[j] - vec[i] + 1;
+            int div = (int) Math.round(delta * RESCALE);
+            // TODO: do we really need this check?
+            div = (div < 0) ? 0 : (div >= PRECISION) ? PRECISION - 1 : div;
+            angles[i][j][div] += 1;
+          }
+          {
+            // This will be on a scale of 0 .. 2:
+            final double delta = vec[j] + vec[i];
+            int div = (int) Math.round(delta * RESCALE);
+            // TODO: do we really need this check?
+            div = (div < 0) ? 0 : (div >= PRECISION) ? PRECISION - 1 : div;
+            angleI[i][j][div] += 1;
+          }
+        }
+      }
+    }
+
+    // Compute entropy in each combination:
+    for (int x = 0; x < dim; x++) {
+      for (int y = x + 1; y < dim; y++) {
+        double entropy = 0., entropyI = 0;
+        {
+          int[] as = angles[x][y];
+          for (int l = 0; l < PRECISION; l++) {
+            if (as[l] > 0) {
+              final double p = as[l] / (double) size;
+              entropy += p * Math.log(p);
+            }
+          }
+        }
+        {
+          int[] as = angleI[x][y];
+          for (int l = 0; l < PRECISION; l++) {
+            if (as[l] > 0) {
+              final double p = as[l] / (double) size;
+              entropyI += p * Math.log(p);
+            }
+          }
+        }
+        if (entropy >= entropyI) {
+          entropy = 1 + entropy / LOG_PRECISION;
+          matrix.set(x, y, entropy);
+        } else {
+          entropyI = 1 + entropyI / LOG_PRECISION;
+          // Negative sign to indicate the axes might be inversely related
+          matrix.set(x, y, -entropyI);
+        }
+      }
+    }
+  }
+
+  /**
+   * Parameterization class.
+   * 
+   * @author Erich Schubert
+   * 
+   * @apiviz.exclude
+   */
+  public static class Parameterizer extends AbstractParameterizer {
+    @Override
+    protected SlopeInversionDimensionSimilarity makeInstance() {
+      return STATIC;
+    }
+  }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/package-info.java b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/package-info.java
new file mode 100644
index 00000000..b6c27c57
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/package-info.java
@@ -0,0 +1,26 @@
+/**
+ * <p>Functions to compute the similarity of dimensions (or the interestingness of the combination).</p> 
+ */
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+package de.lmu.ifi.dbs.elki.math.dimensionsimilarity;
+\ No newline at end of file