summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/HSMDimensionSimilarity.java
diff options
context:
space:
mode:
authorErich Schubert <erich@debian.org>2012-12-14 20:45:15 +0100
committerAndrej Shadura <andrewsh@debian.org>2019-03-09 22:30:35 +0000
commit357b2761a2c0ded8cad5e4d3c1e667b7639ff7a6 (patch)
tree3dd8947bb70a67c221adc3cd4359ba1d385e2f3c /src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/HSMDimensionSimilarity.java
parent4343785ebed9d4145f417d86d581f18a0d31e4ac (diff)
parentb7b404fd7a726774d442562d11659d7b5368cdb9 (diff)
Import Debian changes 0.5.5-1
elki (0.5.5-1) unstable; urgency=low * New upstream release: 0.5.5 interim release.
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/HSMDimensionSimilarity.java')
-rw-r--r--src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/HSMDimensionSimilarity.java251
1 files changed, 251 insertions, 0 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/HSMDimensionSimilarity.java b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/HSMDimensionSimilarity.java
new file mode 100644
index 00000000..b221866c
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/HSMDimensionSimilarity.java
@@ -0,0 +1,251 @@
+package de.lmu.ifi.dbs.elki.math.dimensionsimilarity;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.math.SinCosTable;
+import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
+
+/**
+ * FIXME: This needs serious TESTING before release. Large parts have been
+ * rewritten, but could not be tested at the time of rewriting.
+ *
+ * Compute the similarity of dimensions by using a hough transformation.
+ *
+ * Reference: <br>
+ * <p>
+ * A. Tatu, G. Albuquerque, M. Eisemann, P. Bak, H. Theisel, M. A. Magnor, and
+ * D. A. Keim.<br />
+ * Automated Analytical Methods to Support Visual Exploration of High-
+ * Dimensional Data. <br/>
+ * IEEEVisualization and Computer Graphics, 2011.
+ * </p>
+ *
+ * @author Erich Schubert
+ * @author Robert Rödler
+ */
+@Reference(authors = "A. Tatu, G. Albuquerque, M. Eisemann, P. Bak, H. Theisel, M. A. Magnor, and D. A. Keim.", title = "Automated Analytical Methods to Support Visual Exploration of High-Dimensional Data", booktitle = "IEEE Trans. Visualization and Computer Graphics, 2011", url = "http://dx.doi.org/10.1109/TVCG.2010.242")
+public class HSMDimensionSimilarity implements DimensionSimilarity<NumberVector<?>> {
+ /**
+ * Static instance.
+ */
+ public static final HSMDimensionSimilarity STATIC = new HSMDimensionSimilarity();
+
+ /**
+ * Angular resolution. Best if divisible by 4: smaller tables.
+ *
+ * The original publication used 50.
+ */
+ private final static int STEPS = 64;
+
+ /**
+ * Precompute sinus and cosinus
+ */
+ private final static SinCosTable table = SinCosTable.make(STEPS);
+
+ /**
+ * Constructor. Use static instance instead!
+ */
+ protected HSMDimensionSimilarity() {
+ super();
+ }
+
+ @Override
+ public void computeDimensionSimilarites(Relation<? extends NumberVector<?>> relation, DBIDs subset, DimensionSimilarityMatrix matrix) {
+ final int dim = matrix.size();
+ final int resolution = 500;
+ byte[][][][] pics = new byte[dim][dim][][]; // [resolution][resolution];
+
+ // Initialize / allocate "pictures":
+ for (int i = 0; i < dim - 1; i++) {
+ for (int j = i + 1; j < dim; j++) {
+ pics[i][j] = new byte[resolution][resolution];
+ }
+ }
+ // FIXME: Get/keep these statistics in the relation, or compute for the
+ // sample only.
+ double[] off = new double[dim], scale = new double[dim];
+ {
+ Pair<? extends NumberVector<?>, ? extends NumberVector<?>> mm = DatabaseUtil.computeMinMax(relation);
+ NumberVector<?> min = mm.first;
+ NumberVector<?> max = mm.second;
+ for (int d = 0; d < dim; d++) {
+ off[d] = min.doubleValue(matrix.dim(d));
+ final double m = max.doubleValue(matrix.dim(d));
+ scale[d] = (m > off[d]) ? 1. / (m - off[d]) : 1;
+ }
+ }
+ // Iterate dataset
+ for (DBIDIter id = subset.iter(); id.valid(); id.advance()) {
+ NumberVector<?> pvec = relation.get(id);
+ for (int i = 0; i < dim - 1; i++) {
+ for (int j = i + 1; j < dim; j++) {
+ double xi = (pvec.doubleValue(matrix.dim(i)) - off[i]) * scale[i];
+ double xj = (pvec.doubleValue(matrix.dim(j)) - off[j]) * scale[j];
+ drawLine(0, (int) (resolution * xi), resolution - 1, (int) (resolution * xj), pics[i][j]);
+ }
+ }
+ }
+
+ final double stepsq = (double) STEPS * (double) STEPS;
+ for (int x = 0; x < dim; x++) {
+ final int i = matrix.dim(x);
+ for (int y = x + 1; y < dim; y++) {
+ final int j = matrix.dim(y);
+ int[][] hough = houghTransformation(pics[i][j]);
+ pics[i][j] = null; // Release picture
+ // The original publication said "median", but judging from the text,
+ // meant "mean". Otherwise, always half of the cells are above the
+ // threshold, which doesn't match the explanation there.
+ double mean = sumMatrix(hough) / stepsq;
+ int abovemean = countAboveThreshold(hough, mean);
+
+ matrix.set(x, y, 1. - (abovemean / stepsq));
+ }
+ }
+ }
+
+ /**
+ * Compute the sum of a matix.
+ *
+ * @param mat Matrix
+ * @return Sum of all elements
+ */
+ private long sumMatrix(int[][] mat) {
+ long ret = 0;
+ for (int i = 0; i < mat[0].length; i++) {
+ for (int j = 0; j < mat.length; j++) {
+ ret += mat[i][j];
+ }
+ }
+ return ret;
+ }
+
+ /**
+ * Count the number of cells above the threshold.
+ *
+ * @param mat Matrix
+ * @param threshold Threshold
+ * @return Number of elements above the threshold.
+ */
+ private int countAboveThreshold(int[][] mat, double threshold) {
+ int ret = 0;
+ for (int i = 0; i < mat.length; i++) {
+ int[] row = mat[i];
+ for (int j = 0; j < row.length; j++) {
+ if (row[j] >= threshold) {
+ ret++;
+ }
+ }
+ }
+ return ret;
+ }
+
+ /**
+ * Perform a hough transformation on the binary image in "mat".
+ *
+ * @param mat Binary image
+ * @return Hough transformation of image.
+ */
+ private int[][] houghTransformation(byte[][] mat) {
+ final int xres = mat.length, yres = mat[0].length;
+ final double tscale = STEPS / Math.sqrt(xres * xres + yres * yres);
+ final int[][] ret = new int[STEPS][STEPS];
+
+ for (int x = 0; x < mat.length; x++) {
+ for (int y = 0; y < mat[0].length; y++) {
+ if (mat[x][y] > 0) {
+ for (int i = 0; i < STEPS; i++) {
+ final int d = (int) (tscale * (x * table.cos(i) + y * table.sin(i)));
+ if (d > 0 && d < STEPS) {
+ ret[d][i] += mat[x][y];
+ }
+ }
+ }
+ }
+ }
+
+ return ret;
+ }
+
+ /**
+ * Draw a line onto the array, using the classic Bresenham algorithm.
+ *
+ * @param x0 Start X
+ * @param y0 Start Y
+ * @param x1 End X
+ * @param y1 End Y
+ * @param pic Picture array
+ */
+ private static void drawLine(int x0, int y0, int x1, int y1, byte[][] pic) {
+ final int xres = pic.length, yres = pic[0].length;
+ // Ensure bounds
+ y0 = (y0 < 0) ? 0 : (y0 >= yres) ? (yres - 1) : y0;
+ y1 = (y1 < 0) ? 0 : (y1 >= yres) ? (yres - 1) : y1;
+ x0 = (x0 < 0) ? 0 : (x0 >= xres) ? (xres - 1) : x0;
+ x1 = (x1 < 0) ? 0 : (x1 >= xres) ? (xres - 1) : x1;
+ // Default slope
+ final int dx = +Math.abs(x1 - x0), sx = x0 < x1 ? 1 : -1;
+ final int dy = -Math.abs(y1 - y0), sy = y0 < y1 ? 1 : -1;
+ // Error counter
+ int err = dx + dy;
+
+ for (;;) {
+ pic[x0][y0] = 1;
+ if (x0 == x1 && y0 == y1) {
+ break;
+ }
+
+ final int e2 = err << 1;
+ if (e2 > dy) {
+ err += dy;
+ x0 += sx;
+ }
+ if (e2 < dx) {
+ err += dx;
+ y0 += sy;
+ }
+ }
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ @Override
+ protected HSMDimensionSimilarity makeInstance() {
+ return STATIC;
+ }
+ }
+}