diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/distance/distancefunction/histogram/HistogramMatchDistanceFunction.java')
-rw-r--r-- | src/de/lmu/ifi/dbs/elki/distance/distancefunction/histogram/HistogramMatchDistanceFunction.java | 46 |
1 files changed, 42 insertions, 4 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/histogram/HistogramMatchDistanceFunction.java b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/histogram/HistogramMatchDistanceFunction.java index 947c50fe..f3bf61f7 100644 --- a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/histogram/HistogramMatchDistanceFunction.java +++ b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/histogram/HistogramMatchDistanceFunction.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.distance.distancefunction.histogram; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -24,7 +24,9 @@ package de.lmu.ifi.dbs.elki.distance.distancefunction.histogram; */ import de.lmu.ifi.dbs.elki.data.NumberVector; -import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractVectorDoubleDistanceFunction; +import de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable; +import de.lmu.ifi.dbs.elki.distance.distancefunction.AbstractSpatialDistanceFunction; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; /** @@ -34,9 +36,26 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; * This distance function assumes there exist a natural order in the vectors, * i.e. they should be some 1-dimensional histogram. * + * This is also known as Earth Movers Distance (EMD), 1st Mallows distance or + * 1st Wasserstein metric (also Vasershtein metric), for the special case of a + * one-dimensional histogram, where the cost is linear in the number of bins to + * transport. + * + * Reference: + * <p> + * L.N. Vaserstein<br /> + * Markov processes over denumerable products of spaces describing large systems + * of automata <br /> + * Problemy Peredachi Informatsii 5.3 / Problems of Information Transmission 5:3 + * </p> + * * @author Erich Schubert */ -public class HistogramMatchDistanceFunction extends AbstractVectorDoubleDistanceFunction { +@Reference(authors = "L.N. Vaserstein", // +title = "Markov processes over denumerable products of spaces describing large systems of automata", // +booktitle = "Problemy Peredachi Informatsii 5.3 / Problems of Information Transmission, 5:3", // +url = "http://mi.mathnet.ru/eng/ppi1811") +public class HistogramMatchDistanceFunction extends AbstractSpatialDistanceFunction { /** * Static instance. Use this! */ @@ -53,7 +72,7 @@ public class HistogramMatchDistanceFunction extends AbstractVectorDoubleDistance } @Override - public double doubleDistance(NumberVector<?> v1, NumberVector<?> v2) { + public double distance(NumberVector v1, NumberVector v2) { final int dim = dimensionality(v1, v2); double xs = 0., ys = 0., agg = 0.; for(int i = 0; i < dim; i++) { @@ -65,6 +84,25 @@ public class HistogramMatchDistanceFunction extends AbstractVectorDoubleDistance } @Override + public double minDist(SpatialComparable mbr1, SpatialComparable mbr2) { + final int dim = dimensionality(mbr1, mbr2); + double xmin = 0., xmax = 0., ymin = 0., ymax = 0., agg = 0.; + for(int i = 0; i < dim; i++) { + xmin += mbr1.getMin(i); + xmax += mbr1.getMax(i); + ymin += mbr2.getMin(i); + ymax += mbr2.getMax(i); + agg += (ymin > xmax) ? (ymin - xmax) : (xmin > ymax) ? (xmin - ymax) : 0.; + } + return agg; + } + + @Override + public boolean isMetric() { + return true; + } + + @Override public String toString() { return "HistogramMatchDistanceFunction"; } |