diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/distance/distancefunction/timeseries/ERPDistanceFunction.java')
-rw-r--r-- | src/de/lmu/ifi/dbs/elki/distance/distancefunction/timeseries/ERPDistanceFunction.java | 184 |
1 files changed, 94 insertions, 90 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/timeseries/ERPDistanceFunction.java b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/timeseries/ERPDistanceFunction.java index e7d7dd7e..de8d7d3c 100644 --- a/src/de/lmu/ifi/dbs/elki/distance/distancefunction/timeseries/ERPDistanceFunction.java +++ b/src/de/lmu/ifi/dbs/elki/distance/distancefunction/timeseries/ERPDistanceFunction.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.distance.distancefunction.timeseries; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,30 +23,36 @@ package de.lmu.ifi.dbs.elki.distance.distancefunction.timeseries; along with this program. If not, see <http://www.gnu.org/licenses/>. */ +import java.util.Arrays; + import de.lmu.ifi.dbs.elki.data.NumberVector; -import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; /** - * Provides the Edit Distance With Real Penalty distance for FeatureVectors. + * Edit Distance With Real Penalty distance for numerical vectors. + * + * Reference: + * <p> + * L. Chen and R. Ng<br /> + * On the marriage of Lp-norms and edit distance<br /> + * VLDB '04: Proceedings of the Thirtieth international conference on Very large + * data bases + * </p> * * @author Thomas Bernecker */ @Title("Edit Distance with Real Penalty") -@Reference(authors = "L. Chen and R. Ng", title = "On the marriage of Lp-norms and edit distance", booktitle = "VLDB '04: Proceedings of the Thirtieth international conference on Very large data bases", url = "http://www.vldb.org/conf/2004/RS21P2.PDF") -public class ERPDistanceFunction extends AbstractEditDistanceFunction { +@Reference(authors = "L. Chen and R. Ng", // +title = "On the marriage of Lp-norms and edit distance", // +booktitle = "VLDB '04: Proceedings of the Thirtieth international conference on Very large data bases", // +url = "http://www.vldb.org/conf/2004/RS21P2.PDF") +public class ERPDistanceFunction extends DTWDistanceFunction { /** - * G parameter - */ - public static final OptionID G_ID = new OptionID("erp.g", "the g parameter ERP (positive number)"); - - /** - * Keeps the currently set g. + * Gap value. */ private final double g; @@ -61,91 +67,82 @@ public class ERPDistanceFunction extends AbstractEditDistanceFunction { this.g = g; } - /** - * Provides the Edit Distance With Real Penalty distance between the given two - * vectors. - * - * @return the Edit Distance With Real Penalty distance between the given two - * vectors as an instance of {@link DoubleDistance DoubleDistance}. - */ @Override - public double doubleDistance(NumberVector<?> v1, NumberVector<?> v2) { - // Current and previous columns of the matrix - double[] curr = new double[v2.getDimensionality()]; - double[] prev = new double[v2.getDimensionality()]; + public double distance(NumberVector v1, NumberVector v2) { + // Dimensionality, and last valid value in second vector: + final int dim1 = v1.getDimensionality(), dim2 = v2.getDimensionality(); + final int m2 = dim2 - 1; - // size of edit distance band // bandsize is the maximum allowed distance to the diagonal - final int band = (int) Math.ceil(v2.getDimensionality() * bandSize); - - for(int i = 0; i < v1.getDimensionality(); i++) { - // Swap current and prev arrays. We'll just overwrite the new curr. - { - double[] temp = prev; - prev = curr; - curr = temp; + final int band = effectiveBandSize(dim1, dim2); + // unsatisfiable - lengths too different! + if(Math.abs(dim1 - dim2) > band) { + return Double.POSITIVE_INFINITY; + } + // Current and previous columns of the matrix + double[] buf = new double[dim2 << 1]; + Arrays.fill(buf, Double.POSITIVE_INFINITY); + + // Fill first row: + firstRow(buf, band, v1, v2, dim2); + + // Active buffer offsets (cur = read, nxt = write) + int cur = 0, nxt = dim2; + // Fill remaining rows: + int i = 1, l = 0, r = Math.min(m2, i + band); + while(i < dim1) { + final double val1 = v1.doubleValue(i); + for(int j = l; j <= r; j++) { + // Value in previous row (must exist, may be infinite): + double min = buf[cur + j] + delta(val1, g); + // Diagonal: + if(j > 0) { + final double pij = buf[cur + j - 1] + delta(val1, v2.doubleValue(j)); + min = (pij < min) ? pij : min; + // Previous in same row: + if(j > l) { + final double pj = buf[nxt + j - 1] + delta(g, v2.doubleValue(j)); + min = (pj < min) ? pj : min; + } + } + // Write: + buf[nxt + j] = min; } - int l = i - (band + 1); - if(l < 0) { - l = 0; + // Swap buffer positions: + cur = dim2 - cur; + nxt = dim2 - nxt; + // Update positions: + ++i; + if(i > band) { + ++l; } - int r = i + (band + 1); - if(r > (v2.getDimensionality() - 1)) { - r = (v2.getDimensionality() - 1); + if(r < m2) { + ++r; } + } - for(int j = l; j <= r; j++) { - if(Math.abs(i - j) <= band) { - // compute squared distance of feature vectors - double val1 = v1.doubleValue(i); - double val2 = g; - double diff = (val1 - val2); - final double d1 = Math.sqrt(diff * diff); - - val1 = g; - val2 = v2.doubleValue(j); - diff = (val1 - val2); - final double d2 = Math.sqrt(diff * diff); - - val1 = v1.doubleValue(i); - val2 = v2.doubleValue(j); - diff = (val1 - val2); - final double d12 = Math.sqrt(diff * diff); - - final double dist1 = d1 * d1; - final double dist2 = d2 * d2; - final double dist12 = d12 * d12; - - final double cost; - - if((i + j) != 0) { - if((i == 0) || ((j != 0) && (((prev[j - 1] + dist12) > (curr[j - 1] + dist2)) && ((curr[j - 1] + dist2) < (prev[j] + dist1))))) { - // del - cost = curr[j - 1] + dist2; - } - else if((j == 0) || ((i != 0) && (((prev[j - 1] + dist12) > (prev[j] + dist1)) && ((prev[j] + dist1) < (curr[j - 1] + dist2))))) { - // ins - cost = prev[j] + dist1; - } - else { - // match - cost = prev[j - 1] + dist12; - } - } - else { - cost = 0; - } + // TODO: support Euclidean, Manhattan here: + return Math.sqrt(buf[cur + dim2 - 1]); + } - curr[j] = cost; - // steps[i][j] = step; - } - else { - curr[j] = Double.POSITIVE_INFINITY; // outside band - } - } + @Override + protected void firstRow(double[] buf, int band, NumberVector v1, NumberVector v2, int dim2) { + // First cell: + final double val1 = v1.doubleValue(0); + buf[0] = Math.min(delta(val1, g), delta(val1, v2.doubleValue(0))); + + // Width of valid area: + final int w = (band >= dim2) ? dim2 - 1 : band; + // Fill remaining part of buffer: + for(int j = 1; j <= w; j++) { + buf[j] = Math.min(delta(val1, g), buf[j - 1]) + delta(val1, v2.doubleValue(j)); } + } - return Math.sqrt(curr[v2.getDimensionality() - 1]); + @Override + protected double delta(double val1, double val2) { + double diff = val1 - val2; + return diff * diff; } @Override @@ -164,13 +161,20 @@ public class ERPDistanceFunction extends AbstractEditDistanceFunction { * @apiviz.exclude */ public static class Parameterizer extends AbstractEditDistanceFunction.Parameterizer { - protected double g = 0.0; + /** + * G parameter + */ + public static final OptionID G_ID = new OptionID("erp.g", "The g parameter of ERP - comparison value to use in gaps."); + + /** + * Gap value. + */ + protected double g = 0.; @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - final DoubleParameter gP = new DoubleParameter(G_ID, 0.0); - gP.addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE); + final DoubleParameter gP = new DoubleParameter(G_ID, 0.); if(config.grab(gP)) { g = gP.doubleValue(); } |