diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/math')
107 files changed, 6768 insertions, 1708 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/math/DoubleMinMax.java b/src/de/lmu/ifi/dbs/elki/math/DoubleMinMax.java index 5c7d3d74..cedfb389 100644 --- a/src/de/lmu/ifi/dbs/elki/math/DoubleMinMax.java +++ b/src/de/lmu/ifi/dbs/elki/math/DoubleMinMax.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -165,4 +165,12 @@ public class DoubleMinMax extends DoubleDoublePair { } return ret; } + + /** + * Reset statistics. + */ + public void reset() { + first = Double.POSITIVE_INFINITY; + second = Double.NEGATIVE_INFINITY; + } } diff --git a/src/de/lmu/ifi/dbs/elki/math/MathUtil.java b/src/de/lmu/ifi/dbs/elki/math/MathUtil.java index 83fbca92..7eea5ed7 100644 --- a/src/de/lmu/ifi/dbs/elki/math/MathUtil.java +++ b/src/de/lmu/ifi/dbs/elki/math/MathUtil.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -55,6 +55,11 @@ public final class MathUtil { public static final double SQRT2 = Math.sqrt(2); /** + * Square root of 5 + */ + public static final double SQRT5 = Math.sqrt(5); + + /** * Square root of 0.5 == 1 / Sqrt(2) */ public static final double SQRTHALF = Math.sqrt(.5); @@ -65,6 +70,16 @@ public final class MathUtil { public static final double ONE_BY_SQRTPI = 1 / Math.sqrt(Math.PI); /** + * Logarithm of 2 to the basis e, for logarithm conversion. + */ + public static final double LOG2 = Math.log(2); + + /** + * Math.log(Math.PI) + */ + public static final double LOGPI = Math.log(Math.PI); + + /** * Fake constructor for static class. */ private MathUtil() { @@ -75,18 +90,27 @@ public final class MathUtil { * Computes the square root of the sum of the squared arguments without under * or overflow. * + * Note: this code is <em>not</em> redundant to {@link Math#hypot}, since the + * latter is significantly slower (but maybe has a higher precision). + * * @param a first cathetus * @param b second cathetus * @return {@code sqrt(a<sup>2</sup> + b<sup>2</sup>)} */ - public static double hypotenuse(double a, double b) { - if(Math.abs(a) > Math.abs(b)) { + public static double fastHypot(double a, double b) { + if(a < 0) { + a = -a; + } + if(b < 0) { + b = -b; + } + if(a > b) { final double r = b / a; - return Math.abs(a) * Math.sqrt(1 + r * r); + return a * Math.sqrt(1 + r * r); } else if(b != 0) { final double r = a / b; - return Math.abs(b) * Math.sqrt(1 + r * r); + return b * Math.sqrt(1 + r * r); } else { return 0.0; @@ -94,6 +118,38 @@ public final class MathUtil { } /** + * Computes the square root of the sum of the squared arguments without under + * or overflow. + * + * Note: this code is <em>not</em> redundant to {@link Math#hypot}, since the + * latter is significantly slower (but has a higher precision). + * + * @param a first cathetus + * @param b second cathetus + * @param c second cathetus + * @return {@code sqrt(a<sup>2</sup> + b<sup>2</sup> + c<sup>2</sup>)} + */ + public static double fastHypot3(double a, double b, double c) { + if(a < 0) { + a = -a; + } + if(b < 0) { + b = -b; + } + if(c < 0) { + c = -c; + } + double m = (a > b) ? ((a > c) ? a : c) : ((b > c) ? b : c); + if(m <= 0) { + return 0.0; + } + a = a / m; + b = b / m; + c = c / m; + return m * Math.sqrt(a * a + b * b + c * c); + } + + /** * Compute the Mahalanobis distance using the given weight matrix * * @param weightMatrix Weight Matrix @@ -101,7 +157,7 @@ public final class MathUtil { * @return Mahalanobis distance */ public static double mahalanobisDistance(Matrix weightMatrix, Vector o1_minus_o2) { - double sqrDist = o1_minus_o2.transposeTimes(weightMatrix).times(o1_minus_o2).get(0); + double sqrDist = o1_minus_o2.transposeTimesTimes(weightMatrix, o1_minus_o2); if(sqrDist < 0 && Math.abs(sqrDist) < 0.000000001) { sqrDist = Math.abs(sqrDist); @@ -128,36 +184,11 @@ public final class MathUtil { if(xdim <= 0) { throw new IllegalArgumentException("Invalid arguments: dimensionality not positive."); } - double sumXX = 0; - double sumYY = 0; - double sumXY = 0; - { - // Incremental computation - double meanX = x.doubleValue(1); - double meanY = y.doubleValue(1); - for(int i = 2; i <= xdim; i++) { - // Delta to previous mean - final double deltaX = x.doubleValue(i) - meanX; - final double deltaY = y.doubleValue(i) - meanY; - // Update means - meanX += deltaX / i; - meanY += deltaY / i; - // Delta to new mean - final double neltaX = x.doubleValue(i) - meanX; - final double neltaY = y.doubleValue(i) - meanY; - // Update - sumXX += deltaX * neltaX; - sumYY += deltaY * neltaY; - sumXY += deltaX * neltaY; // should equal deltaY * neltaX! - } - } - final double popSdX = Math.sqrt(sumXX / xdim); - final double popSdY = Math.sqrt(sumYY / ydim); - final double covXY = sumXY / xdim; - if(popSdX == 0 || popSdY == 0) { - return 0; + PearsonCorrelation pc = new PearsonCorrelation(); + for(int i = 0; i < xdim; i++) { + pc.put(x.doubleValue(i + 1), y.doubleValue(i + 1), 1.0); } - return covXY / (popSdX * popSdY); + return pc.getCorrelation(); } /** @@ -179,42 +210,37 @@ public final class MathUtil { if(xdim != weights.length) { throw new IllegalArgumentException("Dimensionality doesn't agree to weights."); } - // Compute means - double sumWe; - double sumXX = 0; - double sumYY = 0; - double sumXY = 0; - { - // Incremental computation - double meanX = x.doubleValue(1); - double meanY = y.doubleValue(1); - sumWe = weights[0]; - for(int i = 2; i <= xdim; i++) { - final double weight = weights[i - 1]; - sumWe += weight; - // Delta to previous mean - final double deltaX = x.doubleValue(i) - meanX; - final double deltaY = y.doubleValue(i) - meanY; - // Update means - meanX += deltaX * weight / sumWe; - meanY += deltaY * weight / sumWe; - // Delta to new mean - final double neltaX = x.doubleValue(i) - meanX; - final double neltaY = y.doubleValue(i) - meanY; - // Update - sumXX += weight * deltaX * neltaX; - sumYY += weight * deltaY * neltaY; - sumXY += weight * deltaX * neltaY; // should equal weight * deltaY * - // neltaX! - } + PearsonCorrelation pc = new PearsonCorrelation(); + for(int i = 0; i < xdim; i++) { + pc.put(x.doubleValue(i + 1), y.doubleValue(i + 1), weights[i]); } - final double popSdX = Math.sqrt(sumXX / sumWe); - final double popSdY = Math.sqrt(sumYY / sumWe); - final double covXY = sumXY / sumWe; - if(popSdX == 0 || popSdY == 0) { - return 0; + return pc.getCorrelation(); + } + + /** + * <p> + * Provides the Pearson product-moment correlation coefficient for two + * FeatureVectors. + * </p> + * + * @param x first FeatureVector + * @param y second FeatureVector + * @return the Pearson product-moment correlation coefficient for x and y + */ + public static double weightedPearsonCorrelationCoefficient(NumberVector<?, ?> x, NumberVector<?, ?> y, NumberVector<?, ?> weights) { + final int xdim = x.getDimensionality(); + final int ydim = y.getDimensionality(); + if(xdim != ydim) { + throw new IllegalArgumentException("Invalid arguments: feature vectors differ in dimensionality."); + } + if(xdim != weights.getDimensionality()) { + throw new IllegalArgumentException("Dimensionality doesn't agree to weights."); } - return covXY / (popSdX * popSdY); + PearsonCorrelation pc = new PearsonCorrelation(); + for(int i = 0; i < xdim; i++) { + pc.put(x.doubleValue(i + 1), y.doubleValue(i + 1), weights.doubleValue(i + 1)); + } + return pc.getCorrelation(); } /** @@ -233,40 +259,11 @@ public final class MathUtil { if(xdim != ydim) { throw new IllegalArgumentException("Invalid arguments: feature vectors differ in dimensionality."); } - if(xdim <= 0) { - throw new IllegalArgumentException("Invalid arguments: dimensionality not positive."); - } - double sumXX = 0; - double sumYY = 0; - double sumXY = 0; - { - // Incremental computation - double meanX = x[0]; - double meanY = y[0]; - for(int i = 1; i < xdim; i++) { - int sumWe = i + 1; - // Delta to previous mean - final double deltaX = x[i] - meanX; - final double deltaY = y[i] - meanY; - // Update means - meanX += deltaX / sumWe; - meanY += deltaY / sumWe; - // Delta to new mean - final double neltaX = x[i] - meanX; - final double neltaY = y[i] - meanY; - // Update - sumXX += deltaX * neltaX; - sumYY += deltaY * neltaY; - sumXY += deltaX * neltaY; // should equal deltaY * neltaX! - } - } - final double popSdX = Math.sqrt(sumXX / xdim); - final double popSdY = Math.sqrt(sumYY / ydim); - final double covXY = sumXY / xdim; - if(popSdX == 0 || popSdY == 0) { - return 0; + PearsonCorrelation pc = new PearsonCorrelation(); + for(int i = 0; i < xdim; i++) { + pc.put(x[i], y[i], 1.0); } - return covXY / (popSdX * popSdY); + return pc.getCorrelation(); } /** @@ -288,42 +285,11 @@ public final class MathUtil { if(xdim != weights.length) { throw new IllegalArgumentException("Dimensionality doesn't agree to weights."); } - // Compute means - double sumWe; - double sumXX = 0; - double sumYY = 0; - double sumXY = 0; - { - // Incremental computation - double meanX = x[0]; - double meanY = y[0]; - sumWe = weights[0]; - for(int i = 1; i < xdim; i++) { - final double weight = weights[i]; - sumWe += weight; - // Delta to previous mean - final double deltaX = x[i] - meanX; - final double deltaY = y[i] - meanY; - // Update means - meanX += deltaX * weight / sumWe; - meanY += deltaY * weight / sumWe; - // Delta to new mean - final double neltaX = x[i] - meanX; - final double neltaY = y[i] - meanY; - // Update - sumXX += weight * deltaX * neltaX; - sumYY += weight * deltaY * neltaY; - sumXY += weight * deltaX * neltaY; // should equal weight * deltaY * - // neltaX! - } - } - final double popSdX = Math.sqrt(sumXX / sumWe); - final double popSdY = Math.sqrt(sumYY / sumWe); - final double covXY = sumXY / sumWe; - if(popSdX == 0 || popSdY == 0) { - return 0; + PearsonCorrelation pc = new PearsonCorrelation(); + for(int i = 0; i < xdim; i++) { + pc.put(x[i], y[i], weights[i]); } - return covXY / (popSdX * popSdY); + return pc.getCorrelation(); } /** @@ -415,462 +381,356 @@ public final class MathUtil { } /** - * Coefficients for erf approximation. + * Compute the sum of the i first integers. * - * Loosely based on http://www.netlib.org/specfun/erf + * @param i maximum summand + * @return Sum */ - static final double ERFAPP_A[] = { 1.85777706184603153e-1, 3.16112374387056560e+0, 1.13864154151050156E+2, 3.77485237685302021e+2, 3.20937758913846947e+3 }; + public static long sumFirstIntegers(final long i) { + return ((i - 1L) * i) / 2; + } /** - * Coefficients for erf approximation. + * Produce an array of random numbers in [0:1] * - * Loosely based on http://www.netlib.org/specfun/erf + * @param len Length + * @return Array */ - static final double ERFAPP_B[] = { 1.00000000000000000e00, 2.36012909523441209e01, 2.44024637934444173e02, 1.28261652607737228e03, 2.84423683343917062e03 }; + public static double[] randomDoubleArray(int len) { + return randomDoubleArray(len, new Random()); + } /** - * Coefficients for erf approximation. + * Produce an array of random numbers in [0:1] * - * Loosely based on http://www.netlib.org/specfun/erf + * @param len Length + * @param r Random generator + * @return Array */ - static final double ERFAPP_C[] = { 2.15311535474403846e-8, 5.64188496988670089e-1, 8.88314979438837594e00, 6.61191906371416295e01, 2.98635138197400131e02, 8.81952221241769090e02, 1.71204761263407058e03, 2.05107837782607147e03, 1.23033935479799725E03 }; + public static double[] randomDoubleArray(int len, Random r) { + final double[] ret = new double[len]; + for(int i = 0; i < len; i++) { + ret[i] = r.nextDouble(); + } + return ret; + } /** - * Coefficients for erf approximation. + * Convert Degree to Radians * - * Loosely based on http://www.netlib.org/specfun/erf + * @param deg Degree value + * @return Radian value */ - static final double ERFAPP_D[] = { 1.00000000000000000e00, 1.57449261107098347e01, 1.17693950891312499e02, 5.37181101862009858e02, 1.62138957456669019e03, 3.29079923573345963e03, 4.36261909014324716e03, 3.43936767414372164e03, 1.23033935480374942e03 }; + public static double deg2rad(double deg) { + return deg * Math.PI / 180.0; + } /** - * Coefficients for erf approximation. + * Radians to Degree * - * Loosely based on http://www.netlib.org/specfun/erf + * @param rad Radians value + * @return Degree value */ - static final double ERFAPP_P[] = { 1.63153871373020978e-2, 3.05326634961232344e-1, 3.60344899949804439e-1, 1.25781726111229246e-1, 1.60837851487422766e-2, 6.58749161529837803e-4 }; + public static double rad2deg(double rad) { + return rad * 180 / Math.PI; + } /** - * Coefficients for erf approximation. + * Compute the approximate on-earth-surface distance of two points. * - * Loosely based on http://www.netlib.org/specfun/erf + * @param lat1 Latitude of first point in degree + * @param lon1 Longitude of first point in degree + * @param lat2 Latitude of second point in degree + * @param lon2 Longitude of second point in degree + * @return Distance in km (approximately) */ - static final double ERFAPP_Q[] = { 1.00000000000000000e00, 2.56852019228982242e00, 1.87295284992346047e00, 5.27905102951428412e-1, 6.05183413124413191e-2, 2.33520497626869185e-3 }; + public static double latlngDistance(double lat1, double lon1, double lat2, double lon2) { + final double EARTH_RADIUS = 6371; // km. + // Work in radians + lat1 = MathUtil.deg2rad(lat1); + lat2 = MathUtil.deg2rad(lat2); + lon1 = MathUtil.deg2rad(lon1); + lon2 = MathUtil.deg2rad(lon2); + // Delta + final double dlat = lat1 - lat2; + final double dlon = lon1 - lon2; - /** - * Complementary error function for Gaussian distributions = Normal - * distributions. - * - * Numerical approximation using taylor series. Implementation loosely based - * on http://www.netlib.org/specfun/erf - * - * @param x parameter value - * @return erfc(x) - */ - public static double erfc(double x) { - if(Double.isNaN(x)) { - return Double.NaN; - } - if(Double.isInfinite(x)) { - return (x < 0.0) ? 2 : 0; - } - - double result = Double.NaN; - double absx = Math.abs(x); - // First approximation interval - if(absx < 0.46875) { - double z = x * x; - result = 1 - x * ((((ERFAPP_A[0] * z + ERFAPP_A[1]) * z + ERFAPP_A[2]) * z + ERFAPP_A[3]) * z + ERFAPP_A[4]) / ((((ERFAPP_B[0] * z + ERFAPP_B[1]) * z + ERFAPP_B[2]) * z + ERFAPP_B[3]) * z + ERFAPP_B[4]); - } - // Second approximation interval - else if(absx < 4.0) { - double z = absx; - result = ((((((((ERFAPP_C[0] * z + ERFAPP_C[1]) * z + ERFAPP_C[2]) * z + ERFAPP_C[3]) * z + ERFAPP_C[4]) * z + ERFAPP_C[5]) * z + ERFAPP_C[6]) * z + ERFAPP_C[7]) * z + ERFAPP_C[8]) / ((((((((ERFAPP_D[0] * z + ERFAPP_D[1]) * z + ERFAPP_D[2]) * z + ERFAPP_D[3]) * z + ERFAPP_D[4]) * z + ERFAPP_D[5]) * z + ERFAPP_D[6]) * z + ERFAPP_D[7]) * z + ERFAPP_D[8]); - double rounded = Math.round(result * 16.0) / 16.0; - double del = (absx - rounded) * (absx + rounded); - result = Math.exp(-rounded * rounded) * Math.exp(-del) * result; - if(x < 0.0) { - result = 2.0 - result; - } - } - // Third approximation interval - else { - double z = 1.0 / (absx * absx); - result = z * (((((ERFAPP_P[0] * z + ERFAPP_P[1]) * z + ERFAPP_P[2]) * z + ERFAPP_P[3]) * z + ERFAPP_P[4]) * z + ERFAPP_P[5]) / (((((ERFAPP_Q[0] * z + ERFAPP_Q[1]) * z + ERFAPP_Q[2]) * z + ERFAPP_Q[3]) * z + ERFAPP_Q[4]) * z + ERFAPP_Q[5]); - result = (ONE_BY_SQRTPI - result) / absx; - double rounded = Math.round(result * 16.0) / 16.0; - double del = (absx - rounded) * (absx + rounded); - result = Math.exp(-rounded * rounded) * Math.exp(-del) * result; - if(x < 0.0) { - result = 2.0 - result; - } - } - return result; + // Spherical Law of Cosines + // NOTE: there seems to be a signedness issue in this code! + // double dist = Math.sin(lat1) * Math.sin(lat2) + Math.cos(lat1) * + // Math.cos(lat2) * Math.cos(dlon); + // return EARTH_RADIUS * Math.atan(dist); + + // Alternative: Havestine formula, higher precision at < 1 meters: + final double a = Math.sin(dlat / 2) * Math.sin(dlat / 2) + Math.sin(dlon / 2) * Math.sin(dlon / 2) * Math.cos(lat1) * Math.cos(lat2); + final double c = 2 * Math.atan2(Math.sqrt(a), Math.sqrt(1 - a)); + return EARTH_RADIUS * c; } /** - * Error function for Gaussian distributions = Normal distributions. - * - * Numerical approximation using taylor series. Implementation loosely based - * on http://www.netlib.org/specfun/erf + * Compute the angle between two vectors. * - * @param x parameter value - * @return erf(x) + * @param v1 first vector + * @param v2 second vector + * @return Angle */ - public static double erf(double x) { - return 1 - erfc(x); + public static double angle(Vector v1, Vector v2) { + return angle(v1.getArrayRef(), v2.getArrayRef()); } /** - * Inverse error function. + * Compute the angle between two vectors. * - * @param x parameter value - * @return erfinv(x) + * @param v1 first vector + * @param v2 second vector + * @return Angle */ - public static double erfinv(double x) { - return standardNormalProbit(0.5 * (x + 1)) / SQRT2; + public static double angle(double[] v1, double[] v2) { + // Essentially, we want to compute this: + // v1.transposeTimes(v2) / (v1.euclideanLength() * v2.euclideanLength()); + // We can just compute all three in parallel. + double s = 0, e1 = 0, e2 = 0; + for(int k = 0; k < v1.length; k++) { + final double r1 = v1[k]; + final double r2 = v2[k]; + s += r1 * r2; + e1 += r1 * r1; + e2 += r2 * r2; + } + return Math.sqrt((s / e1) * (s / e2)); } /** - * Treshold for switching nethods for erfinv approximation - */ - static final double P_LOW = 0.02425D; - - /** - * Treshold for switching nethods for erfinv approximation - */ - static final double P_HIGH = 1.0D - P_LOW; - - /** - * Coefficients for erfinv approximation, rational version - */ - static final double ERFINV_A[] = { -3.969683028665376e+01, 2.209460984245205e+02, -2.759285104469687e+02, 1.383577518672690e+02, -3.066479806614716e+01, 2.506628277459239e+00 }; - - /** - * Coefficients for erfinv approximation, rational version - */ - static final double ERFINV_B[] = { -5.447609879822406e+01, 1.615858368580409e+02, -1.556989798598866e+02, 6.680131188771972e+01, -1.328068155288572e+01 }; - - /** - * Coefficients for erfinv approximation, rational version - */ - static final double ERFINV_C[] = { -7.784894002430293e-03, -3.223964580411365e-01, -2.400758277161838e+00, -2.549732539343734e+00, 4.374664141464968e+00, 2.938163982698783e+00 }; - - /** - * Coefficients for erfinv approximation, rational version + * Compute the angle between two vectors. + * + * @param v1 first vector + * @param v2 second vector + * @param o Origin + * @return Angle */ - static final double ERFINV_D[] = { 7.784695709041462e-03, 3.224671290700398e-01, 2.445134137142996e+00, 3.754408661907416e+00 }; + public static double angle(Vector v1, Vector v2, Vector o) { + return angle(v1.getArrayRef(), v2.getArrayRef(), o.getArrayRef()); + } /** - * Approximate the inverse error function for normal distributions. - * - * Largely based on: - * <p> - * http://www.math.uio.no/~jacklam/notes/invnorm/index.html <br> - * by Peter John Acklam - * </p> + * Compute the angle between two vectors. * - * @param d Quantile. Must be in [0:1], obviously. - * @return Inverse erf. + * @param v1 first vector + * @param v2 second vector + * @param o Origin + * @return Angle */ - public static double standardNormalProbit(double d) { - if(d == 0) { - return Double.NEGATIVE_INFINITY; - } - else if(d == 1) { - return Double.POSITIVE_INFINITY; - } - else if(Double.isNaN(d) || d < 0 || d > 1) { - return Double.NaN; - } - else if(d < P_LOW) { - // Rational approximation for lower region: - double q = Math.sqrt(-2 * Math.log(d)); - return (((((ERFINV_C[0] * q + ERFINV_C[1]) * q + ERFINV_C[2]) * q + ERFINV_C[3]) * q + ERFINV_C[4]) * q + ERFINV_C[5]) / ((((ERFINV_D[0] * q + ERFINV_D[1]) * q + ERFINV_D[2]) * q + ERFINV_D[3]) * q + 1); - } - else if(P_HIGH < d) { - // Rational approximation for upper region: - double q = Math.sqrt(-2 * Math.log(1 - d)); - return -(((((ERFINV_C[0] * q + ERFINV_C[1]) * q + ERFINV_C[2]) * q + ERFINV_C[3]) * q + ERFINV_C[4]) * q + ERFINV_C[5]) / ((((ERFINV_D[0] * q + ERFINV_D[1]) * q + ERFINV_D[2]) * q + ERFINV_D[3]) * q + 1); - } - else { - // Rational approximation for central region: - double q = d - 0.5D; - double r = q * q; - return (((((ERFINV_A[0] * r + ERFINV_A[1]) * r + ERFINV_A[2]) * r + ERFINV_A[3]) * r + ERFINV_A[4]) * r + ERFINV_A[5]) * q / (((((ERFINV_B[0] * r + ERFINV_B[1]) * r + ERFINV_B[2]) * r + ERFINV_B[3]) * r + ERFINV_B[4]) * r + 1); + public static double angle(double[] v1, double[] v2, double[] o) { + // Essentially, we want to compute this: + // v1' = v1 - o, v2' = v2 - o + // v1'.transposeTimes(v2') / (v1'.euclideanLength()*v2'.euclideanLength()); + // We can just compute all three in parallel. + double s = 0, e1 = 0, e2 = 0; + for(int k = 0; k < v1.length; k++) { + final double r1 = v1[k] - o[k]; + final double r2 = v2[k] - o[k]; + s += r1 * r2; + e1 += r1 * r1; + e2 += r2 * r2; } + return Math.sqrt((s / e1) * (s / e2)); } /** - * Probability density function of the normal distribution. + * Find the next power of 2. * - * <pre> - * 1/(SQRT(2*pi*sigma^2)) * e^(-(x-mu)^2/2sigma^2) - * </pre> + * Classic bit operation, for signed 32-bit. Valid for positive integers only + * (0 otherwise). * - * @param x The value. - * @param mu The mean. - * @param sigma The standard deviation. - * @return PDF of the given normal distribution at x. + * @param x original integer + * @return Next power of 2 */ - public static double normalPDF(double x, double mu, double sigma) { - final double x_mu = x - mu; - final double sigmasq = sigma * sigma; - return 1 / (Math.sqrt(TWOPI * sigmasq)) * Math.exp(-1 * x_mu * x_mu / 2 / sigmasq); + public static int nextPow2Int(int x) { + --x; + x |= x >>> 1; + x |= x >>> 2; + x |= x >>> 4; + x |= x >>> 8; + x |= x >>> 16; + return ++x; } /** - * Cumulative probability density function (CDF) of a normal distribution. + * Find the next power of 2. * - * @param x value to evaluate CDF at - * @param mu Mean value - * @param sigma Standard deviation. - * @return The CDF of the normal given distribution at x. - */ - public static double normalCDF(double x, double mu, double sigma) { - return (1 + erf(x / Math.sqrt(2))) / 2; - } - - /** - * Inverse cumulative probability density function (probit) of a normal - * distribution. + * Classic bit operation, for signed 64-bit. Valid for positive integers only + * (0 otherwise). * - * @param x value to evaluate probit function at - * @param mu Mean value - * @param sigma Standard deviation. - * @return The probit of the normal given distribution at x. + * @param x original long integer + * @return Next power of 2 */ - public static double normalProbit(double x, double mu, double sigma) { - return mu + sigma * standardNormalProbit(x); + public static long nextPow2Long(long x) { + --x; + x |= x >>> 1; + x |= x >>> 2; + x |= x >>> 4; + x |= x >>> 16; + x |= x >>> 32; + return ++x; } /** - * LANCZOS-Coefficients for Gamma approximation. + * Find the next larger number with all ones. * - * These are said to have higher precision than those in "Numerical Recipes". - * They probably come from + * Classic bit operation, for signed 32-bit. Valid for positive integers only + * (-1 otherwise). * - * Paul Godfrey: http://my.fit.edu/~gabdo/gamma.txt - */ - static final double[] LANCZOS = { 0.99999999999999709182, 57.156235665862923517, -59.597960355475491248, 14.136097974741747174, -0.49191381609762019978, .33994649984811888699e-4, .46523628927048575665e-4, -.98374475304879564677e-4, .15808870322491248884e-3, -.21026444172410488319e-3, .21743961811521264320e-3, -.16431810653676389022e-3, .84418223983852743293e-4, -.26190838401581408670e-4, .36899182659531622704e-5, }; - - /** - * Numerical precision to use + * @param x original integer + * @return Next number with all bits set */ - static final double NUM_PRECISION = 1E-15; + public static int nextAllOnesInt(int x) { + x |= x >>> 1; + x |= x >>> 2; + x |= x >>> 4; + x |= x >>> 8; + x |= x >>> 16; + return x; + } /** - * Compute logGamma. - * - * Based loosely on "Numerical Recpies" and the work of Paul Godfrey at - * http://my.fit.edu/~gabdo/gamma.txt + * Find the next larger number with all ones. * - * TODO: find out which approximation really is the best... + * Classic bit operation, for signed 64-bit. Valid for positive integers only + * (-1 otherwise). * - * @param x Parameter x - * @return @return log(Γ(x)) + * @param x original long integer + * @return Next number with all bits set */ - public static double logGamma(final double x) { - if(Double.isNaN(x) || (x <= 0.0)) { - return Double.NaN; - } - double g = 607.0 / 128.0; - double tmp = x + g + .5; - tmp = (x + 0.5) * Math.log(tmp) - tmp; - double ser = LANCZOS[0]; - for(int i = LANCZOS.length - 1; i > 0; --i) { - ser += LANCZOS[i] / (x + i); - } - return tmp + Math.log(SQRTTWOPI * ser / x); + public static long nextAllOnesLong(long x) { + x |= x >>> 1; + x |= x >>> 2; + x |= x >>> 4; + x |= x >>> 16; + x |= x >>> 32; + return x; } /** - * Returns the regularized gamma function P(a, x). - * - * Includes the quadrature way of computing. + * Return the largest double that rounds down to this float. * - * TODO: find "the" most accurate version of this. We seem to agree with - * others for the first 10+ digits, but diverge a bit later than that. + * Note: Probably not always correct - subnormal values are quite tricky. So + * some of the bounds might not be tight. * - * @param a Parameter a - * @param x Parameter x - * @return Gamma value + * @param f Float value + * @return Double value */ - public static double regularizedGammaP(final double a, final double x) { - // Special cases - if(Double.isNaN(a) || Double.isNaN(x) || (a <= 0.0) || (x < 0.0)) { + public static double floatToDoubleUpper(float f) { + if(Float.isNaN(f)) { return Double.NaN; } - if(x == 0.0) { - return 0.0; + if(Float.isInfinite(f)) { + if(f > 0) { + return Double.POSITIVE_INFINITY; + } + else { + return Double.longBitsToDouble(0xc7efffffffffffffl); + } } - if(x >= a + 1) { - // Expected to converge faster - return 1.0 - regularizedGammaQ(a, x); - } - // Loosely following "Numerical Recipes" - double del = 1.0 / a; - double sum = del; - for(int n = 1; n < Integer.MAX_VALUE; n++) { - // compute next element in the series - del *= x / (a + n); - sum = sum + del; - if(Math.abs(del / sum) < NUM_PRECISION || sum >= Double.POSITIVE_INFINITY) { - break; + long bits = Double.doubleToRawLongBits((double) f); + if((bits & 0x8000000000000000l) == 0) { // Positive + if(bits == 0l) { + return Double.longBitsToDouble(0x3690000000000000l); } + if(f == Float.MIN_VALUE) { + // bits += 0x7_ffff_ffff_ffffl; + return Double.longBitsToDouble(0x36a7ffffffffffffl); + } + if(Float.MIN_NORMAL > f && f >= Double.MIN_NORMAL) { + // The most tricky case: + // a denormalized float, but a normalized double + final long bits2 = Double.doubleToRawLongBits((double) Math.nextUp(f)); + bits = (bits >>> 1) + (bits2 >>> 1) - 1l; + } + else { + bits += 0xfffffffl; // 28 extra bits + } + return Double.longBitsToDouble(bits); } - if(Double.isInfinite(sum)) { - return 1.0; + else { + if(bits == 0x8000000000000000l) { + return -0.0d; + } + if(f == -Float.MIN_VALUE) { + // bits -= 0xf_ffff_ffff_ffffl; + return Double.longBitsToDouble(0xb690000000000001l); + } + if(-Float.MIN_NORMAL < f && f <= -Double.MIN_NORMAL) { + // The most tricky case: + // a denormalized float, but a normalized double + final long bits2 = Double.doubleToRawLongBits((double) Math.nextUp(f)); + bits = (bits >>> 1) + (bits2 >>> 1) + 1l; + } + else { + bits -= 0xfffffffl; // 28 extra bits + } + return Double.longBitsToDouble(bits); } - return Math.exp(-x + (a * Math.log(x)) - logGamma(a)) * sum; } /** - * Returns the regularized gamma function Q(a, x) = 1 - P(a, x). - * - * Includes the continued fraction way of computing, based loosely on the book - * "Numerical Recipes"; but probably not with the exactly same precision, - * since we reimplemented this in our coding style, not literally. + * Return the largest double that rounds up to this float. * - * TODO: find "the" most accurate version of this. We seem to agree with - * others for the first 10+ digits, but diverge a bit later than that. + * Note: Probably not always correct - subnormal values are quite tricky. So + * some of the bounds might not be tight. * - * @param a parameter a - * @param x parameter x - * @return Result + * @param f Float value + * @return Double value */ - public static double regularizedGammaQ(final double a, final double x) { - if(Double.isNaN(a) || Double.isNaN(x) || (a <= 0.0) || (x < 0.0)) { + public static double floatToDoubleLower(float f) { + if(Float.isNaN(f)) { return Double.NaN; } - if(x == 0.0) { - return 1.0; - } - if(x < a + 1.0) { - // Expected to converge faster - return 1.0 - regularizedGammaP(a, x); - } - // Compute using continued fraction approach. - final double FPMIN = Double.MIN_VALUE / NUM_PRECISION; - double b = x + 1 - a; - double c = 1.0 / FPMIN; - double d = 1.0 / b; - double fac = d; - for(int i = 1; i < Integer.MAX_VALUE; i++) { - double an = i * (a - i); - b += 2; - d = an * d + b; - if(Math.abs(d) < FPMIN) { - d = FPMIN; + if(Float.isInfinite(f)) { + if(f < 0) { + return Double.NEGATIVE_INFINITY; + } + else { + return Double.longBitsToDouble(0x47efffffffffffffl); + } + } + long bits = Double.doubleToRawLongBits((double) f); + if((bits & 0x8000000000000000l) == 0) { // Positive + if(bits == 0l) { + return +0.0d; + } + if(f == Float.MIN_VALUE) { + // bits -= 0xf_ffff_ffff_ffffl; + return Double.longBitsToDouble(0x3690000000000001l); } - c = b + an / c; - if(Math.abs(c) < FPMIN) { - c = FPMIN; + if(Float.MIN_NORMAL > f /* && f >= Double.MIN_NORMAL */) { + // The most tricky case: + // a denormalized float, but a normalized double + final long bits2 = Double.doubleToRawLongBits((double) -Math.nextUp(-f)); + bits = (bits >>> 1) + (bits2 >>> 1) + 1l; // + (0xfff_ffffl << 18); } - d = 1 / d; - double del = d * c; - fac *= del; - if(Math.abs(del - 1.0) <= NUM_PRECISION) { - break; + else { + bits -= 0xfffffffl; // 28 extra bits } + return Double.longBitsToDouble(bits); } - return fac * Math.exp(-x + a * Math.log(x) - logGamma(a)); - } - - /** - * Compute the sum of the i first integers. - * - * @param i maximum summand - * @return Sum - */ - public static long sumFirstIntegers(final long i) { - return ((i - 1L) * i) / 2; - } - - /** - * Produce an array of random numbers in [0:1] - * - * @param len Length - * @return Array - */ - public static double[] randomDoubleArray(int len) { - return randomDoubleArray(len, new Random()); - } - - /** - * Produce an array of random numbers in [0:1] - * - * @param len Length - * @param r Random generator - * @return Array - */ - public static double[] randomDoubleArray(int len, Random r) { - final double[] ret = new double[len]; - for(int i = 0; i < len; i++) { - ret[i] = r.nextDouble(); + else { + if(bits == 0x8000000000000000l) { + return Double.longBitsToDouble(0xb690000000000000l); + } + if(f == -Float.MIN_VALUE) { + // bits += 0x7_ffff_ffff_ffffl; + return Double.longBitsToDouble(0xb6a7ffffffffffffl); + } + if(-Float.MIN_NORMAL < f /* && f <= -Double.MIN_NORMAL */) { + // The most tricky case: + // a denormalized float, but a normalized double + final long bits2 = Double.doubleToRawLongBits((double) -Math.nextUp(-f)); + bits = (bits >>> 1) + (bits2 >>> 1) - 1l; + } + else { + bits += 0xfffffffl; // 28 extra bits + } + return Double.longBitsToDouble(bits); } - return ret; - } - - /** - * Convert Degree to Radians - * - * @param deg Degree value - * @return Radian value - */ - public static double deg2rad(double deg) { - return deg * Math.PI / 180.0; - } - - /** - * Radians to Degree - * - * @param rad Radians value - * @return Degree value - */ - public static double rad2deg(double rad) { - return rad * 180 / Math.PI; - } - - /** - * Compute the approximate on-earth-surface distance of two points. - * - * @param lat1 Latitude of first point in degree - * @param lon1 Longitude of first point in degree - * @param lat2 Latitude of second point in degree - * @param lon2 Longitude of second point in degree - * @return Distance in km (approximately) - */ - public static double latlngDistance(double lat1, double lon1, double lat2, double lon2) { - final double EARTH_RADIUS = 6371; // km. - // Work in radians - lat1 = MathUtil.deg2rad(lat1); - lat2 = MathUtil.deg2rad(lat2); - lon1 = MathUtil.deg2rad(lon1); - lon2 = MathUtil.deg2rad(lon2); - // Delta - final double dlat = lat1 - lat2; - final double dlon = lon1 - lon2; - - // Spherical Law of Cosines - // NOTE: there seems to be a signedness issue in this code! - // double dist = Math.sin(lat1) * Math.sin(lat2) + Math.cos(lat1) * - // Math.cos(lat2) * Math.cos(dlon); - // return EARTH_RADIUS * Math.atan(dist); - - // Alternative: Havestine formula, higher precision at < 1 meters: - final double a = Math.sin(dlat / 2) * Math.sin(dlat / 2) + Math.sin(dlon / 2) * Math.sin(dlon / 2) * Math.cos(lat1) * Math.cos(lat2); - final double c = 2 * Math.atan2(Math.sqrt(a), Math.sqrt(1 - a)); - return EARTH_RADIUS * c; - } - - /** - * Compute the cosine similarity for two vectors. - * - * @param v1 First vector - * @param v2 Second vector - * @return Cosine similarity - */ - public static double cosineSimilarity(Vector v1, Vector v2) { - return v1.scalarProduct(v2) / (v1.euclideanLength() * v2.euclideanLength()); } }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/Mean.java b/src/de/lmu/ifi/dbs/elki/math/Mean.java index 6f63c59e..56f73f8e 100644 --- a/src/de/lmu/ifi/dbs/elki/math/Mean.java +++ b/src/de/lmu/ifi/dbs/elki/math/Mean.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -157,4 +157,12 @@ public class Mean { public String toString() { return "Mean(" + getMean() + ")"; } + + /** + * Reset the value. + */ + public void reset() { + mean = 0; + wsum = 0; + } }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/MeanVariance.java b/src/de/lmu/ifi/dbs/elki/math/MeanVariance.java index 49f00856..0210b0fb 100644 --- a/src/de/lmu/ifi/dbs/elki/math/MeanVariance.java +++ b/src/de/lmu/ifi/dbs/elki/math/MeanVariance.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -176,7 +176,7 @@ public class MeanVariance extends Mean { * @return sample variance */ public double getSampleVariance() { - assert (wsum > 1); + assert (wsum > 1) : "Cannot compute a reasonable sample variance with weight <= 1.0!"; return nvar / (wsum - 1); } @@ -240,4 +240,10 @@ public class MeanVariance extends Mean { public String toString() { return "MeanVariance(mean=" + getMean() + ",var=" + getSampleVariance() + ")"; } + + @Override + public void reset() { + super.reset(); + nvar = 0; + } }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/MeanVarianceMinMax.java b/src/de/lmu/ifi/dbs/elki/math/MeanVarianceMinMax.java new file mode 100644 index 00000000..23e53bb5 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/MeanVarianceMinMax.java @@ -0,0 +1,139 @@ +package de.lmu.ifi.dbs.elki.math; + +import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +/** + * Class collecting mean, variance, minimum and maximum statistics. + * + * @author Erich Schubert + */ +public class MeanVarianceMinMax extends MeanVariance { + /** + * Minimum value + */ + double min = Double.POSITIVE_INFINITY; + + /** + * Maximum value + */ + double max = Double.NEGATIVE_INFINITY; + + /** + * Constructor. + */ + public MeanVarianceMinMax() { + super(); + } + + /** + * Constructor cloning existing statistics. + * + * @param other Existing statistics + */ + public MeanVarianceMinMax(MeanVarianceMinMax other) { + super(other); + this.min = other.min; + this.max = other.max; + } + + @Override + public void put(double val) { + super.put(val); + min = Math.min(min, val); + max = Math.max(max, val); + } + + @Override + public void put(double val, double weight) { + super.put(val, weight); + min = Math.min(min, val); + max = Math.max(max, val); + } + + @Override + public void put(Mean other) { + if(other instanceof MeanVarianceMinMax) { + super.put(other); + min = Math.min(min, ((MeanVarianceMinMax) other).min); + max = Math.max(max, ((MeanVarianceMinMax) other).max); + } + else { + throw new AbortException("Cannot aggregate into a minmax statistic: " + other.getClass()); + } + } + + /** + * Get the current minimum. + * + * @return current minimum. + */ + public double getMin() { + return this.min; + } + + /** + * Get the current maximum. + * + * @return current maximum. + */ + public double getMax() { + return this.max; + } + + /** + * Return the difference between minimum and maximum. + * + * @return Difference of current Minimum and Maximum. + */ + public double getDiff() { + return this.getMax() - this.getMin(); + } + + /** + * Create and initialize a new array of MeanVarianceMinMax + * + * @param dimensionality Dimensionality + * @return New and initialized Array + */ + public static MeanVarianceMinMax[] newArray(int dimensionality) { + MeanVarianceMinMax[] arr = new MeanVarianceMinMax[dimensionality]; + for(int i = 0; i < dimensionality; i++) { + arr[i] = new MeanVarianceMinMax(); + } + return arr; + } + + @Override + public String toString() { + return "MeanVarianceMinMax(mean=" + getMean() + ",var=" + getSampleVariance() + ",min=" + getMin() + ",max=" + getMax() + ")"; + } + + @Override + public void reset() { + super.reset(); + min = Double.POSITIVE_INFINITY; + max = Double.NEGATIVE_INFINITY; + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/MinMax.java b/src/de/lmu/ifi/dbs/elki/math/MinMax.java index e3dd9517..98b66173 100644 --- a/src/de/lmu/ifi/dbs/elki/math/MinMax.java +++ b/src/de/lmu/ifi/dbs/elki/math/MinMax.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/PearsonCorrelation.java b/src/de/lmu/ifi/dbs/elki/math/PearsonCorrelation.java new file mode 100644 index 00000000..175f128d --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/PearsonCorrelation.java @@ -0,0 +1,269 @@ +package de.lmu.ifi.dbs.elki.math; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +/** + * Class to incrementally compute pearson correlation. + * + * In fact, this actually computes Var(X), Var(Y) and Cov(X, Y), all of which + * can be obtained from this class. If you need more than two variables, use + * {@link de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix} which uses + * slightly more memory (by using arrays) but essentially does the same. + * + * @author Erich Schubert + */ +public class PearsonCorrelation { + /** + * Sum for XX + */ + private double sumXX = 0; + + /** + * Sum for YY + */ + private double sumYY = 0; + + /** + * Sum for XY + */ + private double sumXY = 0; + + /** + * Current mean for X + */ + private double meanX = 0; + + /** + * Current mean for Y + */ + private double meanY = 0; + + /** + * Weight sum + */ + private double sumWe = 0; + + /** + * Constructor. + */ + public PearsonCorrelation() { + super(); + } + + /** + * Put a single value into the correlation statistic. + * + * @param x Value in X + * @param y Value in Y + * @param w Weight + */ + public void put(double x, double y, double w) { + if(sumWe <= 0.0) { + meanX = x; + meanY = y; + sumWe = w; + return; + } + // Incremental update + sumWe += w; + // Delta to previous mean + final double deltaX = x - meanX; + final double deltaY = y - meanY; + // Update means + meanX += deltaX * w / sumWe; + meanY += deltaY * w / sumWe; + // Delta to new mean + final double neltaX = x - meanX; + final double neltaY = y - meanY; + // Update + sumXX += w * deltaX * neltaX; + sumYY += w * deltaY * neltaY; + // should equal weight * deltaY * neltaX! + sumXY += w * deltaX * neltaY; + } + + /** + * Put a single value into the correlation statistic. + * + * @param x Value in X + * @param y Value in Y + */ + public void put(double x, double y) { + put(x, y, 1.0); + } + + /** + * Get the pearson correlation value. + * + * @return Correlation value + */ + public double getCorrelation() { + final double popSdX = getNaiveStddevX(); + final double popSdY = getNaiveStddevY(); + final double covXY = getNaiveCovariance(); + if(popSdX == 0 || popSdY == 0) { + return 0; + } + return covXY / (popSdX * popSdY); + } + + /** + * Get the number of points the average is based on. + * + * @return number of data points + */ + public double getCount() { + return sumWe; + } + + /** + * Return mean of X + * + * @return mean + */ + public double getMeanX() { + return meanX; + } + + /** + * Return mean of Y + * + * @return mean + */ + public double getMeanY() { + return meanY; + } + + /** + * Get the covariance of X and Y (not taking sampling into account) + * + * @return Covariance + */ + public double getNaiveCovariance() { + return sumXY / sumWe; + } + + /** + * Get the covariance of X and Y (with sampling correction) + * + * @return Covariance + */ + public double getSampleCovariance() { + assert (sumWe > 1); + return sumXY / (sumWe - 1); + } + + /** + * Return the naive variance (not taking sampling into account) + * + * Note: usually, you should be using {@link #getSampleVarianceX} instead! + * + * @return variance + */ + public double getNaiveVarianceX() { + return sumXX / sumWe; + } + + /** + * Return sample variance. + * + * @return sample variance + */ + public double getSampleVarianceX() { + assert (sumWe > 1); + return sumXX / (sumWe - 1); + } + + /** + * Return standard deviation using the non-sample variance + * + * Note: usually, you should be using {@link #getSampleStddevX} instead! + * + * @return stddev + */ + public double getNaiveStddevX() { + return Math.sqrt(getNaiveVarianceX()); + } + + /** + * Return standard deviation + * + * @return stddev + */ + public double getSampleStddevX() { + return Math.sqrt(getSampleVarianceX()); + } + + /** + * Return the naive variance (not taking sampling into account) + * + * Note: usually, you should be using {@link #getSampleVarianceY} instead! + * + * @return variance + */ + public double getNaiveVarianceY() { + return sumYY / sumWe; + } + + /** + * Return sample variance. + * + * @return sample variance + */ + public double getSampleVarianceY() { + assert (sumWe > 1); + return sumYY / (sumWe - 1); + } + + /** + * Return standard deviation using the non-sample variance + * + * Note: usually, you should be using {@link #getSampleStddevY} instead! + * + * @return stddev + */ + public double getNaiveStddevY() { + return Math.sqrt(getNaiveVarianceY()); + } + + /** + * Return standard deviation + * + * @return stddev + */ + public double getSampleStddevY() { + return Math.sqrt(getSampleVarianceY()); + } + + /** + * Reset the value. + */ + public void reset() { + sumXX = 0; + sumXY = 0; + sumYY = 0; + meanX = 0; + meanY = 0; + sumWe = 0; + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/geometry/AlphaShape.java b/src/de/lmu/ifi/dbs/elki/math/geometry/AlphaShape.java new file mode 100644 index 00000000..8d2eb0f6 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/geometry/AlphaShape.java @@ -0,0 +1,116 @@ +package de.lmu.ifi.dbs.elki.math.geometry; + +import java.util.ArrayList; +import java.util.BitSet; +import java.util.List; + +import de.lmu.ifi.dbs.elki.data.spatial.Polygon; +import de.lmu.ifi.dbs.elki.math.geometry.SweepHullDelaunay2D.Triangle; +import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** + * Compute the alpha-Shape of a point set, using Delaunay triangulation. + * + * @author Erich Schubert + */ +public class AlphaShape { + /** + * Alpha shape + */ + private double alpha2; + + /** + * Points + */ + private List<Vector> points; + + /** + * Delaunay triangulation + */ + private ArrayList<Triangle> delaunay = null; + + public AlphaShape(List<Vector> points, double alpha) { + this.alpha2 = alpha * alpha; + this.points = points; + } + + public List<Polygon> compute() { + // Compute delaunay triangulation: + delaunay = (new SweepHullDelaunay2D(points)).getDelaunay(); + + List<Polygon> polys = new ArrayList<Polygon>(); + + // Working data + BitSet used = new BitSet(delaunay.size()); + List<Vector> cur = new ArrayList<Vector>(); + + for(int i = 0 /* = used.nextClearBit(0) */; i < delaunay.size() && i >= 0; i = used.nextClearBit(i + 1)) { + if(used.get(i) == false) { + used.set(i); + Triangle tri = delaunay.get(i); + if(tri.r2 <= alpha2) { + // Check neighbors + processNeighbor(cur, used, i, tri.ab, tri.b); + processNeighbor(cur, used, i, tri.bc, tri.c); + processNeighbor(cur, used, i, tri.ca, tri.a); + } + if(cur.size() > 0) { + polys.add(new Polygon(cur)); + cur = new ArrayList<Vector>(); + } + } + } + + return polys; + } + + private void processNeighbor(List<Vector> cur, BitSet used, int i, int ab, int b) { + if(ab >= 0) { + if(used.get(ab)) { + return; + } + used.set(ab); + final Triangle next = delaunay.get(ab); + if(next.r2 < alpha2) { + // Continue where we left off... + if(next.ab == i) { + processNeighbor(cur, used, ab, next.bc, next.c); + processNeighbor(cur, used, ab, next.ca, next.a); + } + else if(next.bc == i) { + processNeighbor(cur, used, ab, next.ca, next.a); + processNeighbor(cur, used, ab, next.ab, next.b); + } + else if(next.ca == i) { + processNeighbor(cur, used, ab, next.ab, next.b); + processNeighbor(cur, used, ab, next.bc, next.c); + } + return; + } + } + cur.add(points.get(b)); + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/ConvexHull2D.java b/src/de/lmu/ifi/dbs/elki/math/geometry/GrahamScanConvexHull2D.java index 10a27f38..6d71bbf1 100644 --- a/src/de/lmu/ifi/dbs/elki/math/ConvexHull2D.java +++ b/src/de/lmu/ifi/dbs/elki/math/geometry/GrahamScanConvexHull2D.java @@ -1,26 +1,27 @@ -package de.lmu.ifi.dbs.elki.math;
-/* -This file is part of ELKI: -Environment for Developing KDD-Applications Supported by Index-Structures - -Copyright (C) 2011 -Ludwig-Maximilians-Universität München -Lehr- und Forschungseinheit für Datenbanksysteme -ELKI Development Team - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU Affero General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU Affero General Public License for more details. - -You should have received a copy of the GNU Affero General Public License -along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ +package de.lmu.ifi.dbs.elki.math.geometry;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures +
+ Copyright (C) 2011
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team +
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version. +
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details. +
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
import java.util.Collections;
import java.util.Comparator;
@@ -30,6 +31,7 @@ import java.util.List; import java.util.Stack;
import de.lmu.ifi.dbs.elki.data.spatial.Polygon;
+import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
@@ -40,7 +42,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; * @author Erich Schubert
*/
@Reference(authors = "Paul Graham", title = "An Efficient Algorithm for Determining the Convex Hull of a Finite Planar Set", booktitle = "Information Processing Letters 1")
-public class ConvexHull2D {
+public class GrahamScanConvexHull2D {
/**
* The current set of points
*/
@@ -71,7 +73,7 @@ public class ConvexHull2D { /**
* Constructor.
*/
- public ConvexHull2D() {
+ public GrahamScanConvexHull2D() {
this.points = new LinkedList<Vector>();
}
@@ -81,7 +83,7 @@ public class ConvexHull2D { * @param point Point to add
*/
public void add(Vector point) {
- if (this.ok) {
+ if(this.ok) {
this.points = new LinkedList<Vector>(this.points);
this.ok = false;
}
@@ -116,18 +118,17 @@ public class ConvexHull2D { Collections.sort(this.points, new Comparator<Vector>() {
@Override
public int compare(Vector o1, Vector o2) {
- return isLeft(o1, o2, origin) ? +1 : -1;
+ return isLeft(o1, o2, origin);
}
});
-
grahamScan();
this.ok = true;
}
/**
* Find the starting point, and sort it to the beginning of the list. The
- * starting point must be on the outer hull. Any "most extreme" point will do, e.g.
- * the one with the lowest Y coordinate and for ties with the lowest X.
+ * starting point must be on the outer hull. Any "most extreme" point will do,
+ * e.g. the one with the lowest Y coordinate and for ties with the lowest X.
*/
private void findStartingPoint() {
// Well, we already know the best Y value...
@@ -177,17 +178,17 @@ public class ConvexHull2D { * @param a Vector A
* @param b Vector B
* @param o Origin vector
- * @return true when left
+ * @return +1 when left, 0 when same, -1 when right
*/
- protected final boolean isLeft(Vector a, Vector b, Vector o) {
+ protected final int isLeft(Vector a, Vector b, Vector o) {
final double cross = getRX(a, o) * getRY(b, o) - getRY(a, o) * getRX(b, o);
if(cross == 0) {
// Compare manhattan distances - same angle!
final double dista = Math.abs(getRX(a, o)) + Math.abs(getRY(a, o));
final double distb = Math.abs(getRX(b, o)) + Math.abs(getRY(b, o));
- return dista > distb;
+ return Double.compare(dista, distb);
}
- return cross > 0;
+ return Double.compare(cross, 0);
}
/**
diff --git a/src/de/lmu/ifi/dbs/elki/math/geometry/SweepHullDelaunay2D.java b/src/de/lmu/ifi/dbs/elki/math/geometry/SweepHullDelaunay2D.java new file mode 100644 index 00000000..2533a2b0 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/geometry/SweepHullDelaunay2D.java @@ -0,0 +1,929 @@ +package de.lmu.ifi.dbs.elki.math.geometry; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.BitSet; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.ListIterator; +import java.util.Random; + +import de.lmu.ifi.dbs.elki.data.spatial.Polygon; +import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.math.DoubleMinMax; +import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleIntPair; +import de.lmu.ifi.dbs.elki.utilities.pairs.IntIntPair; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +/** + * Compute the Convex Hull and/or Delaunay Triangulation, using the sweep-hull + * approach of David Sinclair. + * + * Note: This implementation does not check or handle duplicate points! + * + * @author Erich Schubert + */ +@Reference(authors = "David Sinclair", title = "S-hull: a fast sweep-hull routine for Delaunay triangulation", booktitle = "Online: http://s-hull.org/") +public class SweepHullDelaunay2D { + /** + * Class logger + */ + private static final Logging logger = Logging.getLogger(SweepHullDelaunay2D.class); + + /** + * The current set of points. + * + * Note: this list should not be changed after running the algorithm, since we + * use it for object indexing, and the ids should not change + */ + private List<Vector> points; + + /** + * Triangles + */ + private ArrayList<Triangle> tris = null; + + /** + * Internal representation of the hull + */ + private LinkedList<IntIntPair> hull = null; + + /** + * Constructor. + */ + public SweepHullDelaunay2D() { + this(new ArrayList<Vector>()); + } + + /** + * Constructor. + * + * @param points Existing points + */ + public SweepHullDelaunay2D(List<Vector> points) { + this.points = points; + } + + /** + * Add a single point to the list (this does not compute or update the + * triangulation!) + * + * @param point Point to add + */ + public void add(Vector point) { + this.points.add(point); + // Invalidate + hull = null; + tris = null; + } + + /** + * Run the actual algorithm + * + * @param hullonly + */ + void run(boolean hullonly) { + if(points.size() < 3) { + throw new UnsupportedOperationException("There is no delaunay triangulation for less than three objects!"); + } + int len = points.size() - 1; + hull = new LinkedList<IntIntPair>(); + tris = hullonly ? null : new ArrayList<Triangle>(len); + + final Vector seed; + final int seedid = 0; + final DoubleIntPair[] sort = new DoubleIntPair[len]; + // TODO: remove duplicates. + + // Select seed, sort by squared euclidean distance + { + Iterator<Vector> iter = points.iterator(); + seed = iter.next(); + for(int i = 0; iter.hasNext(); i++) { + assert (i < len); + Vector p = iter.next(); + // Pair with distance, list-position + sort[i] = new DoubleIntPair(quadraticEuclidean(seed, p), i + 1); + } + assert (sort[len - 1] != null); + Arrays.sort(sort); + } + assert (sort[0].first > 0); + // final Vector seed2 = points.get(sort[0].second); + final int seed2id = sort[0].second; + int start = 1; + + // Find minimal triangle for these two points: + Triangle besttri = new Triangle(seedid, seed2id, -1); + { + besttri.r2 = Double.MAX_VALUE; + Triangle testtri = new Triangle(seedid, seed2id, -1); + int besti = -1; + for(int i = start; i < len; i++) { + // Update test triad + testtri.c = sort[i].second; + if(testtri.updateCircumcircle(points) && testtri.r2 < besttri.r2) { + besttri.copyFrom(testtri); + besti = i; + } + else if(besttri.r2 * 4 < sort[i].first) { + // Stop early, points are too far away from seed. + break; + } + } + assert (besti != -1); + // Rearrange - remove third seed point. + if(besti > 1) { + DoubleIntPair tmp = sort[besti]; + System.arraycopy(sort, 1, sort, 2, besti - 1); + sort[1] = tmp; + } + } + start = 2; // First two points have already been processed. + + // Make right-handed: + besttri.makeClockwise(points); + // Seed triangulation + if(!hullonly) { + tris.add(besttri); + } + // Seed convex hull + hull.add(new IntIntPair(besttri.a, 0)); + hull.add(new IntIntPair(besttri.b, 0)); + hull.add(new IntIntPair(besttri.c, 0)); + + if(logger.isDebuggingFinest()) { + debugHull(); + } + + // Resort from triangle center + Vector center = besttri.m; + for(int i = start; i < len; i++) { + sort[i].first = quadraticEuclidean(center, points.get(sort[i].second)); + } + Arrays.sort(sort, start, len); + + // Grow hull and triangles + for(int i = start; i < len; i++) { + final int pointId = sort[i].second; + final Vector newpoint = points.get(pointId); + + LinkedList<Triangle> newtris = hullonly ? null : new LinkedList<Triangle>(); + // We identify edges by their starting point. -1 is invalid. + int hstart = -1, hend = -1; + // Find first and last consecutive visible edge, backwards: + { + Iterator<IntIntPair> iter = hull.descendingIterator(); + IntIntPair next = hull.getFirst(); + Vector nextV = points.get(next.first); + for(int pos = hull.size() - 1; iter.hasNext(); pos--) { + IntIntPair prev = iter.next(); + Vector prevV = points.get(prev.first); + // Not yet visible: + if(hend < 0) { + if(leftOf(prevV, nextV, newpoint)) { + hstart = pos; + hend = pos; + if(!hullonly) { + // Clockwise, A is new point! + Triangle tri = new Triangle(pointId, next.first, prev.first); + assert (tri.isClockwise(points)); + assert (prev.second >= 0); + tri.updateCircumcircle(points); + tri.bc = prev.second; + newtris.addFirst(tri); + } + } + } + else { + if(leftOf(prevV, nextV, newpoint)) { + hstart = pos; + // Add triad: + if(!hullonly) { + // Clockwise, A is new point! + Triangle tri = new Triangle(pointId, next.first, prev.first); + assert (tri.isClockwise(points)); + assert (prev.second >= 0); + tri.updateCircumcircle(points); + tri.bc = prev.second; + newtris.addFirst(tri); + } + } + else { + break; + } + } + next = prev; + nextV = prevV; + } + } + // If the last edge was visible, we also need to scan forwards: + if(hend == hull.size() - 1) { + Iterator<IntIntPair> iter = hull.iterator(); + IntIntPair prev = iter.next(); + Vector prevV = points.get(prev.first); + while(iter.hasNext()) { + IntIntPair next = iter.next(); + Vector nextV = points.get(next.first); + if(leftOf(prevV, nextV, newpoint)) { + hend++; + // Add triad: + if(!hullonly) { + // Clockwise, A is new point! + Triangle tri = new Triangle(pointId, next.first, prev.first); + assert (tri.isClockwise(points)); + assert (prev.second >= 0); + tri.updateCircumcircle(points); + tri.bc = prev.second; + newtris.addLast(tri); + } + } + else { + break; + } + prev = next; + prevV = nextV; + } + } + assert (hstart >= 0 && hend >= hstart); + // Note that hend can be larger than hull.size() now, interpret as + // "hend % hull.size()" + // Update hull, remove points + final int firsttri, lasttri; + if(hullonly) { + firsttri = -1; + lasttri = -1; + } + else { + final int tristart = tris.size(); + firsttri = tristart; + lasttri = tristart + newtris.size() - 1; + } + final int hullsize = hull.size(); + if(logger.isDebuggingFinest()) { + logger.debugFinest("Size: " + hullsize + " start: " + hstart + " end: " + hend); + } + if(hend < hullsize) { + ListIterator<IntIntPair> iter = hull.listIterator(); + int p = 0; + // Skip + for(; p <= hstart; p++) { + iter.next(); + } + // Remove + for(; p <= hend; p++) { + iter.next(); + iter.remove(); + } + // Insert, and update edge->triangle mapping + iter.add(new IntIntPair(pointId, lasttri)); + iter.previous(); + if(!hullonly) { + if(iter.hasPrevious()) { + iter.previous().second = firsttri; + } + else { + hull.getLast().second = firsttri; + } + } + } + else { + // System.err.println("Case #2 "+pointId+" "+hstart+" "+hend+" "+hullsize); + ListIterator<IntIntPair> iter = hull.listIterator(); + // Remove end + int p = hullsize; + for(; p <= hend; p++) { + iter.next(); + iter.remove(); + } + // Insert + iter.add(new IntIntPair(pointId, lasttri)); + // Wrap around + p -= hullsize; + IntIntPair pre = null; + for(; p <= hstart; p++) { + pre = iter.next(); + } + assert (pre != null); + pre.second = firsttri; + // Remove remainder + while(iter.hasNext()) { + iter.next(); + iter.remove(); + } + } + if(logger.isDebuggingFinest()) { + debugHull(); + } + if(!hullonly) { + final int tristart = tris.size(); + // Connect triads (they are ordered) + Iterator<Triangle> iter = newtris.iterator(); + for(int o = 0; iter.hasNext(); o++) { + // This triangle has num tristart + o. + Triangle cur = iter.next(); + if(o > 0) { + cur.ca = tristart + o - 1; // previously added triangle + } + else { + cur.ca = -1; // outside + } + if(iter.hasNext()) { + cur.ab = tristart + o + 1; // next triangle + } + else { + cur.ab = -1; // outside + } + // cur.bc was set upon creation + assert (cur.bc >= 0); + Triangle other = tris.get(cur.bc); + Orientation orient = cur.findOrientation(other); + assert (orient != null) : "Inconsistent triangles: " + cur + " " + other; + switch(orient){ + case ORIENT_BC_BA: + assert (other.ab == -1) : "Inconsistent triangles: " + cur + " " + other; + other.ab = tristart + o; + break; + case ORIENT_BC_CB: + assert (other.bc == -1) : "Inconsistent triangles: " + cur + " " + other; + other.bc = tristart + o; + break; + case ORIENT_BC_AC: + assert (other.ca == -1) : "Inconsistent triangles: " + cur + " " + other; + other.ca = tristart + o; + break; + default: + assert (cur.isClockwise(points)); + assert (other.isClockwise(points)); + throw new RuntimeException("Inconsistent triangles: " + cur + " " + other + " size:" + tris.size()); + } + tris.add(cur); + } + assert (tris.size() == lasttri + 1); + } + } + // Now check for triangles that need flipping. + if(!hullonly) { + final int size = tris.size(); + BitSet flippedA = new BitSet(size); + BitSet flippedB = new BitSet(size); + // Initial flip + int flipped = flipTriangles(null, flippedA); + for(int iterations = 1; iterations < 2000 && flipped > 0; iterations++) { + if(iterations % 2 == 1) { + flipped = flipTriangles(flippedA, flippedB); + } + else { + flipped = flipTriangles(flippedB, flippedA); + } + } + } + } + + /** + * Debug helper + */ + void debugHull() { + StringBuffer buf = new StringBuffer(); + for(IntIntPair p : hull) { + buf.append(p).append(" "); + } + logger.debugFinest(buf); + } + + /** + * Flip triangles as necessary + * + * @param flippedA Bit set for triangles to test + * @param flippedB Bit set to mark triangles as done + */ + int flipTriangles(BitSet flippedA, BitSet flippedB) { + final int size = tris.size(); + int numflips = 0; + flippedB.clear(); + if(flippedA == null) { + for(int i = 0; i < size; i++) { + if(flipTriangle(i, flippedB) > 0) { + numflips += 2; + } + } + } + else { + for(int i = flippedA.nextSetBit(0); i > -1; i = flippedA.nextSetBit(i + 1)) { + if(flipTriangle(i, flippedB) > 0) { + numflips += 2; + } + } + } + return numflips; + } + + /** + * Flip a single triangle, if necessary. + * + * @param i Triangle number + * @param flipped Bitset to modify + * @return number of other triangle, or -1 + */ + int flipTriangle(int i, BitSet flipped) { + final Triangle cur = tris.get(i); + // Test edge AB: + if(cur.ab >= 0) { + final int ot = cur.ab; + Triangle oth = tris.get(ot); + Orientation orient = cur.findOrientation(oth); + final int opp, lef, rig; + switch(orient){ + case ORIENT_AB_BA: + opp = oth.c; + lef = oth.bc; + rig = oth.ca; + break; + case ORIENT_AB_CB: + opp = oth.a; + lef = oth.ca; + rig = oth.ab; + break; + case ORIENT_AB_AC: + opp = oth.b; + lef = oth.ab; + rig = oth.bc; + break; + default: + throw new RuntimeException("Neighbor triangles not aligned?"); + } + if(cur.inCircle(points.get(opp))) { + // Replace edge AB, connect c with "opp" instead. + final int a = cur.c, b = cur.a, c = opp, d = cur.b; + final int ab = cur.ca, bc = lef, cd = rig, da = cur.bc; + final int ca = ot, ac = i; + // Update current: + cur.set(a, ab, b, bc, c, ca); + cur.updateCircumcircle(points); + // Update other: + oth.set(c, cd, d, da, a, ac); + oth.updateCircumcircle(points); + // Update tri touching on BC and DA: + if(bc >= 0) { + tris.get(bc).replaceEdge(c, b, ot, i); + } + if(da >= 0) { + tris.get(da).replaceEdge(a, d, i, ot); + } + flipped.set(i); + flipped.set(ot); + return ot; + } + } + // Test edge BC: + if(cur.bc >= 0) { + final int ot = cur.bc; + Triangle oth = tris.get(ot); + Orientation orient = cur.findOrientation(oth); + final int opp, lef, rig; + switch(orient){ + case ORIENT_BC_BA: + opp = oth.c; + lef = oth.bc; + rig = oth.ca; + break; + case ORIENT_BC_CB: + opp = oth.a; + lef = oth.ca; + rig = oth.ab; + break; + case ORIENT_BC_AC: + opp = oth.b; + lef = oth.ab; + rig = oth.bc; + break; + default: + throw new RuntimeException("Neighbor triangles not aligned?"); + } + if(cur.inCircle(points.get(opp))) { + // Replace edge BC, connect A with "opp" instead. + final int a = cur.a, b = cur.b, c = opp, d = cur.c; + final int ab = cur.ab, bc = lef, cd = rig, da = cur.ca; + final int ca = ot, ac = i; + // Update current: + cur.set(a, ab, b, bc, c, ca); + cur.updateCircumcircle(points); + // Update other: + oth.set(c, cd, d, da, a, ac); + oth.updateCircumcircle(points); + // Update tri touching on BC and DA: + if(bc >= 0) { + tris.get(bc).replaceEdge(c, b, ot, i); + } + if(da >= 0) { + tris.get(da).replaceEdge(a, d, i, ot); + } + flipped.set(i); + flipped.set(ot); + return ot; + } + } + // Test edge CA: + if(cur.ca >= 0) { + final int ot = cur.ca; + Triangle oth = tris.get(ot); + Orientation orient = cur.findOrientation(oth); + final int opp, lef, rig; + switch(orient){ + case ORIENT_CA_BA: + opp = oth.c; + lef = oth.bc; + rig = oth.ca; + break; + case ORIENT_CA_CB: + opp = oth.a; + lef = oth.ca; + rig = oth.ab; + break; + case ORIENT_CA_AC: + opp = oth.b; + lef = oth.ab; + rig = oth.bc; + break; + default: + throw new RuntimeException("Neighbor triangles not aligned?"); + } + if(cur.inCircle(points.get(opp))) { + // Replace edge CA, connect B with "opp" instead. + final int a = cur.b, b = cur.c, c = opp, d = cur.a; + final int ab = cur.bc, bc = lef, cd = rig, da = cur.ab; + final int ca = ot, ac = i; + // Update current: + cur.set(a, ab, b, bc, c, ca); + cur.updateCircumcircle(points); + // Update other: + oth.set(c, cd, d, da, a, ac); + oth.updateCircumcircle(points); + // Update tri touching on BC and DA: + if(bc >= 0) { + tris.get(bc).replaceEdge(c, b, ot, i); + } + if(da >= 0) { + tris.get(da).replaceEdge(a, d, i, ot); + } + flipped.set(i); + flipped.set(ot); + return ot; + } + } + return -1; + } + + /** + * Get the convex hull only. + * + * Note: if you also want the Delaunay Triangulation, you should get that + * first! + * + * @return Convex hull + */ + public Polygon getHull() { + if(hull == null) { + run(true); + } + DoubleMinMax minmaxX = new DoubleMinMax(); + DoubleMinMax minmaxY = new DoubleMinMax(); + List<Vector> hullp = new ArrayList<Vector>(hull.size()); + for(IntIntPair pair : hull) { + Vector v = points.get(pair.first); + hullp.add(v); + minmaxX.put(v.get(0)); + minmaxY.put(v.get(1)); + } + return new Polygon(hullp, minmaxX.getMin(), minmaxX.getMax(), minmaxY.getMin(), minmaxY.getMax()); + } + + /** + * Get the Delaunay triangulation. + * + * @return Triangle list + */ + public ArrayList<Triangle> getDelaunay() { + if(tris == null) { + run(false); + } + return tris; + } + + /** + * Squared euclidean distance. 2d. + * + * @param v1 First vector + * @param v2 Second vector + * @return Quadratic distance + */ + public static double quadraticEuclidean(Vector v1, Vector v2) { + final double d1 = v1.get(0) - v2.get(0); + final double d2 = v1.get(1) - v2.get(1); + return (d1 * d1) + (d2 * d2); + } + + /** + * Test if the vector AD is right of AB. + * + * @param a Starting point + * @param b Reference point + * @param d Test point + * @return true when on the left side + */ + boolean leftOf(Vector a, Vector b, Vector d) { + final double bax = b.get(0) - a.get(0); + final double bay = b.get(1) - a.get(1); + final double dax = d.get(0) - a.get(0); + final double day = d.get(1) - a.get(1); + final double cross = bax * day - bay * dax; + return cross > 0; + } + + /** + * The possible orientations two triangles can have to each other. (Shared + * edges must have different directions!) + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + static enum Orientation { + ORIENT_AB_BA, ORIENT_AB_CB, ORIENT_AB_AC, ORIENT_BC_BA, ORIENT_BC_CB, ORIENT_BC_AC, ORIENT_CA_BA, ORIENT_CA_CB, ORIENT_CA_AC + } + + /** + * Class representing a triangle, by referencing points in a list. + * + * @author Erich Schubert + */ + public static class Triangle { + /** + * References to points in Delaunay2D.points + */ + public int a, b, c; + + /** + * References to neighbor triangles + */ + public int ab = -1, ca = -1, bc = -1; + + /** + * Circumcircle parameters + */ + public double r2 = -1; + + /** + * Center vector + */ + public Vector m = new Vector(2); + + /** + * Constructor. + * + * @param x + * @param y + * @param z + */ + public Triangle(int x, int y, int z) { + a = x; + b = y; + c = z; + } + + /** + * Replace an edge + * + * @param a First point + * @param b Second point + * @param ol Previous value + * @param ne New value + */ + void replaceEdge(int a, int b, int ol, int ne) { + if(this.a == a && this.b == b) { + assert (this.ab == ol) : "Edge doesn't match: " + this + " " + a + " " + b + " " + ol + " " + ne; + this.ab = ne; + return; + } + if(this.b == a && this.c == b) { + assert (this.bc == ol) : "Edge doesn't match: " + this + " " + a + " " + b + " " + ol + " " + ne; + this.bc = ne; + return; + } + if(this.c == a && this.a == b) { + assert (this.ca == ol) : "Edge doesn't match: " + this + " " + a + " " + b + " " + ol + " " + ne; + this.ca = ne; + return; + } + } + + /** + * Update the triangle. + * + * @param a First point + * @param ab Edge + * @param b Second point + * @param bc Edge + * @param c Third point + * @param ca Edge + */ + void set(int a, int ab, int b, int bc, int c, int ca) { + this.a = a; + this.ab = ab; + this.b = b; + this.bc = bc; + this.c = c; + this.ca = ca; + } + + /** + * Test whether a point is within the circumference circle. + * + * @param opp Test vector + * @return true when contained + */ + public boolean inCircle(Vector opp) { + double dx = opp.get(0) - m.get(0); + double dy = opp.get(1) - m.get(1); + return (dx * dx + dy * dy) <= r2; + } + + /** + * Find the orientation of the triangles to each other. + * + * @param oth Other triangle + * @return shared edge + */ + Orientation findOrientation(Triangle oth) { + if(this.a == oth.a) { + if(this.b == oth.c) { + return Orientation.ORIENT_AB_AC; + } + if(this.c == oth.b) { + return Orientation.ORIENT_CA_BA; + } + } + if(this.a == oth.b) { + if(this.b == oth.a) { + return Orientation.ORIENT_AB_BA; + } + if(this.c == oth.c) { + return Orientation.ORIENT_CA_CB; + } + } + if(this.a == oth.c) { + if(this.b == oth.b) { + return Orientation.ORIENT_AB_CB; + } + if(this.c == oth.a) { + return Orientation.ORIENT_CA_AC; + } + } + if(this.b == oth.b) { + if(this.c == oth.a) { + return Orientation.ORIENT_BC_BA; + } + } + if(this.b == oth.c) { + if(this.c == oth.b) { + return Orientation.ORIENT_BC_CB; + } + } + if(this.b == oth.a) { + if(this.c == oth.c) { + return Orientation.ORIENT_BC_AC; + } + } + return null; + } + + /** + * Make the triangle clockwise + */ + void makeClockwise(List<Vector> points) { + if(!isClockwise(points)) { + // Swap points B, C + int t = b; + b = c; + c = t; + // And the associated edges + t = ab; + ab = ca; + ca = t; + } + } + + /** + * Verify that the triangle is clockwise + */ + boolean isClockwise(List<Vector> points) { + // Mean + double centX = (points.get(a).get(0) + points.get(b).get(0) + points.get(c).get(0)) / 3.0f; + double centY = (points.get(a).get(1) + points.get(b).get(1) + points.get(c).get(1)) / 3.0f; + + double dr0 = points.get(a).get(0) - centX, dc0 = points.get(a).get(1) - centY; + double dx01 = points.get(b).get(0) - points.get(a).get(0), dy01 = points.get(b).get(1) - points.get(a).get(1); + + double df = -dx01 * dc0 + dy01 * dr0; + return (df <= 0); + } + + /** + * Copy the values from another triangle. + * + * @param o object to copy from + */ + void copyFrom(Triangle o) { + this.a = o.a; + this.b = o.b; + this.c = o.c; + this.r2 = o.r2; + this.m.set(0, o.m.get(0)); + this.m.set(1, o.m.get(1)); + } + + /** + * Recompute the location and squared radius of circumcircle. + * + * Note: numerical stability is important! + * + * @return success + */ + boolean updateCircumcircle(List<Vector> points) { + Vector pa = points.get(a), pb = points.get(b), pc = points.get(c); + + // Compute vectors from A: AB, AC: + final double abx = pb.get(0) - pa.get(0), aby = pb.get(1) - pa.get(1); + final double acx = pc.get(0) - pa.get(0), acy = pc.get(1) - pa.get(1); + + // Squared euclidean lengths + final double ablen = abx * abx + aby * aby; + final double aclen = acx * acx + acy * acy; + + // Compute D + final double D = 2 * (abx * acy - aby * acx); + + // No circumcircle: + if(D == 0) { + return false; + } + + // Compute offset: + final double offx = (acy * ablen - aby * aclen) / D; + final double offy = (abx * aclen - acx * ablen) / D; + + + // Avoid degeneration: + r2 = offx * offx + offy * offy; + if((r2 > 1e10 * ablen || r2 > 1e10 * aclen)) { + return false; + } + + m.set(0, pa.get(0) + offx); + m.set(1, pa.get(1) + offy); + return true; + } + + @Override + public String toString() { + return "Triangle [a=" + a + ", b=" + b + ", c=" + c + ", ab=" + ab + ", ac=" + ca + ", bc=" + bc + "]"; + } + } + + public static void main(String[] args) { + SweepHullDelaunay2D d = new SweepHullDelaunay2D(); + + Random r = new Random(1); + final int num = 100000; + for(int i = 0; i < num; i++) { + final Vector v = new Vector(r.nextDouble(), r.nextDouble()); + // System.err.println(i + ": " + FormatUtil.format(v.getArrayRef(), " ")); + d.add(v); + } + d.run(false); + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/geometry/package-info.java b/src/de/lmu/ifi/dbs/elki/math/geometry/package-info.java new file mode 100644 index 00000000..a7bb6e6e --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/geometry/package-info.java @@ -0,0 +1,26 @@ +/** + * <p>Algorithms from computational geometry.</p> + */ +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2012 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ +package de.lmu.ifi.dbs.elki.math.geometry;
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/AggregatingHistogram.java b/src/de/lmu/ifi/dbs/elki/math/histograms/AggregatingHistogram.java index 47919752..baa9eb54 100644 --- a/src/de/lmu/ifi/dbs/elki/math/AggregatingHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/math/histograms/AggregatingHistogram.java @@ -1,10 +1,10 @@ -package de.lmu.ifi.dbs.elki.math; +package de.lmu.ifi.dbs.elki.math.histograms; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,6 +23,9 @@ package de.lmu.ifi.dbs.elki.math; along with this program. If not, see <http://www.gnu.org/licenses/>. */ +import de.lmu.ifi.dbs.elki.math.MeanVariance; +import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleDoublePair; +import de.lmu.ifi.dbs.elki.utilities.pairs.IntIntPair; import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; /** @@ -46,7 +49,7 @@ public class AggregatingHistogram<T, D> extends ReplacingHistogram<T> { * Adapter class for an AggregatingHistogram * * @author Erich Schubert - * + * * @param <T> Histogram bin type * @param <D> Incoming data type */ @@ -62,7 +65,7 @@ public class AggregatingHistogram<T, D> extends ReplacingHistogram<T> { } /** - * Constructor with Adapter. + * Constructor with Adapter. * * @param bins Number of bins * @param min Minimum value @@ -181,31 +184,31 @@ public class AggregatingHistogram<T, D> extends ReplacingHistogram<T> { * Histograms that work like two {@link #IntSumHistogram}, component wise. * * @param bins Number of bins. - * @param min Minimum value + * @param min Minimum value * @param max Maximum value * @return Histogram object */ - public static AggregatingHistogram<Pair<Integer, Integer>, Pair<Integer, Integer>> IntSumIntSumHistogram(int bins, double min, double max) { - return new AggregatingHistogram<Pair<Integer, Integer>, Pair<Integer, Integer>>(bins, min, max, new Adapter<Pair<Integer, Integer>, Pair<Integer, Integer>>() { + public static AggregatingHistogram<IntIntPair, IntIntPair> IntSumIntSumHistogram(int bins, double min, double max) { + return new AggregatingHistogram<IntIntPair, IntIntPair>(bins, min, max, new Adapter<IntIntPair, IntIntPair>() { @Override - public Pair<Integer, Integer> make() { - return new Pair<Integer, Integer>(0,0); + public IntIntPair make() { + return new IntIntPair(0, 0); } @Override - public Pair<Integer, Integer> aggregate(Pair<Integer, Integer> existing, Pair<Integer, Integer> data) { - existing.setFirst(existing.getFirst() + data.getFirst()); - existing.setSecond(existing.getSecond() + data.getSecond()); + public IntIntPair aggregate(IntIntPair existing, IntIntPair data) { + existing.first = existing.first + data.first; + existing.second = existing.second + data.second; return existing; } }); } - + /** * Histograms that work like two {@link #LongSumHistogram}, component wise. * * @param bins Number of bins. - * @param min Minimum value + * @param min Minimum value * @param max Maximum value * @return Histogram object */ @@ -213,7 +216,7 @@ public class AggregatingHistogram<T, D> extends ReplacingHistogram<T> { return new AggregatingHistogram<Pair<Long, Long>, Pair<Long, Long>>(bins, min, max, new Adapter<Pair<Long, Long>, Pair<Long, Long>>() { @Override public Pair<Long, Long> make() { - return new Pair<Long, Long>(0L,0L); + return new Pair<Long, Long>(0L, 0L); } @Override @@ -224,26 +227,26 @@ public class AggregatingHistogram<T, D> extends ReplacingHistogram<T> { } }); } - + /** * Histograms that work like two {@link #DoubleSumHistogram}, component wise. * * @param bins Number of bins. - * @param min Minimum value + * @param min Minimum value * @param max Maximum value * @return Histogram object */ - public static AggregatingHistogram<Pair<Double, Double>, Pair<Double, Double>> DoubleSumDoubleSumHistogram(int bins, double min, double max) { - return new AggregatingHistogram<Pair<Double, Double>, Pair<Double, Double>>(bins, min, max, new Adapter<Pair<Double, Double>, Pair<Double, Double>>() { + public static AggregatingHistogram<DoubleDoublePair, DoubleDoublePair> DoubleSumDoubleSumHistogram(int bins, double min, double max) { + return new AggregatingHistogram<DoubleDoublePair, DoubleDoublePair>(bins, min, max, new Adapter<DoubleDoublePair, DoubleDoublePair>() { @Override - public Pair<Double, Double> make() { - return new Pair<Double, Double>(0.,0.); + public DoubleDoublePair make() { + return new DoubleDoublePair(0., 0.); } @Override - public Pair<Double, Double> aggregate(Pair<Double, Double> existing, Pair<Double, Double> data) { - existing.setFirst(existing.getFirst() + data.getFirst()); - existing.setSecond(existing.getSecond() + data.getSecond()); + public DoubleDoublePair aggregate(DoubleDoublePair existing, DoubleDoublePair data) { + existing.first = existing.first + data.first; + existing.second = existing.second + data.second; return existing; } }); diff --git a/src/de/lmu/ifi/dbs/elki/math/FlexiHistogram.java b/src/de/lmu/ifi/dbs/elki/math/histograms/FlexiHistogram.java index 9023a126..cee151ed 100644 --- a/src/de/lmu/ifi/dbs/elki/math/FlexiHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/math/histograms/FlexiHistogram.java @@ -1,10 +1,10 @@ -package de.lmu.ifi.dbs.elki.math; +package de.lmu.ifi.dbs.elki.math.histograms; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -26,8 +26,12 @@ package de.lmu.ifi.dbs.elki.math; import java.util.ArrayList; import java.util.Iterator; +import de.lmu.ifi.dbs.elki.math.MeanVariance; +import de.lmu.ifi.dbs.elki.math.scales.LinearScale; +import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleDoublePair; +import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair; +import de.lmu.ifi.dbs.elki.utilities.pairs.IntIntPair; import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; -import de.lmu.ifi.dbs.elki.visualization.scales.LinearScale; /** * Histogram with flexible size, guaranteed to be in [bin, 2*bin[ @@ -39,16 +43,16 @@ import de.lmu.ifi.dbs.elki.visualization.scales.LinearScale; * @param <T> Type of data in histogram * @param <D> Type of input data */ -public class FlexiHistogram<T,D> extends AggregatingHistogram<T,D> { +public class FlexiHistogram<T, D> extends AggregatingHistogram<T, D> { /** * Adapter class, extended "maker". */ - private Adapter<T,D> downsampler; + private Adapter<T, D> downsampler; /** * Cache for elements when not yet initialized. */ - private ArrayList<Pair<Double, D>> tempcache = null; + private ArrayList<DoubleObjPair<D>> tempcache = null; /** * Destination (minimum) size of the structure. At most 2*destsize bins are @@ -64,7 +68,7 @@ public class FlexiHistogram<T,D> extends AggregatingHistogram<T,D> { * @param <T> Type of data in histogram * @param <D> Type of input data */ - public static abstract class Adapter<T,D> extends AggregatingHistogram.Adapter<T,D> { + public static abstract class Adapter<T, D> extends AggregatingHistogram.Adapter<T, D> { /** * Rule to combine two bins into one. * @@ -94,11 +98,11 @@ public class FlexiHistogram<T,D> extends AggregatingHistogram<T,D> { * @param bins Target number of bins * @param adapter Adapter for data types and combination rules. */ - public FlexiHistogram(int bins, Adapter<T,D> adapter) { + public FlexiHistogram(int bins, Adapter<T, D> adapter) { super(bins, 0.0, 1.0, adapter); this.destsize = bins; this.downsampler = adapter; - tempcache = new ArrayList<Pair<Double, D>>(this.destsize * 2); + tempcache = new ArrayList<DoubleObjPair<D>>(this.destsize * 2); } private synchronized void materialize() { @@ -114,7 +118,7 @@ public class FlexiHistogram<T,D> extends AggregatingHistogram<T,D> { } double min = Double.MAX_VALUE; double max = Double.MIN_VALUE; - for(Pair<Double, D> pair : tempcache) { + for(DoubleObjPair<D> pair : tempcache) { min = Math.min(min, pair.first); max = Math.max(max, pair.first); } @@ -131,7 +135,7 @@ public class FlexiHistogram<T,D> extends AggregatingHistogram<T,D> { this.data.add(downsampler.make()); } // re-insert data we have - for(Pair<Double, D> pair : tempcache) { + for(DoubleObjPair<D> pair : tempcache) { super.aggregate(pair.first, pair.second); } // delete cache, signal that we're initialized @@ -152,9 +156,10 @@ public class FlexiHistogram<T,D> extends AggregatingHistogram<T,D> { // Resampling. ArrayList<T> newdata = new ArrayList<T>(this.destsize * 2); for(int i = 0; i < super.size; i += 2) { - if (i + 1 < super.size) { + if(i + 1 < super.size) { newdata.add(downsampler.downsample(super.data.get(i), super.data.get(i + 1))); - } else { + } + else { newdata.add(downsampler.downsample(super.data.get(i), super.make())); } } @@ -208,13 +213,31 @@ public class FlexiHistogram<T,D> extends AggregatingHistogram<T,D> { } @Override - public Iterator<Pair<Double, T>> iterator() { + public double getBinMean(int bin) { + materialize(); + return super.getBinMean(bin); + } + + @Override + public double getBinMin(int bin) { + materialize(); + return super.getBinMin(bin); + } + + @Override + public double getBinMax(int bin) { + materialize(); + return super.getBinMax(bin); + } + + @Override + public Iterator<DoubleObjPair<T>> iterator() { materialize(); return super.iterator(); } @Override - public Iterator<Pair<Double, T>> reverseIterator() { + public Iterator<DoubleObjPair<T>> reverseIterator() { materialize(); return super.reverseIterator(); } @@ -223,7 +246,7 @@ public class FlexiHistogram<T,D> extends AggregatingHistogram<T,D> { public void aggregate(double coord, D value) { if(tempcache != null) { if(tempcache.size() < this.destsize * 2) { - tempcache.add(new Pair<Double, D>(coord, downsampler.cloneForCache(value))); + tempcache.add(new DoubleObjPair<D>(coord, downsampler.cloneForCache(value))); return; } else { @@ -372,32 +395,32 @@ public class FlexiHistogram<T,D> extends AggregatingHistogram<T,D> { * @param bins Number of bins. * @return New Histogram object */ - public static FlexiHistogram<Pair<Integer, Integer>, Pair<Integer, Integer>> IntSumIntSumHistogram(int bins) { - return new FlexiHistogram<Pair<Integer, Integer>, Pair<Integer, Integer>>(bins, new Adapter<Pair<Integer, Integer>, Pair<Integer, Integer>>() { + public static FlexiHistogram<IntIntPair, IntIntPair> IntSumIntSumHistogram(int bins) { + return new FlexiHistogram<IntIntPair, IntIntPair>(bins, new Adapter<IntIntPair, IntIntPair>() { @Override - public Pair<Integer, Integer> make() { - return new Pair<Integer, Integer>(0,0); + public IntIntPair make() { + return new IntIntPair(0, 0); } @Override - public Pair<Integer, Integer> cloneForCache(Pair<Integer, Integer> data) { - return new Pair<Integer, Integer>(data.getFirst(), data.getSecond()); + public IntIntPair cloneForCache(IntIntPair data) { + return new IntIntPair(data.first, data.second); } @Override - public Pair<Integer, Integer> downsample(Pair<Integer, Integer> first, Pair<Integer, Integer> second) { - return new Pair<Integer, Integer>(first.getFirst() + second.getFirst(), first.getSecond() + second.getSecond()); + public IntIntPair downsample(IntIntPair first, IntIntPair second) { + return new IntIntPair(first.first + second.first, first.second + second.second); } @Override - public Pair<Integer, Integer> aggregate(Pair<Integer, Integer> existing, Pair<Integer, Integer> data) { - existing.setFirst(existing.getFirst() + data.getFirst()); - existing.setSecond(existing.getSecond() + data.getSecond()); + public IntIntPair aggregate(IntIntPair existing, IntIntPair data) { + existing.first = existing.first + data.first; + existing.second = existing.second + data.second; return existing; } }); } - + /** * Histograms that work like two {@link #LongSumHistogram}, component wise. * @@ -408,7 +431,7 @@ public class FlexiHistogram<T,D> extends AggregatingHistogram<T,D> { return new FlexiHistogram<Pair<Long, Long>, Pair<Long, Long>>(bins, new Adapter<Pair<Long, Long>, Pair<Long, Long>>() { @Override public Pair<Long, Long> make() { - return new Pair<Long, Long>(0L,0L); + return new Pair<Long, Long>(0L, 0L); } @Override @@ -436,27 +459,27 @@ public class FlexiHistogram<T,D> extends AggregatingHistogram<T,D> { * @param bins Number of bins. * @return New Histogram object */ - public static FlexiHistogram<Pair<Double, Double>, Pair<Double, Double>> DoubleSumDoubleSumHistogram(int bins) { - return new FlexiHistogram<Pair<Double, Double>, Pair<Double, Double>>(bins, new Adapter<Pair<Double, Double>, Pair<Double, Double>>() { + public static FlexiHistogram<DoubleDoublePair, DoubleDoublePair> DoubleSumDoubleSumHistogram(int bins) { + return new FlexiHistogram<DoubleDoublePair, DoubleDoublePair>(bins, new Adapter<DoubleDoublePair, DoubleDoublePair>() { @Override - public Pair<Double, Double> make() { - return new Pair<Double, Double>(0.,0.); + public DoubleDoublePair make() { + return new DoubleDoublePair(0., 0.); } @Override - public Pair<Double, Double> cloneForCache(Pair<Double, Double> data) { - return new Pair<Double, Double>(data.getFirst(), data.getSecond()); + public DoubleDoublePair cloneForCache(DoubleDoublePair data) { + return new DoubleDoublePair(data.first, data.second); } @Override - public Pair<Double, Double> downsample(Pair<Double, Double> first, Pair<Double, Double> second) { - return new Pair<Double, Double>(first.getFirst() + second.getFirst(), first.getSecond() + second.getSecond()); + public DoubleDoublePair downsample(DoubleDoublePair first, DoubleDoublePair second) { + return new DoubleDoublePair(first.first + second.first, first.second + second.second); } @Override - public Pair<Double, Double> aggregate(Pair<Double, Double> existing, Pair<Double, Double> data) { - existing.setFirst(existing.getFirst() + data.getFirst()); - existing.setSecond(existing.getSecond() + data.getSecond()); + public DoubleDoublePair aggregate(DoubleDoublePair existing, DoubleDoublePair data) { + existing.first = existing.first + data.first; + existing.second = existing.second + data.second; return existing; } }); diff --git a/src/de/lmu/ifi/dbs/elki/math/ReplacingHistogram.java b/src/de/lmu/ifi/dbs/elki/math/histograms/ReplacingHistogram.java index 689fceec..304a434b 100644 --- a/src/de/lmu/ifi/dbs/elki/math/ReplacingHistogram.java +++ b/src/de/lmu/ifi/dbs/elki/math/histograms/ReplacingHistogram.java @@ -1,10 +1,10 @@ -package de.lmu.ifi.dbs.elki.math; +package de.lmu.ifi.dbs.elki.math.histograms; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -26,7 +26,9 @@ package de.lmu.ifi.dbs.elki.math; import java.util.ArrayList; import java.util.Iterator; -import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; +import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleDoublePair; +import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair; +import de.lmu.ifi.dbs.elki.utilities.pairs.IntIntPair; /** * Class to manage a simple Histogram. @@ -34,27 +36,28 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; * Note: the iterator returns pairs containing the coordinate and the bin value! * * @author Erich Schubert - * + * * @apiviz.composedOf de.lmu.ifi.dbs.elki.math.ReplacingHistogram.Adapter * * @param <T> Histogram data type. */ -public class ReplacingHistogram<T> implements Iterable<Pair<Double, T>> { +public class ReplacingHistogram<T> implements Iterable<DoubleObjPair<T>> { /** * Interface to plug in a data type T. * * @author Erich Schubert - * + * * @param <T> Data type */ public static abstract class Adapter<T> { /** * Construct a new T when needed. + * * @return new T */ public abstract T make(); } - + /** * Array shift to account for negative indices. */ @@ -66,16 +69,15 @@ public class ReplacingHistogram<T> implements Iterable<Pair<Double, T>> { protected int size; /** - * Array 'base', i.e. the point of 0.0. - * Usually the minimum. + * Array 'base', i.e. the point of 0.0. Usually the minimum. */ protected double base; - + /** * To avoid introducing an extra bucket for the maximum value. */ protected double max; - + /** * Width of a bin. */ @@ -85,7 +87,7 @@ public class ReplacingHistogram<T> implements Iterable<Pair<Double, T>> { * Data storage */ protected ArrayList<T> data; - + /** * Constructor for new elements */ @@ -106,14 +108,14 @@ public class ReplacingHistogram<T> implements Iterable<Pair<Double, T>> { this.size = bins; this.data = new ArrayList<T>(bins); this.maker = maker; - for (int i = 0; i < bins; i++) { + for(int i = 0; i < bins; i++) { this.data.add(maker.make()); } } /** - * Histogram constructor without 'Constructor' to generate new elements. - * Empty bins will be initialized with 'null'. + * Histogram constructor without 'Constructor' to generate new elements. Empty + * bins will be initialized with 'null'. * * @param bins Number of bins * @param min Minimum value @@ -122,7 +124,7 @@ public class ReplacingHistogram<T> implements Iterable<Pair<Double, T>> { public ReplacingHistogram(int bins, double min, double max) { this(bins, min, max, null); } - + /** * Get the data at a given Coordinate. * @@ -144,8 +146,8 @@ public class ReplacingHistogram<T> implements Iterable<Pair<Double, T>> { } /** - * Put data at a given coordinate. - * Note: this replaces the contents, it doesn't "add" or "count". + * Put data at a given coordinate. Note: this replaces the contents, it + * doesn't "add" or "count". * * @param coord Coordinate * @param d New Data @@ -156,18 +158,19 @@ public class ReplacingHistogram<T> implements Iterable<Pair<Double, T>> { } /** - * Compute the bin number. - * Has a special case for rounding max down to the last bin. - * + * Compute the bin number. Has a special case for rounding max down to the + * last bin. + * * @param coord Coordinate * @return bin number */ protected int getBinNr(double coord) { - if (Double.isInfinite(coord) || Double.isNaN(coord)) { - throw new UnsupportedOperationException("Encountered non-finite value in Histogram: "+coord); + if(Double.isInfinite(coord) || Double.isNaN(coord)) { + throw new UnsupportedOperationException("Encountered non-finite value in Histogram: " + coord); } - if (coord == max) { - //System.err.println("Triggered special case: "+ (Math.floor((coord - base) / binsize) + offset) + " vs. " + (size - 1)); + if(coord == max) { + // System.err.println("Triggered special case: "+ (Math.floor((coord - + // base) / binsize) + offset) + " vs. " + (size - 1)); return size - 1; } return (int) Math.floor((coord - base) / binsize) + offset; @@ -198,7 +201,7 @@ public class ReplacingHistogram<T> implements Iterable<Pair<Double, T>> { } else if(bin >= size) { this.data.ensureCapacity(bin + 1); - while (data.size() < bin) { + while(data.size() < bin) { data.add(maker.make()); } // add the new data. @@ -212,7 +215,7 @@ public class ReplacingHistogram<T> implements Iterable<Pair<Double, T>> { this.data.set(bin, d); } } - + /** * Get the number of bins actually in use. * @@ -232,6 +235,36 @@ public class ReplacingHistogram<T> implements Iterable<Pair<Double, T>> { } /** + * Mean of bin + * + * @param bin Bin number + * @return Mean + */ + public double getBinMean(int bin) { + return base + (bin + 0.5 - offset) * binsize; + } + + /** + * Minimum of bin + * + * @param bin Bin number + * @return Lower bound + */ + public double getBinMin(int bin) { + return base + (bin - offset) * binsize; + } + + /** + * Maximum of bin + * + * @param bin Bin number + * @return Upper bound + */ + public double getBinMax(int bin) { + return base + (bin + 1 - offset) * binsize; + } + + /** * Get minimum (covered by bins, not data!) * * @return minimum @@ -257,7 +290,7 @@ public class ReplacingHistogram<T> implements Iterable<Pair<Double, T>> { public ArrayList<T> getData() { return data; } - + /** * Make a new bin. * @@ -266,7 +299,7 @@ public class ReplacingHistogram<T> implements Iterable<Pair<Double, T>> { protected T make() { return maker.make(); } - + /** * Iterator class to iterate over all bins. * @@ -274,20 +307,20 @@ public class ReplacingHistogram<T> implements Iterable<Pair<Double, T>> { * * @apiviz.exclude */ - protected class Iter implements Iterator<Pair<Double, T>> { + protected class Iter implements Iterator<DoubleObjPair<T>> { /** * Current bin number */ int bin = 0; - + @Override public boolean hasNext() { return bin < size; } @Override - public Pair<Double, T> next() { - Pair<Double, T> pair = new Pair<Double, T>(base + (bin + 0.5 - offset) * binsize, data.get(bin)); + public DoubleObjPair<T> next() { + DoubleObjPair<T> pair = new DoubleObjPair<T>(base + (bin + 0.5 - offset) * binsize, data.get(bin)); bin++; return pair; } @@ -305,20 +338,20 @@ public class ReplacingHistogram<T> implements Iterable<Pair<Double, T>> { * * @apiviz.exclude */ - protected class RIter implements Iterator<Pair<Double, T>> { + protected class RIter implements Iterator<DoubleObjPair<T>> { /** * Current bin number */ int bin = size - 1; - + @Override public boolean hasNext() { return bin >= 0; } @Override - public Pair<Double, T> next() { - Pair<Double, T> pair = new Pair<Double, T>(base + (bin + 0.5 - offset) * binsize, data.get(bin)); + public DoubleObjPair<T> next() { + DoubleObjPair< T> pair = new DoubleObjPair<T>(base + (bin + 0.5 - offset) * binsize, data.get(bin)); bin--; return pair; } @@ -333,21 +366,21 @@ public class ReplacingHistogram<T> implements Iterable<Pair<Double, T>> { * Get an iterator over all histogram bins. */ @Override - public Iterator<Pair<Double, T>> iterator() { + public Iterator<DoubleObjPair<T>> iterator() { return new Iter(); } - + /** * Get an iterator over all histogram bins. */ // TODO: is there some interface to implement. - public Iterator<Pair<Double, T>> reverseIterator() { + public Iterator<DoubleObjPair<T>> reverseIterator() { return new RIter(); } - + /** - * Convenience constructor for Integer-based Histograms. - * Uses a constructor to initialize bins with Integer(0) + * Convenience constructor for Integer-based Histograms. Uses a constructor to + * initialize bins with Integer(0) * * @param bins Number of bins * @param min Minimum coordinate @@ -364,8 +397,8 @@ public class ReplacingHistogram<T> implements Iterable<Pair<Double, T>> { } /** - * Convenience constructor for Double-based Histograms. - * Uses a constructor to initialize bins with Double(0) + * Convenience constructor for Double-based Histograms. Uses a constructor to + * initialize bins with Double(0) * * @param bins Number of bins * @param min Minimum coordinate @@ -382,37 +415,37 @@ public class ReplacingHistogram<T> implements Iterable<Pair<Double, T>> { } /** - * Convenience constructor for Histograms with pairs of Integers - * Uses a constructor to initialize bins with Pair(Integer(0),Integer(0)) + * Convenience constructor for Histograms with pairs of Integers Uses a + * constructor to initialize bins with Pair(Integer(0),Integer(0)) * * @param bins Number of bins * @param min Minimum coordinate * @param max Maximum coordinate * @return New histogram for Integer pairs. */ - public static ReplacingHistogram<Pair<Integer,Integer>> IntIntHistogram(int bins, double min, double max) { - return new ReplacingHistogram<Pair<Integer,Integer>>(bins, min, max, new Adapter<Pair<Integer,Integer>>() { + public static ReplacingHistogram<IntIntPair> IntIntHistogram(int bins, double min, double max) { + return new ReplacingHistogram<IntIntPair>(bins, min, max, new Adapter<IntIntPair>() { @Override - public Pair<Integer,Integer> make() { - return new Pair<Integer,Integer>(0,0); + public IntIntPair make() { + return new IntIntPair(0, 0); } }); } /** - * Convenience constructor for Histograms with pairs of Doubles - * Uses a constructor to initialize bins with Pair(Double(0),Double(0)) + * Convenience constructor for Histograms with pairs of Doubles Uses a + * constructor to initialize bins with Pair(Double(0),Double(0)) * * @param bins Number of bins * @param min Minimum coordinate * @param max Maximum coordinate * @return New histogram for Double pairs. */ - public static ReplacingHistogram<Pair<Double,Double>> DoubleDoubleHistogram(int bins, double min, double max) { - return new ReplacingHistogram<Pair<Double,Double>>(bins, min, max, new Adapter<Pair<Double,Double>>() { + public static ReplacingHistogram<DoubleDoublePair> DoubleDoubleHistogram(int bins, double min, double max) { + return new ReplacingHistogram<DoubleDoublePair>(bins, min, max, new Adapter<DoubleDoublePair>() { @Override - public Pair<Double,Double> make() { - return new Pair<Double,Double>(0.0,0.0); + public DoubleDoublePair make() { + return new DoubleDoublePair(0.0, 0.0); } }); } diff --git a/src/de/lmu/ifi/dbs/elki/math/histograms/package-info.java b/src/de/lmu/ifi/dbs/elki/math/histograms/package-info.java new file mode 100644 index 00000000..b4b23466 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/histograms/package-info.java @@ -0,0 +1,26 @@ +/** + * <p>Classes for computing histograms.</p> + */ +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2012 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ +package de.lmu.ifi.dbs.elki.math.histograms;
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/AffineTransformation.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/AffineTransformation.java index 6200164b..5d185ec5 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/AffineTransformation.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/AffineTransformation.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -150,7 +150,7 @@ public class AffineTransformation { * @param v translation vector */ public void addTranslation(Vector v) { - assert (v.getRowDimensionality() == dim); + assert (v.getDimensionality() == dim); // reset inverse transformation - needs recomputation. inv = null; @@ -295,7 +295,7 @@ public class AffineTransformation { * @return vector of dim+1, with new column having the value 1.0 */ public Vector homogeneVector(Vector v) { - assert (v.getRowDimensionality() == dim); + assert (v.getDimensionality() == dim); double[] dv = new double[dim + 1]; for(int i = 0; i < dim; i++) { dv[i] = v.get(i); @@ -311,7 +311,7 @@ public class AffineTransformation { * @return vector of dim+1, with new column having the value 0.0 */ public Vector homogeneRelativeVector(Vector v) { - assert (v.getRowDimensionality() == dim); + assert (v.getDimensionality() == dim); // TODO: this only works properly when trans[dim][dim] == 1.0, right? double[] dv = new double[dim + 1]; for(int i = 0; i < dim; i++) { @@ -328,7 +328,7 @@ public class AffineTransformation { * @return vector of dimension dim */ public Vector unhomogeneVector(Vector v) { - assert (v.getRowDimensionality() == dim + 1); + assert (v.getDimensionality() == dim + 1); // TODO: this only works properly when trans[dim][dim] == 1.0, right? double[] dv = new double[dim]; double scale = v.get(dim); @@ -346,7 +346,7 @@ public class AffineTransformation { * @return vector of dimension dim */ public Vector unhomogeneRelativeVector(Vector v) { - assert (v.getRowDimensionality() == dim + 1); + assert (v.getDimensionality() == dim + 1); double[] dv = new double[dim]; double scale = v.get(dim); assert (Math.abs(scale) == 0.0); diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Centroid.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Centroid.java index 5ba3d4a6..cf3bb25b 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Centroid.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Centroid.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -41,11 +41,6 @@ import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil; */ public class Centroid extends Vector { /** - * Serial version - */ - private static final long serialVersionUID = 1L; - - /** * The current weight */ protected double wsum; @@ -147,7 +142,7 @@ public class Centroid extends Vector { * @return the data */ public <F extends NumberVector<? extends F, ?>> F toVector(Relation<? extends F> relation) { - return DatabaseUtil.assumeVectorField(relation).getFactory().newInstance(elements); + return DatabaseUtil.assumeVectorField(relation).getFactory().newNumberVector(elements); } /** @@ -160,7 +155,7 @@ public class Centroid extends Vector { int n = mat.getColumnDimensionality(); for(int i = 0; i < n; i++) { // TODO: avoid constructing the vector objects? - c.put(mat.getColumnVector(i)); + c.put(mat.getCol(i)); } return c; } diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/CholeskyDecomposition.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/CholeskyDecomposition.java index ff3b8cfb..5209468f 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/CholeskyDecomposition.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/CholeskyDecomposition.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/CovarianceMatrix.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/CovarianceMatrix.java index 24ea740b..5d9d27a1 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/CovarianceMatrix.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/CovarianceMatrix.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -245,7 +245,7 @@ public class CovarianceMatrix { * @return Mean vector */ public <F extends NumberVector<? extends F, ?>> F getMeanVector(Relation<? extends F> relation) { - return DatabaseUtil.assumeVectorField(relation).getFactory().newInstance(mean); + return DatabaseUtil.assumeVectorField(relation).getFactory().newNumberVector(mean); } /** @@ -332,7 +332,7 @@ public class CovarianceMatrix { int n = mat.getColumnDimensionality(); for(int i = 0; i < n; i++) { // TODO: avoid constructing the vector objects? - c.put(mat.getColumnVector(i)); + c.put(mat.getCol(i)); } return c; } diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/EigenPair.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/EigenPair.java index 6b747dae..132393c6 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/EigenPair.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/EigenPair.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/EigenvalueDecomposition.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/EigenvalueDecomposition.java index 67d77089..48ce9c7b 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/EigenvalueDecomposition.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/EigenvalueDecomposition.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -253,7 +253,7 @@ public class EigenvalueDecomposition implements java.io.Serializable { double g = d[l]; double p = (d[l + 1] - g) / (2.0 * e[l]); - double r = MathUtil.hypotenuse(p, 1.0); + double r = MathUtil.fastHypot(p, 1.0); if(p < 0) { r = -r; } @@ -281,7 +281,7 @@ public class EigenvalueDecomposition implements java.io.Serializable { s2 = s; g = c * e[i]; h = c * p; - r = MathUtil.hypotenuse(p, e[i]); + r = MathUtil.fastHypot(p, e[i]); e[i + 1] = s * r; s = e[i] / r; c = p / r; diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/LUDecomposition.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/LUDecomposition.java index 5adafe10..08634279 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/LUDecomposition.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/LUDecomposition.java @@ -1,10 +1,11 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra; + /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,7 +24,6 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra; along with this program. If not, see <http://www.gnu.org/licenses/>. */ - /** * LU Decomposition. * <P> @@ -78,11 +78,21 @@ public class LUDecomposition implements java.io.Serializable { * @param A Rectangular matrix */ public LUDecomposition(Matrix A) { - // Use a "left-looking", dot-product, Crout/Doolittle algorithm. + this(A.getArrayCopy(), A.getRowDimensionality(), A.getColumnDimensionality()); + } - LU = A.getArrayCopy(); - m = A.getRowDimensionality(); - n = A.getColumnDimensionality(); + /** + * LU Decomposition + * + * @param LU Rectangular matrix + * @param m row dimensionality + * @param n column dimensionality + */ + public LUDecomposition(double[][] LU, int m, int n) { + this.LU = LU; + this.m = m; + this.n = n; + // Use a "left-looking", dot-product, Crout/Doolittle algorithm. piv = new int[m]; for(int i = 0; i < m; i++) { piv[i] = i; @@ -273,25 +283,57 @@ public class LUDecomposition implements java.io.Serializable { Matrix Xmat = B.getMatrix(piv, 0, nx - 1); double[][] X = Xmat.getArrayRef(); + solveInplace(X, nx); + return Xmat; + } + + /** + * Solve A*X = B + * + * @param B A Matrix with as many rows as A and any number of columns. + * @return X so that L*U*X = B(piv,:) + * @exception IllegalArgumentException Matrix row dimensions must agree. + * @exception RuntimeException Matrix is singular. + */ + public double[][] solve(double[][] B) { + int mx = B.length; + int nx = B[0].length; + if(mx != m) { + throw new IllegalArgumentException("Matrix row dimensions must agree."); + } + if(!this.isNonsingular()) { + throw new RuntimeException("Matrix is singular."); + } + double[][] Xmat = new Matrix(B).getMatrix(piv, 0, nx - 1).getArrayRef(); + solveInplace(Xmat, nx); + return Xmat; + } + + /** + * Solve A*X = B + * + * @param B A Matrix with as many rows as A and any number of columns. + * @param nx Number of columns + */ + private void solveInplace(double[][] B, int nx) { // Solve L*Y = B(piv,:) for(int k = 0; k < n; k++) { for(int i = k + 1; i < n; i++) { for(int j = 0; j < nx; j++) { - X[i][j] -= X[k][j] * LU[i][k]; + B[i][j] -= B[k][j] * LU[i][k]; } } } // Solve U*X = Y; for(int k = n - 1; k >= 0; k--) { for(int j = 0; j < nx; j++) { - X[k][j] /= LU[k][k]; + B[k][j] /= LU[k][k]; } for(int i = 0; i < k; i++) { for(int j = 0; j < nx; j++) { - X[i][j] -= X[k][j] * LU[i][k]; + B[i][j] -= B[k][j] * LU[i][k]; } } } - return Xmat; } }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/LinearEquationSystem.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/LinearEquationSystem.java index decec2b6..b6a53aa2 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/LinearEquationSystem.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/LinearEquationSystem.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Matrix.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Matrix.java index fefe7e16..f64b1129 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Matrix.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Matrix.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -24,7 +24,6 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra; */ import java.io.BufferedReader; -import java.io.Serializable; import java.io.StreamTokenizer; import java.util.Arrays; import java.util.logging.Logger; @@ -50,12 +49,7 @@ import de.lmu.ifi.dbs.elki.utilities.FormatUtil; * @apiviz.uses Vector * @apiviz.landmark */ -public class Matrix implements MatrixLike<Matrix>, Serializable { - /** - * Serial version - */ - private static final long serialVersionUID = 1L; - +public class Matrix { /** * A small number to handle numbers near 0 as 0. */ @@ -278,7 +272,6 @@ public class Matrix implements MatrixLike<Matrix>, Serializable { * * @return a new matrix containing the same values as this matrix */ - @Override public final Matrix copy() { final Matrix X = new Matrix(elements.length, columndimension); for(int i = 0; i < elements.length; i++) { @@ -324,7 +317,6 @@ public class Matrix implements MatrixLike<Matrix>, Serializable { * * @return m, the number of rows. */ - @Override public final int getRowDimensionality() { return elements.length; } @@ -334,7 +326,6 @@ public class Matrix implements MatrixLike<Matrix>, Serializable { * * @return n, the number of columns. */ - @Override public final int getColumnDimensionality() { return columndimension; } @@ -347,7 +338,6 @@ public class Matrix implements MatrixLike<Matrix>, Serializable { * @return A(i,j) * @throws ArrayIndexOutOfBoundsException on bounds error */ - @Override public final double get(final int i, final int j) { return elements[i][j]; } @@ -361,7 +351,6 @@ public class Matrix implements MatrixLike<Matrix>, Serializable { * @return modified matrix * @throws ArrayIndexOutOfBoundsException on bounds error */ - @Override public final Matrix set(final int i, final int j, final double s) { elements[i][j] = s; return this; @@ -376,7 +365,6 @@ public class Matrix implements MatrixLike<Matrix>, Serializable { * @return modified matrix * @throws ArrayIndexOutOfBoundsException on bounds error */ - @Override public final Matrix increment(final int i, final int j, final double s) { elements[i][j] += s; return this; @@ -597,22 +585,12 @@ public class Matrix implements MatrixLike<Matrix>, Serializable { } /** - * Returns the <code>i</code>th row of this matrix. - * - * @param i the index of the row to be returned - * @return the <code>i</code>th row of this matrix - */ - public final Matrix getRow(final int i) { - return getMatrix(i, i, 0, columndimension - 1); - } - - /** * Returns the <code>i</code>th row of this matrix as vector. * * @param i the index of the row to be returned * @return the <code>i</code>th row of this matrix */ - public final Vector getRowVector(final int i) { + public final Vector getRow(final int i) { double[] row = elements[i].clone(); return new Vector(row); } @@ -620,26 +598,10 @@ public class Matrix implements MatrixLike<Matrix>, Serializable { /** * Sets the <code>j</code>th row of this matrix to the specified vector. * - * @param j the index of the row to be set - * @param row the value of the row to be set - */ - public final void setRow(final int j, final Matrix row) { - if(row.columndimension != columndimension) { - throw new IllegalArgumentException("Matrix must consist of the same no of columns!"); - } - if(row.elements.length != 1) { - throw new IllegalArgumentException("Matrix must consist of one row!"); - } - setMatrix(elements.length - 1, 0, j, j, row); - } - - /** - * Sets the <code>j</code>th row of this matrix to the specified vector. - * * @param j the index of the column to be set * @param row the value of the column to be set */ - public final void setRowVector(final int j, final Vector row) { + public final void setRow(final int j, final Vector row) { if(row.elements.length != columndimension) { throw new IllegalArgumentException("Matrix must consist of the same no of columns!"); } @@ -649,23 +611,12 @@ public class Matrix implements MatrixLike<Matrix>, Serializable { } /** - * Returns the <code>j</code>th column of this matrix. - * - * @param j the index of the column to be returned - * @return the <code>j</code>th column of this matrix - */ - public final Matrix getColumn(final int j) { - return getMatrix(0, elements.length - 1, j, j); - } - - /** * Returns the <code>j</code>th column of this matrix as vector. * * @param j the index of the column to be returned * @return the <code>j</code>th column of this matrix */ - @Override - public final Vector getColumnVector(final int j) { + public final Vector getCol(final int j) { final Vector v = new Vector(elements.length); for(int i = 0; i < elements.length; i++) { v.elements[i] = elements[i][j]; @@ -679,23 +630,7 @@ public class Matrix implements MatrixLike<Matrix>, Serializable { * @param j the index of the column to be set * @param column the value of the column to be set */ - public final void setColumn(final int j, final Matrix column) { - if(column.elements.length != elements.length) { - throw new IllegalArgumentException("Matrix must consist of the same no of rows!"); - } - if(column.columndimension != 1) { - throw new IllegalArgumentException("Matrix must consist of one column!"); - } - setMatrix(0, elements.length - 1, j, j, column); - } - - /** - * Sets the <code>j</code>th column of this matrix to the specified column. - * - * @param j the index of the column to be set - * @param column the value of the column to be set - */ - public final void setColumnVector(final int j, final Vector column) { + public final void setCol(final int j, final Vector column) { if(column.elements.length != elements.length) { throw new IllegalArgumentException("Matrix must consist of the same no of rows!"); } @@ -709,7 +644,6 @@ public class Matrix implements MatrixLike<Matrix>, Serializable { * * @return A<sup>T</sup> */ - @Override public final Matrix transpose() { final Matrix X = new Matrix(columndimension, elements.length); for(int i = 0; i < elements.length; i++) { @@ -726,7 +660,6 @@ public class Matrix implements MatrixLike<Matrix>, Serializable { * @param B another matrix * @return A + B in a new Matrix */ - @Override public final Matrix plus(final Matrix B) { return copy().plusEquals(B); } @@ -738,7 +671,6 @@ public class Matrix implements MatrixLike<Matrix>, Serializable { * @param s scalar * @return A + s * B in a new Matrix */ - @Override public final Matrix plusTimes(final Matrix B, final double s) { return copy().plusTimesEquals(B, s); } @@ -749,7 +681,6 @@ public class Matrix implements MatrixLike<Matrix>, Serializable { * @param B another matrix * @return A + B in this Matrix */ - @Override public final Matrix plusEquals(final Matrix B) { checkMatrixDimensions(B); for(int i = 0; i < elements.length; i++) { @@ -767,7 +698,6 @@ public class Matrix implements MatrixLike<Matrix>, Serializable { * @param s Scalar * @return A + s * B in this Matrix */ - @Override public final Matrix plusTimesEquals(final Matrix B, final double s) { checkMatrixDimensions(B); for(int i = 0; i < elements.length; i++) { @@ -784,7 +714,6 @@ public class Matrix implements MatrixLike<Matrix>, Serializable { * @param B another matrix * @return A - B in a new Matrix */ - @Override public final Matrix minus(final Matrix B) { return copy().minusEquals(B); } @@ -796,7 +725,6 @@ public class Matrix implements MatrixLike<Matrix>, Serializable { * @param s Scalar * @return A - s * B in a new Matrix */ - @Override public final Matrix minusTimes(final Matrix B, final double s) { return copy().minusTimesEquals(B, s); } @@ -807,7 +735,6 @@ public class Matrix implements MatrixLike<Matrix>, Serializable { * @param B another matrix * @return A - B in this Matrix */ - @Override public final Matrix minusEquals(final Matrix B) { checkMatrixDimensions(B); for(int i = 0; i < elements.length; i++) { @@ -825,7 +752,6 @@ public class Matrix implements MatrixLike<Matrix>, Serializable { * @param s Scalar * @return A - s * B in this Matrix */ - @Override public final Matrix minusTimesEquals(final Matrix B, final double s) { checkMatrixDimensions(B); for(int i = 0; i < elements.length; i++) { @@ -842,7 +768,6 @@ public class Matrix implements MatrixLike<Matrix>, Serializable { * @param s scalar * @return s*A */ - @Override public final Matrix times(final double s) { return copy().timesEquals(s); } @@ -853,7 +778,6 @@ public class Matrix implements MatrixLike<Matrix>, Serializable { * @param s scalar * @return replace A by s*A */ - @Override public final Matrix timesEquals(final double s) { for(int i = 0; i < elements.length; i++) { for(int j = 0; j < columndimension; j++) { @@ -1036,91 +960,6 @@ public class Matrix implements MatrixLike<Matrix>, Serializable { } /** - * Returns the scalar product of the colA column of this and the colB column - * of B. - * - * @param colA The column of A to compute scalar product for - * @param B second Matrix - * @param colB The column of B to compute scalar product for - * @return double The scalar product of the first column of this and B - */ - public double scalarProduct(int colA, Matrix B, int colB) { - double scalarProduct = 0.0; - for(int row = 0; row < getRowDimensionality(); row++) { - double prod = elements[row][colA] * B.elements[row][colB]; - scalarProduct += prod; - } - return scalarProduct; - } - - /** - * Returns the scalar product of the colA column of this and the colB column - * of B. - * - * @param colA The column of A to compute scalar product for - * @param B Vector - * @return double The scalar product of the first column of this and B - */ - public double scalarProduct(int colA, Vector B) { - double scalarProduct = 0.0; - for(int row = 0; row < getRowDimensionality(); row++) { - double prod = elements[row][colA] * B.elements[row]; - scalarProduct += prod; - } - return scalarProduct; - } - - /** - * LU Decomposition - * - * @return LUDecomposition - * @see LUDecomposition - */ - public final LUDecomposition lu() { - return new LUDecomposition(this); - } - - /** - * QR Decomposition - * - * @return QRDecomposition - * @see QRDecomposition - */ - public final QRDecomposition qr() { - return new QRDecomposition(this); - } - - /** - * Cholesky Decomposition - * - * @return CholeskyDecomposition - * @see CholeskyDecomposition - */ - public final CholeskyDecomposition chol() { - return new CholeskyDecomposition(this); - } - - /** - * Singular Value Decomposition - * - * @return SingularValueDecomposition - * @see SingularValueDecomposition - */ - public final SingularValueDecomposition svd() { - return new SingularValueDecomposition(this); - } - - /** - * Eigenvalue Decomposition - * - * @return EigenvalueDecomposition - * @see EigenvalueDecomposition - */ - public final EigenvalueDecomposition eig() { - return new EigenvalueDecomposition(this); - } - - /** * Solve A*X = B * * @param B right hand side @@ -1131,16 +970,6 @@ public class Matrix implements MatrixLike<Matrix>, Serializable { } /** - * Solve X*A = B, which is also A'*X' = B' - * - * @param B right hand side - * @return solution if A is square, least squares solution otherwise. - */ - public final Matrix solveTranspose(final Matrix B) { - return transpose().solve(B.transpose()); - } - - /** * Matrix inverse or pseudoinverse * * @return inverse(A) if A is square, pseudoinverse otherwise. @@ -1194,7 +1023,7 @@ public class Matrix implements MatrixLike<Matrix>, Serializable { * * @return maximum column sum. */ - public double norm1() { + public final double norm1() { double f = 0; for(int j = 0; j < columndimension; j++) { double s = 0; @@ -1220,7 +1049,7 @@ public class Matrix implements MatrixLike<Matrix>, Serializable { * * @return maximum row sum. */ - public double normInf() { + public final double normInf() { double f = 0; for(int i = 0; i < elements.length; i++) { double s = 0; @@ -1237,56 +1066,20 @@ public class Matrix implements MatrixLike<Matrix>, Serializable { * * @return sqrt of sum of squares of all elements. */ - public double normF() { + public final double normF() { double f = 0; for(int i = 0; i < elements.length; i++) { for(int j = 0; j < columndimension; j++) { - f = MathUtil.hypotenuse(f, elements[i][j]); + f = MathUtil.fastHypot(f, elements[i][j]); } } return f; } /** - * distanceCov returns distance of two Matrices A and B, i.e. the root of the - * sum of the squared distances A<sub>ij</sub>-B<sub>ij</sub>. - * - * @param B Matrix to compute distance from this (A) - * @return distance of Matrices - */ - // TODO: unused - remove / move into a MatrixDistance helper? - public final double distanceCov(final Matrix B) { - double distance = 0.0; - double distIJ; - int row; - for(int col = 0; col < columndimension; col++) { - for(row = 0; row < elements.length; row++) { - distIJ = elements[row][col] - B.elements[row][col]; - distance += (distIJ * distIJ); - } - } - distance = Math.sqrt(distance); - return distance; - } - - /** - * getDiagonal returns array of diagonal-elements. - * - * @return double[] the values on the diagonal of the Matrix - */ - public final double[] getDiagonal() { - int n = Math.min(columndimension, elements.length); - final double[] diagonal = new double[n]; - for(int i = 0; i < n; i++) { - diagonal[i] = elements[i][i]; - } - return diagonal; - } - - /** * Normalizes the columns of this matrix to length of 1.0. */ - public void normalizeColumns() { + public final void normalizeColumns() { for(int col = 0; col < columndimension; col++) { double norm = 0.0; for(int row = 0; row < elements.length; row++) { @@ -1592,10 +1385,10 @@ public class Matrix implements MatrixLike<Matrix>, Serializable { for(int i = 0; i < result.columndimension; i++) { // FIXME: optimize - excess copying! if(i < columndimension) { - result.setColumn(i, getColumn(i)); + result.setCol(i, getCol(i)); } else { - result.setColumn(i, columns.getColumn(i - columndimension)); + result.setCol(i, columns.getCol(i - columndimension)); } } return result; @@ -1607,19 +1400,19 @@ public class Matrix implements MatrixLike<Matrix>, Serializable { * @return the orthonormalized matrix */ public final Matrix orthonormalize() { - Matrix v = getColumn(0); + Matrix v = copy(); // FIXME: optimize - excess copying! for(int i = 1; i < columndimension; i++) { - final Matrix u_i = getColumn(i); - final Matrix sum = new Matrix(elements.length, 1); + final Vector u_i = getCol(i); + final Vector sum = new Vector(elements.length); for(int j = 0; j < i; j++) { - final Matrix v_j = v.getColumn(j); - double scalar = u_i.scalarProduct(0, v_j, 0) / v_j.scalarProduct(0, v_j, 0); - sum.plusEquals(v_j.times(scalar)); + final Vector v_j = v.getCol(j); + double scalar = u_i.transposeTimes(v_j) / v_j.transposeTimes(v_j); + sum.plusTimesEquals(v_j, scalar); } - final Matrix v_i = u_i.minus(sum); - v = v.appendColumns(v_i); + final Vector v_i = u_i.minus(sum); + v.setCol(i, v_i); } v.normalizeColumns(); @@ -1716,7 +1509,7 @@ public class Matrix implements MatrixLike<Matrix>, Serializable { /** * Check if size(A) == size(B) */ - protected void checkMatrixDimensions(MatrixLike<?> B) { + protected void checkMatrixDimensions(Matrix B) { if(B.getRowDimensionality() != getRowDimensionality() || B.getColumnDimensionality() != getColumnDimensionality()) { throw new IllegalArgumentException("Matrix dimensions must agree."); } @@ -1807,15 +1600,6 @@ public class Matrix implements MatrixLike<Matrix>, Serializable { } /** - * Returns the dimensionality of this matrix as a string. - * - * @return the dimensionality of this matrix as a string - */ - public String dimensionInfo() { - return getRowDimensionality() + " x " + getColumnDimensionality(); - } - - /** * toString returns String-representation of Matrix. */ @Override diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/MatrixLike.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/MatrixLike.java deleted file mode 100644 index ff1ec5ba..00000000 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/MatrixLike.java +++ /dev/null @@ -1,194 +0,0 @@ -package de.lmu.ifi.dbs.elki.math.linearalgebra; - -/* - This file is part of ELKI: - Environment for Developing KDD-Applications Supported by Index-Structures - - Copyright (C) 2011 - Ludwig-Maximilians-Universität München - Lehr- und Forschungseinheit für Datenbanksysteme - ELKI Development Team - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - - -/** - * Common Interface for Matrix and Vector objects, where M is the actual type. - * - * The type M guarantees type safety for many operations. - * - * @param M the actual type - * - * @apiviz.landmark - * - * @author Elke Achtert - * @author Erich Schubert - */ -public interface MatrixLike<M extends MatrixLike<M>> extends Cloneable { - /** - * Make a deep copy of a matrix. - * - * @return a new matrix containing the same values as this matrix - */ - public M copy(); - - /** - * Clone the Matrix object. - */ - public Object clone(); - - /** - * Returns the dimensionality of the rows of this matrix. - * - * @return m, the number of rows. - */ - public int getRowDimensionality(); - - /** - * Returns the dimensionality of the columns of this matrix. - * - * @return n, the number of columns. - */ - public int getColumnDimensionality(); - - /** - * Get a single element. - * - * @param i Row index. - * @param j Column index. - * @return A(i,j) - * @throws ArrayIndexOutOfBoundsException on bounds error - */ - public double get(int i, int j); - - /** - * Set a single element. - * - * @param i Row index. - * @param j Column index. - * @param s A(i,j). - * @throws ArrayIndexOutOfBoundsException on bounds error - */ - public M set(int i, int j, double s); - - /** - * Increments a single element. - * - * @param i the row index - * @param j the column index - * @param s the increment value: A(i,j) = A(i.j) + s. - * @throws ArrayIndexOutOfBoundsException on bounds error - */ - public M increment(int i, int j, double s); - - /** - * Returns the <code>i</code>th column of this matrix as vector. - * - * @param i the index of the column to be returned - * @return the <code>i</code>th column of this matrix - */ - public Vector getColumnVector(int i); - - /** - * Matrix transpose. - * - * @return A<sup>T</sup> - */ - public Matrix transpose(); - - /** - * C = A + B - * - * @param B another matrix - * @return A + B in a new Matrix - */ - public M plus(M B); - - /** - * C = A + s*B - * - * @param B another matrix - * @param s scalar - * @return A + s*B in a new Matrix - */ - public M plusTimes(M B, double s); - - /** - * A = A + B - * - * @param B another matrix - * @return A + B in this Matrix - */ - public M plusEquals(M B); - - /** - * C = A + s*B - * - * @param B another matrix - * @param s scalar - * @return A + s*B in this Matrix - */ - public M plusTimesEquals(M B, double s); - - /** - * C = A - B - * - * @param B another matrix - * @return A - B in a new Matrix - */ - public M minus(M B); - - /** - * C = A - s*B - * - * @param B another matrix - * @param s Scalar - * @return A - s*B in a new Matrix - */ - public M minusTimes(M B, double s); - - /** - * A = A - B - * - * @param B another matrix - * @return A - B in this Matrix - */ - public M minusEquals(M B); - - /** - * C = A - s*B - * - * @param B another matrix - * @param s Scalar - * @return A - s*B in a new Matrix - */ - public M minusTimesEquals(M B, double s); - - /** - * Multiply a matrix by a scalar, C = s*A - * - * @param s scalar - * @return s*A - */ - public M times(double s); - - /** - * Multiply a matrix by a scalar in place, A = s*A - * - * @param s scalar - * @return replace A by s*A - */ - public M timesEquals(double s); -}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/ProjectedCentroid.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/ProjectedCentroid.java index e54f75fe..02b5b424 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/ProjectedCentroid.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/ProjectedCentroid.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -43,11 +43,6 @@ import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil; */ public class ProjectedCentroid extends Centroid { /** - * Serial version - */ - private static final long serialVersionUID = 1L; - - /** * The selected dimensions. */ private BitSet dims; diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/ProjectionResult.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/ProjectionResult.java index 040bd4b1..d8858657 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/ProjectionResult.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/ProjectionResult.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/QRDecomposition.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/QRDecomposition.java index c053de0d..5b52d837 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/QRDecomposition.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/QRDecomposition.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -74,13 +74,22 @@ public class QRDecomposition implements java.io.Serializable { * QR Decomposition, computed by Householder reflections. * * @param A Rectangular matrix - * */ public QRDecomposition(Matrix A) { - // Initialize. - QR = A.getArrayCopy(); - m = A.getRowDimensionality(); - n = A.getColumnDimensionality(); + this(A.getArrayRef(), A.getRowDimensionality(), A.getColumnDimensionality()); + } + + /** + * QR Decomposition, computed by Householder reflections. + * + * @param A Rectangular matrix + * @param m row dimensionality + * @param n column dimensionality + */ + public QRDecomposition(double[][] A, int m, int n) { + this.QR = new Matrix(A).getArrayCopy(); + this.m = QR.length; + this.n = QR[0].length; Rdiag = new double[n]; // Main loop. @@ -88,7 +97,7 @@ public class QRDecomposition implements java.io.Serializable { // Compute 2-norm of k-th column without under/overflow. double nrm = 0; for(int i = k; i < m; i++) { - nrm = MathUtil.hypotenuse(nrm, QR[i][k]); + nrm = MathUtil.fastHypot(nrm, QR[i][k]); } if(nrm != 0.0) { @@ -227,8 +236,38 @@ public class QRDecomposition implements java.io.Serializable { // Copy right hand side int nx = B.getColumnDimensionality(); - double[][] X = B.getArrayCopy(); + Matrix X = B.copy(); + + solveInplace(X.getArrayRef(), nx); + return X.getMatrix(0, n - 1, 0, nx - 1); + } + + /** + * Least squares solution of A*X = B + * + * @param B A Matrix with as many rows as A and any number of columns. + * @return X that minimizes the two norm of Q*R*X-B. + * @exception IllegalArgumentException Matrix row dimensions must agree. + * @exception RuntimeException Matrix is rank deficient. + */ + public double[][] solve(double[][] B) { + int rows = B.length; + int cols = B[0].length; + if(rows != m) { + throw new IllegalArgumentException("Matrix row dimensions must agree."); + } + if(!this.isFullRank()) { + throw new RuntimeException("Matrix is rank deficient."); + } + + // Copy right hand side + Matrix X = new Matrix(B).copy(); + + solveInplace(X.getArrayRef(), cols); + return X.getMatrix(0, n - 1, 0, cols - 1).getArrayRef(); + } + private void solveInplace(double[][] X, int nx) { // Compute Y = transpose(Q)*B for(int k = 0; k < n; k++) { for(int j = 0; j < nx; j++) { @@ -253,6 +292,5 @@ public class QRDecomposition implements java.io.Serializable { } } } - return (new Matrix(X).getMatrix(0, n - 1, 0, nx - 1)); } }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/SingularValueDecomposition.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/SingularValueDecomposition.java index 38d70443..183a8034 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/SingularValueDecomposition.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/SingularValueDecomposition.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -41,12 +41,7 @@ import de.lmu.ifi.dbs.elki.math.MathUtil; * * @apiviz.uses Matrix - - transforms */ -@SuppressWarnings("serial") -public class SingularValueDecomposition implements java.io.Serializable { - /* - * ------------------------ Class variables ------------------------ - */ - +public class SingularValueDecomposition { /** * Arrays for internal storage of U and V. * @@ -70,23 +65,26 @@ public class SingularValueDecomposition implements java.io.Serializable { */ private int m, n; - /* - * ------------------------ Constructor ------------------------ - */ - /** * Construct the singular value decomposition * * @param Arg Rectangular matrix */ - public SingularValueDecomposition(Matrix Arg) { + this(Arg.getArrayRef()); + } + /** + * Constructor. + * + * @param Arg Rectangular input matrix + */ + public SingularValueDecomposition(double[][] Arg) { + double[][] A = new Matrix(Arg).getArrayCopy(); + this.m = A.length; + this.n = A[0].length; // Derived from LINPACK code. // Initialize. - double[][] A = Arg.getArrayCopy(); - m = Arg.getRowDimensionality(); - n = Arg.getColumnDimensionality(); int nu = Math.min(m, n); s = new double[Math.min(m + 1, n)]; U = new double[m][nu]; @@ -103,13 +101,12 @@ public class SingularValueDecomposition implements java.io.Serializable { int nrt = Math.max(0, Math.min(n - 2, m)); for(int k = 0; k < Math.max(nct, nrt); k++) { if(k < nct) { - // Compute the transformation for the k-th column and // place the k-th diagonal in s[k]. // Compute 2-norm of k-th column without under/overflow. s[k] = 0; for(int i = k; i < m; i++) { - s[k] = MathUtil.hypotenuse(s[k], A[i][k]); + s[k] = MathUtil.fastHypot(s[k], A[i][k]); } if(s[k] != 0.0) { if(A[k][k] < 0.0) { @@ -124,9 +121,7 @@ public class SingularValueDecomposition implements java.io.Serializable { } for(int j = k + 1; j < n; j++) { if((k < nct) & (s[k] != 0.0)) { - // Apply the transformation. - double t = 0; for(int i = k; i < m; i++) { t += A[i][k] * A[i][j]; @@ -143,7 +138,6 @@ public class SingularValueDecomposition implements java.io.Serializable { e[j] = A[k][j]; } if(wantu & (k < nct)) { - // Place the transformation in U for subsequent back // multiplication. @@ -152,13 +146,12 @@ public class SingularValueDecomposition implements java.io.Serializable { } } if(k < nrt) { - // Compute the k-th row transformation and place the // k-th super-diagonal in e[k]. // Compute 2-norm without under/overflow. e[k] = 0; for(int i = k + 1; i < n; i++) { - e[k] = MathUtil.hypotenuse(e[k], e[i]); + e[k] = MathUtil.fastHypot(e[k], e[i]); } if(e[k] != 0.0) { if(e[k + 1] < 0.0) { @@ -171,9 +164,7 @@ public class SingularValueDecomposition implements java.io.Serializable { } e[k] = -e[k]; if((k + 1 < m) & (e[k] != 0.0)) { - // Apply the transformation. - for(int i = k + 1; i < m; i++) { work[i] = 0.0; } @@ -190,10 +181,8 @@ public class SingularValueDecomposition implements java.io.Serializable { } } if(wantv) { - // Place the transformation in V for subsequent // back multiplication. - for(int i = k + 1; i < n; i++) { V[i][k] = e[i]; } @@ -254,7 +243,6 @@ public class SingularValueDecomposition implements java.io.Serializable { } // If required, generate V. - if(wantv) { for(int k = n - 1; k >= 0; k--) { if((k < nrt) & (e[k] != 0.0)) { @@ -336,14 +324,13 @@ public class SingularValueDecomposition implements java.io.Serializable { // Perform the task indicated by kase. switch(kase){ - + // Deflate negligible s(p). - case 1: { double f = e[p - 2]; e[p - 2] = 0.0; for(int j = p - 2; j >= k; j--) { - double t = MathUtil.hypotenuse(s[j], f); + double t = MathUtil.fastHypot(s[j], f); double cs = s[j] / t; double sn = f / t; s[j] = t; @@ -363,12 +350,11 @@ public class SingularValueDecomposition implements java.io.Serializable { break; // Split at negligible s(k). - case 2: { double f = e[k - 1]; e[k - 1] = 0.0; for(int j = k; j < p; j++) { - double t = MathUtil.hypotenuse(s[j], f); + double t = MathUtil.fastHypot(s[j], f); double cs = s[j] / t; double sn = f / t; s[j] = t; @@ -386,11 +372,8 @@ public class SingularValueDecomposition implements java.io.Serializable { break; // Perform one qr step. - case 3: { - // Calculate the shift. - double scale = Math.max(Math.max(Math.max(Math.max(Math.abs(s[p - 1]), Math.abs(s[p - 2])), Math.abs(e[p - 2])), Math.abs(s[k])), Math.abs(e[k])); double sp = s[p - 1] / scale; double spm1 = s[p - 2] / scale; @@ -411,9 +394,8 @@ public class SingularValueDecomposition implements java.io.Serializable { double g = sk * ek; // Chase zeros. - for(int j = k; j < p - 1; j++) { - double t = MathUtil.hypotenuse(f, g); + double t = MathUtil.fastHypot(f, g); double cs = f / t; double sn = g / t; if(j != k) { @@ -430,7 +412,7 @@ public class SingularValueDecomposition implements java.io.Serializable { V[i][j] = t; } } - t = MathUtil.hypotenuse(f, g); + t = MathUtil.fastHypot(f, g); cs = f / t; sn = g / t; s[j] = t; @@ -452,11 +434,8 @@ public class SingularValueDecomposition implements java.io.Serializable { break; // Convergence. - case 4: { - // Make the singular values positive. - if(s[k] <= 0.0) { s[k] = (s[k] < 0.0 ? -s[k] : 0.0); if(wantv) { @@ -467,7 +446,6 @@ public class SingularValueDecomposition implements java.io.Serializable { } // Order the singular values. - while(k < pp) { if(s[k] >= s[k + 1]) { break; @@ -499,16 +477,11 @@ public class SingularValueDecomposition implements java.io.Serializable { } } - /* - * ------------------------ Public Methods ------------------------ - */ - /** * Return the left singular vectors * * @return U */ - public Matrix getU() { return new Matrix(U); } @@ -518,7 +491,6 @@ public class SingularValueDecomposition implements java.io.Serializable { * * @return V */ - public Matrix getV() { return new Matrix(V); } @@ -528,7 +500,6 @@ public class SingularValueDecomposition implements java.io.Serializable { * * @return diagonal of S. */ - public double[] getSingularValues() { return s; } @@ -538,7 +509,6 @@ public class SingularValueDecomposition implements java.io.Serializable { * * @return S */ - public Matrix getS() { Matrix X = new Matrix(n, n); double[][] S = X.getArrayRef(); @@ -556,7 +526,6 @@ public class SingularValueDecomposition implements java.io.Serializable { * * @return max(S) */ - public double norm2() { return s[0]; } @@ -566,7 +535,6 @@ public class SingularValueDecomposition implements java.io.Serializable { * * @return max(S)/min(S) */ - public double cond() { return s[0] / s[Math.min(m, n) - 1]; } @@ -576,7 +544,6 @@ public class SingularValueDecomposition implements java.io.Serializable { * * @return Number of nonnegligible singular values. */ - public int rank() { double eps = Math.pow(2.0, -52.0); double tol = Math.max(m, n) * s[0] * eps; @@ -588,4 +555,4 @@ public class SingularValueDecomposition implements java.io.Serializable { } return r; } -} +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/SortedEigenPairs.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/SortedEigenPairs.java index b0264c35..5fa023ca 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/SortedEigenPairs.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/SortedEigenPairs.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -65,7 +65,7 @@ public class SortedEigenPairs { this.eigenPairs = new EigenPair[eigenvalues.length]; for(int i = 0; i < eigenvalues.length; i++) { double e = java.lang.Math.abs(eigenvalues[i]); - Vector v = eigenvectors.getColumnVector(i); + Vector v = eigenvectors.getCol(i); eigenPairs[i] = new EigenPair(v, e); } @@ -124,7 +124,7 @@ public class SortedEigenPairs { Matrix eigenVectors = new Matrix(eigenPairs.length, eigenPairs.length); for(int i = 0; i < eigenPairs.length; i++) { EigenPair eigenPair = eigenPairs[i]; - eigenVectors.setColumnVector(i, eigenPair.getEigenvector()); + eigenVectors.setCol(i, eigenPair.getEigenvector()); } return eigenVectors; } @@ -139,7 +139,7 @@ public class SortedEigenPairs { Matrix eigenVectors = new Matrix(eigenPairs.length, n); for(int i = 0; i < n; i++) { EigenPair eigenPair = eigenPairs[i]; - eigenVectors.setColumnVector(i, eigenPair.getEigenvector()); + eigenVectors.setCol(i, eigenPair.getEigenvector()); } return eigenVectors; } @@ -154,7 +154,7 @@ public class SortedEigenPairs { Matrix eigenVectors = new Matrix(eigenPairs.length, n); for(int i = 0; i < n; i++) { EigenPair eigenPair = eigenPairs[eigenPairs.length - 1 - i]; - eigenVectors.setColumnVector(i, eigenPair.getEigenvector()); + eigenVectors.setCol(i, eigenPair.getEigenvector()); } return eigenVectors; } diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/SubspaceProjectionResult.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/SubspaceProjectionResult.java index 3e8815e3..3cab3b51 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/SubspaceProjectionResult.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/SubspaceProjectionResult.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/VMath.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/VMath.java new file mode 100644 index 00000000..97466b20 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/VMath.java @@ -0,0 +1,1512 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; + +/** + * Class providing basic vector mathematics, for low-level vectors stored as + * {@code double[]}. While this is less nice syntactically, it reduces memory + * usage and VM overhead. + * + * @author Erich Schubert + * + * @apiviz.landmark + */ +public final class VMath { + /** + * A small number to handle numbers near 0 as 0. + */ + public static final double DELTA = 1E-5; + + /** + * Fake constructor. Static class. + */ + private VMath() { + // Cannot be instantiated + } + + /** + * Returns a randomly created vector of length 1.0 + * + * @param dimensionality dimensionality + * @return Random vector of length 1.0 + */ + public final static double[] randomNormalizedVector(final int dimensionality) { + final double[] v = new double[dimensionality]; + for(int i = 0; i < dimensionality; i++) { + v[i] = Math.random(); + } + double norm = euclideanLength(v); + if(norm != 0) { + for(int row = 0; row < v.length; row++) { + v[row] /= norm; + } + return v; + } + else { + return randomNormalizedVector(dimensionality); + } + } + + /** + * Returns the ith unit vector of the specified dimensionality. + * + * @param dimensionality the dimensionality of the vector + * @param i the index + * @return the ith unit vector of the specified dimensionality + */ + public final static double[] unitVector(final int dimensionality, final int i) { + final double[] v = new double[dimensionality]; + v[i] = 1; + return v; + } + + /** + * Returns a copy of this vector. + * + * @param v original vector + * @return a copy of this vector + */ + public final static double[] copy(final double[] v) { + return Arrays.copyOf(v, v.length); + } + + /** + * Transpose vector to a matrix. + * + * @param v Vector + * @return Matrix + */ + public final static double[][] transpose(final double[] v) { + double[][] re = new double[v.length][1]; + for(int i = 0; i < v.length; i++) { + re[i][0] = v[i]; + } + return re; + } + + /** + * Computes v1 + v2 for vectors. + * + * @param v1 first vector + * @param v2 second vector + * @return the sum v1 + v2 + */ + public final static double[] plus(final double[] v1, final double[] v2) { + assert (v1.length == v2.length) : "Vector dimensions must agree."; + final double[] result = new double[v1.length]; + for(int i = 0; i < result.length; i++) { + result[i] = v1[i] + v2[i]; + } + return result; + } + + /** + * Computes v1 + v2 * s2 + * + * @param v1 first vector + * @param v2 second vector + * @param s2 the scalar + * @return the result of v1 + v2 * s2 + */ + public final static double[] plusTimes(final double[] v1, final double[] v2, final double s2) { + assert (v1.length == v2.length) : "Vector dimensions must agree."; + final double[] result = new double[v1.length]; + for(int i = 0; i < result.length; i++) { + result[i] = v1[i] + v2[i] * s2; + } + return result; + } + + /** + * Computes v1 * s1 + v2 + * + * @param v1 first vector + * @param s1 the scalar for v1 + * @param v2 second vector + * @return the result of v1 * s1 + v2 + */ + public final static double[] timesPlus(final double[] v1, final double s1, final double[] v2) { + assert (v1.length == v2.length) : "Vector dimensions must agree."; + final double[] result = new double[v1.length]; + for(int i = 0; i < result.length; i++) { + result[i] = v1[i] * s1 + v2[i]; + } + return result; + } + + /** + * Computes v1 * s1 + v2 * s2 + * + * @param v1 first vector + * @param s1 the scalar for v1 + * @param v2 second vector + * @param s2 the scalar for v2 + * @return the result of v1 * s1 + v2 * s2 + */ + public final static double[] timesPlusTimes(final double[] v1, final double s1, final double[] v2, final double s2) { + assert (v1.length == v2.length) : "Vector dimensions must agree."; + final double[] result = new double[v1.length]; + for(int i = 0; i < result.length; i++) { + result[i] = v1[i] * s1 + v2[i] * s2; + } + return result; + } + + /** + * Computes v1 = v1 + v2, overwriting v1 + * + * @param v1 first vector (overwritten) + * @param v2 second vector + * @return v1 = v1 + v2 + */ + public final static double[] plusEquals(final double[] v1, final double[] v2) { + assert (v1.length == v2.length) : "Vector dimensions must agree."; + for(int i = 0; i < v1.length; i++) { + v1[i] += v2[i]; + } + return v1; + } + + /** + * Computes v1 = v1 + v2 * s2, overwriting v1 + * + * @param v1 first vector + * @param v2 another vector + * @param s2 scalar vor v2 + * @return v1 = v1 + v2 * s2 + */ + public final static double[] plusTimesEquals(final double[] v1, final double[] v2, final double s2) { + assert (v1.length == v2.length) : "Vector dimensions must agree."; + for(int i = 0; i < v1.length; i++) { + v1[i] += s2 * v2[i]; + } + return v1; + } + + /** + * Computes v1 = v1 * s1 + v2, overwriting v1 + * + * @param v1 first vector + * @param s1 scalar for v1 + * @param v2 another vector + * @return v1 = v1 * s1 + v2 + */ + public final static double[] timesPlusEquals(final double[] v1, final double s1, final double[] v2) { + assert (v1.length == v2.length) : "Vector dimensions must agree."; + for(int i = 0; i < v1.length; i++) { + v1[i] = v1[i] * s1 + v2[i]; + } + return v1; + } + + /** + * Computes v1 = v1 * s1 + v2 * s2, overwriting v1 + * + * @param v1 first vector + * @param s1 scalar for v1 + * @param v2 another vector + * @param s2 scalar for v2 + * @return v1 = v1 * s1 + v2 * s2 + */ + public final static double[] timesPlusTimesEquals(final double[] v1, final double s1, final double[] v2, final double s2) { + assert (v1.length == v2.length) : "Vector dimensions must agree."; + for(int i = 0; i < v1.length; i++) { + v1[i] = v1[i] * s1 + v2[i] * s2; + } + return v1; + } + + /** + * Computes v1 + d + * + * @param v1 vector to add to + * @param d value to add + * @return v1 + d + */ + public final static double[] plus(final double[] v1, final double d) { + final double[] result = new double[v1.length]; + for(int i = 0; i < result.length; i++) { + result[i] = v1[i] + d; + } + return result; + } + + /** + * Computes v1 = v1 + d, overwriting v1 + * + * @param v1 vector to add to + * @param d value to add + * @return Modified vector + */ + public final static double[] plusEquals(final double[] v1, final double d) { + for(int i = 0; i < v1.length; i++) { + v1[i] += d; + } + return v1; + } + + /** + * Computes v1 - v2 + * + * @param v1 first vector + * @param v2 the vector to be subtracted from this vector + * @return v1 - v2 + */ + public final static double[] minus(final double[] v1, final double[] v2) { + final double[] sub = new double[v1.length]; + for(int i = 0; i < v1.length; i++) { + sub[i] = v1[i] - v2[i]; + } + return sub; + } + + /** + * Computes v1 - v2 * s2 + * + * @param v1 first vector + * @param v2 the vector to be subtracted from this vector + * @param s2 the scaling factor for v2 + * @return v1 - v2 * s2 + */ + public final static double[] minusTimes(final double[] v1, final double[] v2, final double s2) { + final double[] sub = new double[v1.length]; + for(int i = 0; i < v1.length; i++) { + sub[i] = v1[i] - v2[i] * s2; + } + return sub; + } + + /** + * Computes v1 * s1 - v2 + * + * @param v1 first vector + * @param s1 the scaling factor for v1 + * @param v2 the vector to be subtracted from this vector + * @return v1 * s1 - v2 + */ + public final static double[] timesMinus(final double[] v1, final double s1, final double[] v2) { + final double[] sub = new double[v1.length]; + for(int i = 0; i < v1.length; i++) { + sub[i] = v1[i] * s1 - v2[i]; + } + return sub; + } + + /** + * Computes v1 * s1 - v2 * s2 + * + * @param v1 first vector + * @param s1 the scaling factor for v1 + * @param v2 the vector to be subtracted from this vector + * @param s2 the scaling factor for v2 + * @return v1 * s1 - v2 * s2 + */ + public final static double[] timesMinusTimes(final double[] v1, final double s1, final double[] v2, final double s2) { + final double[] sub = new double[v1.length]; + for(int i = 0; i < v1.length; i++) { + sub[i] = v1[i] * s1 - v2[i] * s2; + } + return sub; + } + + /** + * Computes v1 = v1 - v2, overwriting v1 + * + * @param v1 vector + * @param v2 another vector + * @return v1 = v1 - v2 + */ + public final static double[] minusEquals(final double[] v1, final double[] v2) { + assert (v1.length == v2.length) : "Vector dimensions must agree."; + for(int i = 0; i < v1.length; i++) { + v1[i] -= v2[i]; + } + return v1; + } + + /** + * Computes v1 = v1 - v2 * s2, overwriting v1 + * + * @param v1 vector + * @param v2 another vector + * @param s2 scalar for v2 + * @return v1 = v1 - v2 * s2 + */ + public final static double[] minusTimesEquals(final double[] v1, final double[] v2, final double s2) { + assert (v1.length == v2.length) : "Vector dimensions must agree."; + for(int i = 0; i < v1.length; i++) { + v1[i] -= v2[i] * s2; + } + return v1; + } + + /** + * Computes v1 = v1 * s1 - v2, overwriting v1 + * + * @param v1 vector + * @param s1 scalar for v1 + * @param v2 another vector + * @return v1 = v1 * s1 - v2 + */ + public final static double[] timesMinusEquals(final double[] v1, final double s1, final double[] v2) { + assert (v1.length == v2.length) : "Vector dimensions must agree."; + for(int i = 0; i < v1.length; i++) { + v1[i] = v1[i] * s1 - v2[i]; + } + return v1; + } + + /** + * Computes v1 = v1 * s1 - v2 * s2, overwriting v1 + * + * @param v1 vector + * @param s1 scalar for v1 + * @param v2 another vector + * @param s2 Scalar + * @return v1 = v1 * s1 - v2 * s2 + */ + public final static double[] timesMinusTimesEquals(final double[] v1, final double s1, final double[] v2, final double s2) { + assert (v1.length == v2.length) : "Vector dimensions must agree."; + for(int i = 0; i < v1.length; i++) { + v1[i] = v1[i] * s1 - v2[i] * s2; + } + return v1; + } + + /** + * Compute v1 - d + * + * @param v1 original vector + * @param d Value to subtract + * @return v1 - d + */ + public final static double[] minus(final double[] v1, final double d) { + final double[] result = new double[v1.length]; + for(int i = 0; i < v1.length; i++) { + result[i] = v1[i] - d; + } + return result; + } + + /** + * Computes v1 = v1 - d, overwriting v1 + * + * @param v1 original vector + * @param d Value to subtract + * @return v1 = v1 - d + */ + public final static double[] minusEquals(final double[] v1, final double d) { + for(int i = 0; i < v1.length; i++) { + v1[i] -= d; + } + return v1; + } + + /** + * Computes v1 * s1 + * + * @param v1 original vector + * @param s1 the scalar to be multiplied + * @return v1 * s1 + */ + public final static double[] times(final double[] v1, final double s1) { + final double[] v = new double[v1.length]; + for(int i = 0; i < v1.length; i++) { + v[i] = v1[i] * s1; + } + return v; + } + + /** + * Computes v1 = v1 * s1, overwritings v1 + * + * @param v1 original vector + * @param s scalar + * @return v1 = v1 * s1 + */ + public final static double[] timesEquals(final double[] v1, final double s) { + for(int i = 0; i < v1.length; i++) { + v1[i] *= s; + } + return v1; + } + + /** + * Matrix multiplication: v1 * m2 + * + * @param v1 vector + * @param m2 other matrix + * @return Matrix product, v1 * m2 + */ + public final static double[][] times(final double[] v1, final double[][] m2) { + assert (m2.length == 1) : "Matrix inner dimensions must agree."; + final int columndimension = m2[0].length; + final double[][] re = new double[v1.length][columndimension]; + for(int j = 0; j < columndimension; j++) { + for(int i = 0; i < v1.length; i++) { + re[i][j] = v1[i] * m2[0][j]; + } + } + return re; + } + + /** + * Linear algebraic matrix multiplication, v1<sup>T</sup> * m2 + * + * @param v1 vector + * @param m2 other matrix + * @return Matrix product, v1<sup>T</sup> * m2 + */ + public final static double[][] transposeTimes(final double[] v1, final double[][] m2) { + assert (m2.length == v1.length) : "Matrix inner dimensions must agree."; + final int columndimension = m2[0].length; + final double[][] re = new double[1][columndimension]; + for(int j = 0; j < columndimension; j++) { + double s = 0; + for(int k = 0; k < v1.length; k++) { + s += v1[k] * m2[k][j]; + } + re[0][j] = s; + } + return re; + } + + /** + * Linear algebraic matrix multiplication, v1<sup>T</sup> * v2 + * + * @param v1 vector + * @param v2 other vector + * @return Matrix product, v1<sup>T</sup> * v2 + */ + public final static double transposeTimes(final double[] v1, final double[] v2) { + assert (v2.length == v1.length) : "Matrix inner dimensions must agree."; + double s = 0; + for(int k = 0; k < v1.length; k++) { + s += v1[k] * v2[k]; + } + return s; + } + + /** + * Linear algebraic matrix multiplication, v1 * m2^T + * + * @param v1 vector + * @param m2 other matrix + * @return Matrix product, v1 * m2^T + */ + public final static double[][] timesTranspose(final double[] v1, final double[][] m2) { + assert (m2[0].length == 1) : "Matrix inner dimensions must agree."; + + final double[][] re = new double[v1.length][m2.length]; + for(int j = 0; j < m2.length; j++) { + for(int i = 0; i < v1.length; i++) { + re[i][j] = v1[i] * m2[j][0]; + } + } + return re; + } + + /** + * Linear algebraic matrix multiplication, v1 * v2^T + * + * @param v1 vector + * @param v2 other vector + * @return Matrix product, v1 * v2^T + */ + public final static double[][] timesTranspose(final double[] v1, final double[] v2) { + final double[][] re = new double[v1.length][v2.length]; + for(int j = 0; j < v2.length; j++) { + for(int i = 0; i < v1.length; i++) { + re[i][j] = v1[i] * v2[j]; + } + } + return re; + } + + /** + * Returns the scalar product of this vector and the specified vector v. + * + * This is the same as transposeTimes. + * + * @param v1 vector + * @param v2 other vector + * @return double the scalar product of vectors v1 and v2 + */ + public final static double scalarProduct(final double[] v1, final double[] v2) { + assert (v1.length == v2.length) : "Vector dimensions must agree."; + double scalarProduct = 0.0; + for(int row = 0; row < v1.length; row++) { + scalarProduct += v1[row] * v2[row]; + } + return scalarProduct; + } + + /** + * Euclidean length of the vector + * + * @param v1 vector + * @return euclidean length of this vector + */ + public final static double euclideanLength(final double[] v1) { + double acc = 0.0; + for(int row = 0; row < v1.length; row++) { + final double v = v1[row]; + acc += v * v; + } + return Math.sqrt(acc); + } + + /** + * Normalizes v1 to the length of 1.0. + * + * @param v1 vector + * @return normalized copy of v1 + */ + public final static double[] normalize(final double[] v1) { + double norm = euclideanLength(v1); + double[] re = new double[v1.length]; + if(norm != 0) { + for(int row = 0; row < v1.length; row++) { + re[row] = v1[row] / norm; + } + } + return re; + } + + /** + * Normalizes v1 to the length of 1.0. + * + * @param v1 vector + * @return normalized v1 + */ + public final static double[] normalizeEquals(final double[] v1) { + double norm = euclideanLength(v1); + if(norm != 0) { + for(int row = 0; row < v1.length; row++) { + v1[row] /= norm; + } + } + return v1; + } + + /** + * Projects this row vector into the subspace formed by the specified matrix + * v. + * + * @param m2 the subspace matrix + * @return the projection of p into the subspace formed by v + */ + public final static double[] project(final double[] v1, final double[][] m2) { + assert (v1.length == m2.length) : "v1 and m2 differ in dimensionality!"; + final int columndimension = m2[0].length; + + double[] sum = new double[v1.length]; + for(int i = 0; i < columndimension; i++) { + // TODO: optimize - copy less. + double[] v_i = getCol(m2, i); + plusTimesEquals(sum, v_i, scalarProduct(v1, v_i)); + } + return sum; + } + + /** + * Compute the hash code for the vector + * + * @param v1 elements + * @return hash code + */ + public final static int hashCode(final double[] v1) { + return Arrays.hashCode(v1); + } + + /** + * Compare for equality. + * + * @param v1 first vector + * @param v2 second vector + * @return comparison result + */ + public final static boolean equals(final double[] v1, final double[] v2) { + return Arrays.equals(v1, v2); + } + + /** + * Reset the Vector to 0. + * + * @param v1 vector + */ + public final static void clear(final double[] v1) { + Arrays.fill(v1, 0.0); + } + + /** + * Rotate vector by 90 degrees. + * + * @param v1 first vector + * @return modified v1, rotated by 90 degrees + */ + public final static double[] rotate90Equals(final double[] v1) { + assert (v1.length == 2) : "rotate90Equals is only valid for 2d vectors."; + double temp = v1[0]; + v1[0] = v1[1]; + v1[1] = -temp; + return v1; + } + + // *********** MATRIX operations + + /** + * Returns the unit matrix of the specified dimension. + * + * @param dim the dimensionality of the unit matrix + * @return the unit matrix of the specified dimension + */ + public final static double[][] unitMatrix(final int dim) { + final double[][] e = new double[dim][dim]; + for(int i = 0; i < dim; i++) { + e[i][i] = 1; + } + return e; + } + + /** + * Returns the zero matrix of the specified dimension. + * + * @param dim the dimensionality of the unit matrix + * @return the zero matrix of the specified dimension + */ + public final static double[][] zeroMatrix(final int dim) { + final double[][] z = new double[dim][dim]; + return z; + } + + /** + * Generate matrix with random elements + * + * @param m Number of rows. + * @param n Number of columns. + * @return An m-by-n matrix with uniformly distributed random elements. + */ + public final static double[][] random(final int m, final int n) { + final double[][] A = new double[m][n]; + for(int i = 0; i < m; i++) { + for(int j = 0; j < n; j++) { + A[i][j] = Math.random(); + } + } + return A; + } + + /** + * Generate identity matrix + * + * @param m Number of rows. + * @param n Number of columns. + * @return An m-by-n matrix with ones on the diagonal and zeros elsewhere. + */ + public final static double[][] identity(final int m, final int n) { + final double[][] A = new double[m][n]; + for(int i = 0; i < Math.min(m, n); i++) { + A[i][i] = 1.0; + } + return A; + } + + /** + * Returns a quadratic Matrix consisting of zeros and of the given values on + * the diagonal. + * + * @param v1 the values on the diagonal + * @return the resulting matrix + */ + public final static double[][] diagonal(final double[] v1) { + final double[][] result = new double[v1.length][v1.length]; + for(int i = 0; i < v1.length; i++) { + result[i][i] = v1[i]; + } + return result; + } + + /** + * Make a deep copy of a matrix. + * + * @param m1 Input matrix + * @return a new matrix containing the same values as this matrix + */ + public static final double[][] copy(final double[][] m1) { + final int columndimension = m1[0].length; + final double[][] X = new double[m1.length][columndimension]; + for(int i = 0; i < m1.length; i++) { + System.arraycopy(m1[i], 0, X[i], 0, columndimension); + } + return X; + } + + /** + * Make a one-dimensional row packed copy of the internal array. + * + * @param m1 Input matrix + * @return Matrix elements packed in a one-dimensional array by rows. + */ + public static final double[] rowPackedCopy(final double[][] m1) { + final int columndimension = m1[0].length; + double[] vals = new double[m1.length * columndimension]; + for(int i = 0; i < m1.length; i++) { + for(int j = 0; j < columndimension; j++) { + vals[i * columndimension + j] = m1[i][j]; + } + } + return vals; + } + + /** + * Make a one-dimensional column packed copy of the internal array. + * + * @param m1 Input matrix + * @return Matrix elements packed in a one-dimensional array by columns. + */ + public static final double[] columnPackedCopy(final double[][] m1) { + final int columndimension = m1[0].length; + final double[] vals = new double[m1.length * columndimension]; + for(int i = 0; i < m1.length; i++) { + for(int j = 0; j < columndimension; j++) { + vals[i + j * m1.length] = m1[i][j]; + } + } + return vals; + } + + /** + * Get a submatrix. + * + * @param m1 Input matrix + * @param r0 Initial row index + * @param r1 Final row index + * @param c0 Initial column index + * @param c1 Final column index + * @return m1(r0:r1,c0:c1) + */ + public static final double[][] getMatrix(final double[][] m1, final int r0, final int r1, final int c0, final int c1) { + final double[][] X = new double[r1 - r0 + 1][c1 - c0 + 1]; + for(int i = r0; i <= r1; i++) { + for(int j = c0; j <= c1; j++) { + X[i - r0][j - c0] = m1[i][j]; + } + } + return X; + } + + /** + * Get a submatrix. + * + * @param m1 Input matrix + * @param r Array of row indices. + * @param c Array of column indices. + * @return m1(r(:),c(:)) + */ + public static final double[][] getMatrix(final double[][] m1, final int[] r, final int[] c) { + final double[][] X = new double[r.length][c.length]; + for(int i = 0; i < r.length; i++) { + for(int j = 0; j < c.length; j++) { + X[i][j] = m1[r[i]][c[j]]; + } + } + return X; + } + + /** + * Get a submatrix. + * + * @param m1 Input matrix + * @param r Array of row indices. + * @param c0 Initial column index + * @param c1 Final column index + * @return m1(r(:),c0:c1) + */ + public static final double[][] getMatrix(final double[][] m1, final int[] r, final int c0, final int c1) { + final double[][] X = new double[r.length][c1 - c0 + 1]; + for(int i = 0; i < r.length; i++) { + for(int j = c0; j <= c1; j++) { + X[i][j - c0] = m1[r[i]][j]; + } + } + return X; + } + + /** + * Get a submatrix. + * + * @param m1 Input matrix + * @param r0 Initial row index + * @param r1 Final row index + * @param c Array of column indices. + * @return m1(r0:r1,c(:)) + */ + public static final double[][] getMatrix(final double[][] m1, final int r0, final int r1, final int[] c) { + final double[][] X = new double[r1 - r0 + 1][c.length]; + for(int i = r0; i <= r1; i++) { + for(int j = 0; j < c.length; j++) { + X[i - r0][j] = m1[i][c[j]]; + } + } + return X; + } + + /** + * Set a submatrix. + * + * @param m1 Original matrix + * @param r0 Initial row index + * @param r1 Final row index + * @param c0 Initial column index + * @param c1 Final column index + * @param m2 New values for m1(r0:r1,c0:c1) + */ + public static final void setMatrix(final double[][] m1, final int r0, final int r1, final int c0, final int c1, final double[][] m2) { + for(int i = r0; i <= r1; i++) { + for(int j = c0; j <= c1; j++) { + m1[i][j] = m2[i - r0][j - c0]; + } + } + } + + /** + * Set a submatrix. + * + * @param m1 Original matrix + * @param r Array of row indices. + * @param c Array of column indices. + * @param m2 New values for m1(r(:),c(:)) + */ + public static final void setMatrix(final double[][] m1, final int[] r, final int[] c, final double[][] m2) { + for(int i = 0; i < r.length; i++) { + for(int j = 0; j < c.length; j++) { + m1[r[i]][c[j]] = m2[i][j]; + } + } + } + + /** + * Set a submatrix. + * + * @param m1 Input matrix + * @param r Array of row indices. + * @param c0 Initial column index + * @param c1 Final column index + * @param m2 New values for m1(r(:),c0:c1) + */ + public static final void setMatrix(final double[][] m1, final int[] r, final int c0, final int c1, final double[][] m2) { + for(int i = 0; i < r.length; i++) { + for(int j = c0; j <= c1; j++) { + m1[r[i]][j] = m2[i][j - c0]; + } + } + } + + /** + * Set a submatrix. + * + * @param m1 Input matrix + * @param r0 Initial row index + * @param r1 Final row index + * @param c Array of column indices. + * @param m2 New values for m1(r0:r1,c(:)) + */ + public static final void setMatrix(final double[][] m1, final int r0, final int r1, final int[] c, final double[][] m2) { + for(int i = r0; i <= r1; i++) { + for(int j = 0; j < c.length; j++) { + m1[i][c[j]] = m2[i - r0][j]; + } + } + } + + /** + * Returns the <code>r</code>th row of this matrix as vector. + * + * @param m1 Input matrix + * @param r the index of the row to be returned + * @return the <code>r</code>th row of this matrix + */ + public static final double[] getRow(final double[][] m1, final int r) { + return m1[r].clone(); + } + + /** + * Sets the <code>r</code>th row of this matrix to the specified vector. + * + * @param m1 Original matrix + * @param r the index of the column to be set + * @param row the value of the column to be set + */ + public static final void setRow(final double[][] m1, final int r, final double[] row) { + final int columndimension = getColumnDimensionality(m1); + assert (row.length == columndimension) : "Matrix must consist of the same no of columns!"; + for(int i = 0; i < columndimension; i++) { + m1[r][i] = row[i]; + } + } + + /** + * Get a column from a matrix as vector. + * + * @param m1 Matrix to extract the column from + * @param col Column number + * @return Column + */ + public final static double[] getCol(double[][] m1, int col) { + double[] ret = new double[m1.length]; + for(int i = 0; i < ret.length; i++) { + ret[i] = m1[i][col]; + } + return ret; + } + + /** + * Sets the <code>c</code>th column of this matrix to the specified column. + * + * @param m1 Input matrix + * @param c the index of the column to be set + * @param column the value of the column to be set + */ + public static final void setCol(final double[][] m1, final int c, final double[] column) { + assert (column.length == m1.length) : "Matrix must consist of the same no of rows!"; + for(int i = 0; i < m1.length; i++) { + m1[i][c] = column[i]; + } + } + + /** + * Matrix transpose + * + * @param m1 Input matrix + * @return m1<sup>T</sup> as copy + */ + public static final double[][] transpose(final double[][] m1) { + final int columndimension = getColumnDimensionality(m1); + final double[][] re = new double[columndimension][m1.length]; + for(int i = 0; i < m1.length; i++) { + for(int j = 0; j < columndimension; j++) { + re[j][i] = m1[i][j]; + } + } + return re; + } + + /** + * m3 = m1 + m2 + * + * @param m1 Input matrix + * @param m2 another matrix + * @return m1 + m1 in a new Matrix + */ + public static final double[][] plus(final double[][] m1, final double[][] m2) { + return plusEquals(copy(m1), m2); + } + + /** + * m3 = m1 + s2 * m2 + * + * @param m1 Input matrix + * @param m2 another matrix + * @param s2 scalar + * @return m1 + s2 * m2 in a new Matrix + */ + public static final double[][] plusTimes(final double[][] m1, final double[][] m2, final double s2) { + return plusTimesEquals(copy(m1), m2, s2); + } + + /** + * m1 = m1 + m2, overwriting m1 + * + * @param m1 input matrix + * @param m2 another matrix + * @return m1 = m1 + m2 + */ + public static final double[][] plusEquals(final double[][] m1, final double[][] m2) { + final int columndimension = getColumnDimensionality(m1); + assert (getRowDimensionality(m1) == getRowDimensionality(m2) && columndimension == getColumnDimensionality(m2)) : "Matrix dimensions must agree."; + for(int i = 0; i < m1.length; i++) { + for(int j = 0; j < columndimension; j++) { + m1[i][j] += m2[i][j]; + } + } + return m1; + } + + /** + * m1 = m1 + s2 * m2, overwriting m1 + * + * @param m1 input matrix + * @param m2 another matrix + * @param s2 scalar for s2 + * @return m1 = m1 + s2 * m2, overwriting m1 + */ + public static final double[][] plusTimesEquals(final double[][] m1, final double[][] m2, final double s2) { + final int columndimension = getColumnDimensionality(m1); + assert (getRowDimensionality(m1) == getRowDimensionality(m2) && columndimension == getColumnDimensionality(m2)) : "Matrix dimensions must agree."; + for(int i = 0; i < m1.length; i++) { + for(int j = 0; j < columndimension; j++) { + m1[i][j] += s2 * m2[i][j]; + } + } + return m1; + } + + /** + * m3 = m1 - m2 + * + * @param m1 Input matrix + * @param m2 another matrix + * @return m1 - m2 in a new matrix + */ + public static final double[][] minus(final double[][] m1, final double[][] m2) { + return minusEquals(copy(m1), m2); + } + + /** + * m3 = m1 - s2 * m2 + * + * @param m1 Input matrix + * @param m2 another matrix + * @param s2 Scalar + * @return m1 - s2 * m2 in a new Matrix + */ + public static final double[][] minusTimes(final double[][] m1, final double[][] m2, final double s2) { + return minusTimesEquals(copy(m1), m2, s2); + } + + /** + * m1 = m1 - m2, overwriting m1 + * + * @param m1 Input matrix + * @param m2 another matrix + * @return m1 - m2, overwriting m1 + */ + public static final double[][] minusEquals(final double[][] m1, final double[][] m2) { + final int columndimension = getColumnDimensionality(m1); + assert (getRowDimensionality(m1) == getRowDimensionality(m2) && columndimension == getColumnDimensionality(m2)) : "Matrix dimensions must agree."; + for(int i = 0; i < m1.length; i++) { + for(int j = 0; j < columndimension; j++) { + m1[i][j] -= m2[i][j]; + } + } + return m1; + } + + /** + * m1 = m1 - s2 * m2, overwriting m1 + * + * @param m1 Input matrix + * @param m2 another matrix + * @param s2 Scalar + * @return m1 = m1 - s2 * m2, overwriting m1 + */ + public static final double[][] minusTimesEquals(final double[][] m1, final double[][] m2, final double s2) { + assert (getRowDimensionality(m1) == getRowDimensionality(m2) && getColumnDimensionality(m1) == getColumnDimensionality(m2)) : "Matrix dimensions must agree."; + for(int i = 0; i < m1.length; i++) { + final double[] row1 = m1[i]; + final double[] row2 = m2[i]; + for(int j = 0; j < row1.length; j++) { + row1[j] -= s2 * row2[j]; + } + } + return m1; + } + + /** + * Multiply a matrix by a scalar, m3 = s1*m1 + * + * @param m1 Input matrix + * @param s1 scalar + * @return s1*m1, in a new matrix + */ + public static final double[][] times(final double[][] m1, final double s1) { + return timesEquals(copy(m1), s1); + } + + /** + * Multiply a matrix by a scalar in place, m1 = s1 * m1 + * + * @param m1 Input matrix + * @param s1 scalar + * @return m1 = s1 * m1, overwriting m1 + */ + public static final double[][] timesEquals(final double[][] m1, final double s1) { + for(int i = 0; i < m1.length; i++) { + final double[] row = m1[i]; + for(int j = 0; j < row.length; j++) { + row[j] *= s1; + } + } + return m1; + } + + /** + * Linear algebraic matrix multiplication, m1 * m2 + * + * @param m1 Input matrix + * @param m2 another matrix + * @return Matrix product, m1 * m2 + */ + public static final double[][] times(final double[][] m1, final double[][] m2) { + final int columndimension = getColumnDimensionality(m1); + final int bcolumndimension = getColumnDimensionality(m2); + // Optimized implementation, exploiting the storage layout + assert (m2.length == columndimension) : "Matrix inner dimensions must agree: " + getRowDimensionality(m1) + "," + getColumnDimensionality(m1) + " * " + getRowDimensionality(m2) + "," + getColumnDimensionality(m2); + final double[][] r2 = new double[m1.length][bcolumndimension]; + // Optimized ala Jama. jik order. + final double[] Bcolj = new double[columndimension]; + for(int j = 0; j < bcolumndimension; j++) { + // Make a linear copy of column j from B + // TODO: use column getter from B? + for(int k = 0; k < columndimension; k++) { + Bcolj[k] = m2[k][j]; + } + // multiply it with each row from A + for(int i = 0; i < m1.length; i++) { + final double[] Arowi = m1[i]; + double s = 0; + for(int k = 0; k < columndimension; k++) { + s += Arowi[k] * Bcolj[k]; + } + r2[i][j] = s; + } + } + return r2; + } + + /** + * Linear algebraic matrix multiplication, m1 * v2 + * + * @param m1 Input matrix + * @param v2 a vector + * @return Matrix product, m1 * v2 + */ + public static final double[] times(final double[][] m1, final double[] v2) { + assert (v2.length == getColumnDimensionality(m1)) : "Matrix inner dimensions must agree."; + final double[] re = new double[m1.length]; + // multiply it with each row from A + for(int i = 0; i < m1.length; i++) { + final double[] Arowi = m1[i]; + double s = 0; + for(int k = 0; k < Arowi.length; k++) { + s += Arowi[k] * v2[k]; + } + re[i] = s; + } + return re; + } + + /** + * Linear algebraic matrix multiplication, m1<sup>T</sup> * v2 + * + * @param m1 Input matrix + * @param v2 another matrix + * @return Matrix product, m1<sup>T</sup> * v2 + */ + public static final double[] transposeTimes(final double[][] m1, final double[] v2) { + final int columndimension = getColumnDimensionality(m1); + assert (v2.length == m1.length) : "Matrix inner dimensions must agree."; + final double[] re = new double[columndimension]; + // multiply it with each row from A + for(int i = 0; i < columndimension; i++) { + double s = 0; + for(int k = 0; k < m1.length; k++) { + s += m1[k][i] * v2[k]; + } + re[i] = s; + } + return re; + } + + /** + * Linear algebraic matrix multiplication, m1<sup>T</sup> * m2 + * + * @param m1 Input matrix + * @param m2 another matrix + * @return Matrix product, m1<sup>T</sup> * m2 + */ + public static final double[][] transposeTimes(final double[][] m1, final double[][] m2) { + final int coldim1 = getColumnDimensionality(m1); + final int coldim2 = getColumnDimensionality(m2); + assert (m2.length == m1.length) : "Matrix inner dimensions must agree."; + final double[][] re = new double[coldim1][coldim2]; + final double[] Bcolj = new double[m1.length]; + for(int j = 0; j < coldim2; j++) { + // Make a linear copy of column j from B + for(int k = 0; k < m1.length; k++) { + Bcolj[k] = m2[k][j]; + } + // multiply it with each row from A + for(int i = 0; i < coldim1; i++) { + double s = 0; + for(int k = 0; k < m1.length; k++) { + s += m1[k][i] * Bcolj[k]; + } + re[i][j] = s; + } + } + return re; + } + + /** + * Linear algebraic matrix multiplication, m1 * m2^T + * + * @param m1 Input matrix + * @param m2 another matrix + * @return Matrix product, m1 * m2^T + */ + public static final double[][] timesTranspose(final double[][] m1, final double[][] m2) { + assert (getColumnDimensionality(m2) == getColumnDimensionality(m1)) : "Matrix inner dimensions must agree."; + final double[][] re = new double[m1.length][m2.length]; + for(int j = 0; j < re.length; j++) { + final double[] Browj = m2[j]; + // multiply it with each row from A + for(int i = 0; i < m1.length; i++) { + final double[] Arowi = m1[i]; + double s = 0; + for(int k = 0; k < Browj.length; k++) { + s += Arowi[k] * Browj[k]; + } + re[i][j] = s; + } + } + return re; + } + + /** + * Linear algebraic matrix multiplication, m1^T * m2^T. Computed as (m2*m1)^T + * + * @param m1 Input matrix + * @param m2 another matrix + * @return Matrix product, m1^T * m2^T + */ + public static final double[][] transposeTimesTranspose(final double[][] m1, final double[][] m2) { + // Optimized implementation, exploiting the storage layout + assert (m1.length == getColumnDimensionality(m2)) : "Matrix inner dimensions must agree: " + getRowDimensionality(m1) + "," + getColumnDimensionality(m1) + " * " + getRowDimensionality(m2) + "," + getColumnDimensionality(m2); + final double[][] re = new double[getColumnDimensionality(m1)][m2.length]; + // Optimized ala Jama. jik order. + final double[] Acolj = new double[m1.length]; + for(int j = 0; j < re.length; j++) { + // Make a linear copy of column j from B + for(int k = 0; k < m1.length; k++) { + Acolj[k] = m1[k][j]; + } + final double[] Xrow = re[j]; + // multiply it with each row from A + for(int i = 0; i < m2.length; i++) { + final double[] Browi = m2[i]; + double s = 0; + for(int k = 0; k < m1.length; k++) { + s += Browi[k] * Acolj[k]; + } + Xrow[i] = s; + } + } + return re; + } + + /** + * getDiagonal returns array of diagonal-elements. + * + * @param m1 Input matrix + * @return values on the diagonal of the Matrix + */ + public final static double[] getDiagonal(final double[][] m1) { + final int dim = Math.min(getColumnDimensionality(m1), m1.length); + final double[] diagonal = new double[dim]; + for(int i = 0; i < dim; i++) { + diagonal[i] = m1[i][i]; + } + return diagonal; + } + + /** + * Normalizes the columns of this matrix to length of 1.0. + * + * @param m1 Input matrix + */ + public final static void normalizeColumns(final double[][] m1) { + final int columndimension = getColumnDimensionality(m1); + for(int col = 0; col < columndimension; col++) { + double norm = 0.0; + for(int row = 0; row < m1.length; row++) { + norm = norm + (m1[row][col] * m1[row][col]); + } + norm = Math.sqrt(norm); + if(norm != 0) { + for(int row = 0; row < m1.length; row++) { + m1[row][col] /= norm; + } + } + else { + // TODO: else: throw an exception? + } + } + } + + /** + * Returns a matrix which consists of this matrix and the specified columns. + * + * @param m1 Input matrix + * @param m2 the columns to be appended + * @return the new matrix with the appended columns + */ + public static final double[][] appendColumns(final double[][] m1, final double[][] m2) { + final int columndimension = getColumnDimensionality(m1); + final int ccolumndimension = getColumnDimensionality(m2); + assert (m1.length == m2.length) : "m.getRowDimension() != column.getRowDimension()"; + + final int rcolumndimension = columndimension + ccolumndimension; + final double[][] result = new double[m1.length][rcolumndimension]; + for(int i = 0; i < rcolumndimension; i++) { + // FIXME: optimize - excess copying! + if(i < columndimension) { + setCol(result, i, getCol(m1, i)); + } + else { + setCol(result, i, getCol(m2, i - columndimension)); + } + } + return result; + } + + /** + * Returns an orthonormalization of this matrix. + * + * @param m1 Input matrix + * @return the orthonormalized matrix + */ + public static final double[][] orthonormalize(final double[][] m1) { + final int columndimension = getColumnDimensionality(m1); + double[][] v = copy(m1); + + // FIXME: optimize - excess copying! + for(int i = 1; i < columndimension; i++) { + final double[] u_i = getCol(m1, i); + final double[] sum = new double[m1.length]; + for(int j = 0; j < i; j++) { + final double[] v_j = getCol(v, j); + double scalar = scalarProduct(u_i, v_j) / scalarProduct(v_j, v_j); + plusEquals(sum, times(v_j, scalar)); + } + final double[] v_i = minus(u_i, sum); + setCol(v, i, v_i); + } + + normalizeColumns(v); + return v; + } + + /** + * Compute hash code + * + * @param m1 Input matrix + * @return Hash code + */ + public static final int hashCode(final double[][] m1) { + return Arrays.hashCode(m1); + } + + /** + * Test for equality + * + * @param m1 Input matrix + * @param m2 Other matrix + * @return Equality + */ + public static final boolean equals(final double[][] m1, final double[][] m2) { + return Arrays.equals(m1, m2); + } + + /** + * Compare two matrices with a delta parameter to take numerical errors into + * account. + * + * @param m1 Input matrix + * @param m2 other matrix to compare with + * @param maxdelta maximum delta allowed + * @return true if delta smaller than maximum + */ + public static final boolean almostEquals(final double[][] m1, final double[][] m2, final double maxdelta) { + if(m1 == m2) { + return true; + } + if(m2 == null) { + return false; + } + if(m1.getClass() != m2.getClass()) { + return false; + } + if(m1.length != m2.length) { + return false; + } + final int columndimension = getColumnDimensionality(m1); + if(columndimension != getColumnDimensionality(m2)) { + return false; + } + for(int i = 0; i < m1.length; i++) { + for(int j = 0; j < columndimension; j++) { + if(Math.abs(m1[i][j] - m2[i][j]) > maxdelta) { + return false; + } + } + } + return true; + } + + /** + * Compare two matrices with a delta parameter to take numerical errors into + * account. + * + * @param m1 Input matrix + * @param m2 other matrix to compare with + * @return almost equals with delta {@link #DELTA} + */ + public static final boolean almostEquals(final double[][] m1, final double[][] m2) { + return almostEquals(m1, m2, DELTA); + } + + /** + * Returns the dimensionality of the rows of this matrix. + * + * @param m1 Input matrix + * @return the number of rows. + */ + public static final int getRowDimensionality(final double[][] m1) { + return m1.length; + } + + /** + * Returns the dimensionality of the columns of this matrix. + * + * @param m1 Input matrix + * @return the number of columns. + */ + public static final int getColumnDimensionality(final double[][] m1) { + return m1[0].length; + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Vector.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Vector.java index 8cf3cf78..0a674d87 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Vector.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Vector.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,11 +23,12 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import java.io.Serializable; import java.util.Arrays; -import de.lmu.ifi.dbs.elki.math.MathUtil; +import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.utilities.FormatUtil; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayAdapter; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; /** * Provides a vector object that encapsulates an m x 1 - matrix object. @@ -36,12 +37,7 @@ import de.lmu.ifi.dbs.elki.utilities.FormatUtil; * * @apiviz.landmark */ -public class Vector implements MatrixLike<Vector>, Serializable { - /** - * Serial version - */ - private static final long serialVersionUID = 1L; - +public class Vector implements NumberVector<Vector, Double> { /** * Array for internal storage of elements. * @@ -73,13 +69,18 @@ public class Vector implements MatrixLike<Vector>, Serializable { * @param dimensionality dimensionality * @return the dimensionality of the vector */ - // FIXME: may also return null vector by chance. public static final Vector randomNormalizedVector(final int dimensionality) { final Vector v = new Vector(dimensionality); - for(int i = 0; i < dimensionality; i++) { - v.elements[i] = Math.random(); + double norm = 0; + while(norm <= 0) { + for(int i = 0; i < dimensionality; i++) { + v.elements[i] = Math.random(); + } + norm = v.euclideanLength(); + } + for(int row = 0; row < dimensionality; row++) { + v.elements[row] /= norm; } - v.normalize(); return v; } @@ -101,7 +102,6 @@ public class Vector implements MatrixLike<Vector>, Serializable { * * @return a copy of this vector */ - @Override public final Vector copy() { return new Vector(elements.clone()); } @@ -137,20 +137,11 @@ public class Vector implements MatrixLike<Vector>, Serializable { * * @return the dimensionality of this vector */ - public final int getDimensionality() { - return elements.length; - } - @Override - public final int getRowDimensionality() { + public final int getDimensionality() { return elements.length; } - @Override - public final int getColumnDimensionality() { - return 1; - } - /** * Returns the value at the specified row. * @@ -161,14 +152,6 @@ public class Vector implements MatrixLike<Vector>, Serializable { return elements[i]; } - @Override - public final double get(final int i, final int j) { - if(j != 0) { - throw new ArrayIndexOutOfBoundsException(); - } - return elements[i]; - } - /** * Sets the value at the specified row. * @@ -182,37 +165,6 @@ public class Vector implements MatrixLike<Vector>, Serializable { return this; } - @Override - public final Vector set(final int i, final int j, final double s) { - if(j != 0) { - throw new ArrayIndexOutOfBoundsException(); - } - elements[i] = s; - return this; - } - - @Override - public final Vector increment(final int i, final int j, final double s) { - if(j != 0) { - throw new ArrayIndexOutOfBoundsException(); - } - elements[i] += s; - return this; - } - - @Override - public final Vector getColumnVector(final int i) { - if(i != 0) { - throw new ArrayIndexOutOfBoundsException(); - } - return this; - } - - @Override - public final Matrix transpose() { - return new Matrix(this.elements, 1); - } - /** * Returns a new vector which is the result of this vector plus the specified * vector. @@ -220,9 +172,8 @@ public class Vector implements MatrixLike<Vector>, Serializable { * @param v the vector to be added * @return the resulting vector */ - @Override public final Vector plus(final Vector v) { - checkDimensions(v); + assert (this.elements.length == v.elements.length) : "Vector dimensions must agree."; final Vector result = new Vector(elements.length); for(int i = 0; i < elements.length; i++) { result.elements[i] = elements[i] + v.elements[i]; @@ -238,9 +189,8 @@ public class Vector implements MatrixLike<Vector>, Serializable { * @param s the scalar * @return the resulting vector */ - @Override public final Vector plusTimes(final Vector v, final double s) { - checkDimensions(v); + assert (this.elements.length == v.elements.length) : "Vector dimensions must agree."; final Vector result = new Vector(elements.length); for(int i = 0; i < elements.length; i++) { result.elements[i] = elements[i] + v.elements[i] * s; @@ -254,11 +204,10 @@ public class Vector implements MatrixLike<Vector>, Serializable { * @param B another matrix * @return A + B in this Matrix */ - @Override public final Vector plusEquals(final Vector B) { - checkDimensions(B); + assert (this.elements.length == B.elements.length) : "Vector dimensions must agree."; for(int i = 0; i < elements.length; i++) { - elements[i] += B.get(i, 0); + elements[i] += B.elements[i]; } return this; } @@ -270,11 +219,10 @@ public class Vector implements MatrixLike<Vector>, Serializable { * @param s Scalar * @return A + s * B in this Matrix */ - @Override public final Vector plusTimesEquals(final Vector B, final double s) { - checkDimensions(B); + assert (this.elements.length == B.elements.length) : "Vector dimensions must agree."; for(int i = 0; i < elements.length; i++) { - elements[i] += s * B.get(i, 0); + elements[i] += s * B.elements[i]; } return this; } @@ -298,7 +246,6 @@ public class Vector implements MatrixLike<Vector>, Serializable { * @param v the vector to be subtracted from this vector * @return this vector minus the specified vector v */ - @Override public final Vector minus(final Vector v) { final Vector sub = new Vector(elements.length); for(int i = 0; i < elements.length; i++) { @@ -314,7 +261,6 @@ public class Vector implements MatrixLike<Vector>, Serializable { * @param s the scaling factor * @return this vector minus the specified vector v */ - @Override public final Vector minusTimes(final Vector v, final double s) { final Vector sub = new Vector(elements.length); for(int i = 0; i < elements.length; i++) { @@ -329,11 +275,10 @@ public class Vector implements MatrixLike<Vector>, Serializable { * @param B another matrix * @return A - B in this Matrix */ - @Override public final Vector minusEquals(final Vector B) { - checkDimensions(B); + assert (this.elements.length == B.elements.length) : "Vector dimensions must agree."; for(int i = 0; i < elements.length; i++) { - elements[i] -= B.get(i, 0); + elements[i] -= B.elements[i]; } return this; } @@ -345,11 +290,10 @@ public class Vector implements MatrixLike<Vector>, Serializable { * @param s Scalar * @return A - s * B in this Matrix */ - @Override public final Vector minusTimesEquals(final Vector B, final double s) { - checkDimensions(B); + assert (this.elements.length == B.elements.length) : "Vector dimensions must agree."; for(int i = 0; i < elements.length; i++) { - elements[i] -= s * B.get(i, 0); + elements[i] -= s * B.elements[i]; } return this; } @@ -374,7 +318,6 @@ public class Vector implements MatrixLike<Vector>, Serializable { * @param s the scalar to be multiplied * @return the resulting vector */ - @Override public final Vector times(final double s) { final Vector v = new Vector(elements.length); for(int i = 0; i < elements.length; i++) { @@ -389,7 +332,6 @@ public class Vector implements MatrixLike<Vector>, Serializable { * @param s scalar * @return replace A by s*A */ - @Override public final Vector timesEquals(final double s) { for(int i = 0; i < elements.length; i++) { elements[i] *= s; @@ -402,12 +344,9 @@ public class Vector implements MatrixLike<Vector>, Serializable { * * @param B another matrix * @return Matrix product, A * B - * @throws IllegalArgumentException Matrix inner dimensions must agree. */ public final Matrix times(final Matrix B) { - if(B.elements.length != 1) { - throw new IllegalArgumentException("Matrix inner dimensions must agree."); - } + assert (B.elements.length == 1) : "Matrix inner dimensions must agree."; final Matrix X = new Matrix(this.elements.length, B.columndimension); for(int j = 0; j < B.columndimension; j++) { for(int i = 0; i < this.elements.length; i++) { @@ -422,12 +361,9 @@ public class Vector implements MatrixLike<Vector>, Serializable { * * @param B another matrix * @return Matrix product, A<sup>T</sup> * B - * @throws IllegalArgumentException Matrix inner dimensions must agree. */ public final Matrix transposeTimes(final Matrix B) { - if(B.elements.length != this.elements.length) { - throw new IllegalArgumentException("Matrix inner dimensions must agree."); - } + assert (B.elements.length == this.elements.length) : "Matrix inner dimensions must agree."; final Matrix X = new Matrix(1, B.columndimension); for(int j = 0; j < B.columndimension; j++) { // multiply it with each row from A @@ -441,16 +377,34 @@ public class Vector implements MatrixLike<Vector>, Serializable { } /** + * Linear algebraic matrix multiplication, a<sup>T</sup> * B * c + * + * @param B matrix + * @param c vector on the right + * @return Matrix product, a<sup>T</sup> * B + */ + public final double transposeTimesTimes(final Matrix B, final Vector c) { + assert (B.elements.length == this.elements.length) : "Matrix inner dimensions must agree."; + double sum = 0.0; + for(int j = 0; j < B.columndimension; j++) { + // multiply it with each row from A + double s = 0; + for(int k = 0; k < this.elements.length; k++) { + s += this.elements[k] * B.elements[k][j]; + } + sum += s * c.elements[j]; + } + return sum; + } + + /** * Linear algebraic matrix multiplication, A<sup>T</sup> * B * * @param B another vector * @return Matrix product, A<sup>T</sup> * B - * @throws IllegalArgumentException Matrix inner dimensions must agree. */ public final double transposeTimes(final Vector B) { - if(B.elements.length != this.elements.length) { - throw new IllegalArgumentException("Matrix inner dimensions must agree."); - } + assert (B.elements.length == this.elements.length) : "Matrix inner dimensions must agree."; double s = 0; for(int k = 0; k < this.elements.length; k++) { s += this.elements[k] * B.elements[k]; @@ -463,61 +417,32 @@ public class Vector implements MatrixLike<Vector>, Serializable { * * @param B another matrix * @return Matrix product, A * B^T - * @throws IllegalArgumentException Matrix inner dimensions must agree. */ public final Matrix timesTranspose(final Matrix B) { - if(B.columndimension != 1) { - throw new IllegalArgumentException("Matrix inner dimensions must agree."); - } + assert (B.columndimension == 1) : "Matrix inner dimensions must agree."; final Matrix X = new Matrix(this.elements.length, B.elements.length); for(int j = 0; j < B.elements.length; j++) { for(int i = 0; i < this.elements.length; i++) { - X.elements[i][j] = elements[i] * B.elements[0][j]; + X.elements[i][j] = elements[i] * B.elements[j][0]; } } return X; } /** - * Returns the scalar product of this vector and the specified vector v. - * - * @param v the vector - * @return double the scalar product of this vector and v - */ - public final double scalarProduct(final Vector v) { - checkDimensions(v); - double scalarProduct = 0.0; - for(int row = 0; row < elements.length; row++) { - final double prod = elements[row] * v.elements[row]; - scalarProduct += prod; - } - return scalarProduct; - } - - /** - * Inverts every element of the vector. - * - * @return the resulting vector - */ - public final Vector inverseVector() { - final Vector inv = new Vector(elements.length); - for(int i = 0; i < elements.length; i++) { - inv.elements[i] = 1.0 / elements[i]; - } - return inv; - } - - /** - * Square roots every element of the vector. + * Linear algebraic matrix multiplication, A * B^T * - * @return the resulting vector + * @param B another matrix + * @return Matrix product, A * B^T */ - public final Vector sqrtVector() { - final Vector sqrt = new Vector(elements.length); - for(int i = 0; i < elements.length; i++) { - sqrt.elements[i] = Math.sqrt(elements[i]); + public final Matrix timesTranspose(final Vector B) { + final Matrix X = new Matrix(this.elements.length, B.elements.length); + for(int j = 0; j < B.elements.length; j++) { + for(int i = 0; i < this.elements.length; i++) { + X.elements[i][j] = elements[i] * B.elements[j]; + } } - return sqrt; + return X; } /** @@ -526,27 +451,15 @@ public class Vector implements MatrixLike<Vector>, Serializable { * @return the length of this vector */ public final double euclideanLength() { - double sqlen = 0.0; + double acc = 0.0; for(int row = 0; row < elements.length; row++) { - sqlen += elements[row] * elements[row]; + final double v = elements[row]; + acc += v * v; } - return Math.sqrt(sqlen); + return Math.sqrt(acc); } /** - * Frobenius norm - * - * @return sqrt of sum of squares of all elements. - */ - public double normF() { - double f = 0; - for(int i = 0; i < elements.length; i++) { - f = MathUtil.hypotenuse(f, elements[i]); - } - return f; - } - - /** * Normalizes this vector to the length of 1.0. */ public final Vector normalize() { @@ -565,40 +478,21 @@ public class Vector implements MatrixLike<Vector>, Serializable { * * @param v the subspace matrix * @return the projection of p into the subspace formed by v - * @throws IllegalArgumentException if this matrix is no row vector, i.e. this - * matrix has more than one column or this matrix and v have different - * length of rows */ public final Vector projection(final Matrix v) { - if(elements.length != v.elements.length) { - throw new IllegalArgumentException("p and v differ in row dimensionality!"); - } + assert (elements.length == v.elements.length) : "p and v differ in row dimensionality!"; Vector sum = new Vector(elements.length); for(int i = 0; i < v.columndimension; i++) { - // TODO: optimize - copy less. - Vector v_i = v.getColumnVector(i); - sum.plusEquals(v_i.times(scalarProduct(v_i))); + // TODO: optimize - copy less? + Vector v_i = v.getCol(i); + sum.plusTimesEquals(v_i, this.transposeTimes(v_i)); } return sum; } - /** - * Check if this.getDimensionality() == v.getDimensionality(). - * - * @throws IllegalArgumentException if the dimensions do not agree - */ - private final void checkDimensions(final Vector v) { - if(this.elements.length != v.elements.length) { - throw new IllegalArgumentException("Vector dimensions must agree."); - } - } - @Override public int hashCode() { - final int PRIME = 31; - int result = 1; - result = PRIME * result + Arrays.hashCode(this.elements); - return result; + return Arrays.hashCode(this.elements); } @Override @@ -616,12 +510,7 @@ public class Vector implements MatrixLike<Vector>, Serializable { if(this.elements.length != other.elements.length) { return false; } - for(int i = 0; i < this.elements.length; i++) { - if(this.elements[i] != other.elements[i]) { - return false; - } - } - return true; + return Arrays.equals(this.elements, other.elements); } /** @@ -650,4 +539,95 @@ public class Vector implements MatrixLike<Vector>, Serializable { public void setZero() { Arrays.fill(elements, 0.0); } + + /** + * Rotate vector by 90 degrees. + * + * @return self, for operation chaining. + */ + public Vector rotate90Equals() { + assert (elements.length == 2); + double temp = elements[0]; + elements[0] = elements[1]; + elements[1] = -temp; + return this; + } + + // ////// NumberVector API. A bit hackish. :-( + + @Override + public double getMin(int dimension) { + return elements[dimension - 1]; + } + + @Override + public double getMax(int dimension) { + return elements[dimension - 1]; + } + + @Override + public Double getValue(int dimension) { + return elements[dimension - 1]; + } + + @Override + public double doubleValue(int dimension) { + return elements[dimension - 1]; + } + + @Override + public float floatValue(int dimension) { + return (float) elements[dimension - 1]; + } + + @Override + public int intValue(int dimension) { + return (int) elements[dimension - 1]; + } + + @Override + public long longValue(int dimension) { + return (long) elements[dimension - 1]; + } + + @Override + public short shortValue(int dimension) { + return (short) elements[dimension - 1]; + } + + @Override + public byte byteValue(int dimension) { + return (byte) elements[dimension - 1]; + } + + @Override + public Vector getColumnVector() { + return copy(); + } + + @Override + public Vector newNumberVector(double[] values) { + return new Vector(values); + } + + @Override + public <A> Vector newNumberVector(A array, NumberArrayAdapter<?, A> adapter) { + double[] raw = new double[adapter.size(array)]; + for(int i = 0; i < raw.length; i++) { + raw[i] = adapter.getDouble(array, i); + } + return new Vector(raw); + } + + @Override + public <A> Vector newFeatureVector(A array, ArrayAdapter<Double, A> adapter) { + if(adapter instanceof NumberArrayAdapter) { + return newNumberVector(array, (NumberArrayAdapter<?, A>) adapter); + } + double[] raw = new double[adapter.size(array)]; + for(int i = 0; i < raw.length; i++) { + raw[i] = adapter.get(array, i); + } + return new Vector(raw); + } }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/FittingFunction.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/FittingFunction.java index 0cb67acc..b04c64d5 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/FittingFunction.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/FittingFunction.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.fitting; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/FittingFunctionResult.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/FittingFunctionResult.java index 46245349..76f90138 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/FittingFunctionResult.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/FittingFunctionResult.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.fitting; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/GaussianFittingFunction.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/GaussianFittingFunction.java index 327b92b4..42badb2f 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/GaussianFittingFunction.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/GaussianFittingFunction.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.fitting; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/LevenbergMarquardtMethod.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/LevenbergMarquardtMethod.java index 8b8253dc..d87e1208 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/LevenbergMarquardtMethod.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/LevenbergMarquardtMethod.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.fitting; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/package-info.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/package-info.java index f2ad8c35..2c4143a8 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/package-info.java @@ -7,7 +7,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2011 +Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/package-info.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/package-info.java index d38b9a7b..5c55dfd1 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/package-info.java @@ -43,7 +43,7 @@ can be found at <a href="http://math.nist.gov/javanumerics/jama/">http://math.ni This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2011 +Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/AbstractCovarianceMatrixBuilder.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/AbstractCovarianceMatrixBuilder.java index 197ad939..c14986bd 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/AbstractCovarianceMatrixBuilder.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/AbstractCovarianceMatrixBuilder.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -54,17 +54,17 @@ public abstract class AbstractCovarianceMatrixBuilder<V extends NumberVector<? e public abstract Matrix processIds(DBIDs ids, Relation<? extends V> database); @Override - public <D extends NumberDistance<?, ?>> Matrix processQueryResults(Collection<DistanceResultPair<D>> results, Relation<? extends V> database, int k) { + public <D extends NumberDistance<?, ?>> Matrix processQueryResults(Collection<? extends DistanceResultPair<D>> results, Relation<? extends V> database, int k) { ModifiableDBIDs ids = DBIDUtil.newArray(k); int have = 0; - for(Iterator<DistanceResultPair<D>> it = results.iterator(); it.hasNext() && have < k; have++) { + for(Iterator<? extends DistanceResultPair<D>> it = results.iterator(); it.hasNext() && have < k; have++) { ids.add(it.next().getDBID()); } return processIds(ids, database); } @Override - final public <D extends NumberDistance<?, ?>> Matrix processQueryResults(Collection<DistanceResultPair<D>> results, Relation<? extends V> database) { + final public <D extends NumberDistance<?, ?>> Matrix processQueryResults(Collection<? extends DistanceResultPair<D>> results, Relation<? extends V> database) { return processQueryResults(results, database, results.size()); } diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/CompositeEigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/CompositeEigenPairFilter.java index 112a6df4..bc9486c5 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/CompositeEigenPairFilter.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/CompositeEigenPairFilter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/CovarianceMatrixBuilder.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/CovarianceMatrixBuilder.java index 7416abce..5098ffac 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/CovarianceMatrixBuilder.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/CovarianceMatrixBuilder.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -67,7 +67,7 @@ public interface CovarianceMatrixBuilder<V extends NumberVector<? extends V, ?>> * @param k the number of entries to process * @return Covariance Matrix */ - public <D extends NumberDistance<?, ?>> Matrix processQueryResults(Collection<DistanceResultPair<D>> results, Relation<? extends V> database, int k); + public <D extends NumberDistance<?, ?>> Matrix processQueryResults(Collection<? extends DistanceResultPair<D>> results, Relation<? extends V> database, int k); /** * Compute Covariance Matrix for a QueryResult Collection @@ -78,5 +78,5 @@ public interface CovarianceMatrixBuilder<V extends NumberVector<? extends V, ?>> * @param database the database used * @return Covariance Matrix */ - public <D extends NumberDistance<?, ?>> Matrix processQueryResults(Collection<DistanceResultPair<D>> results, Relation<? extends V> database); + public <D extends NumberDistance<?, ?>> Matrix processQueryResults(Collection<? extends DistanceResultPair<D>> results, Relation<? extends V> database); }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/EigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/EigenPairFilter.java index 3c9d8603..553a111f 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/EigenPairFilter.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/EigenPairFilter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/FilteredEigenPairs.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/FilteredEigenPairs.java index 9fdac000..a2c83249 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/FilteredEigenPairs.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/FilteredEigenPairs.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/FirstNEigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/FirstNEigenPairFilter.java index c4839109..08482b71 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/FirstNEigenPairFilter.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/FirstNEigenPairFilter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/LimitEigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/LimitEigenPairFilter.java index c1edb99a..e8d2b844 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/LimitEigenPairFilter.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/LimitEigenPairFilter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/NormalizingEigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/NormalizingEigenPairFilter.java index 47dbb01a..29be965c 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/NormalizingEigenPairFilter.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/NormalizingEigenPairFilter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -86,7 +86,7 @@ public class NormalizingEigenPairFilter implements EigenPairFilter { */ private void normalizeEigenPair(final EigenPair eigenPair) { final Vector eigenvector = eigenPair.getEigenvector(); - final double scaling = 1.0 / Math.sqrt(eigenPair.getEigenvalue()) * eigenvector.normF(); + final double scaling = 1.0 / Math.sqrt(eigenPair.getEigenvalue()) * eigenvector.euclideanLength(); eigenvector.timesEquals(scaling); } }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredAutotuningRunner.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredAutotuningRunner.java new file mode 100644 index 00000000..8b53dc43 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredAutotuningRunner.java @@ -0,0 +1,232 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; + +import de.lmu.ifi.dbs.elki.data.NumberVector; +import de.lmu.ifi.dbs.elki.database.ids.DBID; +import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair; +import de.lmu.ifi.dbs.elki.database.query.DoubleDistanceResultPair; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; +import de.lmu.ifi.dbs.elki.math.linearalgebra.EigenPair; +import de.lmu.ifi.dbs.elki.math.linearalgebra.EigenvalueDecomposition; +import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix; +import de.lmu.ifi.dbs.elki.math.linearalgebra.SortedEigenPairs; +import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; + +/** + * Performs a self-tuning local PCA based on the covariance matrices of given + * objects. At most the closest 'k' points are used in the calculation and a + * weight function is applied. + * + * The number of points actually used depends on when the strong eigenvectors + * exhibit the clearest correlation. + * + * @author Erich Schubert + * @param <V> vector type + */ +@Reference(authors = "H.-P. Kriegel, P. Kröger, E. Schubert, A. Zimek", title = "A General Framework for Increasing the Robustness of PCA-based Correlation Clustering Algorithms", booktitle = "Proceedings of the 20th International Conference on Scientific and Statistical Database Management (SSDBM), Hong Kong, China, 2008", url = "http://dx.doi.org/10.1007/978-3-540-69497-7_27") +public class PCAFilteredAutotuningRunner<V extends NumberVector<? extends V, ?>> extends PCAFilteredRunner<V> { + /** + * Constructor. + * + * @param covarianceMatrixBuilder + * @param eigenPairFilter + * @param big + * @param small + */ + public PCAFilteredAutotuningRunner(CovarianceMatrixBuilder<V> covarianceMatrixBuilder, EigenPairFilter eigenPairFilter, double big, double small) { + super(covarianceMatrixBuilder, eigenPairFilter, big, small); + } + + @Override + public PCAFilteredResult processIds(DBIDs ids, Relation<? extends V> database) { + // Assume Euclidean distance. In the context of PCA, the neighborhood should + // be L2-spherical to be unbiased. + V center = DatabaseUtil.centroid(database, ids); + List<DoubleDistanceResultPair> dres = new ArrayList<DoubleDistanceResultPair>(ids.size()); + for(DBID id : ids) { + final double dist = EuclideanDistanceFunction.STATIC.doubleDistance(center, database.get(id)); + dres.add(new DoubleDistanceResultPair(dist, id)); + } + Collections.sort(dres); + return processQueryResult(dres, database); + } + + @Override + public <D extends NumberDistance<?, ?>> PCAFilteredResult processQueryResult(Collection<? extends DistanceResultPair<D>> results, Relation<? extends V> database) { + assertSortedByDistance(results); + final int dim = DatabaseUtil.dimensionality(database); + + List<Matrix> best = new LinkedList<Matrix>(); + for(int i = 0; i < dim; i++) { + best.add(null); + } + double[] beststrength = new double[dim]; + for(int i = 0; i < dim; i++) { + beststrength[i] = -1; + } + int[] bestk = new int[dim]; + // 'history' + LinkedList<Matrix> prevM = new LinkedList<Matrix>(); + LinkedList<Double> prevS = new LinkedList<Double>(); + LinkedList<Integer> prevD = new LinkedList<Integer>(); + // TODO: starting parameter shouldn't be hardcoded... + int smooth = 3; + int startk = 4; + if(startk > results.size() - 1) { + startk = results.size() - 1; + } + // TODO: add smoothing options, handle border cases better. + for(int k = startk; k < results.size(); k++) { + // sorted eigenpairs, eigenvectors, eigenvalues + Matrix covMat = covarianceMatrixBuilder.processQueryResults(results, database); + EigenvalueDecomposition evd = new EigenvalueDecomposition(covMat); + SortedEigenPairs eigenPairs = new SortedEigenPairs(evd, false); + FilteredEigenPairs filteredEigenPairs = getEigenPairFilter().filter(eigenPairs); + + // correlationDimension = #strong EV + int thisdim = filteredEigenPairs.countStrongEigenPairs(); + + // FIXME: handle the case of no strong EVs. + assert ((thisdim > 0) && (thisdim <= dim)); + double thisexplain = computeExplainedVariance(filteredEigenPairs); + + prevM.add(covMat); + prevS.add(thisexplain); + prevD.add(thisdim); + assert (prevS.size() == prevM.size()); + assert (prevS.size() == prevD.size()); + + if(prevS.size() >= 2 * smooth + 1) { + // all the same dimension? + boolean samedim = true; + for(Iterator<Integer> it = prevD.iterator(); it.hasNext();) { + if(it.next().intValue() != thisdim) { + samedim = false; + } + } + if(samedim) { + // average their explain values + double avgexplain = 0.0; + for(Iterator<Double> it = prevS.iterator(); it.hasNext();) { + avgexplain += it.next().doubleValue(); + } + avgexplain /= prevS.size(); + + if(avgexplain > beststrength[thisdim - 1]) { + beststrength[thisdim - 1] = avgexplain; + best.set(thisdim - 1, prevM.get(smooth)); + bestk[thisdim - 1] = k - smooth; + } + } + prevM.removeFirst(); + prevS.removeFirst(); + prevD.removeFirst(); + assert (prevS.size() == prevM.size()); + assert (prevS.size() == prevD.size()); + } + } + // Try all dimensions, lowest first. + for(int i = 0; i < dim; i++) { + if(beststrength[i] > 0.0) { + // If the best was the lowest or the biggest k, skip it! + if(bestk[i] == startk + smooth) { + continue; + } + if(bestk[i] == results.size() - smooth - 1) { + continue; + } + Matrix covMat = best.get(i); + + // We stop at the lowest dimension that did the job for us. + // System.err.println("Auto-k: "+bestk[i]+" dim: "+(i+1)); + return processCovarMatrix(covMat); + } + } + // NOTE: if we didn't get a 'maximum' anywhere, we end up with the data from + // the last run of the loop above. I.e. PCA on the full data set. That is + // intended. + return processCovarMatrix(covarianceMatrixBuilder.processQueryResults(results, database)); + } + + /** + * Compute the explained variance for a FilteredEigenPairs + * + * @param filteredEigenPairs + * @return explained variance by the strong eigenvectors. + */ + private double computeExplainedVariance(FilteredEigenPairs filteredEigenPairs) { + double strongsum = 0.0; + double weaksum = 0.0; + for(EigenPair ep : filteredEigenPairs.getStrongEigenPairs()) { + strongsum += ep.getEigenvalue(); + } + for(EigenPair ep : filteredEigenPairs.getWeakEigenPairs()) { + weaksum += ep.getEigenvalue(); + } + return strongsum / (strongsum / weaksum); + } + + /** + * Ensure that the results are sorted by distance. + * + * @param results + */ + private <D extends NumberDistance<?, ?>> void assertSortedByDistance(Collection<? extends DistanceResultPair<D>> results) { + // TODO: sort results instead? + double dist = -1.0; + for(Iterator<? extends DistanceResultPair<D>> it = results.iterator(); it.hasNext();) { + double qr = it.next().getDistance().doubleValue(); + if(qr < dist) { + System.err.println("WARNING: results not sorted by distance!"); + } + dist = qr; + } + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer<V extends NumberVector<? extends V, ?>> extends PCAFilteredRunner.Parameterizer<V> { + @Override + protected PCAFilteredAutotuningRunner<V> makeInstance() { + return new PCAFilteredAutotuningRunner<V>(covarianceMatrixBuilder, eigenPairFilter, big, small); + } + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredResult.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredResult.java index 30453f0c..4aa626a9 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredResult.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredResult.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -118,7 +118,7 @@ public class PCAFilteredResult extends PCAResult implements ProjectionResult { for(Iterator<EigenPair> it = strongEigenPairs.iterator(); it.hasNext(); i++) { EigenPair eigenPair = it.next(); strongEigenvalues[i] = eigenPair.getEigenvalue(); - strongEigenvectors.setColumnVector(i, eigenPair.getEigenvector()); + strongEigenvectors.setCol(i, eigenPair.getEigenvector()); sumStrongEigenvalues += strongEigenvalues[i]; } } @@ -131,7 +131,7 @@ public class PCAFilteredResult extends PCAResult implements ProjectionResult { for(Iterator<EigenPair> it = weakEigenPairs.iterator(); it.hasNext(); i++) { EigenPair eigenPair = it.next(); weakEigenvalues[i] = eigenPair.getEigenvalue(); - weakEigenvectors.setColumnVector(i, eigenPair.getEigenvector()); + weakEigenvectors.setCol(i, eigenPair.getEigenvector()); sumWeakEigenvalues += weakEigenvalues[i]; } } diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredRunner.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredRunner.java index 799502fa..2391446d 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredRunner.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredRunner.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -141,7 +141,7 @@ public class PCAFilteredRunner<V extends NumberVector<? extends V, ?>> extends P * @return PCA result */ @Override - public <D extends NumberDistance<?, ?>> PCAFilteredResult processQueryResult(Collection<DistanceResultPair<D>> results, Relation<? extends V> database) { + public <D extends NumberDistance<?, ?>> PCAFilteredResult processQueryResult(Collection<? extends DistanceResultPair<D>> results, Relation<? extends V> database) { return processCovarMatrix(covarianceMatrixBuilder.processQueryResults(results, database)); } @@ -153,7 +153,7 @@ public class PCAFilteredRunner<V extends NumberVector<? extends V, ?>> extends P @Override public PCAFilteredResult processCovarMatrix(Matrix covarMatrix) { // TODO: add support for a different implementation to do EVD? - EigenvalueDecomposition evd = covarMatrix.eig(); + EigenvalueDecomposition evd = new EigenvalueDecomposition(covarMatrix); return processEVD(evd); } diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAResult.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAResult.java index 26497645..6969a3a3 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAResult.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAResult.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCARunner.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCARunner.java index 21b4304f..661fa5c5 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCARunner.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCARunner.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -113,7 +113,7 @@ public class PCARunner<V extends NumberVector<? extends V, ?>> implements Parame * @param database the database used * @return PCA result */ - public <D extends NumberDistance<?, ?>> PCAResult processQueryResult(Collection<DistanceResultPair<D>> results, Relation<? extends V> database) { + public <D extends NumberDistance<?, ?>> PCAResult processQueryResult(Collection<? extends DistanceResultPair<D>> results, Relation<? extends V> database) { return processCovarMatrix(covarianceMatrixBuilder.processQueryResults(results, database)); } @@ -125,7 +125,7 @@ public class PCARunner<V extends NumberVector<? extends V, ?>> implements Parame */ public PCAResult processCovarMatrix(Matrix covarMatrix) { // TODO: add support for a different implementation to do EVD? - EigenvalueDecomposition evd = covarMatrix.eig(); + EigenvalueDecomposition evd = new EigenvalueDecomposition(covarMatrix); return processEVD(evd); } diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PercentageEigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PercentageEigenPairFilter.java index b0065eae..321c12cc 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PercentageEigenPairFilter.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PercentageEigenPairFilter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/ProgressiveEigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/ProgressiveEigenPairFilter.java index f43ac424..f66a1e96 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/ProgressiveEigenPairFilter.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/ProgressiveEigenPairFilter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/RelativeEigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/RelativeEigenPairFilter.java index 5109b010..59b2b750 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/RelativeEigenPairFilter.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/RelativeEigenPairFilter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/SignificantEigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/SignificantEigenPairFilter.java index d09b7312..94636553 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/SignificantEigenPairFilter.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/SignificantEigenPairFilter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/StandardCovarianceMatrixBuilder.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/StandardCovarianceMatrixBuilder.java index 393b26ec..2c88d490 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/StandardCovarianceMatrixBuilder.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/StandardCovarianceMatrixBuilder.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/WeakEigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/WeakEigenPairFilter.java index 57d21ec1..fbca039d 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/WeakEigenPairFilter.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/WeakEigenPairFilter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/WeightedCovarianceMatrixBuilder.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/WeightedCovarianceMatrixBuilder.java index 547737fe..db5e8702 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/WeightedCovarianceMatrixBuilder.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/WeightedCovarianceMatrixBuilder.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -158,7 +158,7 @@ public class WeightedCovarianceMatrixBuilder<V extends NumberVector<? extends V, * @return Covariance Matrix */ @Override - public <D extends NumberDistance<?, ?>> Matrix processQueryResults(Collection<DistanceResultPair<D>> results, Relation<? extends V> database, int k) { + public <D extends NumberDistance<?, ?>> Matrix processQueryResults(Collection<? extends DistanceResultPair<D>> results, Relation<? extends V> database, int k) { final int dim = DatabaseUtil.dimensionality(database); final CovarianceMatrix cmat = new CovarianceMatrix(dim); @@ -172,7 +172,7 @@ public class WeightedCovarianceMatrixBuilder<V extends NumberVector<? extends V, double stddev = 0.0; { int i = 0; - for(Iterator<DistanceResultPair<D>> it = results.iterator(); it.hasNext() && i < k; i++) { + for(Iterator<? extends DistanceResultPair<D>> it = results.iterator(); it.hasNext() && i < k; i++) { DistanceResultPair<D> res = it.next(); final double dist; if(res instanceof DoubleDistanceResultPair) { @@ -194,7 +194,7 @@ public class WeightedCovarianceMatrixBuilder<V extends NumberVector<? extends V, // calculate weighted PCA int i = 0; - for(Iterator<DistanceResultPair<D>> it = results.iterator(); it.hasNext() && i < k; i++) { + for(Iterator<? extends DistanceResultPair<D>> it = results.iterator(); it.hasNext() && i < k; i++) { DistanceResultPair<? extends NumberDistance<?, ?>> res = it.next(); final double dist; if(res instanceof DoubleDistanceResultPair) { diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/package-info.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/package-info.java index 94b0f5a5..f08a016c 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/package-info.java @@ -5,7 +5,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2011 +Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ConstantWeight.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ConstantWeight.java index b31d15ea..ef089980 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ConstantWeight.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ConstantWeight.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ErfcStddevWeight.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ErfcStddevWeight.java index 71096f05..656406c3 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ErfcStddevWeight.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ErfcStddevWeight.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -24,6 +24,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions; */ import de.lmu.ifi.dbs.elki.math.MathUtil; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution; /** * Gaussian Error Function Weight function, scaled using stddev. This probably @@ -42,6 +43,6 @@ public final class ErfcStddevWeight implements WeightFunction { if(stddev <= 0) { return 1; } - return MathUtil.erfc(MathUtil.SQRTHALF * distance / stddev); + return NormalDistribution.erfc(MathUtil.SQRTHALF * distance / stddev); } -} +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ErfcWeight.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ErfcWeight.java index 1a6cb5b6..6a8783ed 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ErfcWeight.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ErfcWeight.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,7 +23,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import de.lmu.ifi.dbs.elki.math.MathUtil; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution; /** * Gaussian Error Function Weight function, scaled such that the result it 0.1 @@ -48,6 +48,6 @@ public final class ErfcWeight implements WeightFunction { double relativedistance = distance / max; // the scaling was picked such that getWeight(a,a,0) is 0.1 // since erfc(1.1630871536766736) == 1.0 - return MathUtil.erfc(1.1630871536766736 * relativedistance); + return NormalDistribution.erfc(1.1630871536766736 * relativedistance); } } diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ExponentialStddevWeight.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ExponentialStddevWeight.java index 979ccaf5..e6cb9951 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ExponentialStddevWeight.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ExponentialStddevWeight.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ExponentialWeight.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ExponentialWeight.java index 96077581..0792e764 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ExponentialWeight.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ExponentialWeight.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/GaussStddevWeight.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/GaussStddevWeight.java index 33ff90c5..f73d4a7d 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/GaussStddevWeight.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/GaussStddevWeight.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/GaussWeight.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/GaussWeight.java index 00eeb8b1..05c3389f 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/GaussWeight.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/GaussWeight.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/InverseLinearWeight.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/InverseLinearWeight.java index f618c08b..d5843cf1 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/InverseLinearWeight.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/InverseLinearWeight.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/InverseProportionalStddevWeight.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/InverseProportionalStddevWeight.java index da1c0a61..740ec8ca 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/InverseProportionalStddevWeight.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/InverseProportionalStddevWeight.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/InverseProportionalWeight.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/InverseProportionalWeight.java index ee17b25b..c4ae9a97 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/InverseProportionalWeight.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/InverseProportionalWeight.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/LinearWeight.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/LinearWeight.java index 241c2673..38318133 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/LinearWeight.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/LinearWeight.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/QuadraticStddevWeight.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/QuadraticStddevWeight.java index 7d9b41ad..372dc016 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/QuadraticStddevWeight.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/QuadraticStddevWeight.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/QuadraticWeight.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/QuadraticWeight.java index 9755c963..49cb1aca 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/QuadraticWeight.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/QuadraticWeight.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/WeightFunction.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/WeightFunction.java index c3102350..79c9f8a5 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/WeightFunction.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/WeightFunction.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/package-info.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/package-info.java index 268a3e89..649a8fb7 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/package-info.java @@ -5,7 +5,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2011 +Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/package-info.java b/src/de/lmu/ifi/dbs/elki/math/package-info.java index dabeba0e..e01c3e50 100644 --- a/src/de/lmu/ifi/dbs/elki/math/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/math/package-info.java @@ -5,7 +5,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2011 +Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/scales/LinearScale.java b/src/de/lmu/ifi/dbs/elki/math/scales/LinearScale.java new file mode 100644 index 00000000..32015853 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/scales/LinearScale.java @@ -0,0 +1,245 @@ +package de.lmu.ifi.dbs.elki.math.scales; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.text.NumberFormat; +import java.util.Locale; + +/** + * Class to handle a linear scale for an axis. + * + * The computed scales are rounded to be on decimal borders, choosing an + * appropriate resolution to have between 4 and 31 major tics (3 to 30 + * intervals). Future versions might use major/minor tics to get even nicer + * values. + * + * @author Erich Schubert + * + */ + +// TODO: iterator over sensible tics (major/minor) +// TODO: interface for logarithmic scales +// TODO: magic to choose appropriate linear/log scales based on data +// distribution. +public class LinearScale { + // at 31 scale steps, decrease resolution. + private final double ZOOMFACTOR = Math.log10(31); + + /** + * min value of the scale + */ + private double min; + + /** + * max value of the scale + */ + private double max; + + /** + * Scale resolution + */ + private double res; + + /** + * Scale resolution in log10. + */ + private int log10res; + + /** + * Scale delta := max - min + */ + private double delta; + + /** + * Constructor. Computes a scale covering the range of min-max with between 3 + * and 30 intervals, rounded to the appropriate number of digits. + * + * @param min actual minimum in the data + * @param max actual maximum in the data + */ + public LinearScale(double min, double max) { + if(max < min) { + double tmp = max; + max = min; + min = tmp; + } + this.delta = max - min; + if(this.delta <= Double.MIN_NORMAL) { + this.delta = 1.0; + } + log10res = (int) Math.ceil(Math.log10(this.delta) - ZOOMFACTOR); + res = Math.pow(10, log10res); + + // round min and max according to the resolution counters + this.min = Math.floor(min / res) * res; + this.max = Math.ceil(max / res) * res; + if(this.min == this.max) { + this.max = this.min + res; + } + // Update delta (note: updated min, max!) + this.delta = this.max - this.min; + if(this.delta <= Double.MIN_NORMAL) { + this.delta = 1.0; + } + + // LoggingUtil.warning(min+"~"+this.min+" "+max+"~"+this.max+" % "+this.res+" "+this.delta); + } + + /** + * Get minimum value (scale, not data). + * + * @return min + */ + public double getMin() { + return min; + } + + /** + * Get maximum value (scale, not data). + * + * @return max + */ + public double getMax() { + return max; + } + + /** + * Get resolution (scale interval size) + * + * @return scale interval size + */ + public double getRes() { + return res; + } + + /** + * Get resolution (scale interval size) + * + * @return scale interval size in logarithmic form + */ + public double getLog10Res() { + return log10res; + } + + /** + * Covert a value to it's scale position + * + * @param val data value + * @return scale position in the interval [0:1] + */ + public double getScaled(double val) { + return (val - min) / delta; + } + + /** + * Covert a scale position to the actual value + * + * @param val scale position in the interval [0:1] + * @return value on the original scale + */ + public double getUnscaled(double val) { + return val * delta + min; + } + + /** + * Covert a relative value to it's scale position + * + * @param val relative data value + * @return relative scale position in the interval [0:1] + */ + public double getRelativeScaled(double val) { + return val / delta; + } + + /** + * Covert a relative scale position to the actual value + * + * @param val relative scale position in the interval [0:1] + * @return relative value on the original scale + */ + public double getRelativeUnscaled(double val) { + return val * delta; + } + + /** + * Covert a value to it's scale position + * + * @param val data value + * @param smin target scale minimum + * @param smax target scale maximum + * @return scale position in the interval [smin:smax] + */ + public double getScaled(double val, double smin, double smax) { + return getScaled(val) * (smax - smin) + smin; + } + + /** + * Covert a scale position to the actual value + * + * @param val scale position in the interval [smin:smax] + * @param smin target scale minimum + * @param smax target scale maximum + * @return value on the original scale + */ + public double getUnscaled(double val, double smin, double smax) { + return getUnscaled(val) * (smax - smin) + smin; + } + + /** + * Covert a relative value to it's scale position + * + * @param val relative data value + * @param smin target scale minimum + * @param smax target scale maximum + * @return relative scale position in the interval [smin:smax] + */ + public double getRelativeScaled(double val, double smax, double smin) { + return getRelativeScaled(val) * (smax - smin); + } + + /** + * Covert a relative scale position to the actual value + * + * @param val relative scale position in the interval [smin:smax] + * @param smin target scale minimum + * @param smax target scale maximum + * @return relative value on the original scale + */ + public double getRelativeUnscaled(double val, double smin, double smax) { + return getRelativeUnscaled(val) * (smax-smin); + } + + /** + * Format value according to the scales resolution (i.e. appropriate number of + * digits) + * + * @param val Value + * @return formatted number + */ + public String formatValue(double val) { + NumberFormat fmt = NumberFormat.getInstance(Locale.US); + fmt.setMaximumFractionDigits(-log10res); + return fmt.format(val); + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/scales/Scales.java b/src/de/lmu/ifi/dbs/elki/math/scales/Scales.java new file mode 100644 index 00000000..ffaaffc7 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/scales/Scales.java @@ -0,0 +1,73 @@ +package de.lmu.ifi.dbs.elki.math.scales; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.data.NumberVector; +import de.lmu.ifi.dbs.elki.database.ids.DBID; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.math.DoubleMinMax; +import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil; +import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; + +/** + * Scales helper class. + * Currently, this will just compute a linear scale for each axis. + * It is planned to add functionality to include some analysis to + * be able to automatically choose log scales when appropriate. + * + * @author Erich Schubert + * + * @apiviz.has LinearScale oneway - - computes + */ +public class Scales { + /** + * Compute a linear scale for each dimension. + * + * @param <O> vector type + * @param db Database + * @return Scales, indexed starting with 0 (like Vector, not database objects!) + */ + public static <O extends NumberVector<?,? extends Number>> LinearScale[] calcScales(Relation<O> db) { + if (db == null) { + throw new AbortException("No database was given to Scales.calcScales."); + } + int dim = DatabaseUtil.dimensionality(db); + DoubleMinMax minmax[] = DoubleMinMax.newArray(dim); + LinearScale scales[] = new LinearScale[dim]; + + // analyze data + for(DBID objId : db.iterDBIDs()) { + O v = db.get(objId); + for(int d = 0; d < dim; d++) { + minmax[d].put(v.doubleValue(d+1)); + } + } + + // generate scales + for(int d = 0; d < dim; d++) { + scales[d] = new LinearScale(minmax[d].getMin(), minmax[d].getMax()); + } + return scales; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/scales/package-info.java b/src/de/lmu/ifi/dbs/elki/math/scales/package-info.java new file mode 100644 index 00000000..1047bb0a --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/scales/package-info.java @@ -0,0 +1,27 @@ +/** + * <p>Scales handling for plotting.</p> + * + */ +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2012 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ +package de.lmu.ifi.dbs.elki.math.scales;
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/AbstractSpatialSorter.java b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/AbstractSpatialSorter.java new file mode 100644 index 00000000..942fe64b --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/AbstractSpatialSorter.java @@ -0,0 +1,144 @@ +package de.lmu.ifi.dbs.elki.math.spacefillingcurves; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.List; + +import de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable; + +/** + * Abstract base class for spatial sorters, offering shared functionality. + * + * @author Erich Schubert + */ +public abstract class AbstractSpatialSorter implements SpatialSorter { + /** + * Constructor. + */ + public AbstractSpatialSorter() { + super(); + } + + @Override + public <T extends SpatialComparable> void sort(List<T> objs) { + double[] mms = computeMinMax(objs); + sort(objs, 0, objs.size(), mms); + } + + /** + * "Pivotize" the list, such that all elements before the given position are + * less than, all elements after the position are larger than the threshold + * value in the given dimension. (desc inverts the sorting!) + * + * Only the elments in the interval <tt>[start: end[</tt> are sorted! + * + * @param objs List of objects + * @param start Start of sorting range + * @param end End of sorting range + * @param dim Dimension to sort by + * @param threshold Threshold value + * @param desc Inversion flag + * @return Pivot position + */ + protected <T extends SpatialComparable> int pivotizeList1D(List<T> objs, int start, int end, int dim, double threshold, boolean desc) { + threshold = 2 * threshold; // faster + int s = start, e = end; + while(s < e) { + if(!desc) { + double sminmax = getMinPlusMaxObject(objs, s, dim); + while((sminmax < threshold) && s + 1 <= e && s + 1 < end) { + s++; + sminmax = getMinPlusMaxObject(objs, s, dim); + } + double eminmax = getMinPlusMaxObject(objs, e - 1, dim); + while((eminmax >= threshold) && s < e - 1 && start < e - 1) { + e--; + eminmax = getMinPlusMaxObject(objs, e - 1, dim); + } + } + else { + double sminmax = getMinPlusMaxObject(objs, s, dim); + while((sminmax > threshold) && s + 1 <= e && s + 1 < end) { + s++; + sminmax = getMinPlusMaxObject(objs, s, dim); + } + double eminmax = getMinPlusMaxObject(objs, e - 1, dim); + while((eminmax <= threshold) && s < e - 1 && start < e - 1) { + e--; + eminmax = getMinPlusMaxObject(objs, e - 1, dim); + } + } + if(s >= e) { + assert (s == e); + break; + } + // Swap + objs.set(s, objs.set(e - 1, objs.get(s))); + s++; + e--; + } + return e; + } + + /** + * Compute getMin(dim) + getMax(dim) for the spatial object + * + * @param objs Objects + * @param s index + * @param dim Dimensionality + * @return Min+Max + */ + private double getMinPlusMaxObject(List<? extends SpatialComparable> objs, int s, int dim) { + SpatialComparable sobj = objs.get(s); + return sobj.getMin(dim) + sobj.getMax(dim); + } + + /** + * Compute the minimum and maximum for each dimension. + * + * @param objs Objects + * @return Array of min, max pairs (length = 2 * dim) + */ + public static <T extends SpatialComparable> double[] computeMinMax(List<T> objs) { + final int dim = objs.get(0).getDimensionality(); + // Compute min and max for each dimension: + double[] mm = new double[dim * 2]; + { + for(int d = 0; d < dim; d++) { + mm[d * 2] = Double.POSITIVE_INFINITY; + mm[d * 2 + 1] = Double.NEGATIVE_INFINITY; + } + for(SpatialComparable obj : objs) { + for(int d = 0; d < dim; d++) { + mm[2 * d] = Math.min(mm[2 * d], obj.getMin(d + 1)); + mm[2 * d + 1] = Math.max(mm[2 * d + 1], obj.getMax(d + 1)); + } + } + for(int d = 0; d < dim; d++) { + assert (mm[2 * d] <= mm[2 * d + 1]); + } + } + return mm; + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/BinarySplitSpatialSorter.java b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/BinarySplitSpatialSorter.java new file mode 100644 index 00000000..0b45022c --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/BinarySplitSpatialSorter.java @@ -0,0 +1,112 @@ +package de.lmu.ifi.dbs.elki.math.spacefillingcurves; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import java.util.Comparator; +import java.util.List; + +import de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable; +import de.lmu.ifi.dbs.elki.utilities.datastructures.QuickSelect; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; + +/** + * Spatially sort the data set by repetitive binary splitting, circulating + * through the dimensions. This is essentially the bulk-loading proposed for the + * k-d-tree, as it will produce a perfectly balanced k-d-tree. The resulting + * order is the sequence in which objects would then be stored in the k-d-tree. + * + * Note that when using this for bulk-loading an R-tree, the result will + * <em>not</em> be a k-d-tree, not even remotely similar, as the splits are not + * preserved. + * + * Reference (for the bulk-loading): + * <p> + * J. L. Bentley<br/> + * Multidimensional binary search trees used for associative searching<br/> + * Communications of the ACM, Vol. 18 Issue 9, Sept. 1975 + * </p> + * + * @author Erich Schubert + */ +@Reference(authors = "J. L. Bentley", title = "Multidimensional binary search trees used for associative searching", booktitle = "Communications of the ACM, Vol. 18 Issue 9, Sept. 1975", url = "http://dx.doi.org/10.1145/361002.361007") +public class BinarySplitSpatialSorter extends AbstractSpatialSorter { + /** + * Constructor. + */ + public BinarySplitSpatialSorter() { + super(); + } + + @Override + public <T extends SpatialComparable> void sort(List<T> objs, int start, int end, double[] minmax) { + final int dims = objs.get(0).getDimensionality(); + binarySplitSort(objs, start, end, 1, dims, new DimC()); + } + + /** + * Sort the array using a binary split in dimension curdim, then recurse with + * the next dimension. + * + * @param objs List of objects + * @param start Interval start + * @param end Interval end (exclusive) + * @param curdim Current dimension + * @param dims Number of dimensions + * @param comp Comparator to use + */ + private <T extends SpatialComparable> void binarySplitSort(List<T> objs, final int start, final int end, int curdim, final int dims, DimC comp) { + final int mid = start + ((end - start) >>> 1); + // Make invariant + comp.dim = curdim; + QuickSelect.quickSelect(objs, comp, start, end, mid); + // Recurse + final int nextdim = (curdim % dims) + 1; + if(start < mid - 1) { + binarySplitSort(objs, start, mid, nextdim, dims, comp); + } + if(mid + 2 < end) { + binarySplitSort(objs, mid + 1, end, nextdim, dims, comp); + } + } + + /** + * Comparator that uses only a particular dimension. + * + * This comparator is meant to be reused, and the dimension to be changed, to + * reduce the number of objects allocated. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + private static class DimC implements Comparator<SpatialComparable> { + public int dim = -1; + + @Override + public int compare(SpatialComparable o1, SpatialComparable o2) { + double m1 = o1.getMax(dim) + o1.getMin(dim); + double m2 = o2.getMax(dim) + o2.getMin(dim); + return Double.compare(m1, m2); + } + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/HilbertSpatialSorter.java b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/HilbertSpatialSorter.java new file mode 100644 index 00000000..82e41337 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/HilbertSpatialSorter.java @@ -0,0 +1,346 @@ +package de.lmu.ifi.dbs.elki.math.spacefillingcurves; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable; +import de.lmu.ifi.dbs.elki.utilities.BitsUtil; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; + +/** + * Sort object along the Hilbert Space Filling curve by mapping them to their + * Hilbert numbers and sorting them. + * + * Objects are mapped using 31 bits per dimension. + * + * Reference: + * <p> + * D. Hilbert<br /> + * Über die stetige Abbildung einer Linie auf ein Flächenstück<br /> + * In: Mathematische Annalen, 38(3) + * </p> + * + * @author Erich Schubert + * + * @apiviz.uses HilbertRef + */ +@Reference(authors = "D. Hilbert", title = "Über die stetige Abbildung einer Linie auf ein Flächenstück", booktitle = "Mathematische Annalen, 38(3)") +public class HilbertSpatialSorter extends AbstractSpatialSorter { + /** + * Constructor. + */ + public HilbertSpatialSorter() { + super(); + } + + @Override + public <T extends SpatialComparable> void sort(List<T> objs, int start, int end, double[] minmax) { + final int dim = minmax.length >> 1; + List<HilbertRef<T>> tmp = new ArrayList<HilbertRef<T>>(end - start); + int[] buf = new int[dim]; + for(int i = start; i < end; i++) { + T v = objs.get(i); + // Convert into integers + for(int d = 0; d < dim; d++) { + double val = (v.getMin(d + 1) + v.getMax(d + 1)) / 2; + val = Integer.MAX_VALUE * ((val - minmax[2 * d]) / (minmax[2 * d + 1] - minmax[2 * d])); + buf[d] = (int) val; + } + tmp.add(new HilbertRef<T>(v, coordinatesToHilbert(buf, Integer.SIZE - 1, 1))); + } + // Sort and copy back + Collections.sort(tmp); + for(int i = start; i < end; i++) { + objs.set(i, tmp.get(i - start).vec); + } + } + + /** + * Object used in spatial sorting, combining the spatial object and the object + * ID. + * + * @author Erich Schubert + */ + private static class HilbertRef<T extends SpatialComparable> implements Comparable<HilbertRef<T>> { + /** + * The referenced object + */ + protected T vec; + + /** + * Hilbert representation + */ + protected long[] bits; + + /** + * Constructor. + * + * @param vec + */ + protected HilbertRef(T vec, long[] bits) { + super(); + this.vec = vec; + this.bits = bits; + } + + @Override + public int compareTo(HilbertRef<T> o) { + return BitsUtil.compare(this.bits, o.bits); + } + } + + /** + * Interleave one long per dimension (using the "bitsperdim" highest bits) to + * a hilbert address. + * + * @param coords Original coordinates + * @param bitsperdim Number of bits to use. + * @return Hilbert address + */ + public static long[] coordinatesToHilbert(long[] coords, int bitsperdim, int offset) { + final int numdim = coords.length; + final int numbits = numdim * bitsperdim; + final long[] output = BitsUtil.zero(numbits); + + int rotation = 0; + long[] refl = BitsUtil.zero(numdim); + for(int i = 0; i < bitsperdim; i++) { + final long[] hist = interleaveBits(coords, i + offset); + // System.err.println(BitsUtil.toString(hist, + // numdim)+" rot:"+rotation+" refl: "+BitsUtil.toString(refl, numdim)); + final long[] bits = BitsUtil.copy(hist); + BitsUtil.xorI(bits, refl); + BitsUtil.cycleRightI(bits, rotation, numdim); + final int nextrot = (rotation + BitsUtil.numberOfTrailingZerosSigned(bits) + 2) % numdim; + BitsUtil.invgrayI(bits); + BitsUtil.orI(output, bits, numbits - (i + 1) * numdim); + // System.err.println(BitsUtil.toString(output, + // numbits)+" bits: "+BitsUtil.toString(bits, numdim)); + refl = hist; + BitsUtil.flipI(refl, rotation); + if(!BitsUtil.get(bits, 0)) { + BitsUtil.flipI(refl, (nextrot - 1 + numdim) % numdim); + } + rotation = nextrot; + } + + return output; + } + + /** + * Interleave one int per dimension (using the "bitsperdim" highest bits) to + * a hilbert address. + * + * @param coords Original coordinates + * @param bitsperdim Number of bits to use. + * @return Hilbert address + */ + public static long[] coordinatesToHilbert(int[] coords, int bitsperdim, int offset) { + final int numdim = coords.length; + final int numbits = numdim * bitsperdim; + final long[] output = BitsUtil.zero(numbits); + + int rotation = 0; + long[] refl = BitsUtil.zero(numdim); + for(int i = 0; i < bitsperdim; i++) { + final long[] hist = interleaveBits(coords, i + offset); + // System.err.println(BitsUtil.toString(hist, + // numdim)+" rot:"+rotation+" refl: "+BitsUtil.toString(refl, numdim)); + final long[] bits = BitsUtil.copy(hist); + BitsUtil.xorI(bits, refl); + BitsUtil.cycleRightI(bits, rotation, numdim); + final int nextrot = (rotation + BitsUtil.numberOfTrailingZerosSigned(bits) + 2) % numdim; + BitsUtil.invgrayI(bits); + BitsUtil.orI(output, bits, numbits - (i + 1) * numdim); + // System.err.println(BitsUtil.toString(output, + // numbits)+" bits: "+BitsUtil.toString(bits, numdim)); + refl = hist; + BitsUtil.flipI(refl, rotation); + if(!BitsUtil.get(bits, 0)) { + BitsUtil.flipI(refl, (nextrot - 1 + numdim) % numdim); + } + rotation = nextrot; + } + + return output; + } + + /** + * Interleave one short per dimension (using the "bitsperdim" highest bits) to + * a hilbert address. + * + * @param coords Original coordinates + * @param bitsperdim Number of bits to use. + * @return Hilbert address + */ + public static long[] coordinatesToHilbert(short[] coords, int bitsperdim, int offset) { + final int numdim = coords.length; + final int numbits = numdim * bitsperdim; + final long[] output = BitsUtil.zero(numbits); + + int rotation = 0; + long[] refl = BitsUtil.zero(numdim); + for(int i = 0; i < bitsperdim; i++) { + final long[] hist = interleaveBits(coords, i + offset); + // System.err.println(BitsUtil.toString(hist, + // numdim)+" rot:"+rotation+" refl: "+BitsUtil.toString(refl, numdim)); + final long[] bits = BitsUtil.copy(hist); + BitsUtil.xorI(bits, refl); + BitsUtil.cycleRightI(bits, rotation, numdim); + final int nextrot = (rotation + BitsUtil.numberOfTrailingZerosSigned(bits) + 2) % numdim; + BitsUtil.invgrayI(bits); + BitsUtil.orI(output, bits, numbits - (i + 1) * numdim); + // System.err.println(BitsUtil.toString(output, + // numbits)+" bits: "+BitsUtil.toString(bits, numdim)); + refl = hist; + BitsUtil.flipI(refl, rotation); + if(!BitsUtil.get(bits, 0)) { + BitsUtil.flipI(refl, (nextrot - 1 + numdim) % numdim); + } + rotation = nextrot; + } + + return output; + } + + /** + * Interleave one byte per dimension (using the "bitsperdim" highest bits) to + * a hilbert address. + * + * @param coords Original coordinates + * @param bitsperdim Number of bits to use. + * @return Hilbert address + */ + public static long[] coordinatesToHilbert(byte[] coords, int bitsperdim, int offset) { + final int numdim = coords.length; + final int numbits = numdim * bitsperdim; + final long[] output = BitsUtil.zero(numbits); + + int rotation = 0; + long[] refl = BitsUtil.zero(numdim); + for(int i = 0; i < bitsperdim; i++) { + final long[] hist = interleaveBits(coords, i + offset); + // System.err.println(BitsUtil.toString(hist, + // numdim)+" rot:"+rotation+" refl: "+BitsUtil.toString(refl, numdim)); + final long[] bits = BitsUtil.copy(hist); + BitsUtil.xorI(bits, refl); + BitsUtil.cycleRightI(bits, rotation, numdim); + final int nextrot = (rotation + BitsUtil.numberOfTrailingZerosSigned(bits) + 2) % numdim; + BitsUtil.invgrayI(bits); + BitsUtil.orI(output, bits, numbits - (i + 1) * numdim); + // System.err.println(BitsUtil.toString(output, + // numbits)+" bits: "+BitsUtil.toString(bits, numdim)); + refl = hist; + BitsUtil.flipI(refl, rotation); + if(!BitsUtil.get(bits, 0)) { + BitsUtil.flipI(refl, (nextrot - 1 + numdim) % numdim); + } + rotation = nextrot; + } + + return output; + } + + /** + * Select the "iter" highest bit from each dimension. + * + * @param coords Input coordinates + * @param iter Bit position (from highest position) + * @return One bit per dimension + */ + public static long[] interleaveBits(long[] coords, int iter) { + final int numdim = coords.length; + final long[] bitset = BitsUtil.zero(numdim); + // convert longValues into zValues + final long mask = 1L << 63 - iter; + for(int dim = 0; dim < numdim; dim++) { + if((coords[dim] & mask) != 0) { + BitsUtil.setI(bitset, dim); + } + } + return bitset; + } + + /** + * Select the "iter" highest bit from each dimension. + * + * @param coords Input coordinates + * @param iter Bit position (from highest position) + * @return One bit per dimension + */ + public static long[] interleaveBits(int[] coords, int iter) { + final int numdim = coords.length; + final long[] bitset = BitsUtil.zero(numdim); + // convert longValues into zValues + final long mask = 1L << 31 - iter; + for(int dim = 0; dim < numdim; dim++) { + if((coords[dim] & mask) != 0) { + BitsUtil.setI(bitset, dim); + } + } + return bitset; + } + + /** + * Select the "iter" highest bit from each dimension. + * + * @param coords Input coordinates + * @param iter Bit position (from highest position) + * @return One bit per dimension + */ + public static long[] interleaveBits(short[] coords, int iter) { + final int numdim = coords.length; + final long[] bitset = BitsUtil.zero(numdim); + // convert longValues into zValues + final long mask = 1L << 15 - iter; + for(int dim = 0; dim < numdim; dim++) { + if((coords[dim] & mask) != 0) { + BitsUtil.setI(bitset, dim); + } + } + return bitset; + } + + /** + * Select the "iter" highest bit from each dimension. + * + * @param coords Input coordinates + * @param iter Bit position (from highest position) + * @return One bit per dimension + */ + public static long[] interleaveBits(byte[] coords, int iter) { + final int numdim = coords.length; + final long[] bitset = BitsUtil.zero(numdim); + // convert longValues into zValues + final long mask = 1L << 7 - iter; + for(int dim = 0; dim < numdim; dim++) { + if((coords[dim] & mask) != 0) { + BitsUtil.setI(bitset, dim); + } + } + return bitset; + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/PeanoSpatialSorter.java b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/PeanoSpatialSorter.java new file mode 100644 index 00000000..50cf1946 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/PeanoSpatialSorter.java @@ -0,0 +1,149 @@ +package de.lmu.ifi.dbs.elki.math.spacefillingcurves; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import java.util.BitSet; +import java.util.List; + +import de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; + +/** + * Bulk-load an R-tree index by presorting the objects with their position on + * the Peano curve. + * + * The basic shape of this space-filling curve looks like this: + * + * <pre> + * 3---4 9 + * | | | + * 2 5 8 + * | | | + * 1 6---7 + * </pre> + * + * Which then expands to the next level as: + * + * <pre> + * +-+ +-+ +-+ +-+ E + * | | | | | | | | | + * | +-+ +-+ | | +-+ + * | | | + * | +-+ +-+ | | +-+ + * | | | | | | | | | + * +-+ | | +-+ +-+ | + * | | | + * +-+ | | +-+ +-+ | + * | | | | | | | | | + * S +-+ +-+ +-+ +-+ + * </pre> + * + * and so on. + * + * Reference: + * <p> + * G. Peano<br /> + * Sur une courbe, qui remplit toute une aire plane<br /> + * Mathematische Annalen, 36(1) + * </p> + * + * @author Erich Schubert + */ +@Reference(authors = "G. Peano", title = "Sur une courbe, qui remplit toute une aire plane", booktitle = "Mathematische Annalen, 36(1)") +public class PeanoSpatialSorter extends AbstractSpatialSorter { + /** + * Constructor. + */ + public PeanoSpatialSorter() { + super(); + } + + @Override + public <T extends SpatialComparable> void sort(List<T> objs, int start, int end, double[] minmax) { + peanoSort(objs, start, end, minmax, 0, new BitSet(), false); + } + + /** + * Sort by Peano curve. + * + * @param objs Objects + * @param start Start index + * @param end End + * @param mms Minmax values + * @param dim Dimension + * @param bits Bit set for inversions + * @param desc Current ordering + */ + protected <T extends SpatialComparable> void peanoSort(List<T> objs, int start, int end, double[] mms, int dim, BitSet bits, boolean desc) { + // Find the splitting points. + final double min = mms[2 * dim], max = mms[2 * dim + 1]; + final double tfirst = (min + min + max) / 3.; + final double tsecond = (min + max + max) / 3.; + // Safeguard against duplicate points: + if(max - tsecond < 1E-10 || tsecond - tfirst < 1E-10 || tfirst - min < 1E-10) { + boolean ok = false; + for(int d = 0; d < mms.length; d += 2) { + if(mms[d + 1] - mms[d] >= 1E-10) { + ok = true; + break; + } + } + if(!ok) { + return; + } + } + final boolean inv = bits.get(dim) ^ desc; + // Split the data set into three parts + int fsplit, ssplit; + if(!inv) { + fsplit = pivotizeList1D(objs, start, end, dim + 1, tfirst, false); + ssplit = (fsplit < end - 1) ? pivotizeList1D(objs, fsplit, end, dim + 1, tsecond, false) : fsplit; + } + else { + fsplit = pivotizeList1D(objs, start, end, dim + 1, tsecond, true); + ssplit = (fsplit < end - 1) ? pivotizeList1D(objs, fsplit, end, dim + 1, tfirst, true) : fsplit; + } + int nextdim = (dim + 1) % objs.get(0).getDimensionality(); + // Do we need to update the min/max values? + if(start < fsplit - 1) { + mms[2 * dim] = !inv ? min : tsecond; + mms[2 * dim + 1] = !inv ? tfirst : max; + peanoSort(objs, start, fsplit, mms, nextdim, bits, desc); + } + if(fsplit < ssplit - 1) { + bits.flip(dim); // set (all but dim: we also flip "desc") + mms[2 * dim] = tfirst; + mms[2 * dim + 1] = tsecond; + peanoSort(objs, fsplit, ssplit, mms, nextdim, bits, !desc); + bits.flip(dim); + } + if(ssplit < end - 1) { + mms[2 * dim] = !inv ? tsecond : min; + mms[2 * dim + 1] = !inv ? max : tfirst; + peanoSort(objs, ssplit, end, mms, nextdim, bits, desc); + } + // Restore ranges + mms[2 * dim] = min; + mms[2 * dim + 1] = max; + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/SpatialSorter.java b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/SpatialSorter.java new file mode 100644 index 00000000..2473dff5 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/SpatialSorter.java @@ -0,0 +1,54 @@ +package de.lmu.ifi.dbs.elki.math.spacefillingcurves; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.List; + +import de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable; + +/** + * Interface for spatial sorting - ZCurves, Peano curves, Hilbert curves, ... + * + * @author Erich Schubert + */ +public interface SpatialSorter { + /** + * Partitions the specified feature vectors + * + * @param <T> actual type we sort + * @param objs the spatial objects to be sorted + */ + public <T extends SpatialComparable> void sort(List<T> objs); + + /** + * Sort part of the list (start to end). + * + * @param <T> actual type we sort + * @param objs the spatial objects to be sorted + * @param start First index to sort (e.g. 0) + * @param end End of range (e.g. <code>site()</code>) + * @param minmax Array with dim pairs of (min, max) of value ranges + */ + public <T extends SpatialComparable> void sort(List<T> objs, int start, int end, double[] minmax); +} diff --git a/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/ZCurve.java b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/ZCurve.java index 69dd8ff6..38f45aef 100644 --- a/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/ZCurve.java +++ b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/ZCurve.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.spacefillingcurves; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/ZCurveSpatialSorter.java b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/ZCurveSpatialSorter.java new file mode 100644 index 00000000..b8fc63bd --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/ZCurveSpatialSorter.java @@ -0,0 +1,91 @@ +package de.lmu.ifi.dbs.elki.math.spacefillingcurves; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.List; + +import de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable; + +/** + * Class to sort the data set by their Z-index, without doing a full + * materialization of the Z indexes. + * + * @author Erich Schubert + */ +public class ZCurveSpatialSorter extends AbstractSpatialSorter { + private static final double STOPVAL = 1E-10; + + @Override + public <T extends SpatialComparable> void sort(List<T> objs, int start, int end, double[] minmax) { + zSort(objs, start, end, minmax, 0); + } + + /** + * The actual Z sorting function + * + * @param objs Objects to sort + * @param start Start + * @param end End + * @param mms Min-Max value ranges + * @param dim Current dimension + */ + protected <T extends SpatialComparable> void zSort(List<T> objs, int start, int end, double[] mms, int dim) { + // Find the splitting points. + final double min = mms[2 * dim], max = mms[2 * dim + 1]; + double spos = (min + max) / 2.; + // Safeguard against duplicate points: + if(max - spos < STOPVAL || spos - min < STOPVAL) { + boolean ok = false; + for(int d = 0; d < mms.length; d += 2) { + if(mms[d + 1] - mms[d] >= STOPVAL) { + // LoggingUtil.warning("No: " + (mms[d + 1] - mms[d])); + ok = true; + break; + } + } + if(!ok) { + return; + } + } + int split = pivotizeList1D(objs, start, end, dim + 1, spos, false); + assert (start <= split && split <= end); + int nextdim = (dim + 1) % objs.get(0).getDimensionality(); + // LoggingUtil.warning("dim: " + dim + " min: " + min + " split: " + spos + + // " max:" + max + " " + start + " < " + split + " < " + end); + if(start < split - 1) { + mms[2 * dim] = min; + mms[2 * dim + 1] = spos; + zSort(objs, start, split, mms, nextdim); + } + if(split < end - 1) { + mms[2 * dim] = spos; + mms[2 * dim + 1] = max; + zSort(objs, split, end, mms, nextdim); + } + // Restore ranges + mms[2 * dim] = min; + mms[2 * dim + 1] = max; + // FIXME: implement completely and test. + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/package-info.java b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/package-info.java index c3bf1766..9fb9494a 100644 --- a/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/package-info.java @@ -5,7 +5,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2011 +Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/EpanechnikovKernelDensityFunction.java b/src/de/lmu/ifi/dbs/elki/math/statistics/EpanechnikovKernelDensityFunction.java index 0b48a69e..ed9d8d58 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/EpanechnikovKernelDensityFunction.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/EpanechnikovKernelDensityFunction.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.statistics; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/GaussianKernelDensityFunction.java b/src/de/lmu/ifi/dbs/elki/math/statistics/GaussianKernelDensityFunction.java index 7adf8ee1..744a9108 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/GaussianKernelDensityFunction.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/GaussianKernelDensityFunction.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.statistics; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/KernelDensityEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/KernelDensityEstimator.java index 398e6927..3bb0e1f6 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/KernelDensityEstimator.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/KernelDensityEstimator.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.statistics; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/KernelDensityFunction.java b/src/de/lmu/ifi/dbs/elki/math/statistics/KernelDensityFunction.java index 4c9100fd..29718fcb 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/KernelDensityFunction.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/KernelDensityFunction.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.statistics; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/LinearRegression.java b/src/de/lmu/ifi/dbs/elki/math/statistics/LinearRegression.java index f28986f0..231b2071 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/LinearRegression.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/LinearRegression.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.statistics; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/MultipleLinearRegression.java b/src/de/lmu/ifi/dbs/elki/math/statistics/MultipleLinearRegression.java index 90f80135..0e674146 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/MultipleLinearRegression.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/MultipleLinearRegression.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.statistics; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -139,14 +139,10 @@ public class MultipleLinearRegression { @Override public String toString() { StringBuffer msg = new StringBuffer(); - msg.append("\nx = "); - msg.append(FormatUtil.format(x, 9, 4)); - msg.append("\ny = "); - msg.append(FormatUtil.format(y, 9, 4)); - msg.append("\nb = "); - msg.append(FormatUtil.format(b, 9, 4)); - msg.append("\ne = "); - msg.append(FormatUtil.format(e, 9, 4)); + msg.append("x = ").append(FormatUtil.format(x, 9, 4)); + msg.append("\ny = ").append(FormatUtil.format(y, 9, 4)); + msg.append("\nb = ").append(FormatUtil.format(b, 9, 4)); + msg.append("\ne = ").append(FormatUtil.format(e, 9, 4)); msg.append("error variance = ").append(FormatUtil.format(variance, 4)); return msg.toString(); } @@ -203,7 +199,7 @@ public class MultipleLinearRegression { * @return the estimation of y */ public double estimateY(Matrix x) { - return x.times(b).get(0, 0); + return x.times(b).get(0); } /** diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/PolynomialRegression.java b/src/de/lmu/ifi/dbs/elki/math/statistics/PolynomialRegression.java index 1b785f75..b1bfe31b 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/PolynomialRegression.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/PolynomialRegression.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.statistics; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -62,7 +62,7 @@ public class PolynomialRegression extends MultipleLinearRegression { } private static Matrix xMatrix(Vector x, int p) { - int n = x.getRowDimensionality(); + int n = x.getDimensionality(); Matrix result = new Matrix(n, p + 1); for(int i = 0; i < n; i++) { @@ -79,7 +79,7 @@ public class PolynomialRegression extends MultipleLinearRegression { * @return the adapted coefficient of determination */ public double adaptedCoefficientOfDetermination() { - int n = getEstimatedResiduals().getRowDimensionality(); + int n = getEstimatedResiduals().getDimensionality(); return 1.0 - ((n - 1.0) / (n * 1.0 - p)) * (1 - coefficientOfDetermination()); } diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/QuickSelect.java b/src/de/lmu/ifi/dbs/elki/math/statistics/QuickSelect.java deleted file mode 100644 index efacd395..00000000 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/QuickSelect.java +++ /dev/null @@ -1,231 +0,0 @@ -package de.lmu.ifi.dbs.elki.math.statistics; - -/* - This file is part of ELKI: - Environment for Developing KDD-Applications Supported by Index-Structures - - Copyright (C) 2011 - Ludwig-Maximilians-Universität München - Lehr- und Forschungseinheit für Datenbanksysteme - ELKI Development Team - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - - -/** - * QuickSelect computes ("selects") the element at a given rank and can be used - * to compute Medians and arbitrary quantiles by computing the appropriate rank. - * - * This algorithm is essentially an incomplete QuickSort that only descends into - * that part of the data that we are interested in, and also attributed to - * Charles Antony Richard Hoare - * - * @author Erich Schubert - */ -public class QuickSelect { - /** - * For small arrays, use a simpler method: - */ - private static final int SMALL = 10; - - /** - * QuickSelect is essentially quicksort, except that we only "sort" that half - * of the array that we are interested in. - * - * Note: the array is <b>modified</b> by this. - * - * @param data Data to process - * @param rank Rank position that we are interested in (integer!) - * @return Value at the given rank - */ - public static double quickSelect(double[] data, int rank) { - quickSelect(data, 0, data.length - 1, rank); - return data[rank]; - } - - /** - * Compute the median of an array efficiently using the QuickSelect method. - * - * Note: the array is <b>modified</b> by this. - * - * @param data Data to process - * @return Median value - */ - public static double median(double[] data) { - return median(data, 0, data.length - 1); - } - - /** - * Compute the median of an array efficiently using the QuickSelect method. - * - * Note: the array is <b>modified</b> by this. - * - * @param data Data to process - * @param begin Begin of valid values - * @param end End of valid values (inclusive!) - * @return Median value - */ - public static double median(double[] data, int begin, int end) { - final int length = (end + 1) - begin; - assert (length > 0); - // Integer division is "floor" since we are non-negative. - final int left = begin + (length - 1) / 2; - quickSelect(data, begin, end, left); - if(length % 2 == 1) { - return data[left]; - } - else { - quickSelect(data, begin, end, left + 1); - return data[left] + (data[left + 1] - data[left]) / 2; - } - } - - /** - * Compute the median of an array efficiently using the QuickSelect method. - * - * Note: the array is <b>modified</b> by this. - * - * @param data Data to process - * @param quant Quantile to compute - * @return Value at quantile - */ - public static double quantile(double[] data, double quant) { - return quantile(data, 0, data.length - 1, quant); - } - - /** - * Compute the median of an array efficiently using the QuickSelect method. - * - * Note: the array is <b>modified</b> by this. - * - * @param data Data to process - * @param begin Begin of valid values - * @param end End of valid values (inclusive!) - * @param quant Quantile to compute - * @return Value at quantile - */ - public static double quantile(double[] data, int begin, int end, double quant) { - final int length = (end + 1) - begin; - assert (length > 0) : "Quantile on empty set?"; - // Integer division is "floor" since we are non-negative. - final double dleft = begin + (length - 1) * quant; - final int ileft = (int) Math.floor(dleft); - final double err = dleft - ileft; - - quickSelect(data, begin, end, ileft); - if(err <= Double.MIN_NORMAL) { - return data[ileft]; - } - else { - quickSelect(data, begin, end, ileft + 1); - // Mix: - double mix = data[ileft] + (data[ileft + 1] - data[ileft]) * err; - return mix; - } - } - - /** - * QuickSelect is essentially quicksort, except that we only "sort" that half - * of the array that we are interested in. - * - * @param data Data to process - * @param start Interval start - * @param end Interval end (inclusive) - * @param rank rank position we are interested in (starting at 0) - */ - public static void quickSelect(double[] data, int start, int end, int rank) { - // Optimization for small arrays - // This also ensures a minimum size below - if(start + SMALL > end) { - insertionSort(data, start, end); - return; - } - - // Pick pivot from three candidates: start, middle, end - // Since we compare them, we can also just "bubble sort" them. - final int middle = (start + end) / 2; - if(data[start] > data[middle]) { - swap(data, start, middle); - } - if(data[start] > data[end]) { - swap(data, start, end); - } - if(data[middle] > data[end]) { - swap(data, middle, end); - } - // TODO: use more candidates for larger arrays? - - final double pivot = data[middle]; - // Move middle element out of the way, just before end - // (Since we already know that "end" is bigger) - swap(data, middle, end - 1); - - // Begin partitioning - int i = start + 1, j = end - 2; - // This is classic quicksort stuff - while(true) { - while(data[i] <= pivot && i <= j) { - i++; - } - while(data[j] >= pivot && j >= i) { - j--; - } - if(i >= j) { - break; - } - swap(data, i, j); - } - - // Move pivot (former middle element) back into the appropriate place - swap(data, i, end - 1); - - // In contrast to quicksort, we only need to recurse into the half we are - // interested in. - if(rank < i) { - quickSelect(data, start, i - 1, rank); - } - else if(rank > i) { - quickSelect(data, i + 1, end, rank); - } - } - - /** - * Sort a small array using repetitive insertion sort. - * - * @param data Data to sort - * @param start Interval start - * @param end Interval end - */ - private static void insertionSort(double[] data, int start, int end) { - for(int i = start + 1; i <= end; i++) { - for(int j = i; j > start && data[j - 1] > data[j]; j--) { - swap(data, j, j - 1); - } - } - } - - /** - * The usual swap method. - * - * @param data Array - * @param a First index - * @param b Second index - */ - private static final void swap(double[] data, int a, int b) { - double tmp = data[a]; - data[a] = data[b]; - data[b] = tmp; - } -}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/StudentDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/StudentDistribution.java index e9ad4a00..a8f45f9d 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/StudentDistribution.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/StudentDistribution.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.statistics; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,8 +23,10 @@ package de.lmu.ifi.dbs.elki.math.statistics; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import java.util.HashMap; -import java.util.Map; +import gnu.trove.map.TDoubleDoubleMap; +import gnu.trove.map.TIntObjectMap; +import gnu.trove.map.hash.TDoubleDoubleHashMap; +import gnu.trove.map.hash.TIntObjectHashMap; /** * Tabelarizes the values for student distribution. @@ -125,7 +127,7 @@ public class StudentDistribution { /** * Holds the t-values. */ - private static Map<Integer, Map<Double, Double>> tValues = new HashMap<Integer, Map<Double, Double>>(); + private static TIntObjectMap<TDoubleDoubleMap> tValues = new TIntObjectHashMap<TDoubleDoubleMap>(); static { put(31, new double[] { 0.2533, 0.8416, 1.2816, 1.6449, 1.96, 2.3263, 2.5758, 3.0903, 3.2906 }); @@ -142,7 +144,7 @@ public class StudentDistribution { if(n > 30) { n = 31; } - Map<Double, Double> map = tValues.get(n); + TDoubleDoubleMap map = tValues.get(n); if(map == null) { throw new IllegalArgumentException("t-values for n=" + n + " not yet tabularized!"); } @@ -162,7 +164,7 @@ public class StudentDistribution { * @param values the t-values */ private static void put(int n, double[] values) { - Map<Double, Double> map = new HashMap<Double, Double>(); + TDoubleDoubleMap map = new TDoubleDoubleHashMap(); map.put(_6000, values[0]); map.put(_8000, values[1]); map.put(_9000, values[2]); diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/TriangularKernelDensityFunction.java b/src/de/lmu/ifi/dbs/elki/math/statistics/TriangularKernelDensityFunction.java index 7beb17c3..e3c23b2a 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/TriangularKernelDensityFunction.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/TriangularKernelDensityFunction.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.statistics; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/UniformKernelDensityFunction.java b/src/de/lmu/ifi/dbs/elki/math/statistics/UniformKernelDensityFunction.java index 2f000736..8d85528f 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/UniformKernelDensityFunction.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/UniformKernelDensityFunction.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.statistics; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiDistribution.java new file mode 100644 index 00000000..c561c4cd --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiDistribution.java @@ -0,0 +1,92 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** + * Chi distribution. + * + * @author Erich Schubert + */ +public class ChiDistribution implements Distribution { + /** + * Degrees of freedom. Usually integer. + */ + private double dof; + + /** + * Chi squared distribution (for random generation) + */ + private ChiSquaredDistribution chisq; + + /** + * Constructor. + * + * @param dof Degrees of freedom. Usually integer. + */ + public ChiDistribution(double dof) { + super(); + this.dof = dof; + this.chisq = new ChiSquaredDistribution(dof); + } + + @Override + public double nextRandom() { + return Math.sqrt(chisq.nextRandom()); + } + + @Override + public double pdf(double val) { + return pdf(val, dof); + } + + /** + * PDF function + * + * @param val Value + * @param dof Degrees of freedom + * @return Pdf value + */ + public static double pdf(double val, double dof) { + if(val < 0) { + return 0.0; + } + return Math.sqrt(ChiSquaredDistribution.pdf(val, dof)); + } + + @Override + public double cdf(double val) { + return cdf(val, dof); + } + + /** + * Cumulative density function. + * + * @param val Value + * @param dof Degrees of freedom. + * @return CDF value + */ + public static double cdf(double val, double dof) { + return GammaDistribution.regularizedGammaP(dof / 2, val * val / 2); + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiSquaredDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiSquaredDistribution.java new file mode 100644 index 00000000..0ab39c78 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiSquaredDistribution.java @@ -0,0 +1,72 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** + * Chi-Squared distribution (a specialization of the Gamma distribution). + * + * @author Erich Schubert + */ +public class ChiSquaredDistribution extends GammaDistribution { + /** + * Constructor. + * + * @param dof Degrees of freedom. + */ + public ChiSquaredDistribution(double dof) { + super(.5 * dof, 2.0); + } + + /** + * The CDF, static version. + * + * @param val Value + * @param dof Degrees of freedom. + * @return cdf value + */ + public static double cdf(double val, double dof) { + return regularizedGammaP(.5 * dof, .5 * val); + } + + /** + * Chi-Squared distribution PDF (with 0.0 for x < 0) + * + * @param x query value + * @param dof Degrees of freedom. + * @return probability density + */ + public static double pdf(double x, double dof) { + if(x <= 0) { + return 0.0; + } + if(x == 0) { + return 0.0; + } + final double k = dof / 2; + if(k == 1.0) { + return Math.exp(-x * 2.0) * 2.0; + } + return Math.exp((k - 1.0) * Math.log(x * 2.0) - x * 2.0 - logGamma(k)) * 2.0; + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ConstantDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ConstantDistribution.java new file mode 100644 index 00000000..1f36dd4a --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ConstantDistribution.java @@ -0,0 +1,61 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** + * Pseudo distribution, that has a unique constant value. + * + * @author Erich Schubert + */ +public class ConstantDistribution implements Distribution { + /** + * The constant + */ + final double c; + + /** + * Constructor. + * + * @param c Constant + */ + public ConstantDistribution(double c) { + super(); + this.c = c; + } + + @Override + public double nextRandom() { + return c; + } + + @Override + public double pdf(double val) { + return (val == c) ? 1 : 0; + } + + @Override + public double cdf(double val) { + return (val >= c) ? 1.0 : 0.0; + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/Distribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/Distribution.java new file mode 100644 index 00000000..290e6434 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/Distribution.java @@ -0,0 +1,63 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** + * Interface for a simple distribution generator with a PDF, i.e. it can also + * compute a density + * + * @author Erich Schubert + */ +public interface Distribution { + /** + * Generate a new random value + * + * @return new random value + */ + public double nextRandom(); + + /** + * Return the density of an existing value + * + * @param val existing value + * @return distribution density + */ + public double pdf(double val); + + /** + * Return the cumulative density function at the given value. + * + * @param val existing value + * @return cumulative density + */ + public double cdf(double val); + + /** + * Describe the distribution + * + * @return description + */ + @Override + public String toString(); +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GammaDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GammaDistribution.java new file mode 100644 index 00000000..6830f25a --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GammaDistribution.java @@ -0,0 +1,470 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Random; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + +/** + * Gamma Distribution, with random generation and density functions. + * + * @author Erich Schubert + */ +public class GammaDistribution implements Distribution { + /** + * LANCZOS-Coefficients for Gamma approximation. + * + * These are said to have higher precision than those in "Numerical Recipes". + * They probably come from + * + * Paul Godfrey: http://my.fit.edu/~gabdo/gamma.txt + */ + static final double[] LANCZOS = { 0.99999999999999709182, 57.156235665862923517, -59.597960355475491248, 14.136097974741747174, -0.49191381609762019978, .33994649984811888699e-4, .46523628927048575665e-4, -.98374475304879564677e-4, .15808870322491248884e-3, -.21026444172410488319e-3, .21743961811521264320e-3, -.16431810653676389022e-3, .84418223983852743293e-4, -.26190838401581408670e-4, .36899182659531622704e-5, }; + + /** + * Numerical precision to use + */ + static final double NUM_PRECISION = 1E-15; + + /** + * Alpha == k + */ + private final double k; + + /** + * Theta == 1 / Beta + */ + private final double theta; + + /** + * The random generator. + */ + private Random random; + + /** + * Constructor for Gamma distribution. + * + * @param k k, alpha aka. "shape" parameter + * @param theta Theta = 1.0/Beta aka. "scaling" parameter + * @param random Random generator + */ + public GammaDistribution(double k, double theta, Random random) { + super(); + if(k <= 0.0 || theta <= 0.0) { + throw new IllegalArgumentException("Invalid parameters for Gamma distribution."); + } + + this.k = k; + this.theta = theta; + this.random = random; + } + + /** + * Constructor for Gamma distribution. + * + * @param k k, alpha aka. "shape" parameter + * @param theta Theta = 1.0/Beta aka. "scaling" parameter + */ + public GammaDistribution(double k, double theta) { + this(k, theta, new Random()); + } + + @Override + public double pdf(double val) { + return pdf(val, k, theta); + } + + @Override + public double cdf(double val) { + return cdf(val, k, theta); + } + + @Override + public double nextRandom() { + return nextRandom(k, theta, random); + } + + /** + * Simple toString explaining the distribution parameters. + * + * Used in producing a model description. + */ + @Override + public String toString() { + return "Gamma Distribution (k=" + k + ", theta=" + theta + ")"; + } + + /** + * @return the value of k + */ + public double getK() { + return k; + } + + /** + * @return the standard deviation + */ + public double getTheta() { + return theta; + } + + /** + * The CDF, static version. + * + * @param val Value + * @param k Shape k + * @param theta Theta = 1.0/Beta aka. "scaling" parameter + * @return cdf value + */ + public static double cdf(double val, double k, double theta) { + return regularizedGammaP(k, val / theta); + } + + /** + * Gamma distribution PDF (with 0.0 for x < 0) + * + * @param x query value + * @param k Alpha + * @param theta Theta = 1 / Beta + * @return probability density + */ + public static double pdf(double x, double k, double theta) { + if(x < 0) { + return 0.0; + } + if(x == 0) { + if(k == 1.0) { + return theta; + } + else { + return 0.0; + } + } + if(k == 1.0) { + return Math.exp(-x * theta) * theta; + } + + return Math.exp((k - 1.0) * Math.log(x * theta) - x * theta - logGamma(k)) * theta; + } + + /** + * Compute logGamma. + * + * Based loosely on "Numerical Recpies" and the work of Paul Godfrey at + * http://my.fit.edu/~gabdo/gamma.txt + * + * TODO: find out which approximation really is the best... + * + * @param x Parameter x + * @return log(Γ(x)) + */ + public static double logGamma(final double x) { + if(Double.isNaN(x) || (x <= 0.0)) { + return Double.NaN; + } + double g = 607.0 / 128.0; + double tmp = x + g + .5; + tmp = (x + 0.5) * Math.log(tmp) - tmp; + double ser = LANCZOS[0]; + for(int i = LANCZOS.length - 1; i > 0; --i) { + ser += LANCZOS[i] / (x + i); + } + return tmp + Math.log(MathUtil.SQRTTWOPI * ser / x); + } + + /** + * Returns the regularized gamma function P(a, x). + * + * Includes the quadrature way of computing. + * + * TODO: find "the" most accurate version of this. We seem to agree with + * others for the first 10+ digits, but diverge a bit later than that. + * + * @param a Parameter a + * @param x Parameter x + * @return Gamma value + */ + public static double regularizedGammaP(final double a, final double x) { + // Special cases + if(Double.isNaN(a) || Double.isNaN(x) || (a <= 0.0) || (x < 0.0)) { + return Double.NaN; + } + if(x == 0.0) { + return 0.0; + } + if(x >= a + 1) { + // Expected to converge faster + return 1.0 - regularizedGammaQ(a, x); + } + // Loosely following "Numerical Recipes" + double del = 1.0 / a; + double sum = del; + for(int n = 1; n < Integer.MAX_VALUE; n++) { + // compute next element in the series + del *= x / (a + n); + sum = sum + del; + if(Math.abs(del / sum) < NUM_PRECISION || sum >= Double.POSITIVE_INFINITY) { + break; + } + } + if(Double.isInfinite(sum)) { + return 1.0; + } + return Math.exp(-x + (a * Math.log(x)) - logGamma(a)) * sum; + } + + /** + * Returns the regularized gamma function Q(a, x) = 1 - P(a, x). + * + * Includes the continued fraction way of computing, based loosely on the book + * "Numerical Recipes"; but probably not with the exactly same precision, + * since we reimplemented this in our coding style, not literally. + * + * TODO: find "the" most accurate version of this. We seem to agree with + * others for the first 10+ digits, but diverge a bit later than that. + * + * @param a parameter a + * @param x parameter x + * @return Result + */ + public static double regularizedGammaQ(final double a, final double x) { + if(Double.isNaN(a) || Double.isNaN(x) || (a <= 0.0) || (x < 0.0)) { + return Double.NaN; + } + if(x == 0.0) { + return 1.0; + } + if(x < a + 1.0) { + // Expected to converge faster + return 1.0 - regularizedGammaP(a, x); + } + // Compute using continued fraction approach. + final double FPMIN = Double.MIN_VALUE / NUM_PRECISION; + double b = x + 1 - a; + double c = 1.0 / FPMIN; + double d = 1.0 / b; + double fac = d; + for(int i = 1; i < Integer.MAX_VALUE; i++) { + double an = i * (a - i); + b += 2; + d = an * d + b; + if(Math.abs(d) < FPMIN) { + d = FPMIN; + } + c = b + an / c; + if(Math.abs(c) < FPMIN) { + c = FPMIN; + } + d = 1 / d; + double del = d * c; + fac *= del; + if(Math.abs(del - 1.0) <= NUM_PRECISION) { + break; + } + } + return fac * Math.exp(-x + a * Math.log(x) - logGamma(a)); + } + + /** + * Generate a random value with the generators parameters. + * + * Along the lines of + * + * - J.H. Ahrens, U. Dieter (1974): Computer methods for sampling from gamma, + * beta, Poisson and binomial distributions, Computing 12, 223-246. + * + * - J.H. Ahrens, U. Dieter (1982): Generating gamma variates by a modified + * rejection technique, Communications of the ACM 25, 47-54. + * + * @param k K parameter + * @param theta Theta parameter + * @param random Random generator + */ + public static double nextRandom(double k, double theta, Random random) { + /* Constants */ + final double q1 = 0.0416666664, q2 = 0.0208333723, q3 = 0.0079849875; + final double q4 = 0.0015746717, q5 = -0.0003349403, q6 = 0.0003340332; + final double q7 = 0.0006053049, q8 = -0.0004701849, q9 = 0.0001710320; + final double a1 = 0.333333333, a2 = -0.249999949, a3 = 0.199999867; + final double a4 = -0.166677482, a5 = 0.142873973, a6 = -0.124385581; + final double a7 = 0.110368310, a8 = -0.112750886, a9 = 0.104089866; + final double e1 = 1.000000000, e2 = 0.499999994, e3 = 0.166666848; + final double e4 = 0.041664508, e5 = 0.008345522, e6 = 0.001353826; + final double e7 = 0.000247453; + + if(k < 1.0) { // Base case, for small k + final double b = 1.0 + 0.36788794412 * k; // Step 1 + while(true) { + final double p = b * random.nextDouble(); + if(p <= 1.0) { // when gds <= 1 + final double gds = Math.exp(Math.log(p) / k); + if(Math.log(random.nextDouble()) <= -gds) { + return (gds / theta); + } + } + else { // when gds > 1 + final double gds = -Math.log((b - p) / k); + if(Math.log(random.nextDouble()) <= ((k - 1.0) * Math.log(gds))) { + return (gds / theta); + } + } + } + } + else { + // Step 1. Preparations + final double ss, s, d; + if(k != -1.0) { + ss = k - 0.5; + s = Math.sqrt(ss); + d = 5.656854249 - 12.0 * s; + } + else { + // For k == -1.0: + ss = 0.0; + s = 0.0; + d = 0.0; + } + // Random vector of maximum length 1 + final double v1, /* v2, */v12; + { // Temporary values - candidate + double tv1, tv2, tv12; + do { + tv1 = 2.0 * random.nextDouble() - 1.0; + tv2 = 2.0 * random.nextDouble() - 1.0; + tv12 = tv1 * tv1 + tv2 * tv2; + } + while(tv12 > 1.0); + v1 = tv1; + /* v2 = tv2; */ + v12 = tv12; + } + + // double b = 0.0, c = 0.0; + // double si = 0.0, q0 = 0.0; + final double b, c, si, q0; + + // Simpler accept cases & parameter computation + { + final double t = v1 * Math.sqrt(-2.0 * Math.log(v12) / v12); + final double x = s + 0.5 * t; + final double gds = x * x; + if(t >= 0.0) { + return (gds / theta); // Immediate acceptance + } + + // Random uniform + final double un = random.nextDouble(); + // Squeeze acceptance + if(d * un <= t * t * t) { + return (gds / theta); + } + + if(k != -1.0) { // Step 4. Set-up for hat case + final double r = 1.0 / k; + q0 = ((((((((q9 * r + q8) * r + q7) * r + q6) * r + q5) * r + q4) * r + q3) * r + q2) * r + q1) * r; + if(k > 3.686) { + if(k > 13.022) { + b = 1.77; + si = 0.75; + c = 0.1515 / s; + } + else { + b = 1.654 + 0.0076 * ss; + si = 1.68 / s + 0.275; + c = 0.062 / s + 0.024; + } + } + else { + b = 0.463 + s - 0.178 * ss; + si = 1.235; + c = 0.195 / s - 0.079 + 0.016 * s; + } + } + else { + // For k == -1.0: + b = 0.0; + c = 0.0; + si = 0.0; + q0 = 0.0; + } + // Compute v and q + if(x > 0.0) { + final double v = t / (s + s); + final double q; + if(Math.abs(v) > 0.25) { + q = q0 - s * t + 0.25 * t * t + (ss + ss) * Math.log(1.0 + v); + } + else { + q = q0 + 0.5 * t * t * ((((((((a9 * v + a8) * v + a7) * v + a6) * v + a5) * v + a4) * v + a3) * v + a2) * v + a1) * v; + } + // Quotient acceptance: + if(Math.log(1.0 - un) <= q) { + return (gds / theta); + } + } + } + + // Double exponential deviate t + while(true) { + double e, u, sign_u, t; + // Retry until t is sufficiently large + do { + e = -Math.log(random.nextDouble()); + u = random.nextDouble(); + u = u + u - 1.0; + sign_u = (u > 0) ? 1.0 : -1.0; + t = b + (e * si) * sign_u; + } + while(t <= -0.71874483771719); + + // New v(t) and q(t) + final double v = t / (s + s); + final double q; + if(Math.abs(v) > 0.25) { + q = q0 - s * t + 0.25 * t * t + (ss + ss) * Math.log(1.0 + v); + } + else { + q = q0 + 0.5 * t * t * ((((((((a9 * v + a8) * v + a7) * v + a6) * v + a5) * v + a4) * v + a3) * v + a2) * v + a1) * v; + } + if(q <= 0.0) { + continue; // retry + } + // Compute w(t) + final double w; + if(q > 0.5) { + w = Math.exp(q) - 1.0; + } + else { + w = ((((((e7 * q + e6) * q + e5) * q + e4) * q + e3) * q + e2) * q + e1) * q; + } + // Hat acceptance + if(c * u * sign_u <= w * Math.exp(e - 0.5 * t * t)) { + final double x = s + 0.5 * t; + return (x * x / theta); + } + } + } + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/NormalDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/NormalDistribution.java new file mode 100644 index 00000000..919cc2e3 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/NormalDistribution.java @@ -0,0 +1,337 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Random; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + +/** + * Gaussian distribution aka normal distribution + * + * @author Erich Schubert + */ +public class NormalDistribution implements Distribution { + /** + * Coefficients for erf approximation. + * + * Loosely based on http://www.netlib.org/specfun/erf + */ + static final double ERFAPP_A[] = { 1.85777706184603153e-1, 3.16112374387056560e+0, 1.13864154151050156E+2, 3.77485237685302021e+2, 3.20937758913846947e+3 }; + + /** + * Coefficients for erf approximation. + * + * Loosely based on http://www.netlib.org/specfun/erf + */ + static final double ERFAPP_B[] = { 1.00000000000000000e00, 2.36012909523441209e01, 2.44024637934444173e02, 1.28261652607737228e03, 2.84423683343917062e03 }; + + /** + * Coefficients for erf approximation. + * + * Loosely based on http://www.netlib.org/specfun/erf + */ + static final double ERFAPP_C[] = { 2.15311535474403846e-8, 5.64188496988670089e-1, 8.88314979438837594e00, 6.61191906371416295e01, 2.98635138197400131e02, 8.81952221241769090e02, 1.71204761263407058e03, 2.05107837782607147e03, 1.23033935479799725E03 }; + + /** + * Coefficients for erf approximation. + * + * Loosely based on http://www.netlib.org/specfun/erf + */ + static final double ERFAPP_D[] = { 1.00000000000000000e00, 1.57449261107098347e01, 1.17693950891312499e02, 5.37181101862009858e02, 1.62138957456669019e03, 3.29079923573345963e03, 4.36261909014324716e03, 3.43936767414372164e03, 1.23033935480374942e03 }; + + /** + * Coefficients for erf approximation. + * + * Loosely based on http://www.netlib.org/specfun/erf + */ + static final double ERFAPP_P[] = { 1.63153871373020978e-2, 3.05326634961232344e-1, 3.60344899949804439e-1, 1.25781726111229246e-1, 1.60837851487422766e-2, 6.58749161529837803e-4 }; + + /** + * Coefficients for erf approximation. + * + * Loosely based on http://www.netlib.org/specfun/erf + */ + static final double ERFAPP_Q[] = { 1.00000000000000000e00, 2.56852019228982242e00, 1.87295284992346047e00, 5.27905102951428412e-1, 6.05183413124413191e-2, 2.33520497626869185e-3 }; + + /** + * Treshold for switching nethods for erfinv approximation + */ + static final double P_LOW = 0.02425D; + + /** + * Treshold for switching nethods for erfinv approximation + */ + static final double P_HIGH = 1.0D - P_LOW; + + /** + * Coefficients for erfinv approximation, rational version + */ + static final double ERFINV_A[] = { -3.969683028665376e+01, 2.209460984245205e+02, -2.759285104469687e+02, 1.383577518672690e+02, -3.066479806614716e+01, 2.506628277459239e+00 }; + + /** + * Coefficients for erfinv approximation, rational version + */ + static final double ERFINV_B[] = { -5.447609879822406e+01, 1.615858368580409e+02, -1.556989798598866e+02, 6.680131188771972e+01, -1.328068155288572e+01 }; + + /** + * Coefficients for erfinv approximation, rational version + */ + static final double ERFINV_C[] = { -7.784894002430293e-03, -3.223964580411365e-01, -2.400758277161838e+00, -2.549732539343734e+00, 4.374664141464968e+00, 2.938163982698783e+00 }; + + /** + * Coefficients for erfinv approximation, rational version + */ + static final double ERFINV_D[] = { 7.784695709041462e-03, 3.224671290700398e-01, 2.445134137142996e+00, 3.754408661907416e+00 }; + + /** + * Mean value for the generator + */ + private double mean; + + /** + * Standard deviation + */ + private double stddev; + + /** + * The random generator. + */ + private Random random; + + /** + * Constructor for Gaussian distribution + * + * @param mean Mean + * @param stddev Standard Deviation + * @param random Random generator + */ + public NormalDistribution(double mean, double stddev, Random random) { + super(); + this.mean = mean; + this.stddev = stddev; + this.random = random; + } + + /** + * Constructor for Gaussian distribution + * + * @param mean Mean + * @param stddev Standard Deviation + */ + public NormalDistribution(double mean, double stddev) { + this(mean, stddev, new Random()); + } + + @Override + public double pdf(double val) { + return pdf(val, mean, stddev); + } + + @Override + public double cdf(double val) { + return cdf(val, mean, stddev); + } + + @Override + public double nextRandom() { + return mean + random.nextGaussian() * stddev; + } + + @Override + public String toString() { + return "Normal Distribution (mean="+mean+", stddev="+stddev+")"; + } + + /** + * @return the mean + */ + public double getMean() { + return mean; + } + + /** + * @return the standard deviation + */ + public double getStddev() { + return stddev; + } + + /** + * Complementary error function for Gaussian distributions = Normal + * distributions. + * + * Numerical approximation using taylor series. Implementation loosely based + * on http://www.netlib.org/specfun/erf + * + * @param x parameter value + * @return erfc(x) + */ + public static double erfc(double x) { + if(Double.isNaN(x)) { + return Double.NaN; + } + if(Double.isInfinite(x)) { + return (x < 0.0) ? 2 : 0; + } + + double result = Double.NaN; + double absx = Math.abs(x); + // First approximation interval + if(absx < 0.46875) { + double z = x * x; + result = 1 - x * ((((ERFAPP_A[0] * z + ERFAPP_A[1]) * z + ERFAPP_A[2]) * z + ERFAPP_A[3]) * z + ERFAPP_A[4]) / ((((ERFAPP_B[0] * z + ERFAPP_B[1]) * z + ERFAPP_B[2]) * z + ERFAPP_B[3]) * z + ERFAPP_B[4]); + } + // Second approximation interval + else if(absx < 4.0) { + double z = absx; + result = ((((((((ERFAPP_C[0] * z + ERFAPP_C[1]) * z + ERFAPP_C[2]) * z + ERFAPP_C[3]) * z + ERFAPP_C[4]) * z + ERFAPP_C[5]) * z + ERFAPP_C[6]) * z + ERFAPP_C[7]) * z + ERFAPP_C[8]) / ((((((((ERFAPP_D[0] * z + ERFAPP_D[1]) * z + ERFAPP_D[2]) * z + ERFAPP_D[3]) * z + ERFAPP_D[4]) * z + ERFAPP_D[5]) * z + ERFAPP_D[6]) * z + ERFAPP_D[7]) * z + ERFAPP_D[8]); + double rounded = Math.round(result * 16.0) / 16.0; + double del = (absx - rounded) * (absx + rounded); + result = Math.exp(-rounded * rounded) * Math.exp(-del) * result; + if(x < 0.0) { + result = 2.0 - result; + } + } + // Third approximation interval + else { + double z = 1.0 / (absx * absx); + result = z * (((((ERFAPP_P[0] * z + ERFAPP_P[1]) * z + ERFAPP_P[2]) * z + ERFAPP_P[3]) * z + ERFAPP_P[4]) * z + ERFAPP_P[5]) / (((((ERFAPP_Q[0] * z + ERFAPP_Q[1]) * z + ERFAPP_Q[2]) * z + ERFAPP_Q[3]) * z + ERFAPP_Q[4]) * z + ERFAPP_Q[5]); + result = (MathUtil.ONE_BY_SQRTPI - result) / absx; + double rounded = Math.round(result * 16.0) / 16.0; + double del = (absx - rounded) * (absx + rounded); + result = Math.exp(-rounded * rounded) * Math.exp(-del) * result; + if(x < 0.0) { + result = 2.0 - result; + } + } + return result; + } + + /** + * Error function for Gaussian distributions = Normal distributions. + * + * Numerical approximation using taylor series. Implementation loosely based + * on http://www.netlib.org/specfun/erf + * + * @param x parameter value + * @return erf(x) + */ + public static double erf(double x) { + return 1 - erfc(x); + } + + /** + * Inverse error function. + * + * @param x parameter value + * @return erfinv(x) + */ + public static double erfinv(double x) { + return standardNormalProbit(0.5 * (x + 1)) / MathUtil.SQRT2; + } + + /** + * Approximate the inverse error function for normal distributions. + * + * Largely based on: + * <p> + * http://www.math.uio.no/~jacklam/notes/invnorm/index.html <br> + * by Peter John Acklam + * </p> + * + * @param d Quantile. Must be in [0:1], obviously. + * @return Inverse erf. + */ + public static double standardNormalProbit(double d) { + if(d == 0) { + return Double.NEGATIVE_INFINITY; + } + else if(d == 1) { + return Double.POSITIVE_INFINITY; + } + else if(Double.isNaN(d) || d < 0 || d > 1) { + return Double.NaN; + } + else if(d < P_LOW) { + // Rational approximation for lower region: + double q = Math.sqrt(-2 * Math.log(d)); + return (((((ERFINV_C[0] * q + ERFINV_C[1]) * q + ERFINV_C[2]) * q + ERFINV_C[3]) * q + ERFINV_C[4]) * q + ERFINV_C[5]) / ((((ERFINV_D[0] * q + ERFINV_D[1]) * q + ERFINV_D[2]) * q + ERFINV_D[3]) * q + 1); + } + else if(P_HIGH < d) { + // Rational approximation for upper region: + double q = Math.sqrt(-2 * Math.log(1 - d)); + return -(((((ERFINV_C[0] * q + ERFINV_C[1]) * q + ERFINV_C[2]) * q + ERFINV_C[3]) * q + ERFINV_C[4]) * q + ERFINV_C[5]) / ((((ERFINV_D[0] * q + ERFINV_D[1]) * q + ERFINV_D[2]) * q + ERFINV_D[3]) * q + 1); + } + else { + // Rational approximation for central region: + double q = d - 0.5D; + double r = q * q; + return (((((ERFINV_A[0] * r + ERFINV_A[1]) * r + ERFINV_A[2]) * r + ERFINV_A[3]) * r + ERFINV_A[4]) * r + ERFINV_A[5]) * q / (((((ERFINV_B[0] * r + ERFINV_B[1]) * r + ERFINV_B[2]) * r + ERFINV_B[3]) * r + ERFINV_B[4]) * r + 1); + } + } + + /** + * Probability density function of the normal distribution. + * + * <pre> + * 1/(SQRT(2*pi*sigma^2)) * e^(-(x-mu)^2/2sigma^2) + * </pre> + * + * @param x The value. + * @param mu The mean. + * @param sigma The standard deviation. + * @return PDF of the given normal distribution at x. + */ + public static double pdf(double x, double mu, double sigma) { + final double x_mu = x - mu; + final double sigmasq = sigma * sigma; + return 1 / (Math.sqrt(MathUtil.TWOPI * sigmasq)) * Math.exp(-1 * x_mu * x_mu / 2 / sigmasq); + } + + /** + * Cumulative probability density function (CDF) of a normal distribution. + * + * @param x value to evaluate CDF at + * @param mu Mean value + * @param sigma Standard deviation. + * @return The CDF of the normal given distribution at x. + */ + public static double cdf(double x, double mu, double sigma) { + return (1 + erf(x / Math.sqrt(2))) / 2; + } + + /** + * Inverse cumulative probability density function (probit) of a normal + * distribution. + * + * @param x value to evaluate probit function at + * @param mu Mean value + * @param sigma Standard deviation. + * @return The probit of the normal given distribution at x. + */ + public static double probit(double x, double mu, double sigma) { + return mu + sigma * standardNormalProbit(x); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/UniformDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/UniformDistribution.java new file mode 100644 index 00000000..9571cfd3 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/UniformDistribution.java @@ -0,0 +1,132 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Random; + +/** + * Uniform distribution. + * + * @author Erich Schubert + */ +public class UniformDistribution implements Distribution { + /** + * Minimum + */ + private double min; + + /** + * Maximum + */ + private double max; + + /** + * Len := max - min + */ + private double len; + + /** + * The random generator. + */ + private Random random; + + /** + * Constructor for a uniform distribution on the interval [min, max[ + * + * @param min Minimum value + * @param max Maximum value + * @param random Random generator + */ + public UniformDistribution(double min, double max, Random random) { + super(); + // Swap parameters if they were given incorrectly. + if(min > max) { + double tmp = min; + min = max; + max = tmp; + } + this.min = min; + this.max = max; + this.len = max - min; + this.random = random; + } + + /** + * Constructor for a uniform distribution on the interval [min, max[ + * + * @param min Minimum value + * @param max Maximum value + */ + public UniformDistribution(double min, double max) { + this(min, max, new Random()); + } + + @Override + public double pdf(double val) { + if(val < min || val >= max) { + return 0.0; + } + return 1.0 / len; + } + + @Override + public double cdf(double val) { + if(val < min) { + return 0.0; + } + if(val > max) { + return 1.0; + } + return (val - min) / len; + } + + @Override + public double nextRandom() { + return min + random.nextDouble() * len; + } + + /** + * Simple toString explaining the distribution parameters. + * + * Used in describing cluster models. + */ + @Override + public String toString() { + return "Uniform Distribution (min=" + min + ", max=" + max + ")"; + } + + /** + * @return the minimum value + */ + public double getMin() { + return min; + } + + /** + * @return the maximum value + */ + public double getMax() { + return max; + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/package-info.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/package-info.java new file mode 100644 index 00000000..ed9c0e88 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/package-info.java @@ -0,0 +1,26 @@ +/** + * <p>Standard distributions, with random generation functionalities.</p> + */ +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +package de.lmu.ifi.dbs.elki.math.statistics.distribution;
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/package-info.java b/src/de/lmu/ifi/dbs/elki/math/statistics/package-info.java index bd1195f3..142524e6 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/package-info.java @@ -5,7 +5,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2011 +Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team |