diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/math')
235 files changed, 14193 insertions, 1652 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/math/DoubleMinMax.java b/src/de/lmu/ifi/dbs/elki/math/DoubleMinMax.java index 56814649..1dfdc75d 100644 --- a/src/de/lmu/ifi/dbs/elki/math/DoubleMinMax.java +++ b/src/de/lmu/ifi/dbs/elki/math/DoubleMinMax.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -68,8 +68,12 @@ public class DoubleMinMax extends DoubleDoublePair { * @param data New value */ public void put(double data) { - this.first = Math.min(this.first, data); - this.second = Math.max(this.second, data); + if (data < first) { + first = data; + } + if (data > second) { + second = data; + } } /** diff --git a/src/de/lmu/ifi/dbs/elki/math/GeoUtil.java b/src/de/lmu/ifi/dbs/elki/math/GeoUtil.java deleted file mode 100644 index 226d8112..00000000 --- a/src/de/lmu/ifi/dbs/elki/math/GeoUtil.java +++ /dev/null @@ -1,670 +0,0 @@ -package de.lmu.ifi.dbs.elki.math; - -/* - This file is part of ELKI: - Environment for Developing KDD-Applications Supported by Index-Structures - - Copyright (C) 2012 - Ludwig-Maximilians-Universität München - Lehr- und Forschungseinheit für Datenbanksysteme - ELKI Development Team - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; - -/** - * Class with utility functions for geographic computations. - * - * The majority of formulas are adapted from: - * <p> - * Ed Williams<br /> - * Aviation Formulary<br /> - * Online: http://williams.best.vwh.net/avform.htm - * </p> - * - * TODO: add ellipsoid version of Vinentry formula. - * - * @author Erich Schubert - * @author Niels Dörre - */ -@Reference(authors = "Ed Williams", title = "Aviation Formulary", booktitle = "", url = "http://williams.best.vwh.net/avform.htm") -public final class GeoUtil { - /** - * Earth radius approximation in km. - */ - public static final double EARTH_RADIUS = 6371.009; // km. - - /** - * Radius of the WGS84 Ellipsoid in km. - */ - public static final double WGS84_RADIUS = 6378.137; // km - - /** - * Flattening of the WGS84 Ellipsoid. - */ - public static final double WGS84_FLATTENING = 0.00335281066474748; - - /** - * Eccentricity squared of the WGS84 Ellipsoid - */ - public static final double WGS84_ECCENTRICITY_SQUARED = 2 * WGS84_FLATTENING - (WGS84_FLATTENING * WGS84_FLATTENING); - - /** - * Dummy constructor. Do not instantiate. - */ - private GeoUtil() { - // Use static methods. Do not intantiate - } - - /** - * Compute the approximate on-earth-surface distance of two points using the - * Haversine formula - * - * Complexity: 5 trigonometric functions, 2 sqrt. - * - * Reference: - * <p> - * R. W. Sinnott,<br/> - * Virtues of the Haversine<br /> - * Sky and telescope, 68-2, 1984 - * </p> - * - * @param lat1 Latitude of first point in degree - * @param lon1 Longitude of first point in degree - * @param lat2 Latitude of second point in degree - * @param lon2 Longitude of second point in degree - * @return Distance in km (approximately) - */ - @Reference(authors = "Sinnott, R. W.", title = "Virtues of the Haversine", booktitle = "Sky and telescope, 68-2, 1984") - public static double haversineFormulaDeg(double lat1, double lon1, double lat2, double lon2) { - // Convert to radians: - lat1 = MathUtil.deg2rad(lat1); - lat2 = MathUtil.deg2rad(lat2); - lon1 = MathUtil.deg2rad(lon1); - lon2 = MathUtil.deg2rad(lon2); - return haversineFormulaRad(lat1, lon1, lat2, lon2); - } - - /** - * Compute the approximate on-earth-surface distance of two points using the - * Haversine formula - * - * Complexity: 5 trigonometric functions, 2 sqrt. - * - * Reference: - * <p> - * R. W. Sinnott,<br/> - * Virtues of the Haversine<br /> - * Sky and telescope, 68-2, 1984 - * </p> - * - * @param lat1 Latitude of first point in degree - * @param lon1 Longitude of first point in degree - * @param lat2 Latitude of second point in degree - * @param lon2 Longitude of second point in degree - * @return Distance in km (approximately) - */ - @Reference(authors = "Sinnott, R. W.", title = "Virtues of the Haversine", booktitle = "Sky and telescope, 68-2, 1984") - public static double haversineFormulaRad(double lat1, double lon1, double lat2, double lon2) { - // Haversine formula, higher precision at < 1 meters but maybe issues at - // antipodal points. - final double slat = Math.sin((lat1 - lat2) * .5); - final double slon = Math.sin((lon1 - lon2) * .5); - final double a = slat * slat + slon * slon * Math.cos(lat1) * Math.cos(lat2); - final double c = 2 * Math.atan2(Math.sqrt(a), Math.sqrt(1 - a)); - return EARTH_RADIUS * c; - } - - /** - * Compute the approximate on-earth-surface distance of two points. - * - * Uses Vincenty's Formula for the spherical case, which does not require - * iterations. - * - * Complexity: 7 trigonometric functions, 1 sqrt. - * - * Reference: - * <p> - * T. Vincenty<br /> - * Direct and inverse solutions of geodesics on the ellipsoid with application - * of nested equations<br /> - * Survey review 23 176, 1975 - * </p> - * - * @param lat1 Latitude of first point in degree - * @param lon1 Longitude of first point in degree - * @param lat2 Latitude of second point in degree - * @param lon2 Longitude of second point in degree - * @return Distance in km (approximately) - */ - @Reference(authors = "T. Vincenty", title = "Direct and inverse solutions of geodesics on the ellipsoid with application of nested equations", booktitle = "Survey review 23 176, 1975", url = "http://www.ngs.noaa.gov/PUBS_LIB/inverse.pdf") - public static double sphericalVincentyFormulaDeg(double lat1, double lon1, double lat2, double lon2) { - // Work in radians - lat1 = MathUtil.deg2rad(lat1); - lat2 = MathUtil.deg2rad(lat2); - lon1 = MathUtil.deg2rad(lon1); - lon2 = MathUtil.deg2rad(lon2); - return sphericalVincentyFormulaRad(lat1, lon1, lat2, lon2); - } - - /** - * Compute the approximate on-earth-surface distance of two points. - * - * Uses Vincenty's Formula for the spherical case, which does not require - * iterations. - * - * Complexity: 7 trigonometric functions, 1 sqrt. - * - * Reference: - * <p> - * T. Vincenty<br /> - * Direct and inverse solutions of geodesics on the ellipsoid with application - * of nested equations<br /> - * Survey review 23 176, 1975 - * </p> - * - * @param lat1 Latitude of first point in degree - * @param lon1 Longitude of first point in degree - * @param lat2 Latitude of second point in degree - * @param lon2 Longitude of second point in degree - * @return Distance in km (approximately) - */ - @Reference(authors = "T. Vincenty", title = "Direct and inverse solutions of geodesics on the ellipsoid with application of nested equations", booktitle = "Survey review 23 176, 1975", url = "http://www.ngs.noaa.gov/PUBS_LIB/inverse.pdf") - public static double sphericalVincentyFormulaRad(double lat1, double lon1, double lat2, double lon2) { - // Delta - final double dlon = lon1 - lon2; - - // Spherical special case of Vincenty's formula - no iterations needed - final double slat1 = Math.sin(lat1); - final double slat2 = Math.sin(lat2); - final double slond = Math.sin(dlon * .5); - final double clat1 = Math.cos(lat1); - final double clat2 = Math.cos(lat2); - final double clond = Math.cos(dlon * .5); - final double a = clat2 * slond; - final double b = (clat1 * slat2) - (slat1 * clat2 * clond); - final double d = Math.atan2(Math.sqrt(a * a + b * b), slat1 * slat2 + clat1 * clat2 * clond); - return EARTH_RADIUS * d; - } - - /** - * Compute the cross-track distance. - * - * @param lat1 Latitude of starting point. - * @param lon1 Longitude of starting point. - * @param lat2 Latitude of destination point. - * @param lon2 Longitude of destination point. - * @param latQ Latitude of query point. - * @param lonQ Longitude of query point. - * @param dist1Q Distance from starting point to query point in km. - * @return Cross-track distance in km. May be negative - this gives the side. - */ - public static double crossTrackDistanceDeg(double lat1, double lon1, double lat2, double lon2, double latQ, double lonQ, double dist1Q) { - // Convert to radians. - lat1 = MathUtil.deg2rad(lat1); - latQ = MathUtil.deg2rad(latQ); - lat2 = MathUtil.deg2rad(lat2); - lon1 = MathUtil.deg2rad(lon1); - lonQ = MathUtil.deg2rad(lonQ); - lon2 = MathUtil.deg2rad(lon2); - return crossTrackDistanceRad(lat1, lon1, lat2, lon2, latQ, lonQ, dist1Q); - } - - /** - * Compute the cross-track distance. - * - * @param lat1 Latitude of starting point. - * @param lon1 Longitude of starting point. - * @param lat2 Latitude of destination point. - * @param lon2 Longitude of destination point. - * @param latQ Latitude of query point. - * @param lonQ Longitude of query point. - * @param dist1Q Distance from starting point to query point in km. - * @return Cross-track distance in km. May be negative - this gives the side. - */ - public static double crossTrackDistanceRad(double lat1, double lon1, double lat2, double lon2, double latQ, double lonQ, double dist1Q) { - final double dlon12 = lon2 - lon1; - final double dlon1Q = lonQ - lon1; - - // Compute trigonometric functions only once. - final double slat1 = Math.sin(lat1); - final double slatQ = Math.sin(latQ); - final double slat2 = Math.sin(lat2); - final double clat1 = Math.cos(lat1); - final double clatQ = Math.cos(latQ); - final double clat2 = Math.cos(lat2); - - // Compute the course - final double crs12, crs1Q; - { - // y = sin(dlon) * cos(lat2) - double yE = Math.sin(dlon12) * clat2; - double yQ = Math.sin(dlon1Q) * clatQ; - - // x = cos(lat1) * sin(lat2) - sin(lat1) * cos(lat2) * cos(dlon) - double xE = clat1 * slat2 - slat1 * clat2 * Math.cos(dlon12); - double xQ = clat1 * slatQ - slat1 * clatQ * Math.cos(dlon1Q); - - crs12 = Math.atan2(yE, xE); - crs1Q = Math.atan2(yQ, xQ); - } - - // Calculate cross-track distance - return EARTH_RADIUS * Math.asin(Math.sin(dist1Q / EARTH_RADIUS) * Math.sin(crs1Q - crs12)); - } - - /** - * Compute the cross-track distance. - * - * XTD = asin(sin(dist_1Q)*sin(crs_1Q-crs_12)) - * - * @param lat1 Latitude of starting point. - * @param lon1 Longitude of starting point. - * @param lat2 Latitude of destination point. - * @param lon2 Longitude of destination point. - * @param latQ Latitude of query point. - * @param lonQ Longitude of query point. - * @return Cross-track distance in km. May be negative - this gives the side. - */ - public static double crossTrackDistance(double lat1, double lon1, double lat2, double lon2, double latQ, double lonQ) { - // Convert to radians. - lat1 = MathUtil.deg2rad(lat1); - latQ = MathUtil.deg2rad(latQ); - lat2 = MathUtil.deg2rad(lat2); - lon1 = MathUtil.deg2rad(lon1); - lonQ = MathUtil.deg2rad(lonQ); - lon2 = MathUtil.deg2rad(lon2); - return crossTrackDistanceRad(lat1, lon1, lat2, lon2, latQ, lonQ); - } - - /** - * Compute the cross-track distance. - * - * XTD = asin(sin(dist_SQ)*sin(crs_SQ-crs_SE)) - * - * @param lat1 Latitude of starting point. - * @param lon1 Longitude of starting point. - * @param lat2 Latitude of destination point. - * @param lon2 Longitude of destination point. - * @param latQ Latitude of query point. - * @param lonQ Longitude of query point. - * @return Cross-track distance in km. May be negative - this gives the side. - */ - public static double crossTrackDistanceRad(double lat1, double lon1, double lat2, double lon2, double latQ, double lonQ) { - final double dlon12 = lon2 - lon1; - final double dlon1Q = lonQ - lon1; - - // Compute trigonometric functions only once. - final double clat1 = Math.cos(lat1); - final double clatQ = Math.cos(latQ); - final double clat2 = Math.cos(lat2); - final double slat1 = Math.sin(lat1); - final double slatQ = Math.sin(latQ); - final double slat2 = Math.sin(lat2); - - // Haversine formula, higher precision at < 1 meters but maybe issues at - // antipodal points - we do not yet multiply with the radius! - double angDist1Q; - { - final double slat = Math.sin((latQ - lat1) * .5); - final double slon = Math.sin(dlon1Q * .5); - final double a = slat * slat + slon * slon * clat1 * clatQ; - angDist1Q = 2 * Math.atan2(Math.sqrt(a), Math.sqrt(1 - a)); - } - - // Compute the course - final double crs12, crs1Q; - { - // y = sin(dlon) * cos(lat2) - double yE = Math.sin(dlon12) * clat2; - double yQ = Math.sin(dlon1Q) * clatQ; - - // x = cos(lat1) * sin(lat2) - sin(lat1) * cos(lat2) * cos(dlon) - double xE = clat1 * slat2 - slat1 * clat2 * Math.cos(dlon12); - double xQ = clat1 * slatQ - slat1 * clatQ * Math.cos(dlon1Q); - - crs12 = Math.atan2(yE, xE); - crs1Q = Math.atan2(yQ, xQ); - } - - // Calculate cross-track distance - return EARTH_RADIUS * Math.asin(Math.sin(angDist1Q) * Math.sin(crs1Q - crs12)); - } - - /** - * The along track distance, is the distance from S to Q along the track S to - * E. - * - * ATD=acos(cos(dist_1Q)/cos(XTD)) - * - * FIXME: can we get a proper sign into this? - * - * @param lat1 Latitude of starting point. - * @param lon1 Longitude of starting point. - * @param lat2 Latitude of destination point. - * @param lon2 Longitude of destination point. - * @param latQ Latitude of query point. - * @param lonQ Longitude of query point. - * @return Along-track distance in km. May be negative - this gives the side. - */ - public static double alongTrackDistance(double lat1, double lon1, double lat2, double lon2, double latQ, double lonQ) { - double dist1Q = haversineFormulaDeg(lat1, lon1, latQ, lonQ); - double ctd = crossTrackDistanceDeg(lat1, lon1, lat2, lon2, latQ, lonQ, dist1Q); - return alongTrackDistance(lat1, lon1, lat2, lon2, latQ, lonQ, dist1Q, ctd); - } - - /** - * The along track distance, is the distance from S to Q along the track S to - * E. - * - * ATD=acos(cos(dist_SQ)/cos(XTD)) - * - * FIXME: can we get a proper sign into this? - * - * @param lat1 Latitude of starting point. - * @param lon1 Longitude of starting point. - * @param lat2 Latitude of destination point. - * @param lon2 Longitude of destination point. - * @param latQ Latitude of query point. - * @param lonQ Longitude of query point. - * @param dist1Q Distance S to Q - * @param ctd Cross-track-distance - * @return Along-track distance in km. May be negative - this gives the side. - */ - public static double alongTrackDistance(double lat1, double lon1, double lat2, double lon2, double latQ, double lonQ, double dist1Q, double ctd) { - // TODO: optimize the sign computation! - int sign = Math.abs(bearing(lat1, lon1, lat2, lon2) - bearing(lat1, lon1, latQ, lonQ)) < MathUtil.HALFPI ? +1 : -1; - return sign * EARTH_RADIUS * Math.acos(Math.cos(dist1Q / EARTH_RADIUS) / Math.cos(ctd / EARTH_RADIUS)); - // TODO: for short distances, use this instead? - // asin(sqrt( (sin(dist_1Q))^2 - (sin(XTD))^2 )/cos(XTD)) - } - - /** - * Point to rectangle minimum distance. - * - * Complexity: - * <ul> - * <li>Trivial cases (on longitude slice): no trigonometric functions.</li> - * <li>Cross-track case: 10+2 trig</li> - * <li>Corner case: 10+3 trig, 2 sqrt</li> - * </ul> - * - * @param plat Latitude of query point. - * @param plng Longitude of query point. - * @param rminlat Min latitude of rectangle. - * @param rminlng Min longitude of rectangle. - * @param rmaxlat Max latitude of rectangle. - * @param rmaxlng Max longitude of rectangle. - * @return Distance - */ - public static double latlngMinDistDeg(double plat, double plng, double rminlat, double rminlng, double rmaxlat, double rmaxlng) { - // Convert to radians. - plat = MathUtil.deg2rad(plat); - plng = MathUtil.deg2rad(plng); - rminlat = MathUtil.deg2rad(rminlat); - rminlng = MathUtil.deg2rad(rminlng); - rmaxlat = MathUtil.deg2rad(rmaxlat); - rmaxlng = MathUtil.deg2rad(rmaxlng); - - return latlngMinDistRad(plat, plng, rminlat, rminlng, rmaxlat, rmaxlng); - } - - /** - * Point to rectangle minimum distance. - * - * Complexity: - * <ul> - * <li>Trivial cases (on longitude slice): no trigonometric functions.</li> - * <li>Cross-track case: 10+2 trig</li> - * <li>Corner case: 10+3 trig, 2 sqrt</li> - * </ul> - * - * @param plat Latitude of query point. - * @param plng Longitude of query point. - * @param rminlat Min latitude of rectangle. - * @param rminlng Min longitude of rectangle. - * @param rmaxlat Max latitude of rectangle. - * @param rmaxlng Max longitude of rectangle. - * @return Distance - */ - public static double latlngMinDistRad(double plat, double plng, double rminlat, double rminlng, double rmaxlat, double rmaxlng) { - // FIXME: handle rectangles crossing the +-180 deg boundary correctly! - - // Degenerate rectangles: - if ((rminlat >= rmaxlat) && (rminlng >= rmaxlng)) { - return haversineFormulaRad(rminlat, rminlng, plat, plng); - } - - // The simplest case is when the query point is in the same "slice": - if (rminlng <= plng && plng <= rmaxlng) { - // Inside rectangle: - if (rminlat <= plat && plat <= rmaxlat) { - return 0; - } - // South: - if (plat < rminlat) { - return EARTH_RADIUS * (rminlat - plat); - } else { - // plat > rmaxlat - return EARTH_RADIUS * (plat - rmaxlat); - } - } - - // Determine whether going east or west is shorter. - double lngE = rminlng - plng; - lngE += (lngE < 0) ? MathUtil.TWOPI : 0; - double lngW = rmaxlng - plng; - lngW -= (lngW > 0) ? MathUtil.TWOPI : 0; - - // Compute sine and cosine values we will certainly need below: - final double slatQ = Math.sin(plat); - final double clatQ = Math.cos(plat); - final double slatN = Math.sin(rmaxlat); - final double clatN = Math.cos(rmaxlat); - final double slatS = Math.sin(rminlat); - final double clatS = Math.cos(rminlat); - - // East, to min edge: - if (lngE <= -lngW) { - final double slngD = Math.sin(lngE); - final double clngD = Math.cos(lngE); - - // Bearing to south - // Math.atan2(slngD * clatS, clatQ * slatS - slatQ * clatS * clngD); - // Bearing from south - final double bs = Math.atan2(slngD * clatQ, clatS * slatQ - slatS * clatQ * clngD); - // Bearing to north - // Math.atan2(slngD * clatN, clatQ * slatN - slatQ * clatN * clngD); - // Bearing from north - final double bn = Math.atan2(slngD * clatQ, clatN * slatQ - slatN * clatQ * clngD); - if (bs < MathUtil.HALFPI) { - if (bn > MathUtil.HALFPI) { - // Radians from south pole = abs(ATD) - final double radFromS = -MathUtil.HALFPI - plat; - - // Cross-track-distance to longitude line. - return EARTH_RADIUS * Math.asin(Math.sin(radFromS) * -slngD); - } - } - if (bs - MathUtil.HALFPI < MathUtil.HALFPI - bn) { - // Haversine to north corner. - final double slatN2 = Math.sin((plat - rmaxlat) * .5); - final double slon = Math.sin(lngE * .5); - final double aN = slatN2 * slatN2 + slon * slon * clatQ * clatN; - final double distN = 2 * Math.atan2(Math.sqrt(aN), Math.sqrt(1 - aN)); - return EARTH_RADIUS * distN; - } else { - // Haversine to south corner. - final double slatS2 = Math.sin((plat - rminlat) * .5); - final double slon = Math.sin(lngE * .5); - final double aS = slatS2 * slatS2 + slon * slon * clatQ * clatS; - final double distS = 2 * Math.atan2(Math.sqrt(aS), Math.sqrt(1 - aS)); - return EARTH_RADIUS * distS; - } - } else { // West, to max edge - final double slngD = Math.sin(lngW); - final double clngD = Math.cos(lngW); - - // Bearing to south - // Math.atan2(slngD * clatS, clatQ * slatS - slatQ * clatS * clngD); - // Bearing from south - final double bs = Math.atan2(slngD * clatQ, clatS * slatQ - slatS * clatQ * clngD); - // Bearing to north - // Math.atan2(slngD * clatN, clatQ * slatN - slatQ * clatN * clngD); - // Bearing from north - final double bn = Math.atan2(slngD * clatQ, clatN * slatQ - slatN * clatQ * clngD); - if (bs > -MathUtil.HALFPI) { - if (bn < -MathUtil.HALFPI) { - // Radians from south = abs(ATD) = distance from pole - final double radFromS = -MathUtil.HALFPI - plat; - // Cross-track-distance to longitude line. - return EARTH_RADIUS * Math.asin(Math.sin(radFromS) * slngD); - } - } - if (-MathUtil.HALFPI - bs < bn + MathUtil.HALFPI) { - // Haversine to north corner. - final double slatN2 = Math.sin((plat - rmaxlat) * .5); - final double slon = Math.sin(lngW * .5); - final double aN = slatN2 * slatN2 + slon * slon * clatQ * clatN; - final double distN = 2 * Math.atan2(Math.sqrt(aN), Math.sqrt(1 - aN)); - return EARTH_RADIUS * distN; - } else { - // Haversine to south corner. - final double slatS2 = Math.sin((plat - rminlat) * .5); - final double slon = Math.sin(lngW * .5); - final double aS = slatS2 * slatS2 + slon * slon * clatQ * clatS; - final double distS = 2 * Math.atan2(Math.sqrt(aS), Math.sqrt(1 - aS)); - return EARTH_RADIUS * distS; - } - } - } - - /** - * Compute the bearing from start to end. - * - * @param latS Start latitude, in degree - * @param lngS Start longitude, in degree - * @param latE End latitude, in degree - * @param lngE End longitude, in degree - * @return Bearing in radians - */ - public static double bearing(double latS, double lngS, double latE, double lngE) { - latS = MathUtil.deg2rad(latS); - latE = MathUtil.deg2rad(latE); - lngS = MathUtil.deg2rad(lngS); - lngE = MathUtil.deg2rad(lngE); - final double slatS = Math.sin(latS); - final double clatS = Math.cos(latS); - final double slatE = Math.sin(latE); - final double clatE = Math.cos(latE); - return Math.atan2(-Math.sin(lngS - lngE) * clatE, clatS * slatE - slatS * clatE * Math.cos(lngS - lngE)); - } - - /** - * Map a latitude,longitude pair to 3D X-Y-Z coordinates, using athe WGS84 - * ellipsoid. - * - * The coordinate system is chosen such that the earth rotates around the Z - * axis. - * - * @param lat Latitude in degree - * @param lng Longitude in degree - * @return Coordinate triple - */ - public static double[] latLngDegToXZYWGS84(double lat, double lng) { - // Switch to radians: - lat = Math.toRadians(lat); - lng = Math.toRadians(lng); - // Sine and cosines: - final double clat = Math.cos(lat), slat = Math.sin(lat); - final double clng = Math.cos(lng), slng = Math.sin(lng); - - // Eccentricity squared - final double v = WGS84_RADIUS / (Math.sqrt(1 - WGS84_ECCENTRICITY_SQUARED * slat * slat)); - - return new double[] { v * clat * clng, v * clat * slng, (1 - WGS84_ECCENTRICITY_SQUARED) * v * slat }; - } - - /** - * Convert Latitude-Longitude pair to X-Y-Z coordinates using a spherical - * approximation of the earth. - * - * The coordinate system is chosen such that the earth rotates around the Z - * axis. - * - * @param lat Latitude in degree - * @param lng Longitude in degree - * @return Coordinate triple - */ - public static double[] latLngDegToXZY(double lat, double lng) { - // Map to radians. - lat = MathUtil.rad2deg(lat); - lng = MathUtil.rad2deg(lng); - // Sine and cosines: - final double clat = Math.cos(lat), slat = Math.sin(lat); - final double clng = Math.cos(lng), slng = Math.sin(lng); - return new double[] { EARTH_RADIUS * clat * clng, EARTH_RADIUS * clat * slng, EARTH_RADIUS * slat }; - } - - /** - * Convert a 3D coordinate pair to the corresponding longitude. - * - * Only x and y are required - z gives the latitude. - * - * @param x X value - * @param y Y value - * @return Latitude - */ - public static double xyzToLatDegWGS84(double x, double y, double z) { - final double p = Math.sqrt(x * x + y * y); - double lat = Math.atan2(z, p * (1 - WGS84_ECCENTRICITY_SQUARED)); - - // Iteratively improving the lat value - // TODO: instead of a fixed number of iterations, check for convergence. - for (int i = 0; i < 10; i++) { - final double slat = Math.sin(lat); - final double v = WGS84_RADIUS / (Math.sqrt(1 - WGS84_ECCENTRICITY_SQUARED * slat * slat)); - lat = Math.atan2(z + WGS84_ECCENTRICITY_SQUARED * v * slat, p); - } - - return MathUtil.rad2deg(lat); - } - - /** - * Convert a 3D coordinate pair to the corresponding latitude. - * - * Only the z coordinate is required. - * - * @param z Z value - * @return Latitude - */ - public static double xyzToLatDeg(double z) { - return MathUtil.rad2deg(Math.asin(z / EARTH_RADIUS)); - } - - /** - * Convert a 3D coordinate pair to the corresponding longitude. - * - * Only x and y are required - z gives the latitude. - * - * @param x X value - * @param y Y value - * @return Latitude - */ - public static double xyzToLngDeg(double x, double y) { - return MathUtil.rad2deg(Math.atan2(y, x)); - } -} diff --git a/src/de/lmu/ifi/dbs/elki/math/IntegerMinMax.java b/src/de/lmu/ifi/dbs/elki/math/IntegerMinMax.java index 4c89240b..5804f057 100644 --- a/src/de/lmu/ifi/dbs/elki/math/IntegerMinMax.java +++ b/src/de/lmu/ifi/dbs/elki/math/IntegerMinMax.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/MathUtil.java b/src/de/lmu/ifi/dbs/elki/math/MathUtil.java index aaa19f0d..27de942e 100644 --- a/src/de/lmu/ifi/dbs/elki/math/MathUtil.java +++ b/src/de/lmu/ifi/dbs/elki/math/MathUtil.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -43,12 +43,32 @@ public final class MathUtil { /** * Two times Pi. */ - public static final double TWOPI = 2 * Math.PI; + public static final double TWOPI = 2. * Math.PI; /** * Half the value of Pi. */ - public static final double HALFPI = 0.5 * Math.PI; + public static final double HALFPI = .5 * Math.PI; + + /** + * One quarter of Pi. + */ + public static final double QUARTERPI = .25 * Math.PI; + + /** + * 1.5 times Pi. + */ + public static final double ONEHALFPI = 1.5 * Math.PI; + + /** + * Pi squared + */ + public static final double PISQUARE = Math.PI * Math.PI; + + /** + * Square root of Pi. + */ + public static final double SQRTPI = Math.sqrt(Math.PI); /** * Square root of two times Pi. @@ -56,14 +76,19 @@ public final class MathUtil { public static final double SQRTTWOPI = Math.sqrt(TWOPI); /** + * Constant for sqrt(pi/2) + */ + public static final double SQRTHALFPI = Math.sqrt(HALFPI); + + /** * Square root of 2. */ - public static final double SQRT2 = Math.sqrt(2); + public static final double SQRT2 = Math.sqrt(2.); /** * Square root of 5. */ - public static final double SQRT5 = Math.sqrt(5); + public static final double SQRT5 = Math.sqrt(5.); /** * Square root of 0.5 == 1 / sqrt(2). @@ -73,17 +98,27 @@ public final class MathUtil { /** * Precomputed value of 1 / sqrt(pi). */ - public static final double ONE_BY_SQRTPI = 1 / Math.sqrt(Math.PI); + public static final double ONE_BY_SQRTPI = 1. / SQRTPI; + + /** + * Precomputed value of 1 / sqrt(2 * pi). + */ + public static final double ONE_BY_SQRTTWOPI = 1. / SQRTTWOPI; /** * Logarithm of 2 to the basis e, for logarithm conversion. */ - public static final double LOG2 = Math.log(2); + public static final double LOG2 = Math.log(2.); + + /** + * Logarithm of 3 to the basis e, for logarithm conversion. + */ + public static final double LOG3 = Math.log(3.); /** * Natural logarithm of 10. */ - public static final double LOG10 = Math.log(10); + public static final double LOG10 = Math.log(10.); /** * Math.log(Math.PI). @@ -101,6 +136,11 @@ public final class MathUtil { public static final double LOGSQRTTWOPI = Math.log(SQRTTWOPI); /** + * Log(log(2)) + */ + public static final double LOGLOG2 = Math.log(LOG2); + + /** * Constant for degrees to radians. */ public static final double DEG2RAD = Math.PI / 180.0; @@ -557,6 +597,61 @@ public final class MathUtil { } /** + * Normalize an angle to [0:2pi[ + * + * @param x Input angle + * @return Normalized angle + */ + public static double normAngle(double x) { + x %= TWOPI; + if (x > 0) { + return x; + } else { + return x + TWOPI; + } + } + + /** + * <b>Fast</b> way of computing cos(x) from x and sin(x). + * + * @param angle Input angle x + * @param sin Sine of x. + * @return Cosine of x + */ + public static double sinToCos(double angle, double sin) { + // Numerics of the formula below aren't too good. + if ((-1e-5 < sin && sin < 1e-5) || sin > 0.99999 || sin < -0.99999) { + return Math.cos(angle); + } + angle = normAngle(angle); + if (angle < HALFPI || angle > ONEHALFPI) { + return Math.sqrt(1 - sin * sin); + } else { + return -Math.sqrt(1 - sin * sin); + } + } + + /** + * <b>Fast</b> way of computing sin(x) from x and cos(x). + * + * @param angle Input angle x + * @param cos Cosine of x. + * @return Sine of x + */ + public static double cosToSin(double angle, double cos) { + // Numerics of the formula below aren't too good. + if ((-1e-5 < cos && cos < 1e-5) || cos > 0.99999 || cos < -0.99999) { + return Math.sin(angle); + } + angle = normAngle(angle); + if (angle < Math.PI) { + return Math.sqrt(1 - cos * cos); + } else { + return -Math.sqrt(1 - cos * cos); + } + } + + /** * Find the next power of 2. * * Classic bit operation, for signed 32-bit. Valid for positive integers only @@ -745,4 +840,14 @@ public final class MathUtil { return Double.longBitsToDouble(bits); } } + + /** + * More stable than {@code Math.log(1 - Math.exp(x))} + * + * @param x Value + * @return log(1-exp(x)) + */ + public static double log1mexp(double x) { + return (x > -LOG2) ? Math.log(-Math.expm1(x)) : Math.log1p(-Math.exp(x)); + } } diff --git a/src/de/lmu/ifi/dbs/elki/math/Mean.java b/src/de/lmu/ifi/dbs/elki/math/Mean.java index 56f73f8e..5b943a2f 100644 --- a/src/de/lmu/ifi/dbs/elki/math/Mean.java +++ b/src/de/lmu/ifi/dbs/elki/math/Mean.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -53,14 +53,14 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; @Reference(authors = "B. P. Welford", title = "Note on a method for calculating corrected sums of squares and products", booktitle = "Technometrics 4(3)") public class Mean { /** - * Mean of values + * Mean of values - first moment. */ - protected double mean = 0.0; + protected double m1 = 0.0; /** - * Weight sum (number of samples) + * Weight sum (number of samples). */ - protected double wsum = 0; + protected double n = 0; /** * Empty constructor @@ -75,8 +75,8 @@ public class Mean { * @param other other instance to copy data from. */ public Mean(Mean other) { - this.mean = other.mean; - this.wsum = other.wsum; + this.m1 = other.m1; + this.n = other.n; } /** @@ -85,9 +85,9 @@ public class Mean { * @param val Value */ public void put(double val) { - wsum += 1.0; - final double delta = val - mean; - mean += delta / wsum; + n += 1.0; + final double delta = val - m1; + m1 += delta / n; } /** @@ -100,11 +100,11 @@ public class Mean { * @param weight weight */ public void put(double val, double weight) { - final double nwsum = weight + wsum; - final double delta = val - mean; + final double nwsum = weight + n; + final double delta = val - m1; final double rval = delta * weight / nwsum; - mean += rval; - wsum = nwsum; + m1 += rval; + n = nwsum; } /** @@ -113,12 +113,12 @@ public class Mean { * @param other Data to join with */ public void put(Mean other) { - final double nwsum = other.wsum + this.wsum; + final double nwsum = other.n + this.n; // this.mean += rval; // This supposedly is more numerically stable: - this.mean = (this.wsum * this.mean + other.wsum * other.mean) / nwsum; - this.wsum = nwsum; + this.m1 = (this.n * this.m1 + other.n * other.m1) / nwsum; + this.n = nwsum; } /** @@ -127,7 +127,7 @@ public class Mean { * @return number of data points */ public double getCount() { - return wsum; + return n; } /** @@ -136,7 +136,7 @@ public class Mean { * @return mean */ public double getMean() { - return mean; + return m1; } /** @@ -162,7 +162,7 @@ public class Mean { * Reset the value. */ public void reset() { - mean = 0; - wsum = 0; + m1 = 0; + n = 0; } }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/MeanVariance.java b/src/de/lmu/ifi/dbs/elki/math/MeanVariance.java index 0210b0fb..7723daa4 100644 --- a/src/de/lmu/ifi/dbs/elki/math/MeanVariance.java +++ b/src/de/lmu/ifi/dbs/elki/math/MeanVariance.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -62,7 +62,7 @@ public class MeanVariance extends Mean { /** * nVariance */ - protected double nvar = 0.0; + protected double m2 = 0.0; /** * Empty constructor @@ -77,9 +77,9 @@ public class MeanVariance extends Mean { * @param other other instance to copy data from. */ public MeanVariance(MeanVariance other) { - this.mean = other.mean; - this.nvar = other.nvar; - this.wsum = other.wsum; + this.m1 = other.m1; + this.m2 = other.m2; + this.n = other.n; } /** @@ -89,11 +89,11 @@ public class MeanVariance extends Mean { */ @Override public void put(double val) { - wsum += 1.0; - final double delta = val - mean; - mean += delta / wsum; + n += 1.0; + final double delta = val - m1; + m1 += delta / n; // The next line needs the *new* mean! - nvar += delta * (val - mean); + m2 += delta * (val - m1); } /** @@ -106,14 +106,15 @@ public class MeanVariance extends Mean { * @param weight weight */ @Override + @Reference(authors = "D.H.D. West", title = "Updating Mean and Variance Estimates: An Improved Method", booktitle = "Communications of the ACM, Volume 22 Issue 9") public void put(double val, double weight) { - final double nwsum = weight + wsum; - final double delta = val - mean; + final double nwsum = weight + n; + final double delta = val - m1; final double rval = delta * weight / nwsum; - mean += rval; + m1 += rval; // Use old and new weight sum here: - nvar += wsum * delta * rval; - wsum = nwsum; + m2 += n * delta * rval; + n = nwsum; } /** @@ -123,18 +124,17 @@ public class MeanVariance extends Mean { */ @Override public void put(Mean other) { - if(other instanceof MeanVariance) { - final double nwsum = other.wsum + this.wsum; - final double delta = other.mean - this.mean; - final double rval = delta * other.wsum / nwsum; + if (other instanceof MeanVariance) { + final double nwsum = other.n + this.n; + final double delta = other.m1 - this.m1; + final double rval = delta * other.n / nwsum; // this.mean += rval; // This supposedly is more numerically stable: - this.mean = (this.wsum * this.mean + other.wsum * other.mean) / nwsum; - this.nvar += ((MeanVariance) other).nvar + delta * this.wsum * rval; - this.wsum = nwsum; - } - else { + this.m1 = (this.n * this.m1 + other.n * other.m1) / nwsum; + this.m2 += ((MeanVariance) other).m2 + delta * this.n * rval; + this.n = nwsum; + } else { throw new AbortException("I cannot combine Mean and MeanVariance to a MeanVariance."); } } @@ -146,7 +146,7 @@ public class MeanVariance extends Mean { */ @Override public double getCount() { - return wsum; + return n; } /** @@ -156,7 +156,7 @@ public class MeanVariance extends Mean { */ @Override public double getMean() { - return mean; + return m1; } /** @@ -167,7 +167,7 @@ public class MeanVariance extends Mean { * @return variance */ public double getNaiveVariance() { - return nvar / wsum; + return m2 / n; } /** @@ -176,8 +176,10 @@ public class MeanVariance extends Mean { * @return sample variance */ public double getSampleVariance() { - assert (wsum > 1) : "Cannot compute a reasonable sample variance with weight <= 1.0!"; - return nvar / (wsum - 1); + if (!(n > 1.)) { + throw new ArithmeticException("Cannot compute a reasonable sample variance with weight <= 1.0!"); + } + return m2 / (n - 1); } /** @@ -230,7 +232,7 @@ public class MeanVariance extends Mean { */ public static MeanVariance[] newArray(int dimensionality) { MeanVariance[] arr = new MeanVariance[dimensionality]; - for(int i = 0; i < dimensionality; i++) { + for (int i = 0; i < dimensionality; i++) { arr[i] = new MeanVariance(); } return arr; @@ -244,6 +246,6 @@ public class MeanVariance extends Mean { @Override public void reset() { super.reset(); - nvar = 0; + m2 = 0; } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/math/MeanVarianceMinMax.java b/src/de/lmu/ifi/dbs/elki/math/MeanVarianceMinMax.java index 4ea76002..027a302d 100644 --- a/src/de/lmu/ifi/dbs/elki/math/MeanVarianceMinMax.java +++ b/src/de/lmu/ifi/dbs/elki/math/MeanVarianceMinMax.java @@ -1,12 +1,10 @@ package de.lmu.ifi.dbs.elki.math; -import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; - /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -24,6 +22,9 @@ import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; You should have received a copy of the GNU Affero General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; + /** * Class collecting mean, variance, minimum and maximum statistics. * @@ -61,25 +62,32 @@ public class MeanVarianceMinMax extends MeanVariance { @Override public void put(double val) { super.put(val); - min = Math.min(min, val); - max = Math.max(max, val); + if (val < min) { + min = val; + } + if (val > max) { + max = val; + } } @Override public void put(double val, double weight) { super.put(val, weight); - min = Math.min(min, val); - max = Math.max(max, val); + if (val < min) { + min = val; + } + if (val > max) { + max = val; + } } @Override public void put(Mean other) { - if(other instanceof MeanVarianceMinMax) { + if (other instanceof MeanVarianceMinMax) { super.put(other); min = Math.min(min, ((MeanVarianceMinMax) other).min); max = Math.max(max, ((MeanVarianceMinMax) other).max); - } - else { + } else { throw new AbortException("Cannot aggregate into a minmax statistic: " + other.getClass()); } } @@ -101,14 +109,14 @@ public class MeanVarianceMinMax extends MeanVariance { public double getMax() { return this.max; } - + /** * Get the current minimum and maximum. * * @return current minimum and maximum */ - public DoubleMinMax getDoubleMinMax(){ - return new DoubleMinMax(this.min,this.max); + public DoubleMinMax getDoubleMinMax() { + return new DoubleMinMax(this.min, this.max); } /** @@ -119,7 +127,7 @@ public class MeanVarianceMinMax extends MeanVariance { public double getDiff() { return this.getMax() - this.getMin(); } - + /** * Create and initialize a new array of MeanVarianceMinMax * @@ -128,7 +136,7 @@ public class MeanVarianceMinMax extends MeanVariance { */ public static MeanVarianceMinMax[] newArray(int dimensionality) { MeanVarianceMinMax[] arr = new MeanVarianceMinMax[dimensionality]; - for(int i = 0; i < dimensionality; i++) { + for (int i = 0; i < dimensionality; i++) { arr[i] = new MeanVarianceMinMax(); } return arr; @@ -145,4 +153,4 @@ public class MeanVarianceMinMax extends MeanVariance { min = Double.POSITIVE_INFINITY; max = Double.NEGATIVE_INFINITY; } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/math/MinMax.java b/src/de/lmu/ifi/dbs/elki/math/MinMax.java index 98b66173..d3ee45bd 100644 --- a/src/de/lmu/ifi/dbs/elki/math/MinMax.java +++ b/src/de/lmu/ifi/dbs/elki/math/MinMax.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -178,7 +178,7 @@ public class MinMax<T extends Comparable<? super T>> extends Pair<T, T> { Class<MinMax<N>> mmcls = ClassGenericsUtil.uglyCastIntoSubclass(MinMax.class); MinMax<N>[] mms = ClassGenericsUtil.newArrayOfNull(size, mmcls); for(int i = 0; i < size; i++) { - mms[i] = new MinMax<N>(); + mms[i] = new MinMax<>(); } return mms; } diff --git a/src/de/lmu/ifi/dbs/elki/math/PearsonCorrelation.java b/src/de/lmu/ifi/dbs/elki/math/PearsonCorrelation.java index 175f128d..4cd9c732 100644 --- a/src/de/lmu/ifi/dbs/elki/math/PearsonCorrelation.java +++ b/src/de/lmu/ifi/dbs/elki/math/PearsonCorrelation.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/Primes.java b/src/de/lmu/ifi/dbs/elki/math/Primes.java index e9ac0463..5755df1a 100644 --- a/src/de/lmu/ifi/dbs/elki/math/Primes.java +++ b/src/de/lmu/ifi/dbs/elki/math/Primes.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/SinCosTable.java b/src/de/lmu/ifi/dbs/elki/math/SinCosTable.java index 16b89772..6cabb172 100644 --- a/src/de/lmu/ifi/dbs/elki/math/SinCosTable.java +++ b/src/de/lmu/ifi/dbs/elki/math/SinCosTable.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -97,7 +97,7 @@ public abstract class SinCosTable { double ang = 0.; for (int i = 0; i < steps; i++, ang += radstep) { this.costable[i] = Math.cos(ang); - this.sintable[i] = Math.sin(ang); + this.sintable[i] = MathUtil.cosToSin(ang, this.costable[i]); } } diff --git a/src/de/lmu/ifi/dbs/elki/math/StatisticalMoments.java b/src/de/lmu/ifi/dbs/elki/math/StatisticalMoments.java new file mode 100644 index 00000000..af42c2e5 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/StatisticalMoments.java @@ -0,0 +1,282 @@ +package de.lmu.ifi.dbs.elki.math; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; + +/** + * Track various statistical moments, including mean, variance, skewness and + * kurtosis. + * + * References: + * <p> + * T. B. Terriberry<br /> + * Computing Higher-Order Moments Online<br/> + * http://people.xiph.org/~tterribe/notes/homs.html + * </p> + * + * General recurrence, for higher order moments, can be found in: + * <p> + * Philippe Pébay<br /> + * Formulas for Robust, One-Pass Parallel Computation of Covariances and + * Arbitrary-Order Statistical Moments<br /> + * Sandia Report SAND2008-6212, Sandia National Laboratories + * </p> + * + * @author Erich Schubert + */ +@Reference(authors = "T. B. Terriberry", title = "Computing Higher-Order Moments Online", booktitle = "Online - Technical Note", url = "http://people.xiph.org/~tterribe/notes/homs.html") +public class StatisticalMoments extends MeanVarianceMinMax { + /** + * Third moment. + */ + double m3; + + /** + * Fourth moment. + */ + double m4; + + /** + * Empty constructor + */ + public StatisticalMoments() { + // nothing to do here, initialization done above. + } + + /** + * Constructor from other instance + * + * @param other other instance to copy data from. + */ + public StatisticalMoments(StatisticalMoments other) { + this.m1 = other.m1; + this.m2 = other.m2; + this.n = other.n; + this.m3 = other.m3; + this.m4 = other.m4; + } + + /** + * Add a single value with weight 1.0 + * + * @param val Value + */ + @Override + public void put(double val) { + final double nn = this.n + 1.0; + final double delta = val - m1; + final double delta_nn = delta / nn; + final double delta_nn2 = delta_nn * delta_nn; + final double inc = delta * delta_nn * this.n; + + // Update values: + m4 += inc * delta_nn2 * (nn * nn - 3. * nn + 3.) + 6. * delta_nn2 * m2 - 4. * delta_nn * m3; + m3 += inc * delta_nn * (nn - 2) - 3. * delta_nn * m2; + m2 += inc; + m1 += delta_nn; + n = nn; + + min = Math.min(min, val); + max = Math.max(max, val); + } + + /** + * Add data with a given weight. + * + * @param val data + * @param weight weight + */ + @Override + public void put(double val, double weight) { + // TODO: any way of further simplifying this? + // Right now it is copy & paste from the merge formula. + final double nn = weight + this.n; + final double delta = val - this.m1; + + // Some factors used below: + final double otherm2 = val * val; + final double otherm3 = otherm2 * val; + final double otherm4 = otherm3 * val; + + final double delta_nn = delta / nn; + final double delta_nn2 = delta_nn * delta_nn; + final double delta_nn3 = delta_nn2 * delta_nn; + final double na2 = this.n * this.n; + final double nb2 = weight * weight; + final double ntn = this.n * weight; + + this.m4 += otherm4 + delta * delta_nn3 * ntn * (na2 - ntn + nb2) + 6. * (na2 * otherm2 + nb2 * this.m2) * delta_nn2 + 4. * (this.n * otherm3 - weight * this.m3) * delta_nn; + this.m3 += otherm3 + delta * delta_nn2 * ntn * (this.n - weight) + 3. * (this.n * otherm2 - weight * this.m2) * delta_nn; + this.m2 += otherm2 + delta * delta_nn * this.n * weight; + this.m1 += weight * delta_nn; + this.n = nn; + + min = Math.min(min, val); + max = Math.max(max, val); + } + + /** + * Join the data of another MeanVariance instance. + * + * @param other Data to join with + */ + @Override + public void put(Mean other) { + if (other instanceof StatisticalMoments) { + StatisticalMoments othe = (StatisticalMoments) other; + final double nn = othe.n + this.n; + final double delta = othe.m1 - this.m1; + + // Some factors used below: + final double delta_nn = delta / nn; + final double delta_nn2 = delta_nn * delta_nn; + final double delta_nn3 = delta_nn2 * delta_nn; + final double na2 = this.n * this.n; + final double nb2 = othe.n * othe.n; + final double ntn = this.n * othe.n; + + this.m4 += othe.m4 + delta * delta_nn3 * ntn * (na2 - ntn + nb2) + 6. * (na2 * othe.m2 + nb2 * this.m2) * delta_nn2 + 4. * (this.n * othe.m3 - othe.n * this.m3) * delta_nn; + this.m3 += othe.m3 + delta * delta_nn2 * ntn * (this.n - othe.n) + 3. * (this.n * othe.m2 - othe.n * this.m2) * delta_nn; + this.m2 += othe.m2 + delta * delta_nn * this.n * othe.n; + this.m1 += othe.n * delta_nn; + this.n = nn; + + min = Math.min(min, othe.min); + max = Math.max(max, othe.max); + } else { + throw new AbortException("I cannot combine Mean or MeanVariance into to a StatisticalMoments class."); + } + } + + /** + * Get the skewness using sample variance. + * + * @return Skewness + */ + public double getSampleSkewness() { + assert (n > 2.) : "Cannot compute a reasonable sample skewness with weight <= 2.0!"; + double sigma2 = getSampleVariance(); + return (m3 * n / (n - 1) / (n - 2)) / Math.pow(sigma2, 1.5); + } + + /** + * Get the skewness using naive variance. + * + * @return Skewness + */ + public double getNaiveSkewness() { + double sigma2 = getNaiveVariance(); + return (m3 / n) / Math.pow(sigma2, 1.5); + } + + /** + * Get the kurtosis using sample variance. + * + * Note: this formula does <em>not</em> include the correction factor, such + * that a normal distribution should be 0. + * + * @return Kurtosis + */ + public double getSampleKurtosis() { + assert (n > 3.) : "Cannot compute a reasonable sample kurtosis with weight <= 3.0!"; + if (!(m2 > 0)) { + throw new ArithmeticException("Kurtosis not defined when variance is 0!"); + } + final double nm1 = n - 1.; + return (nm1 / ((n - 2.) * (n - 3.))) * (n * (n + 1) * m4 / (m2 * m2) - 3 * nm1) + 3; + } + + /** + * Get the kurtosis using naive variance. + * + * Note: this formula does <em>not</em> include the -3 term. + * + * @return Kurtosis + */ + public double getNaiveKurtosis() { + if (!(m2 > 0)) { + throw new ArithmeticException("Kurtosis not defined when variance is 0!"); + } + return (n * m4) / (m2 * m2); + } + + /** + * Get the kurtosis using sample variance. + * + * Note: this formula <em>does</em> include the correction factor, such that a + * normal distribution should be 0. + * + * @return Kurtosis + */ + public double getSampleExcessKurtosis() { + assert (n > 3.) : "Cannot compute a reasonable sample kurtosis with weight <= 3.0!"; + if (!(m2 > 0)) { + throw new ArithmeticException("Kurtosis not defined when variance is 0!"); + } + final double nm1 = n - 1.; + return (nm1 / ((n - 2.) * (n - 3.))) * (n * (n + 1) * m4 / (m2 * m2) - 3 * nm1); + } + + /** + * Get the kurtosis using naive variance. + * + * Note: this formula <em>does</em> include the -3 term. + * + * @return Kurtosis + */ + public double getNaiveExcessKurtosis() { + if (!(m2 > 0)) { + throw new ArithmeticException("Kurtosis not defined when variance is 0!"); + } + return (n * m4) / (m2 * m2) - 3; + } + + /** + * Create and initialize a new array of MeanVariance + * + * @param dimensionality Dimensionality + * @return New and initialized Array + */ + public static StatisticalMoments[] newArray(int dimensionality) { + StatisticalMoments[] arr = new StatisticalMoments[dimensionality]; + for (int i = 0; i < dimensionality; i++) { + arr[i] = new StatisticalMoments(); + } + return arr; + } + + @Override + public String toString() { + return "StatisticalMoments(mean=" + getMean() + ",m2=" + m2 + ",m3=" + m3 + ",m4=" + m4 + ",n=" + n + ")"; + } + + @Override + public void reset() { + super.reset(); + m3 = 0; + m4 = 0; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/CovarianceDimensionSimilarity.java b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/CovarianceDimensionSimilarity.java index ad59ebfd..59380a74 100644 --- a/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/CovarianceDimensionSimilarity.java +++ b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/CovarianceDimensionSimilarity.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.dimensionsimilarity; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,6 +23,7 @@ package de.lmu.ifi.dbs.elki.math.dimensionsimilarity; along with this program. If not, see <http://www.gnu.org/licenses/>. */ import de.lmu.ifi.dbs.elki.data.NumberVector; +import de.lmu.ifi.dbs.elki.database.Database; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix; @@ -47,7 +48,7 @@ public class CovarianceDimensionSimilarity implements DimensionSimilarity<Number } @Override - public void computeDimensionSimilarites(Relation<? extends NumberVector<?>> relation, DBIDs subset, DimensionSimilarityMatrix matrix) { + public void computeDimensionSimilarites(Database database, Relation<? extends NumberVector<?>> relation, DBIDs subset, DimensionSimilarityMatrix matrix) { final int dim = matrix.size(); // FIXME: Use only necessary dimensions! CovarianceMatrix covmat = CovarianceMatrix.make(relation, subset); diff --git a/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/DimensionSimilarity.java b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/DimensionSimilarity.java index 6b01b3f3..d0c1a0bc 100644 --- a/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/DimensionSimilarity.java +++ b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/DimensionSimilarity.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.dimensionsimilarity; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -22,6 +22,7 @@ package de.lmu.ifi.dbs.elki.math.dimensionsimilarity; You should have received a copy of the GNU Affero General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ +import de.lmu.ifi.dbs.elki.database.Database; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.utilities.optionhandling.Parameterizable; @@ -32,15 +33,18 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.Parameterizable; * * @author Erich Schubert * + * @apiviz.uses DimensionSimilarityMatrix - - «writes» + * * @param <V> Object type */ public interface DimensionSimilarity<V> extends Parameterizable { /** * Compute the dimension similarity matrix * + * @param database Database context * @param relation Relation * @param subset DBID subset (for sampling / selection) * @param matrix Matrix to fill */ - public void computeDimensionSimilarites(Relation<? extends V> relation, DBIDs subset, DimensionSimilarityMatrix matrix); + public void computeDimensionSimilarites(Database database, Relation<? extends V> relation, DBIDs subset, DimensionSimilarityMatrix matrix); } diff --git a/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/DimensionSimilarityMatrix.java b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/DimensionSimilarityMatrix.java index 9f7a9707..70429de4 100644 --- a/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/DimensionSimilarityMatrix.java +++ b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/DimensionSimilarityMatrix.java @@ -1,12 +1,13 @@ package de.lmu.ifi.dbs.elki.math.dimensionsimilarity; import de.lmu.ifi.dbs.elki.math.geometry.PrimsMinimumSpanningTree; +import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -29,6 +30,8 @@ import de.lmu.ifi.dbs.elki.math.geometry.PrimsMinimumSpanningTree; * Class representing a similarity matrix between dimensions. * * @author Erich Schubert + * + * @apiviz.uses PrimsMinimumSpanningTree */ public abstract class DimensionSimilarityMatrix { /** @@ -120,6 +123,26 @@ public abstract class DimensionSimilarityMatrix { return ((y * (y - 1)) >> 1) + x; } + /** + * Transform linear triangle matrix into a full matrix. + * + * @return New matrix + */ + public Matrix copyToFullMatrix() { + final int dim = size(); + Matrix m = new Matrix(dim, dim); + double[][] ref = m.getArrayRef(); + int i = 0; + for (int y = 1; y < dim; y++) { + for (int x = 0; x < y; x++) { + ref[x][y] = sim[i]; + ref[y][x] = sim[i]; + ++i; + } + } + return m; + } + @Override public String toString() { StringBuffer buf = new StringBuffer(); @@ -204,6 +227,8 @@ public abstract class DimensionSimilarityMatrix { * Adapter class for running prim's algorithm. * * @author Erich Schubert + * + * @apiviz.exclude */ public static class PrimAdapter implements PrimsMinimumSpanningTree.Adapter<DimensionSimilarityMatrix> { /** diff --git a/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/HSMDimensionSimilarity.java b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/HSMDimensionSimilarity.java index b221866c..e73d02af 100644 --- a/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/HSMDimensionSimilarity.java +++ b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/HSMDimensionSimilarity.java @@ -24,6 +24,7 @@ package de.lmu.ifi.dbs.elki.math.dimensionsimilarity; */
import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
@@ -34,9 +35,6 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
/**
- * FIXME: This needs serious TESTING before release. Large parts have been
- * rewritten, but could not be tested at the time of rewriting.
- *
* Compute the similarity of dimensions by using a hough transformation.
*
* Reference: <br>
@@ -48,6 +46,9 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; * IEEEVisualization and Computer Graphics, 2011.
* </p>
*
+ * FIXME: This needs serious TESTING before release. Large parts have been
+ * rewritten, but could not be tested at the time of rewriting.
+ *
* @author Erich Schubert
* @author Robert Rödler
*/
@@ -63,7 +64,7 @@ public class HSMDimensionSimilarity implements DimensionSimilarity<NumberVector< *
* The original publication used 50.
*/
- private final static int STEPS = 64;
+ private final static int STEPS = 48; // 64;
/**
* Precompute sinus and cosinus
@@ -78,15 +79,15 @@ public class HSMDimensionSimilarity implements DimensionSimilarity<NumberVector< }
@Override
- public void computeDimensionSimilarites(Relation<? extends NumberVector<?>> relation, DBIDs subset, DimensionSimilarityMatrix matrix) {
+ public void computeDimensionSimilarites(Database database, Relation<? extends NumberVector<?>> relation, DBIDs subset, DimensionSimilarityMatrix matrix) {
final int dim = matrix.size();
- final int resolution = 500;
- byte[][][][] pics = new byte[dim][dim][][]; // [resolution][resolution];
+ final int resolution = 512;
+ boolean[][][][] pics = new boolean[dim][dim][][]; // [resolution][resolution];
// Initialize / allocate "pictures":
for (int i = 0; i < dim - 1; i++) {
for (int j = i + 1; j < dim; j++) {
- pics[i][j] = new byte[resolution][resolution];
+ pics[i][j] = new boolean[resolution][resolution];
}
}
// FIXME: Get/keep these statistics in the relation, or compute for the
@@ -106,8 +107,8 @@ public class HSMDimensionSimilarity implements DimensionSimilarity<NumberVector< for (DBIDIter id = subset.iter(); id.valid(); id.advance()) {
NumberVector<?> pvec = relation.get(id);
for (int i = 0; i < dim - 1; i++) {
+ double xi = (pvec.doubleValue(matrix.dim(i)) - off[i]) * scale[i];
for (int j = i + 1; j < dim; j++) {
- double xi = (pvec.doubleValue(matrix.dim(i)) - off[i]) * scale[i];
double xj = (pvec.doubleValue(matrix.dim(j)) - off[j]) * scale[j];
drawLine(0, (int) (resolution * xi), resolution - 1, (int) (resolution * xj), pics[i][j]);
}
@@ -116,11 +117,9 @@ public class HSMDimensionSimilarity implements DimensionSimilarity<NumberVector< final double stepsq = (double) STEPS * (double) STEPS;
for (int x = 0; x < dim; x++) {
- final int i = matrix.dim(x);
for (int y = x + 1; y < dim; y++) {
- final int j = matrix.dim(y);
- int[][] hough = houghTransformation(pics[i][j]);
- pics[i][j] = null; // Release picture
+ int[][] hough = houghTransformation(pics[x][y]);
+ pics[x][y] = null; // Release picture
// The original publication said "median", but judging from the text,
// meant "mean". Otherwise, always half of the cells are above the
// threshold, which doesn't match the explanation there.
@@ -133,16 +132,17 @@ public class HSMDimensionSimilarity implements DimensionSimilarity<NumberVector< }
/**
- * Compute the sum of a matix.
+ * Compute the sum of a matrix.
*
* @param mat Matrix
* @return Sum of all elements
*/
private long sumMatrix(int[][] mat) {
long ret = 0;
- for (int i = 0; i < mat[0].length; i++) {
- for (int j = 0; j < mat.length; j++) {
- ret += mat[i][j];
+ for (int i = 0; i < mat.length; i++) {
+ final int[] row = mat[i];
+ for (int j = 0; j < row.length; j++) {
+ ret += row[j];
}
}
return ret;
@@ -174,18 +174,18 @@ public class HSMDimensionSimilarity implements DimensionSimilarity<NumberVector< * @param mat Binary image
* @return Hough transformation of image.
*/
- private int[][] houghTransformation(byte[][] mat) {
+ private int[][] houghTransformation(boolean[][] mat) {
final int xres = mat.length, yres = mat[0].length;
- final double tscale = STEPS / Math.sqrt(xres * xres + yres * yres);
+ final double tscale = STEPS * .5 / (xres + yres);
final int[][] ret = new int[STEPS][STEPS];
for (int x = 0; x < mat.length; x++) {
for (int y = 0; y < mat[0].length; y++) {
- if (mat[x][y] > 0) {
+ if (mat[x][y]) {
for (int i = 0; i < STEPS; i++) {
- final int d = (int) (tscale * (x * table.cos(i) + y * table.sin(i)));
+ final int d = (STEPS >> 1) + (int) (tscale * (x * table.cos(i) + y * table.sin(i)));
if (d > 0 && d < STEPS) {
- ret[d][i] += mat[x][y];
+ ret[d][i]++;
}
}
}
@@ -204,7 +204,7 @@ public class HSMDimensionSimilarity implements DimensionSimilarity<NumberVector< * @param y1 End Y
* @param pic Picture array
*/
- private static void drawLine(int x0, int y0, int x1, int y1, byte[][] pic) {
+ private static void drawLine(int x0, int y0, int x1, int y1, boolean[][] pic) {
final int xres = pic.length, yres = pic[0].length;
// Ensure bounds
y0 = (y0 < 0) ? 0 : (y0 >= yres) ? (yres - 1) : y0;
@@ -218,7 +218,7 @@ public class HSMDimensionSimilarity implements DimensionSimilarity<NumberVector< int err = dx + dy;
for (;;) {
- pic[x0][y0] = 1;
+ pic[x0][y0] = true;
if (x0 == x1 && y0 == y1) {
break;
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/HiCSDimensionSimilarity.java b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/HiCSDimensionSimilarity.java index 468db679..efc12b5c 100644 --- a/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/HiCSDimensionSimilarity.java +++ b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/HiCSDimensionSimilarity.java @@ -30,6 +30,7 @@ import de.lmu.ifi.dbs.elki.algorithm.outlier.meta.HiCS; import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.VectorUtil;
import de.lmu.ifi.dbs.elki.data.VectorUtil.SortDBIDsBySingleDimension;
+import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
@@ -41,6 +42,7 @@ import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.math.statistics.tests.GoodnessOfFitTest;
import de.lmu.ifi.dbs.elki.math.statistics.tests.KolmogorovSmirnovTest;
import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
@@ -52,9 +54,17 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter; /**
* Use the statistical tests as used by HiCS to arrange dimensions.
*
+ * Reference:
* <p>
- * Based on:<br />
- * Fabian Keller, Emmanuel Müller, and Klemens Böhm.<br />
+ * Elke Achtert, Hans-Peter Kriegel, Erich Schubert, Arthur Zimek:<br />
+ * Interactive Data Mining with 3D-Parallel-Coordinate-Trees.<br />
+ * Proceedings of the 2013 ACM International Conference on Management of Data
+ * (SIGMOD), New York City, NY, 2013.
+ * </p>
+ *
+ * Based on:
+ * <p>
+ * F. Keller, E. Müller, and K. Böhm.<br />
* HiCS: High Contrast Subspaces for Density-Based Outlier Ranking. <br />
* In ICDE, pages 1037–1048, 2012.
* </p>
@@ -62,6 +72,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter; * @author Erich Schubert
* @author Robert Rödler
*/
+@Reference(authors = "Elke Achtert, Hans-Peter Kriegel, Erich Schubert, Arthur Zimek", title = "Interactive Data Mining with 3D-Parallel-Coordinate-Trees", booktitle = "Proc. of the 2013 ACM International Conference on Management of Data (SIGMOD)", url = "http://dx.doi.org/10.1145/2463676.2463696")
public class HiCSDimensionSimilarity implements DimensionSimilarity<NumberVector<?>> {
/**
* Monte-Carlo iterations
@@ -100,7 +111,7 @@ public class HiCSDimensionSimilarity implements DimensionSimilarity<NumberVector }
@Override
- public void computeDimensionSimilarites(Relation<? extends NumberVector<?>> relation, DBIDs subset, DimensionSimilarityMatrix matrix) {
+ public void computeDimensionSimilarites(Database database, Relation<? extends NumberVector<?>> relation, DBIDs subset, DimensionSimilarityMatrix matrix) {
final Random random = rnd.getRandom();
final int dim = matrix.size();
@@ -129,7 +140,7 @@ public class HiCSDimensionSimilarity implements DimensionSimilarity<NumberVector */
private ArrayList<ArrayDBIDs> buildOneDimIndexes(Relation<? extends NumberVector<?>> relation, DBIDs ids, DimensionSimilarityMatrix matrix) {
final int dim = matrix.size();
- ArrayList<ArrayDBIDs> subspaceIndex = new ArrayList<ArrayDBIDs>(dim);
+ ArrayList<ArrayDBIDs> subspaceIndex = new ArrayList<>(dim);
SortDBIDsBySingleDimension comp = new VectorUtil.SortDBIDsBySingleDimension(relation);
for (int i = 0; i < dim; i++) {
@@ -221,12 +232,16 @@ public class HiCSDimensionSimilarity implements DimensionSimilarity<NumberVector private GoodnessOfFitTest statTest;
/**
- * Holds the value of {@link de.lmu.ifi.dbs.elki.algorithm.outlier.meta.HiCS.Parameterizer#M_ID}.
+ * Holds the value of
+ * {@link de.lmu.ifi.dbs.elki.algorithm.outlier.meta.HiCS.Parameterizer#M_ID}
+ * .
*/
private int m = 50;
/**
- * Holds the value of {@link de.lmu.ifi.dbs.elki.algorithm.outlier.meta.HiCS.Parameterizer#ALPHA_ID}.
+ * Holds the value of
+ * {@link de.lmu.ifi.dbs.elki.algorithm.outlier.meta.HiCS.Parameterizer#ALPHA_ID}
+ * .
*/
private double alpha = 0.1;
@@ -250,7 +265,7 @@ public class HiCSDimensionSimilarity implements DimensionSimilarity<NumberVector alpha = alphaP.doubleValue();
}
- final ObjectParameter<GoodnessOfFitTest> testP = new ObjectParameter<GoodnessOfFitTest>(HiCS.Parameterizer.TEST_ID, GoodnessOfFitTest.class, KolmogorovSmirnovTest.class);
+ final ObjectParameter<GoodnessOfFitTest> testP = new ObjectParameter<>(HiCS.Parameterizer.TEST_ID, GoodnessOfFitTest.class, KolmogorovSmirnovTest.class);
if (config.grab(testP)) {
statTest = testP.instantiateClass(config);
}
diff --git a/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/MCEDimensionSimilarity.java b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/MCEDimensionSimilarity.java index aecdf857..b3e6bb76 100644 --- a/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/MCEDimensionSimilarity.java +++ b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/MCEDimensionSimilarity.java @@ -29,6 +29,7 @@ import java.util.Arrays; import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.VectorUtil;
import de.lmu.ifi.dbs.elki.data.VectorUtil.SortDBIDsBySingleDimension;
+import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
@@ -45,7 +46,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; *
* Reference:
* <p>
- * Diansheng Guo<br />
+ * D. Guo<br />
* Coordinating computational and visual approaches for interactive feature
* selection and multivariate clustering<br />
* Information Visualization, 2(4), 2003.
@@ -53,7 +54,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; *
* @author Erich Schubert
*/
-@Reference(authors = "Diansheng Guo", title = "Coordinating computational and visual approaches for interactive feature selection and multivariate clustering", booktitle = "Information Visualization, 2(4)", url = "http://dx.doi.org/10.1057/palgrave.ivs.9500053")
+@Reference(authors = "D. Guo", title = "Coordinating computational and visual approaches for interactive feature selection and multivariate clustering", booktitle = "Information Visualization, 2(4)", url = "http://dx.doi.org/10.1057/palgrave.ivs.9500053")
public class MCEDimensionSimilarity implements DimensionSimilarity<NumberVector<?>> {
/**
* Static instance.
@@ -76,7 +77,7 @@ public class MCEDimensionSimilarity implements DimensionSimilarity<NumberVector< }
@Override
- public void computeDimensionSimilarites(Relation<? extends NumberVector<?>> relation, DBIDs subset, DimensionSimilarityMatrix matrix) {
+ public void computeDimensionSimilarites(Database database, Relation<? extends NumberVector<?>> relation, DBIDs subset, DimensionSimilarityMatrix matrix) {
final int dim = matrix.size();
// Find a number of bins as recommended by Cheng et al.
@@ -186,7 +187,7 @@ public class MCEDimensionSimilarity implements DimensionSimilarity<NumberVector< */
private ArrayList<ArrayList<DBIDs>> buildPartitions(Relation<? extends NumberVector<?>> relation, DBIDs ids, int depth, DimensionSimilarityMatrix matrix) {
final int dim = matrix.size();
- ArrayList<ArrayList<DBIDs>> subspaceIndex = new ArrayList<ArrayList<DBIDs>>(dim);
+ ArrayList<ArrayList<DBIDs>> subspaceIndex = new ArrayList<>(dim);
SortDBIDsBySingleDimension comp = new VectorUtil.SortDBIDsBySingleDimension(relation);
double[] tmp = new double[ids.size()];
Mean mean = new Mean();
@@ -194,7 +195,7 @@ public class MCEDimensionSimilarity implements DimensionSimilarity<NumberVector< for (int i = 0; i < dim; i++) {
final int d = matrix.dim(i);
// Index for a single dimension:
- ArrayList<DBIDs> idx = new ArrayList<DBIDs>(1 << depth);
+ ArrayList<DBIDs> idx = new ArrayList<>(1 << depth);
// First, we need a copy of the DBIDs and sort it.
ArrayModifiableDBIDs sids = DBIDUtil.newArray(ids);
comp.setDimension(d);
diff --git a/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/SURFINGDimensionSimilarity.java b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/SURFINGDimensionSimilarity.java index 551b4759..fd83d44b 100644 --- a/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/SURFINGDimensionSimilarity.java +++ b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/SURFINGDimensionSimilarity.java @@ -48,6 +48,14 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; *
* Reference:
* <p>
+ * Elke Achtert, Hans-Peter Kriegel, Erich Schubert, Arthur Zimek:<br />
+ * Interactive Data Mining with 3D-Parallel-Coordinate-Trees.<br />
+ * Proceedings of the 2013 ACM International Conference on Management of Data
+ * (SIGMOD), New York City, NY, 2013.
+ * </p>
+ *
+ * Based on:
+ * <p>
* Christian Baumgartner, Claudia Plant, Karin Kailing, Hans-Peter Kriegel, and
* Peer Kröger<br />
* Subspace Selection for Clustering High-Dimensional Data<br />
@@ -61,7 +69,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; *
* @apiviz.uses SubspaceEuclideanDistanceFunction
*/
-@Reference(authors = "Christian Baumgartner, Claudia Plant, Karin Kailing, Hans-Peter Kriegel, and Peer Kröger", title = "Subspace Selection for Clustering High-Dimensional Data", booktitle = "IEEE International Conference on Data Mining, 2004", url = "http://dx.doi.org/10.1109/ICDM.2004.10112")
+@Reference(authors = "Elke Achtert, Hans-Peter Kriegel, Erich Schubert, Arthur Zimek", title = "Interactive Data Mining with 3D-Parallel-Coordinate-Trees", booktitle = "Proc. of the 2013 ACM International Conference on Management of Data (SIGMOD)", url = "http://dx.doi.org/10.1145/2463676.2463696")
public class SURFINGDimensionSimilarity implements DimensionSimilarity<NumberVector<?>> {
/**
* Static instance.
@@ -75,10 +83,10 @@ public class SURFINGDimensionSimilarity implements DimensionSimilarity<NumberVec super();
}
+ @Reference(authors = "Christian Baumgartner, Claudia Plant, Karin Kailing, Hans-Peter Kriegel, and Peer Kröger", title = "Subspace Selection for Clustering High-Dimensional Data", booktitle = "IEEE International Conference on Data Mining, 2004", url = "http://dx.doi.org/10.1109/ICDM.2004.10112")
@Override
- public void computeDimensionSimilarites(Relation<? extends NumberVector<?>> relation, DBIDs subset, DimensionSimilarityMatrix matrix) {
+ public void computeDimensionSimilarites(Database database, Relation<? extends NumberVector<?>> relation, DBIDs subset, DimensionSimilarityMatrix matrix) {
final int dim = matrix.size();
- final Database db = relation.getDatabase();
Mean kdistmean = new Mean();
final int k = Math.max(1, subset.size() / 10);
@@ -92,8 +100,8 @@ public class SURFINGDimensionSimilarity implements DimensionSimilarity<NumberVec BitSet dims = new BitSet(dim);
dims.set(i);
dims.set(j);
- DistanceQuery<? extends NumberVector<?>, DoubleDistance> dq = db.getDistanceQuery(relation, new SubspaceEuclideanDistanceFunction(dims));
- KNNQuery<? extends NumberVector<?>, DoubleDistance> knnq = db.getKNNQuery(dq, k);
+ DistanceQuery<? extends NumberVector<?>, DoubleDistance> dq = database.getDistanceQuery(relation, new SubspaceEuclideanDistanceFunction(dims));
+ KNNQuery<? extends NumberVector<?>, DoubleDistance> knnq = database.getKNNQuery(dq, k);
kdistmean.reset();
int knn = 0;
diff --git a/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/SlopeDimensionSimilarity.java b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/SlopeDimensionSimilarity.java index 3c81da63..1eb62189 100644 --- a/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/SlopeDimensionSimilarity.java +++ b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/SlopeDimensionSimilarity.java @@ -24,19 +24,30 @@ package de.lmu.ifi.dbs.elki.math.dimensionsimilarity; */
import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
/**
* Arrange dimensions based on the entropy of the slope spectrum.
*
+ * Reference:
+ * <p>
+ * Elke Achtert, Hans-Peter Kriegel, Erich Schubert, Arthur Zimek:<br />
+ * Interactive Data Mining with 3D-Parallel-Coordinate-Trees.<br />
+ * Proceedings of the 2013 ACM International Conference on Management of Data
+ * (SIGMOD), New York City, NY, 2013.
+ * </p>
+ *
* @author Erich Schubert
* @author Robert Rödler
*/
+@Reference(authors = "Elke Achtert, Hans-Peter Kriegel, Erich Schubert, Arthur Zimek", title = "Interactive Data Mining with 3D-Parallel-Coordinate-Trees", booktitle = "Proc. of the 2013 ACM International Conference on Management of Data (SIGMOD)", url = "http://dx.doi.org/10.1145/2463676.2463696")
public class SlopeDimensionSimilarity implements DimensionSimilarity<NumberVector<?>> {
/**
* Static instance.
@@ -66,7 +77,7 @@ public class SlopeDimensionSimilarity implements DimensionSimilarity<NumberVecto }
@Override
- public void computeDimensionSimilarites(Relation<? extends NumberVector<?>> relation, DBIDs subset, DimensionSimilarityMatrix matrix) {
+ public void computeDimensionSimilarites(Database database, Relation<? extends NumberVector<?>> relation, DBIDs subset, DimensionSimilarityMatrix matrix) {
final int dim = matrix.size();
final int size = subset.size();
diff --git a/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/SlopeInversionDimensionSimilarity.java b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/SlopeInversionDimensionSimilarity.java index aad58448..77408914 100644 --- a/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/SlopeInversionDimensionSimilarity.java +++ b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/SlopeInversionDimensionSimilarity.java @@ -24,10 +24,12 @@ package de.lmu.ifi.dbs.elki.math.dimensionsimilarity; */
import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
@@ -36,11 +38,20 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; * {@link SlopeDimensionSimilarity}, we also take the option of inverting an
* axis into account.
*
+ * Reference:
+ * <p>
+ * Elke Achtert, Hans-Peter Kriegel, Erich Schubert, Arthur Zimek:<br />
+ * Interactive Data Mining with 3D-Parallel-Coordinate-Trees.<br />
+ * Proceedings of the 2013 ACM International Conference on Management of Data
+ * (SIGMOD), New York City, NY, 2013.
+ * </p>
+ *
* TODO: shouldn't this be normalized by the single-dimension entropies or so?
*
* @author Erich Schubert
* @author Robert Rödler
*/
+@Reference(authors = "Elke Achtert, Hans-Peter Kriegel, Erich Schubert, Arthur Zimek", title = "Interactive Data Mining with 3D-Parallel-Coordinate-Trees", booktitle = "Proc. of the 2013 ACM International Conference on Management of Data (SIGMOD)", url = "http://dx.doi.org/10.1145/2463676.2463696")
public class SlopeInversionDimensionSimilarity extends SlopeDimensionSimilarity {
/**
* Static instance.
@@ -55,7 +66,7 @@ public class SlopeInversionDimensionSimilarity extends SlopeDimensionSimilarity }
@Override
- public void computeDimensionSimilarites(Relation<? extends NumberVector<?>> relation, DBIDs subset, DimensionSimilarityMatrix matrix) {
+ public void computeDimensionSimilarites(Database database, Relation<? extends NumberVector<?>> relation, DBIDs subset, DimensionSimilarityMatrix matrix) {
final int dim = matrix.size();
final int size = subset.size();
diff --git a/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/package-info.java b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/package-info.java index b6c27c57..bcbd47d5 100644 --- a/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/package-info.java @@ -5,7 +5,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/geodesy/AbstractEarthModel.java b/src/de/lmu/ifi/dbs/elki/math/geodesy/AbstractEarthModel.java new file mode 100644 index 00000000..d8eaa43f --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/geodesy/AbstractEarthModel.java @@ -0,0 +1,207 @@ +package de.lmu.ifi.dbs.elki.math.geodesy; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.MathUtil; + +/** + * Abstract base class for earth models with shared glue code. + * + * @author Erich Schubert + */ +public abstract class AbstractEarthModel implements EarthModel { + /** + * Maximum number of iterations. + */ + private static final int MAX_ITER = 20; + + /** + * Maximum desired precision. + */ + private static final double PRECISION = 1e-10; + + /** + * Model parameters: major and minor radius. + */ + final double a, b; + + /** + * Model parameters: flattening, inverse flattening. + */ + final double f, invf; + + /** + * Derived model parameters: e and e squared. + */ + final double e, esq; + + /** + * Constructor. + * + * @param a Major axis radius + * @param b Minor axis radius + * @param f Flattening + * @param invf Inverse flattening + */ + public AbstractEarthModel(double a, double b, double f, double invf) { + super(); + this.a = a; + this.b = b; + this.f = f; + this.invf = invf; + this.esq = f * (2 - f); + this.e = Math.sqrt(esq); + } + + @Override + public double getEquatorialRadius() { + return a; + } + + @Override + public double getPolarDistance() { + return b; + } + + @Override + public double[] latLngDegToECEF(double lat, double lng) { + return latLngRadToECEF(MathUtil.deg2rad(lat), MathUtil.deg2rad(lng)); + } + + @Override + public double[] latLngDegToECEF(double lat, double lng, double h) { + return latLngRadToECEF(MathUtil.deg2rad(lat), MathUtil.deg2rad(lng), h); + } + + @Override + public double[] latLngRadToECEF(double lat, double lng) { + // Sine and cosines: + final double clat = Math.cos(lat), slat = MathUtil.cosToSin(lat, clat); + final double clng = Math.cos(lng), slng = MathUtil.cosToSin(lng, clng); + + final double v = a / Math.sqrt(1 - esq * slat * slat); + return new double[] { v * clat * clng, v * clat * slng, (1 - esq) * v * slat }; + } + + @Override + public double[] latLngRadToECEF(double lat, double lng, double h) { + // Sine and cosines: + final double clat = Math.cos(lat), slat = MathUtil.cosToSin(lat, clat); + final double clng = Math.cos(lng), slng = MathUtil.cosToSin(lng, clng); + + final double v = a / Math.sqrt(1 - esq * slat * slat); + return new double[] { (v + h) * clat * clng, (v + h) * clat * slng, ((1 - esq) * v + h) * slat }; + } + + @Override + public double ecefToLatDeg(double x, double y, double z) { + return MathUtil.rad2deg(ecefToLatRad(x, y, z)); + } + + @Override + public double ecefToLatRad(double x, double y, double z) { + final double p = Math.sqrt(x * x + y * y); + double plat = Math.atan2(z, p * (1 - esq)); + + // Iteratively improving the lat value + // TODO: instead of a fixed number of iterations, check for convergence? + for (int i = 0;; i++) { + final double slat = Math.sin(plat); + final double v = a / Math.sqrt(1 - esq * slat * slat); + final double lat = Math.atan2(z + esq * v * slat, p); + if (Math.abs(lat - plat) < PRECISION || i > MAX_ITER) { + return lat; + } + plat = lat; + } + } + + @Override + public double ecefToLngDeg(double x, double y) { + return MathUtil.rad2deg(ecefToLngRad(x, y)); + } + + @Override + public double ecefToLngRad(double x, double y) { + return Math.atan2(y, x); + } + + @Override + public double[] ecefToLatLngDegHeight(double x, double y, double z) { + double[] ret = ecefToLatLngRadHeight(x, y, z); + ret[0] = MathUtil.rad2deg(ret[0]); + ret[1] = MathUtil.rad2deg(ret[1]); + return ret; + } + + @Override + public double[] ecefToLatLngRadHeight(double x, double y, double z) { + double lng = Math.atan2(y, x); + final double p = Math.sqrt(x * x + y * y); + double plat = Math.atan2(z, p * (1 - esq)); + double h = 0; + + // Iteratively improving the lat value + // TODO: instead of a fixed number of iterations, check for convergence? + for (int i = 0;; i++) { + final double slat = Math.sin(plat); + final double v = a / Math.sqrt(1 - esq * slat * slat); + double lat = Math.atan2(z + esq * v * slat, p); + if (Math.abs(lat - plat) < PRECISION || i > MAX_ITER) { + h = p / Math.cos(lat) - v; + return new double[] { lat, lng, h }; + } + plat = lat; + } + } + + @Override + public double distanceDeg(double lat1, double lng1, double lat2, double lng2) { + return distanceRad(MathUtil.deg2rad(lat1), MathUtil.deg2rad(lng1), // + MathUtil.deg2rad(lat2), MathUtil.deg2rad(lng2)); + } + + @Override + public double distanceRad(double lat1, double lng1, double lat2, double lng2) { + // Vincenty uses minor axis radius! + return b * SphereUtil.ellipsoidVincentyFormulaRad(f, lat1, lng1, lat2, lng2); + } + + @Override + public double minDistDeg(double plat, double plng, double rminlat, double rminlng, double rmaxlat, double rmaxlng) { + return minDistRad(MathUtil.deg2rad(plat), MathUtil.deg2rad(plng), // + MathUtil.deg2rad(rminlat), MathUtil.deg2rad(rminlng), // + MathUtil.deg2rad(rmaxlat), MathUtil.deg2rad(rmaxlng)); + } + + @Override + public double minDistRad(double plat, double plng, double rminlat, double rminlng, double rmaxlat, double rmaxlng) { + return b * SphereUtil.latlngMinDistRad(plat, plng, rminlat, rminlng, rmaxlat, rmaxlng); + } + + @Override + public String toString() { + return this.getClass().getSimpleName()+" [a=" + a + ", b=" + b + ", f=" + f + ", invf=" + invf + ", e=" + e + ", esq=" + esq + "]"; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/geodesy/Clarke1858SpheroidEarthModel.java b/src/de/lmu/ifi/dbs/elki/math/geodesy/Clarke1858SpheroidEarthModel.java new file mode 100644 index 00000000..c1d8a9af --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/geodesy/Clarke1858SpheroidEarthModel.java @@ -0,0 +1,80 @@ +package de.lmu.ifi.dbs.elki.math.geodesy; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.utilities.Alias; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * The Clarke 1858 spheroid earth model. + * + * Radius: 6378293.645 m + * + * Flattening: 1 / 294.26068 + * + * @author Erich Schubert + */ +@Alias({ "Clarke 1858" }) +public class Clarke1858SpheroidEarthModel extends AbstractEarthModel { + /** + * Static instance. + */ + public static final Clarke1858SpheroidEarthModel STATIC = new Clarke1858SpheroidEarthModel(); + + /** + * Radius of the CLARKE1858 Ellipsoid in m (a). + */ + public static final double CLARKE1858_RADIUS = 6378293.645; // m + + /** + * Inverse flattening 1/f of the CLARKE1858 Ellipsoid. + */ + public static final double CLARKE1858_INV_FLATTENING = 294.26068; + + /** + * Flattening f of the CLARKE1858 Ellipsoid. + */ + public static final double CLARKE1858_FLATTENING = 1 / CLARKE1858_INV_FLATTENING; + + /** + * Constructor. + */ + protected Clarke1858SpheroidEarthModel() { + super(CLARKE1858_RADIUS, CLARKE1858_RADIUS * (1 - CLARKE1858_FLATTENING), CLARKE1858_FLATTENING, CLARKE1858_INV_FLATTENING); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected Clarke1858SpheroidEarthModel makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/geodesy/Clarke1880SpheroidEarthModel.java b/src/de/lmu/ifi/dbs/elki/math/geodesy/Clarke1880SpheroidEarthModel.java new file mode 100644 index 00000000..731cf9eb --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/geodesy/Clarke1880SpheroidEarthModel.java @@ -0,0 +1,80 @@ +package de.lmu.ifi.dbs.elki.math.geodesy; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.utilities.Alias; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * The Clarke 1880 spheroid earth model. + * + * Radius: 6378249.145 m + * + * Flattening: 1 / 293.465 + * + * @author Erich Schubert + */ +@Alias({ "Clarke 1880" }) +public class Clarke1880SpheroidEarthModel extends AbstractEarthModel { + /** + * Static instance. + */ + public static final Clarke1880SpheroidEarthModel STATIC = new Clarke1880SpheroidEarthModel(); + + /** + * Radius of the CLARKE1880 Ellipsoid in m (a). + */ + public static final double CLARKE1880_RADIUS = 6378249.145; // m + + /** + * Inverse flattening 1/f of the CLARKE1880 Ellipsoid. + */ + public static final double CLARKE1880_INV_FLATTENING = 293.465; + + /** + * Flattening f of the CLARKE1880 Ellipsoid. + */ + public static final double CLARKE1880_FLATTENING = 1 / CLARKE1880_INV_FLATTENING; + + /** + * Constructor. + */ + protected Clarke1880SpheroidEarthModel() { + super(CLARKE1880_RADIUS, CLARKE1880_RADIUS * (1 - CLARKE1880_FLATTENING), CLARKE1880_FLATTENING, CLARKE1880_INV_FLATTENING); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected Clarke1880SpheroidEarthModel makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/geodesy/EarthModel.java b/src/de/lmu/ifi/dbs/elki/math/geodesy/EarthModel.java new file mode 100644 index 00000000..b27f2e3b --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/geodesy/EarthModel.java @@ -0,0 +1,221 @@ +package de.lmu.ifi.dbs.elki.math.geodesy; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; + +/** + * API for handling different earth models. + * + * @author Erich Schubert + * + * @apiviz.uses SphereUtil + */ +public interface EarthModel { + /** + * Parameter to choose the earth model to use. + */ + public static final OptionID MODEL_ID = new OptionID("geo.model", "Earth model to use for projection. Default: spherical model."); + + /** + * Map a degree latitude, longitude pair to 3D X-Y-Z coordinates, using a + * spherical earth model. + * + * The coordinate system is usually chosen such that the earth rotates around + * the Z axis and X points to the prime meridian and Equator. + * + * @param lat Latitude in degree + * @param lng Longitude in degree + * @return Coordinate triple, in meters. + */ + double[] latLngDegToECEF(double lat, double lng); + + /** + * Map a radians latitude, longitude pair to 3D X-Y-Z coordinates, using a + * spherical earth model. + * + * The coordinate system is usually chosen such that the earth rotates around + * the Z axis and X points to the prime meridian and Equator. + * + * @param lat Latitude in radians + * @param lng Longitude in radians + * @return Coordinate triple, in meters. + */ + double[] latLngRadToECEF(double lat, double lng); + + /** + * Map a degree latitude, longitude pair to 3D X-Y-Z coordinates, using a + * spherical earth model. + * + * The coordinate system is usually chosen such that the earth rotates around + * the Z axis and X points to the prime meridian and Equator. + * + * @param lat Latitude in degree + * @param lng Longitude in degree + * @param h Height + * @return Coordinate triple, in meters. + */ + double[] latLngDegToECEF(double lat, double lng, double h); + + /** + * Map a radians latitude, longitude pair to 3D X-Y-Z coordinates, using a + * spherical earth model. + * + * The coordinate system is usually chosen such that the earth rotates around + * the Z axis and X points to the prime meridian and Equator. + * + * @param lat Latitude in radians + * @param lng Longitude in radians + * @param h Height + * @return Coordinate triple, in meters. + */ + double[] latLngRadToECEF(double lat, double lng, double h); + + /** + * Convert a 3D coordinate pair to the corresponding latitude. + * + * @param x X value + * @param y Y value + * @param z Z value + * @return Latitude in degrees + */ + double ecefToLatDeg(double x, double y, double z); + + /** + * Convert a 3D coordinate pair to the corresponding latitude. + * + * @param x X value + * @param y Y value + * @param z Z value + * @return Latitude in radians + */ + double ecefToLatRad(double x, double y, double z); + + /** + * Convert a 3D coordinate pair to the corresponding longitude. + * + * @param x X value + * @param y Y value + * @return Longitude in degrees + */ + double ecefToLngDeg(double x, double y); + + /** + * Convert a 3D coordinate pair to the corresponding longitude. + * + * @param x X value + * @param y Y value + * @return Longitude in radians + */ + double ecefToLngRad(double x, double y); + + /** + * Convert a 3D coordinate pair to the corresponding latitude, longitude and + * height. + * + * Note: if you are not interested in the height, use {@link #ecefToLatDeg} + * and {@link #ecefToLngDeg} instead, which has a smaller memory footprint. + * + * @param x X value + * @param y Y value + * @param z Z value + * @return Array containing (latitude, longitude, height). + */ + double[] ecefToLatLngDegHeight(double x, double y, double z); + + /** + * Convert a 3D coordinate pair to the corresponding latitude, longitude and + * height. + * + * Note: if you are not interested in the height, use {@link #ecefToLatRad} + * and {@link #ecefToLngRad} instead, which has a smaller memory footprint. + * + * @param x X value + * @param y Y value + * @param z Z value + * @return Array containing (latitude, longitude, height). + */ + double[] ecefToLatLngRadHeight(double x, double y, double z); + + /** + * Compute the geodetic distance between two surface coordinates. + * + * @param lat1 Latitude of first in degrees. + * @param lng1 Longitude of first in degrees. + * @param lat2 Latitude of second in degrees. + * @param lng2 Longitude of second in degrees. + * @return Distance in meters. + */ + double distanceDeg(double lat1, double lng1, double lat2, double lng2); + + /** + * Compute the geodetic distance between two surface coordinates. + * + * @param lat1 Latitude of first in radians. + * @param lng1 Longitude of first in radians. + * @param lat2 Latitude of second in radians. + * @param lng2 Longitude of second in radians. + * @return Distance in meters. + */ + double distanceRad(double lat1, double lng1, double lat2, double lng2); + + /** + * Compute a lower bound for the geodetic distance point to rectangle. + * + * @param plat Latitude of point in degrees. + * @param plng Longitude of point in degrees. + * @param rminlat Min latitude of rectangle in degrees. + * @param rminlng Min Longitude of rectangle in degrees. + * @param rmaxlat Max Latitude of rectangle in degrees. + * @param rmaxlng Max Longitude of rectangle in degrees. + * @return Distance in meters. + */ + double minDistDeg(double plat, double plng, double rminlat, double rminlng, double rmaxlat, double rmaxlng); + + /** + * Compute a lower bound for the geodetic distance point to rectangle. + * + * @param plat Latitude of point in radians. + * @param plng Longitude of point in radians. + * @param rminlat Min latitude of rectangle in radians. + * @param rminlng Min Longitude of rectangle in radians. + * @param rmaxlat Max Latitude of rectangle in radians. + * @param rmaxlng Max Longitude of rectangle in radians. + * @return Distance in meters. + */ + double minDistRad(double plat, double plng, double rminlat, double rminlng, double rmaxlat, double rmaxlng); + + /** + * Equatorial radius + * + * @return Radius + */ + double getEquatorialRadius(); + + /** + * Polar distance. + * + * @return Distance to poles (= minor radius) + */ + double getPolarDistance(); +} diff --git a/src/de/lmu/ifi/dbs/elki/math/geodesy/GRS67SpheroidEarthModel.java b/src/de/lmu/ifi/dbs/elki/math/geodesy/GRS67SpheroidEarthModel.java new file mode 100644 index 00000000..81dc7565 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/geodesy/GRS67SpheroidEarthModel.java @@ -0,0 +1,80 @@ +package de.lmu.ifi.dbs.elki.math.geodesy; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.utilities.Alias; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * The GRS 67 spheroid earth model. + * + * Radius: 6378160.0 m + * + * Flattening: 1 / 298.25 + * + * @author Erich Schubert + */ +@Alias({ "GRS67", "GRS-67", "GRS67" }) +public class GRS67SpheroidEarthModel extends AbstractEarthModel { + /** + * Static instance. + */ + public static final GRS67SpheroidEarthModel STATIC = new GRS67SpheroidEarthModel(); + + /** + * Radius of the GRS67 Ellipsoid in m (a). + */ + public static final double GRS67_RADIUS = 6378160.0; // m + + /** + * Inverse flattening 1/f of the GRS67 Ellipsoid. + */ + public static final double GRS67_INV_FLATTENING = 298.25; + + /** + * Flattening f of the GRS67 Ellipsoid. + */ + public static final double GRS67_FLATTENING = 1 / GRS67_INV_FLATTENING; + + /** + * Constructor. + */ + protected GRS67SpheroidEarthModel() { + super(GRS67_RADIUS, GRS67_RADIUS * (1 - GRS67_FLATTENING), GRS67_FLATTENING, GRS67_INV_FLATTENING); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected GRS67SpheroidEarthModel makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/geodesy/GRS80SpheroidEarthModel.java b/src/de/lmu/ifi/dbs/elki/math/geodesy/GRS80SpheroidEarthModel.java new file mode 100644 index 00000000..2499da0c --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/geodesy/GRS80SpheroidEarthModel.java @@ -0,0 +1,81 @@ +package de.lmu.ifi.dbs.elki.math.geodesy; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.utilities.Alias; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * The GRS 80 spheroid earth model, without height model (so not a geoid, just a + * spheroid!) + * + * Radius: 6378137.0 m + * + * Flattening: 1 / 298.257222101 + * + * @author Erich Schubert + */ +@Alias({ "grs80", "GRS-80", "GRS80" }) +public class GRS80SpheroidEarthModel extends AbstractEarthModel { + /** + * Static instance. + */ + public static final GRS80SpheroidEarthModel STATIC = new GRS80SpheroidEarthModel(); + + /** + * Radius of the GRS80 Ellipsoid in m (a). + */ + public static final double GRS80_RADIUS = 6378137.0; // m + + /** + * Inverse flattening 1/f of the GRS80 Ellipsoid. + */ + public static final double GRS80_INV_FLATTENING = 298.257222101; + + /** + * Flattening f of the GRS80 Ellipsoid. + */ + public static final double GRS80_FLATTENING = 1 / GRS80_INV_FLATTENING; + + /** + * Constructor. + */ + protected GRS80SpheroidEarthModel() { + super(GRS80_RADIUS, GRS80_RADIUS * (1 - GRS80_FLATTENING), GRS80_FLATTENING, GRS80_INV_FLATTENING); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected GRS80SpheroidEarthModel makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/geodesy/SphereUtil.java b/src/de/lmu/ifi/dbs/elki/math/geodesy/SphereUtil.java new file mode 100644 index 00000000..b8e57cc3 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/geodesy/SphereUtil.java @@ -0,0 +1,881 @@ +package de.lmu.ifi.dbs.elki.math.geodesy; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.MathUtil; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; + +/** + * Class with utility functions for distance computations on the sphere. + * + * Note: the formulas are usually implemented for the unit sphere. + * + * The majority of formulas are adapted from: + * <p> + * Ed Williams<br /> + * Aviation Formulary<br /> + * Online: http://williams.best.vwh.net/avform.htm + * </p> + * + * TODO: add ellipsoid version of Vinentry formula. + * + * @author Erich Schubert + * @author Niels Dörre + */ +@Reference(authors = "Ed Williams", title = "Aviation Formulary", booktitle = "", url = "http://williams.best.vwh.net/avform.htm") +public final class SphereUtil { + /** + * Maximum number of iterations. + */ + private static final int MAX_ITER = 20; + + /** + * Maximum desired precision. + */ + private static final double PRECISION = 1e-12; + + /** + * Constant to divide by 6 via multiplication. + */ + private static final double ONE_SIXTH = 1. / 6; + + /** + * Dummy constructor. Do not instantiate. + */ + private SphereUtil() { + // Use static methods. Do not intantiate + } + + /** + * Compute the approximate great-circle distance of two points using the + * Haversine formula + * + * Complexity: 6 (2 of which emulated) trigonometric functions. + * + * Reference: + * <p> + * R. W. Sinnott,<br/> + * Virtues of the Haversine<br /> + * Sky and telescope, 68-2, 1984 + * </p> + * + * @param lat1 Latitude of first point in degree + * @param lon1 Longitude of first point in degree + * @param lat2 Latitude of second point in degree + * @param lon2 Longitude of second point in degree + * @return Distance on unit sphere + */ + public static double cosineFormulaDeg(double lat1, double lon1, double lat2, double lon2) { + return cosineFormulaRad(MathUtil.deg2rad(lat1), MathUtil.deg2rad(lon1),// + MathUtil.deg2rad(lat2), MathUtil.deg2rad(lon2)); + } + + /** + * Compute the approximate great-circle distance of two points using the + * Spherical law of cosines. + * + * Complexity: 6 (2 of which emulated) trigonometric functions. Note that acos + * is rather expensive apparently - roughly atan + sqrt. + * + * Reference: + * <p> + * R. W. Sinnott,<br/> + * Virtues of the Haversine<br /> + * Sky and telescope, 68-2, 1984 + * </p> + * + * @param lat1 Latitude of first point in degree + * @param lon1 Longitude of first point in degree + * @param lat2 Latitude of second point in degree + * @param lon2 Longitude of second point in degree + * @return Distance on unit sphere + */ + public static double cosineFormulaRad(double lat1, double lon1, double lat2, double lon2) { + final double slat1 = Math.sin(lat1), clat1 = MathUtil.sinToCos(lat1, slat1); + final double slat2 = Math.sin(lat2), clat2 = MathUtil.sinToCos(lat2, slat2); + return Math.acos(Math.min(1.0, slat1 * slat2 + clat1 * clat2 * Math.cos(Math.abs(lon2 - lon1)))); + } + + /** + * Compute the approximate great-circle distance of two points using the + * Haversine formula + * + * Complexity: 5 trigonometric functions, 2 sqrt. + * + * Reference: + * <p> + * R. W. Sinnott,<br/> + * Virtues of the Haversine<br /> + * Sky and telescope, 68-2, 1984 + * </p> + * + * @param lat1 Latitude of first point in degree + * @param lon1 Longitude of first point in degree + * @param lat2 Latitude of second point in degree + * @param lon2 Longitude of second point in degree + * @return Distance on unit sphere + */ + @Reference(authors = "Sinnott, R. W.", title = "Virtues of the Haversine", booktitle = "Sky and telescope, 68-2, 1984") + public static double haversineFormulaDeg(double lat1, double lon1, double lat2, double lon2) { + return haversineFormulaRad(MathUtil.deg2rad(lat1), MathUtil.deg2rad(lon1),// + MathUtil.deg2rad(lat2), MathUtil.deg2rad(lon2)); + } + + /** + * Compute the approximate great-circle distance of two points using the + * Haversine formula + * + * Complexity: 5 trigonometric functions, 2 sqrt. + * + * Reference: + * <p> + * R. W. Sinnott,<br/> + * Virtues of the Haversine<br /> + * Sky and telescope, 68-2, 1984 + * </p> + * + * @param lat1 Latitude of first point in degree + * @param lon1 Longitude of first point in degree + * @param lat2 Latitude of second point in degree + * @param lon2 Longitude of second point in degree + * @return Distance on unit sphere + */ + @Reference(authors = "Sinnott, R. W.", title = "Virtues of the Haversine", booktitle = "Sky and telescope, 68-2, 1984") + public static double haversineFormulaRad(double lat1, double lon1, double lat2, double lon2) { + // Haversine formula, higher precision at < 1 meters but maybe issues at + // antipodal points. + final double slat = Math.sin((lat1 - lat2) * .5); + final double slon = Math.sin((lon1 - lon2) * .5); + final double a = slat * slat + slon * slon * Math.cos(lat1) * Math.cos(lat2); + return 2 * Math.atan2(Math.sqrt(a), Math.sqrt(1 - a)); + } + + /** + * Compute the approximate great-circle distance of two points. + * + * Uses Vincenty's Formula for the spherical case, which does not require + * iterations. + * + * Complexity: 7 trigonometric functions, 1 sqrt. + * + * Reference: + * <p> + * T. Vincenty<br /> + * Direct and inverse solutions of geodesics on the ellipsoid with application + * of nested equations<br /> + * Survey review 23 176, 1975 + * </p> + * + * @param lat1 Latitude of first point in degree + * @param lon1 Longitude of first point in degree + * @param lat2 Latitude of second point in degree + * @param lon2 Longitude of second point in degree + * @return Distance in radians / on unit sphere. + */ + @Reference(authors = "T. Vincenty", title = "Direct and inverse solutions of geodesics on the ellipsoid with application of nested equations", booktitle = "Survey review 23 176, 1975", url = "http://www.ngs.noaa.gov/PUBS_LIB/inverse.pdf") + public static double sphericalVincentyFormulaDeg(double lat1, double lon1, double lat2, double lon2) { + return sphericalVincentyFormulaRad(MathUtil.deg2rad(lat1), MathUtil.deg2rad(lon1),// + MathUtil.deg2rad(lat2), MathUtil.deg2rad(lon2)); + } + + /** + * Compute the approximate great-circle distance of two points. + * + * Uses Vincenty's Formula for the spherical case, which does not require + * iterations. + * + * Complexity: 7 trigonometric functions, 1 sqrt. + * + * Reference: + * <p> + * T. Vincenty<br /> + * Direct and inverse solutions of geodesics on the ellipsoid with application + * of nested equations<br /> + * Survey review 23 176, 1975 + * </p> + * + * @param lat1 Latitude of first point in degree + * @param lon1 Longitude of first point in degree + * @param lat2 Latitude of second point in degree + * @param lon2 Longitude of second point in degree + * @return Distance on unit sphere + */ + @Reference(authors = "T. Vincenty", title = "Direct and inverse solutions of geodesics on the ellipsoid with application of nested equations", booktitle = "Survey review 23 176, 1975", url = "http://www.ngs.noaa.gov/PUBS_LIB/inverse.pdf") + public static double sphericalVincentyFormulaRad(double lat1, double lon1, double lat2, double lon2) { + // Half delta longitude. + final double dlnh = Math.abs(lon1 - lon2); + + // Spherical special case of Vincenty's formula - no iterations needed + final double slat1 = Math.sin(lat1), clat1 = MathUtil.sinToCos(lat1, slat1); + final double slat2 = Math.sin(lat2), clat2 = MathUtil.sinToCos(lat2, slat2); + final double slond = Math.sin(dlnh), clond = MathUtil.sinToCos(dlnh, slond); + final double a = clat2 * slond; + final double b = (clat1 * slat2) - (slat1 * clat2 * clond); + return Math.atan2(Math.sqrt(a * a + b * b), slat1 * slat2 + clat1 * clat2 * clond); + } + + /** + * Compute the approximate great-circle distance of two points. + * + * Reference: + * <p> + * T. Vincenty<br /> + * Direct and inverse solutions of geodesics on the ellipsoid with application + * of nested equations<br /> + * Survey review 23 176, 1975 + * </p> + * + * @param f Ellipsoid flattening + * @param lat1 Latitude of first point in degree + * @param lon1 Longitude of first point in degree + * @param lat2 Latitude of second point in degree + * @param lon2 Longitude of second point in degree + * @return Distance for a minor axis of 1. + */ + @Reference(authors = "T. Vincenty", title = "Direct and inverse solutions of geodesics on the ellipsoid with application of nested equations", booktitle = "Survey review 23 176, 1975", url = "http://www.ngs.noaa.gov/PUBS_LIB/inverse.pdf") + public static double ellipsoidVincentyFormulaDeg(double f, double lat1, double lon1, double lat2, double lon2) { + return ellipsoidVincentyFormulaRad(f, MathUtil.deg2rad(lat1), MathUtil.deg2rad(lon1), // + MathUtil.deg2rad(lat2), MathUtil.deg2rad(lon2)); + } + + /** + * Compute the approximate great-circle distance of two points. + * + * Reference: + * <p> + * T. Vincenty<br /> + * Direct and inverse solutions of geodesics on the ellipsoid with application + * of nested equations<br /> + * Survey review 23 176, 1975 + * </p> + * + * @param f Ellipsoid flattening + * @param lat1 Latitude of first point in degree + * @param lon1 Longitude of first point in degree + * @param lat2 Latitude of second point in degree + * @param lon2 Longitude of second point in degree + * @return Distance for a minor axis of 1. + */ + @Reference(authors = "T. Vincenty", title = "Direct and inverse solutions of geodesics on the ellipsoid with application of nested equations", booktitle = "Survey review 23 176, 1975", url = "http://www.ngs.noaa.gov/PUBS_LIB/inverse.pdf") + public static double ellipsoidVincentyFormulaRad(double f, double lat1, double lon1, double lat2, double lon2) { + final double dlon = Math.abs(lon2 - lon1); + final double onemf = 1 - f; // = 1 - (a-b)/a = b/a + + // Second eccentricity squared + final double a_b = 1. / onemf; // = a/b + final double ecc2 = (a_b + 1) * (a_b - 1); // (a^2-b^2)/(b^2) + + // Reduced latitudes: + final double u1 = Math.atan(onemf * Math.tan(lat1)); + final double u2 = Math.atan(onemf * Math.tan(lat2)); + // Trigonometric values + final double su1 = Math.sin(u1), cu1 = MathUtil.sinToCos(u1, su1); + final double su2 = Math.sin(u2), cu2 = MathUtil.sinToCos(u2, su2); + + // Eqn (13) - initial value + double lambda = dlon; + + for (int i = 0;; i++) { + final double slon = Math.sin(lambda), clon = MathUtil.sinToCos(lambda, slon); + + // Eqn (14) - \sin \sigma + final double term1 = cu2 * slon, term2 = cu1 * su2 - su1 * cu2 * clon; + final double ssig = Math.sqrt(term1 * term1 + term2 * term2); + // Eqn (15) - \cos \sigma + final double csig = su1 * su2 + cu1 * cu2 * clon; + // Eqn (16) - \sigma from \tan \sigma + final double sigma = Math.atan2(ssig, csig); + + // Two identical points? + if (!(ssig > 0)) { + return 0.; + } + // Eqn (17) - \sin \alpha, and this way \cos^2 \alpha + final double salp = cu1 * cu2 * slon / ssig; + final double c2alp = (1. + salp) * (1. - salp); + // Eqn (18) - \cos 2 \sigma_m + final double ctwosigm = (Math.abs(c2alp) > 0) ? csig - 2.0 * su1 * su2 / c2alp : 0.; + final double c2twosigm = ctwosigm * ctwosigm; + + // Eqn (10) - C + final double cc = f * .0625 * c2alp * (4.0 + f * (4.0 - 3.0 * c2alp)); + // Eqn (11) - new \lambda + final double prevlambda = lambda; + lambda = dlon + (1.0 - cc) * f * salp * // + (sigma + cc * ssig * (ctwosigm + cc * csig * (-1.0 + 2.0 * c2twosigm))); + // Check for convergence: + if (Math.abs(prevlambda - lambda) < PRECISION || i >= MAX_ITER) { + // TODO: what is the proper result to return on MAX_ITER (antipodal + // points)? + // Definition of u^2, rewritten to use second eccentricity. + final double usq = c2alp * ecc2; + // Eqn (3) - A + final double aa = 1.0 + usq / 16384.0 * (4096.0 + usq * (-768.0 + usq * (320.0 - 175.0 * usq))); + // Eqn (4) - B + final double bb = usq / 1024.0 * (256.0 + usq * (-128.0 + usq * (74.0 - 47.0 * usq))); + // Eqn (6) - \Delta \sigma + final double dsig = bb * ssig * (ctwosigm + .25 * bb * (csig * (-1.0 + 2.0 * c2twosigm) // + - ONE_SIXTH * bb * ctwosigm * (-3.0 + 4.0 * ssig * ssig) * (-3.0 + 4.0 * c2twosigm))); + // Eqn (19) - s + return aa * (sigma - dsig); + } + } + } + + /** + * Compute the cross-track distance. + * + * XTD = asin(sin(dist_1Q)*sin(crs_1Q-crs_12)) + * + * @param lat1 Latitude of starting point. + * @param lon1 Longitude of starting point. + * @param lat2 Latitude of destination point. + * @param lon2 Longitude of destination point. + * @param latQ Latitude of query point. + * @param lonQ Longitude of query point. + * @return Cross-track distance in km. May be negative - this gives the side. + */ + public static double crossTrackDistanceDeg(double lat1, double lon1, double lat2, double lon2, double latQ, double lonQ) { + return crossTrackDistanceRad(MathUtil.deg2rad(lat1), MathUtil.deg2rad(lon1),// + MathUtil.deg2rad(lat2), MathUtil.deg2rad(lon2),// + MathUtil.deg2rad(latQ), MathUtil.deg2rad(lonQ)); + } + + /** + * Compute the cross-track distance. + * + * @param lat1 Latitude of starting point. + * @param lon1 Longitude of starting point. + * @param lat2 Latitude of destination point. + * @param lon2 Longitude of destination point. + * @param latQ Latitude of query point. + * @param lonQ Longitude of query point. + * @param dist1Q Distance from starting point to query point on unit sphere + * @return Cross-track distance on unit sphere. May be negative - this gives + * the side. + */ + public static double crossTrackDistanceRad(double lat1, double lon1, double lat2, double lon2, double latQ, double lonQ, double dist1Q) { + final double dlon12 = lon2 - lon1; + final double dlon1Q = lonQ - lon1; + + // Compute trigonometric functions only once. + final double slat1 = Math.sin(lat1), clat1 = MathUtil.sinToCos(lat1, slat1); + final double slatQ = Math.sin(latQ), clatQ = MathUtil.sinToCos(latQ, slatQ); + final double slat2 = Math.sin(lat2), clat2 = MathUtil.sinToCos(lat2, slat2); + + // / Compute the course + // y = sin(dlon) * cos(lat2) + final double sdlon12 = Math.sin(dlon12), cdlon12 = MathUtil.sinToCos(dlon12, sdlon12); + final double sdlon1Q = Math.sin(dlon1Q), cdlon1Q = MathUtil.sinToCos(dlon1Q, sdlon1Q); + + final double yE = sdlon12 * clat2; + final double yQ = sdlon1Q * clatQ; + + // x = cos(lat1) * sin(lat2) - sin(lat1) * cos(lat2) * cos(dlon) + final double xE = clat1 * slat2 - slat1 * clat2 * cdlon12; + final double xQ = clat1 * slatQ - slat1 * clatQ * cdlon1Q; + + final double crs12 = Math.atan2(yE, xE); + final double crs1Q = Math.atan2(yQ, xQ); + + // / Calculate cross-track distance + return Math.asin(Math.sin(dist1Q) * Math.sin(crs1Q - crs12)); + } + + /** + * Compute the cross-track distance. + * + * @param lat1 Latitude of starting point. + * @param lon1 Longitude of starting point. + * @param lat2 Latitude of destination point. + * @param lon2 Longitude of destination point. + * @param latQ Latitude of query point. + * @param lonQ Longitude of query point. + * @param dist1Q Distance from starting point to query point in radians (i.e. + * on unit sphere). + * @return Cross-track distance on unit sphere. May be negative - this gives + * the side. + */ + public static double crossTrackDistanceDeg(double lat1, double lon1, double lat2, double lon2, double latQ, double lonQ, double dist1Q) { + return crossTrackDistanceRad(MathUtil.deg2rad(lat1), MathUtil.deg2rad(lon1),// + MathUtil.deg2rad(lat2), MathUtil.deg2rad(lon2),// + MathUtil.deg2rad(latQ), MathUtil.deg2rad(lonQ),// + dist1Q); + } + + /** + * Compute the cross-track distance. + * + * XTD = asin(sin(dist_SQ)*sin(crs_SQ-crs_SE)) + * + * @param lat1 Latitude of starting point. + * @param lon1 Longitude of starting point. + * @param lat2 Latitude of destination point. + * @param lon2 Longitude of destination point. + * @param latQ Latitude of query point. + * @param lonQ Longitude of query point. + * @return Cross-track distance in km. May be negative - this gives the side. + */ + public static double crossTrackDistanceRad(double lat1, double lon1, double lat2, double lon2, double latQ, double lonQ) { + final double dlon12 = lon2 - lon1; + final double dlon1Q = lonQ - lon1; + final double dlat1Q = latQ - lat1; + + // Compute trigonometric functions only once. + final double clat1 = Math.cos(lat1), slat1 = MathUtil.cosToSin(lat1, clat1); + final double clatQ = Math.cos(latQ), slatQ = MathUtil.cosToSin(latQ, clatQ); + final double clat2 = Math.cos(lat2), slat2 = MathUtil.cosToSin(lat2, clat2); + + // Haversine formula, higher precision at < 1 meters but maybe issues at + // antipodal points - we do not yet multiply with the radius! + final double slat = Math.sin(dlat1Q * .5); + final double slon = Math.sin(dlon1Q * .5); + final double a = slat * slat + slon * slon * clat1 * clatQ; + final double angDist1Q = 2 * Math.atan2(Math.sqrt(a), Math.sqrt(1 - a)); + + // Compute the course + // y = sin(dlon) * cos(lat2) + final double sdlon12 = Math.sin(dlon12), cdlon12 = MathUtil.sinToCos(dlon12, sdlon12); + final double sdlon1Q = Math.sin(dlon1Q), cdlon1Q = MathUtil.sinToCos(dlon1Q, sdlon1Q); + final double yE = sdlon12 * clat2; + final double yQ = sdlon1Q * clatQ; + + // x = cos(lat1) * sin(lat2) - sin(lat1) * cos(lat2) * cos(dlon) + final double xE = clat1 * slat2 - slat1 * clat2 * cdlon12; + final double xQ = clat1 * slatQ - slat1 * clatQ * cdlon1Q; + + final double crs12 = Math.atan2(yE, xE); + final double crs1Q = Math.atan2(yQ, xQ); + + // Calculate cross-track distance + return Math.asin(Math.sin(angDist1Q) * Math.sin(crs1Q - crs12)); + } + + /** + * The along track distance, is the distance from S to Q along the track S to + * E. + * + * ATD=acos(cos(dist_1Q)/cos(XTD)) + * + * TODO: optimize. + * + * @param lat1 Latitude of starting point. + * @param lon1 Longitude of starting point. + * @param lat2 Latitude of destination point. + * @param lon2 Longitude of destination point. + * @param latQ Latitude of query point. + * @param lonQ Longitude of query point. + * @return Along-track distance in radians. May be negative - this gives the + * side. + */ + public static double alongTrackDistanceDeg(double lat1, double lon1, double lat2, double lon2, double latQ, double lonQ) { + // TODO: inline and share some of the trigonometric computations! + double dist1Q = haversineFormulaDeg(lat1, lon1, latQ, lonQ); + double ctd = crossTrackDistanceDeg(lat1, lon1, lat2, lon2, latQ, lonQ, dist1Q); + return alongTrackDistanceDeg(lat1, lon1, lat2, lon2, latQ, lonQ, dist1Q, ctd); + } + + /** + * The along track distance, is the distance from S to Q along the track S to + * E. + * + * ATD=acos(cos(dist_1Q)/cos(XTD)) + * + * TODO: optimize. + * + * @param lat1 Latitude of starting point in radians. + * @param lon1 Longitude of starting point in radians. + * @param lat2 Latitude of destination point in radians. + * @param lon2 Longitude of destination point in radians. + * @param latQ Latitude of query point in radians. + * @param lonQ Longitude of query point in radians. + * @return Along-track distance in radians. May be negative - this gives the + * side. + */ + public static double alongTrackDistanceRad(double lat1, double lon1, double lat2, double lon2, double latQ, double lonQ) { + // TODO: inline and share some of the trigonometric computations! + double dist1Q = haversineFormulaRad(lat1, lon1, latQ, lonQ); + double ctd = crossTrackDistanceRad(lat1, lon1, lat2, lon2, latQ, lonQ, dist1Q); + return alongTrackDistanceRad(lat1, lon1, lat2, lon2, latQ, lonQ, dist1Q, ctd); + } + + /** + * The along track distance, is the distance from S to Q along the track S to + * E. + * + * ATD=acos(cos(dist_SQ)/cos(XTD)) + * + * @param lat1 Latitude of starting point. + * @param lon1 Longitude of starting point. + * @param lat2 Latitude of destination point. + * @param lon2 Longitude of destination point. + * @param latQ Latitude of query point. + * @param lonQ Longitude of query point. + * @param dist1Q Distance S to Q in radians. + * @param ctd Cross-track-distance in radians. + * @return Along-track distance in radians. May be negative - this gives the + * side. + */ + public static double alongTrackDistanceDeg(double lat1, double lon1, double lat2, double lon2, double latQ, double lonQ, double dist1Q, double ctd) { + return alongTrackDistanceRad(MathUtil.deg2rad(lat1), MathUtil.deg2rad(lon1), MathUtil.deg2rad(lat2), MathUtil.deg2rad(lon2), MathUtil.deg2rad(latQ), MathUtil.deg2rad(lonQ), dist1Q, ctd); + } + + /** + * The along track distance, is the distance from S to Q along the track S to + * E. + * + * ATD=acos(cos(dist_SQ)/cos(XTD)) + * + * TODO: optimize: can we do a faster sign computation? + * + * @param lat1 Latitude of starting point in radians. + * @param lon1 Longitude of starting point in radians. + * @param lat2 Latitude of destination point in radians. + * @param lon2 Longitude of destination point in radians. + * @param latQ Latitude of query point in radians. + * @param lonQ Longitude of query point in radians. + * @param dist1Q Distance S to Q in radians. + * @param ctd Cross-track-distance in radians. + * @return Along-track distance in radians. May be negative - this gives the + * side. + */ + public static double alongTrackDistanceRad(double lat1, double lon1, double lat2, double lon2, double latQ, double lonQ, double dist1Q, double ctd) { + // FIXME: optimize the sign computation! + int sign = Math.abs(bearingRad(lat1, lon1, lat2, lon2) - bearingRad(lat1, lon1, latQ, lonQ)) < MathUtil.HALFPI ? +1 : -1; + return sign * Math.acos(Math.cos(dist1Q) / Math.cos(ctd)); + // TODO: for short distances, use this instead? + // asin(sqrt( (sin(dist_1Q))^2 - (sin(XTD))^2 )/cos(XTD)) + } + + /** + * Point to rectangle minimum distance. + * + * Complexity: + * <ul> + * <li>Trivial cases (on longitude slice): no trigonometric functions.</li> + * <li>Cross-track case: 10+2 trig</li> + * <li>Corner case: 10+3 trig, 2 sqrt</li> + * </ul> + * + * Reference: + * <p> + * Erich Schubert, Arthur Zimek and Hans-Peter Kriegel<br /> + * Geodetic Distance Queries on R-Trees for Indexing Geographic Data<br /> + * Advances in Spatial and Temporal Databases - 13th International Symposium, + * SSTD 2013, Munich, Germany + * </p> + * + * @param plat Latitude of query point. + * @param plng Longitude of query point. + * @param rminlat Min latitude of rectangle. + * @param rminlng Min longitude of rectangle. + * @param rmaxlat Max latitude of rectangle. + * @param rmaxlng Max longitude of rectangle. + * @return Distance in radians. + */ + @Reference(authors = "Erich Schubert, Arthur Zimek and Hans-Peter Kriegel", title = "Geodetic Distance Queries on R-Trees for Indexing Geographic Data", booktitle = "Advances in Spatial and Temporal Databases - 13th International Symposium, SSTD 2013, Munich, Germany") + public static double latlngMinDistDeg(double plat, double plng, double rminlat, double rminlng, double rmaxlat, double rmaxlng) { + return latlngMinDistRad(MathUtil.deg2rad(plat), MathUtil.deg2rad(plng),// + MathUtil.deg2rad(rminlat), MathUtil.deg2rad(rminlng), // + MathUtil.deg2rad(rmaxlat), MathUtil.deg2rad(rmaxlng)); + } + + /** + * Point to rectangle minimum distance. + * + * Complexity: + * <ul> + * <li>Trivial cases (on longitude slice): no trigonometric functions.</li> + * <li>Corner case: 3/4 trig + (haversine:) 5 trig, 2 sqrt</li> + * <li>Cross-track case: 4+3 trig</li> + * </ul> + * + * Reference: + * <p> + * Erich Schubert, Arthur Zimek and Hans-Peter Kriegel<br /> + * Geodetic Distance Queries on R-Trees for Indexing Geographic Data<br /> + * Advances in Spatial and Temporal Databases - 13th International Symposium, + * SSTD 2013, Munich, Germany + * </p> + * + * @param plat Latitude of query point. + * @param plng Longitude of query point. + * @param rminlat Min latitude of rectangle. + * @param rminlng Min longitude of rectangle. + * @param rmaxlat Max latitude of rectangle. + * @param rmaxlng Max longitude of rectangle. + * @return Distance on unit sphere. + */ + @Reference(authors = "Erich Schubert, Arthur Zimek and Hans-Peter Kriegel", title = "Geodetic Distance Queries on R-Trees for Indexing Geographic Data", booktitle = "Advances in Spatial and Temporal Databases - 13th International Symposium, SSTD 2013, Munich, Germany") + public static double latlngMinDistRad(double plat, double plng, double rminlat, double rminlng, double rmaxlat, double rmaxlng) { + // FIXME: handle rectangles crossing the +-180 deg boundary correctly! + + // Degenerate rectangles: + if ((rminlat >= rmaxlat) && (rminlng >= rmaxlng)) { + return haversineFormulaRad(rminlat, rminlng, plat, plng); + } + + // The simplest case is when the query point is in the same "slice": + if (rminlng <= plng && plng <= rmaxlng) { + // Inside rectangle: + if (rminlat <= plat && plat <= rmaxlat) { + return 0; + } + // South: + if (plat < rminlat) { + return rminlat - plat; + } else { + // plat > rmaxlat + return plat - rmaxlat; + } + } + + // Determine whether going east or west is shorter. + double lngE = rminlng - plng; + if (lngE < 0) { + lngE += MathUtil.TWOPI; + } + double lngW = rmaxlng - plng; // we keep this negative! + if (lngW > 0) { + lngW -= MathUtil.TWOPI; + } + + // East, to min edge: + if (lngE <= -lngW) { + final double clngD = Math.cos(lngE); + final double tlatQ = Math.tan(plat); + if (lngE > MathUtil.HALFPI) { + final double tlatm = Math.tan((rmaxlat + rminlat) * .5); + if (tlatQ >= tlatm * clngD) { + return haversineFormulaRad(plat, plng, rmaxlat, rminlng); + } else { + return haversineFormulaRad(plat, plng, rminlat, rminlng); + } + } + final double tlatN = Math.tan(rmaxlat); + if (tlatQ >= tlatN * clngD) { // North corner + return haversineFormulaRad(plat, plng, rmaxlat, rminlng); + } + final double tlatS = Math.tan(rminlat); + if (tlatQ <= tlatS * clngD) { // South corner + return haversineFormulaRad(plat, plng, rminlat, rminlng); + } + // Cross-track-distance to longitude line. + final double slngD = MathUtil.cosToSin(lngE, clngD); + return Math.asin(Math.cos(plat) * slngD); + } else { // West, to max edge: + final double clngD = Math.cos(lngW); + final double tlatQ = Math.tan(plat); + if (-lngW > MathUtil.HALFPI) { + final double tlatm = Math.tan((rmaxlat + rminlat) * .5); + if (tlatQ >= tlatm * clngD) { + return haversineFormulaRad(plat, plng, rmaxlat, rmaxlng); + } else { + return haversineFormulaRad(plat, plng, rminlat, rmaxlng); + } + } + final double tlatN = Math.tan(rmaxlat); + if (tlatQ >= tlatN * clngD) { // North corner + return haversineFormulaRad(plat, plng, rmaxlat, rmaxlng); + } + final double tlatS = Math.tan(rminlat); + if (tlatQ <= tlatS * clngD) { // South corner + return haversineFormulaRad(plat, plng, rminlat, rmaxlng); + } + // Cross-track-distance to longitude line. + final double slngD = MathUtil.cosToSin(lngW, clngD); + return Math.asin(-Math.cos(plat) * slngD); + } + } + + /** + * Point to rectangle minimum distance. + * + * Previous version, only around for reference. + * + * Complexity: + * <ul> + * <li>Trivial cases (on longitude slice): no trigonometric functions.</li> + * <li>Cross-track case: 10+2 trig</li> + * <li>Corner case: 10+3 trig, 2 sqrt</li> + * </ul> + * + * Reference: + * <p> + * Erich Schubert, Arthur Zimek and Hans-Peter Kriegel<br /> + * Geodetic Distance Queries on R-Trees for Indexing Geographic Data<br /> + * Advances in Spatial and Temporal Databases - 13th International Symposium, + * SSTD 2013, Munich, Germany + * </p> + * + * @param plat Latitude of query point. + * @param plng Longitude of query point. + * @param rminlat Min latitude of rectangle. + * @param rminlng Min longitude of rectangle. + * @param rmaxlat Max latitude of rectangle. + * @param rmaxlng Max longitude of rectangle. + * @return Distance in radians + */ + @Reference(authors = "Erich Schubert, Arthur Zimek and Hans-Peter Kriegel", title = "Geodetic Distance Queries on R-Trees for Indexing Geographic Data", booktitle = "Advances in Spatial and Temporal Databases - 13th International Symposium, SSTD 2013, Munich, Germany") + public static double latlngMinDistRadFull(double plat, double plng, double rminlat, double rminlng, double rmaxlat, double rmaxlng) { + // FIXME: handle rectangles crossing the +-180 deg boundary correctly! + + // Degenerate rectangles: + if ((rminlat >= rmaxlat) && (rminlng >= rmaxlng)) { + return haversineFormulaRad(rminlat, rminlng, plat, plng); + } + + // The simplest case is when the query point is in the same "slice": + if (rminlng <= plng && plng <= rmaxlng) { + // Inside rectangle: + if (rminlat <= plat && plat <= rmaxlat) { + return 0; + } + // South: + if (plat < rminlat) { + return rminlat - plat; + } else { + // plat > rmaxlat + return plat - rmaxlat; + } + } + + // Determine whether going east or west is shorter. + double lngE = rminlng - plng; + if (lngE < 0) { + lngE += MathUtil.TWOPI; + } + double lngW = rmaxlng - plng; // we keep this negative! + if (lngW > 0) { + lngW -= MathUtil.TWOPI; + } + + // Compute sine and cosine values we will certainly need below: + final double slatQ = Math.sin(plat), clatQ = MathUtil.sinToCos(plat, slatQ); + final double slatN = Math.sin(rmaxlat), clatN = MathUtil.sinToCos(rmaxlat, slatN); + final double slatS = Math.sin(rminlat), clatS = MathUtil.sinToCos(rminlat, slatS); + + // East, to min edge: + if (lngE <= -lngW) { + final double slngD = Math.sin(lngE); + final double clngD = MathUtil.sinToCos(lngE, slngD); + + // Bearing to south + // Math.atan2(slngD * clatS, clatQ * slatS - slatQ * clatS * clngD); + // Bearing from south + final double bs = Math.atan2(slngD * clatQ, clatS * slatQ - slatS * clatQ * clngD); + // Bearing to north + // Math.atan2(slngD * clatN, clatQ * slatN - slatQ * clatN * clngD); + // Bearing from north + final double bn = Math.atan2(slngD * clatQ, clatN * slatQ - slatN * clatQ * clngD); + if (bs < MathUtil.HALFPI) { + if (bn > MathUtil.HALFPI) { + // Radians from south pole = abs(ATD) + final double radFromS = -MathUtil.HALFPI - plat; + + // Cross-track-distance to longitude line. + return Math.asin(Math.sin(radFromS) * -slngD); + } + } + if (bs - MathUtil.HALFPI < MathUtil.HALFPI - bn) { + // Haversine to north corner. + final double slatN2 = Math.sin((plat - rmaxlat) * .5); + final double slon = Math.sin(lngE * .5); + final double aN = slatN2 * slatN2 + slon * slon * clatQ * clatN; + final double distN = 2 * Math.atan2(Math.sqrt(aN), Math.sqrt(1 - aN)); + return distN; + } else { + // Haversine to south corner. + final double slatS2 = Math.sin((plat - rminlat) * .5); + final double slon = Math.sin(lngE * .5); + final double aS = slatS2 * slatS2 + slon * slon * clatQ * clatS; + final double distS = 2 * Math.atan2(Math.sqrt(aS), Math.sqrt(1 - aS)); + return distS; + } + } else { // West, to max edge + final double slngD = Math.sin(lngW); + final double clngD = MathUtil.sinToCos(lngW, slngD); + + // Bearing to south + // Math.atan2(slngD * clatS, clatQ * slatS - slatQ * clatS * clngD); + // Bearing from south + final double bs = Math.atan2(slngD * clatQ, clatS * slatQ - slatS * clatQ * clngD); + // Bearing to north + // Math.atan2(slngD * clatN, clatQ * slatN - slatQ * clatN * clngD); + // Bearing from north + final double bn = Math.atan2(slngD * clatQ, clatN * slatQ - slatN * clatQ * clngD); + if (bs > -MathUtil.HALFPI) { + if (bn < -MathUtil.HALFPI) { + // Radians from south = abs(ATD) = distance from pole + final double radFromS = -MathUtil.HALFPI - plat; + // Cross-track-distance to longitude line. + return Math.asin(Math.sin(radFromS) * slngD); + } + } + if (-MathUtil.HALFPI - bs < bn + MathUtil.HALFPI) { + // Haversine to north corner. + final double slatN2 = Math.sin((plat - rmaxlat) * .5); + final double slon = Math.sin(lngW * .5); + final double aN = slatN2 * slatN2 + slon * slon * clatQ * clatN; + final double distN = 2 * Math.atan2(Math.sqrt(aN), Math.sqrt(1 - aN)); + return distN; + } else { + // Haversine to south corner. + final double slatS2 = Math.sin((plat - rminlat) * .5); + final double slon = Math.sin(lngW * .5); + final double aS = slatS2 * slatS2 + slon * slon * clatQ * clatS; + final double distS = 2 * Math.atan2(Math.sqrt(aS), Math.sqrt(1 - aS)); + return distS; + } + } + } + + /** + * Compute the bearing from start to end. + * + * @param latS Start latitude, in degree + * @param lngS Start longitude, in degree + * @param latE End latitude, in degree + * @param lngE End longitude, in degree + * @return Bearing in degree + */ + public static double bearingDegDeg(double latS, double lngS, double latE, double lngE) { + return MathUtil.rad2deg(bearingRad(MathUtil.deg2rad(latS), MathUtil.deg2rad(lngS), MathUtil.deg2rad(latE), MathUtil.deg2rad(lngE))); + } + + /** + * Compute the bearing from start to end. + * + * @param latS Start latitude, in radians + * @param lngS Start longitude, in radians + * @param latE End latitude, in radians + * @param lngE End longitude, in radians + * @return Bearing in degree + */ + public static double bearingRad(double latS, double lngS, double latE, double lngE) { + final double slatS = Math.sin(latS), clatS = MathUtil.sinToCos(latS, slatS); + final double slatE = Math.sin(latE), clatE = MathUtil.sinToCos(latE, slatE); + return Math.atan2(-Math.sin(lngS - lngE) * clatE, clatS * slatE - slatS * clatE * Math.cos(lngS - lngE)); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/geodesy/SphericalCosineEarthModel.java b/src/de/lmu/ifi/dbs/elki/math/geodesy/SphericalCosineEarthModel.java new file mode 100644 index 00000000..f81757ac --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/geodesy/SphericalCosineEarthModel.java @@ -0,0 +1,97 @@ +package de.lmu.ifi.dbs.elki.math.geodesy; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.MathUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * A simple spherical earth model using radius 6371009 m. + * + * For distance computations, this variant uses the Cosine formula, which is + * faster but less accurate than the Haversince or Vincenty's formula. + * + * @author Erich Schubert + */ +public class SphericalCosineEarthModel extends AbstractEarthModel { + /** + * Spherical earth model, static instance. + */ + public static final SphericalCosineEarthModel STATIC = new SphericalCosineEarthModel(); + + /** + * Earth radius approximation in m. + */ + public static final double EARTH_RADIUS = 6371009; // m + + /** + * Constructor. + */ + protected SphericalCosineEarthModel() { + super(EARTH_RADIUS, EARTH_RADIUS, 0., Double.POSITIVE_INFINITY); + } + + @Override + public double[] latLngRadToECEF(double lat, double lng) { + // Then to sine and cosines: + final double clat = Math.cos(lat), slat = MathUtil.cosToSin(lat, clat); + final double clng = Math.cos(lng), slng = MathUtil.cosToSin(lng, clng); + + return new double[] { EARTH_RADIUS * clat * clng, EARTH_RADIUS * clat * slng, EARTH_RADIUS * slat }; + } + + @Override + public double[] latLngRadToECEF(double lat, double lng, double h) { + // Then to sine and cosines: + final double clat = Math.cos(lat), slat = MathUtil.cosToSin(lat, clat); + final double clng = Math.cos(lng), slng = MathUtil.cosToSin(lng, clng); + + return new double[] { (EARTH_RADIUS + h) * clat * clng, (EARTH_RADIUS + h) * clat * slng, (EARTH_RADIUS + h) * slat }; + } + + @Override + public double ecefToLatRad(double x, double y, double z) { + final double p = Math.sqrt(x * x + y * y); + return Math.atan2(z, p); + } + + @Override + public double distanceRad(double lat1, double lng1, double lat2, double lng2) { + return EARTH_RADIUS * SphereUtil.cosineFormulaRad(lat1, lng1, lat2, lng2); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected SphericalCosineEarthModel makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/geodesy/SphericalHaversineEarthModel.java b/src/de/lmu/ifi/dbs/elki/math/geodesy/SphericalHaversineEarthModel.java new file mode 100644 index 00000000..5ed8c4ca --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/geodesy/SphericalHaversineEarthModel.java @@ -0,0 +1,97 @@ +package de.lmu.ifi.dbs.elki.math.geodesy; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.MathUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * A simple spherical earth model using radius 6371009 m. + * + * For distance computations, this variant uses the Haversine formula, which is + * faster but less accurate than Vincenty's formula. + * + * @author Erich Schubert + */ +public class SphericalHaversineEarthModel extends AbstractEarthModel { + /** + * Spherical earth model, static instance. + */ + public static final SphericalHaversineEarthModel STATIC = new SphericalHaversineEarthModel(); + + /** + * Earth radius approximation in m. + */ + public static final double EARTH_RADIUS = 6371009; // m + + /** + * Constructor. + */ + protected SphericalHaversineEarthModel() { + super(EARTH_RADIUS, EARTH_RADIUS, 0., Double.POSITIVE_INFINITY); + } + + @Override + public double[] latLngRadToECEF(double lat, double lng) { + // Then to sine and cosines: + final double clat = Math.cos(lat), slat = MathUtil.cosToSin(lat, clat); + final double clng = Math.cos(lng), slng = MathUtil.cosToSin(lng, clng); + + return new double[] { EARTH_RADIUS * clat * clng, EARTH_RADIUS * clat * slng, EARTH_RADIUS * slat }; + } + + @Override + public double[] latLngRadToECEF(double lat, double lng, double h) { + // Then to sine and cosines: + final double clat = Math.cos(lat), slat = MathUtil.cosToSin(lat, clat); + final double clng = Math.cos(lng), slng = MathUtil.cosToSin(lng, clng); + + return new double[] { (EARTH_RADIUS + h) * clat * clng, (EARTH_RADIUS + h) * clat * slng, (EARTH_RADIUS + h) * slat }; + } + + @Override + public double ecefToLatRad(double x, double y, double z) { + final double p = Math.sqrt(x * x + y * y); + return Math.atan2(z, p); + } + + @Override + public double distanceRad(double lat1, double lng1, double lat2, double lng2) { + return EARTH_RADIUS * SphereUtil.haversineFormulaRad(lat1, lng1, lat2, lng2); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected SphericalHaversineEarthModel makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/geodesy/SphericalVincentyEarthModel.java b/src/de/lmu/ifi/dbs/elki/math/geodesy/SphericalVincentyEarthModel.java new file mode 100644 index 00000000..e9a780cc --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/geodesy/SphericalVincentyEarthModel.java @@ -0,0 +1,96 @@ +package de.lmu.ifi.dbs.elki.math.geodesy; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.MathUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * A simple spherical earth model using radius 6371009 m. + * + * @author Erich Schubert + */ +public class SphericalVincentyEarthModel extends AbstractEarthModel { + /** + * Spherical earth model, static instance. + */ + public static final SphericalVincentyEarthModel STATIC = new SphericalVincentyEarthModel(); + + /** + * Earth radius approximation in m. + * + * As per International Union of Geodesy and Geophysics (IUGG): + */ + public static final double EARTH_RADIUS = 6371009; // m + + /** + * Constructor. + */ + protected SphericalVincentyEarthModel() { + super(EARTH_RADIUS, EARTH_RADIUS, 0., Double.POSITIVE_INFINITY); + } + + @Override + public double[] latLngRadToECEF(double lat, double lng) { + // Then to sine and cosines: + final double clat = Math.cos(lat), slat = MathUtil.cosToSin(lat, clat); + final double clng = Math.cos(lng), slng = MathUtil.cosToSin(lng, clng); + + return new double[] { EARTH_RADIUS * clat * clng, EARTH_RADIUS * clat * slng, EARTH_RADIUS * slat }; + } + + @Override + public double[] latLngRadToECEF(double lat, double lng, double h) { + // Then to sine and cosines: + final double clat = Math.cos(lat), slat = MathUtil.cosToSin(lat, clat); + final double clng = Math.cos(lng), slng = MathUtil.cosToSin(lng, clng); + + return new double[] { (EARTH_RADIUS + h) * clat * clng, (EARTH_RADIUS + h) * clat * slng, (EARTH_RADIUS + h) * slat }; + } + + @Override + public double ecefToLatRad(double x, double y, double z) { + final double p = Math.sqrt(x * x + y * y); + return Math.atan2(z, p); + } + + @Override + public double distanceRad(double lat1, double lng1, double lat2, double lng2) { + return EARTH_RADIUS * SphereUtil.sphericalVincentyFormulaRad(lat1, lng1, lat2, lng2); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected SphericalVincentyEarthModel makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/geodesy/WGS72SpheroidEarthModel.java b/src/de/lmu/ifi/dbs/elki/math/geodesy/WGS72SpheroidEarthModel.java new file mode 100644 index 00000000..44d4f9d0 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/geodesy/WGS72SpheroidEarthModel.java @@ -0,0 +1,81 @@ +package de.lmu.ifi.dbs.elki.math.geodesy; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.utilities.Alias; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * The WGS72 spheroid earth model, without height model. + * + * + * Radius: 6378135.0 m + * + * Flattening: 1 / 298.26 + * + * @author Erich Schubert + */ +@Alias({ "WGS72", "WGS-72", "WGS72" }) +public class WGS72SpheroidEarthModel extends AbstractEarthModel { + /** + * Static instance. + */ + public static final WGS72SpheroidEarthModel STATIC = new WGS72SpheroidEarthModel(); + + /** + * Radius of the WGS72 Ellipsoid in m (a). + */ + public static final double WGS72_RADIUS = 6378135.0; // m + + /** + * Inverse flattening 1/f of the WGS72 Ellipsoid. + */ + public static final double WGS72_INV_FLATTENING = 298.26; + + /** + * Flattening f of the WGS72 Ellipsoid. + */ + public static final double WGS72_FLATTENING = 1 / WGS72_INV_FLATTENING; + + /** + * Constructor. + */ + protected WGS72SpheroidEarthModel() { + super(WGS72_RADIUS, WGS72_RADIUS * (1 - WGS72_FLATTENING), WGS72_FLATTENING, WGS72_INV_FLATTENING); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected WGS72SpheroidEarthModel makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/geodesy/WGS84SpheroidEarthModel.java b/src/de/lmu/ifi/dbs/elki/math/geodesy/WGS84SpheroidEarthModel.java new file mode 100644 index 00000000..30ba02aa --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/geodesy/WGS84SpheroidEarthModel.java @@ -0,0 +1,86 @@ +package de.lmu.ifi.dbs.elki.math.geodesy; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.utilities.Alias; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * The WGS84 spheroid earth model, without height model (so not a geoid, just a + * spheroid!) + * + * Note that EGM96 uses the same spheroid, but what really makes the difference + * is it's geoid expansion. + * + * Radius: 6378137.0 m + * + * Flattening: 1 / 298.257223563 + * + * @author Erich Schubert + * + * @apiviz.landmark + */ +@Alias({ "wgs84", "WGS-84", "WGS84" }) +public class WGS84SpheroidEarthModel extends AbstractEarthModel { + /** + * Static instance. + */ + public static final WGS84SpheroidEarthModel STATIC = new WGS84SpheroidEarthModel(); + + /** + * Radius of the WGS84 Ellipsoid in m (a). + */ + public static final double WGS84_RADIUS = 6378137.0; // m + + /** + * Inverse flattening 1/f of the WGS84 Ellipsoid. + */ + public static final double WGS84_INV_FLATTENING = 298.257223563; + + /** + * Flattening f of the WGS84 Ellipsoid. + */ + public static final double WGS84_FLATTENING = 1 / WGS84_INV_FLATTENING; + + /** + * Constructor. + */ + protected WGS84SpheroidEarthModel() { + super(WGS84_RADIUS, WGS84_RADIUS * (1 - WGS84_FLATTENING), WGS84_FLATTENING, WGS84_INV_FLATTENING); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected WGS84SpheroidEarthModel makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/geometry/AlphaShape.java b/src/de/lmu/ifi/dbs/elki/math/geometry/AlphaShape.java index 1ab324c3..11f24cb2 100644 --- a/src/de/lmu/ifi/dbs/elki/math/geometry/AlphaShape.java +++ b/src/de/lmu/ifi/dbs/elki/math/geometry/AlphaShape.java @@ -12,7 +12,7 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -64,11 +64,11 @@ public class AlphaShape { // Compute delaunay triangulation: delaunay = (new SweepHullDelaunay2D(points)).getDelaunay(); - List<Polygon> polys = new ArrayList<Polygon>(); + List<Polygon> polys = new ArrayList<>(); // Working data BitSet used = new BitSet(delaunay.size()); - List<Vector> cur = new ArrayList<Vector>(); + List<Vector> cur = new ArrayList<>(); for(int i = 0 /* = used.nextClearBit(0) */; i < delaunay.size() && i >= 0; i = used.nextClearBit(i + 1)) { if(!used.get(i)) { @@ -82,7 +82,7 @@ public class AlphaShape { } if(cur.size() > 0) { polys.add(new Polygon(cur)); - cur = new ArrayList<Vector>(); + cur = new ArrayList<>(); } } } diff --git a/src/de/lmu/ifi/dbs/elki/math/geometry/GrahamScanConvexHull2D.java b/src/de/lmu/ifi/dbs/elki/math/geometry/GrahamScanConvexHull2D.java index def48c52..c2945fde 100644 --- a/src/de/lmu/ifi/dbs/elki/math/geometry/GrahamScanConvexHull2D.java +++ b/src/de/lmu/ifi/dbs/elki/math/geometry/GrahamScanConvexHull2D.java @@ -76,7 +76,7 @@ public class GrahamScanConvexHull2D { * Constructor.
*/
public GrahamScanConvexHull2D() {
- this.points = new LinkedList<Vector>();
+ this.points = new LinkedList<>();
}
/**
@@ -86,7 +86,7 @@ public class GrahamScanConvexHull2D { */
public void add(Vector point) {
if(this.ok) {
- this.points = new LinkedList<Vector>(this.points);
+ this.points = new LinkedList<>(this.points);
this.ok = false;
}
this.points.add(point);
@@ -229,7 +229,7 @@ public class GrahamScanConvexHull2D { return;
}
Iterator<Vector> iter = points.iterator();
- Stack<Vector> stack = new Stack<Vector>();
+ Stack<Vector> stack = new Stack<>();
// Start with the first two points on the stack
stack.add(iter.next());
stack.add(iter.next());
diff --git a/src/de/lmu/ifi/dbs/elki/math/geometry/PrimsMinimumSpanningTree.java b/src/de/lmu/ifi/dbs/elki/math/geometry/PrimsMinimumSpanningTree.java index d6675256..de31f92e 100644 --- a/src/de/lmu/ifi/dbs/elki/math/geometry/PrimsMinimumSpanningTree.java +++ b/src/de/lmu/ifi/dbs/elki/math/geometry/PrimsMinimumSpanningTree.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.geometry; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -42,6 +42,8 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; * </p> * * @author Erich Schubert + * + * @apiviz.uses Adapter */ @Reference(authors = "R. C. Prim", title = "Shortest connection networks and some generalizations", booktitle = "Bell System Technical Journal, 36 (1957)") public class PrimsMinimumSpanningTree { diff --git a/src/de/lmu/ifi/dbs/elki/math/geometry/SweepHullDelaunay2D.java b/src/de/lmu/ifi/dbs/elki/math/geometry/SweepHullDelaunay2D.java index afb8db26..ca6d0e5e 100644 --- a/src/de/lmu/ifi/dbs/elki/math/geometry/SweepHullDelaunay2D.java +++ b/src/de/lmu/ifi/dbs/elki/math/geometry/SweepHullDelaunay2D.java @@ -21,7 +21,7 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.IntIntPair; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -113,7 +113,7 @@ public class SweepHullDelaunay2D { throw new UnsupportedOperationException("There is no delaunay triangulation for less than three objects!"); } int len = points.size() - 1; - hull = new LinkedList<IntIntPair>(); + hull = new LinkedList<>(); tris = hullonly ? null : new ArrayList<Triangle>(len); final Vector seed; @@ -614,7 +614,7 @@ public class SweepHullDelaunay2D { } DoubleMinMax minmaxX = new DoubleMinMax(); DoubleMinMax minmaxY = new DoubleMinMax(); - List<Vector> hullp = new ArrayList<Vector>(hull.size()); + List<Vector> hullp = new ArrayList<>(hull.size()); for(IntIntPair pair : hull) { Vector v = points.get(pair.first); hullp.add(v); diff --git a/src/de/lmu/ifi/dbs/elki/math/geometry/XYCurve.java b/src/de/lmu/ifi/dbs/elki/math/geometry/XYCurve.java index 3a53f2a2..36ee7f23 100644 --- a/src/de/lmu/ifi/dbs/elki/math/geometry/XYCurve.java +++ b/src/de/lmu/ifi/dbs/elki/math/geometry/XYCurve.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.geometry; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -151,7 +151,7 @@ public class XYCurve implements Result, TextWriteable { public void addAndSimplify(double x, double y) { // simplify curve when possible: final int len = data.size(); - if(len >= 4) { + if (len >= 4) { // Look at the previous 2 points final double l1x = data.get(len - 4); final double l1y = data.get(len - 3); @@ -163,16 +163,16 @@ public class XYCurve implements Result, TextWriteable { final double cdx = x - l2x; final double cdy = y - l2y; // X simplification - if((ldx == 0) && (cdx == 0)) { + if ((ldx == 0) && (cdx == 0)) { data.remove(len - 2, 2); } // horizontal simplification - else if((ldy == 0) && (cdy == 0)) { + else if ((ldy == 0) && (cdy == 0)) { data.remove(len - 2, 2); } // diagonal simplification - else if(ldy > 0 && cdy > 0) { - if(Math.abs((ldx / ldy) - (cdx / cdy)) < THRESHOLD) { + else if (ldy > 0 && cdy > 0) { + if (Math.abs((ldx / ldy) - (cdx / cdy)) < THRESHOLD) { data.remove(len - 2, 2); } } @@ -255,6 +255,21 @@ public class XYCurve implements Result, TextWriteable { } /** + * Rescale the graph. + * + * @param sx Scaling factor for X axis + * @param sy Scaling factor for Y axis + */ + public void rescale(double sx, double sy) { + for (int i = 0; i < data.size(); i += 2) { + data.set(i, sx * data.get(i)); + data.set(i + 1, sy * data.get(i + 1)); + } + maxx *= sx; + maxy *= sy; + } + + /** * Size of curve. * * @return curve length @@ -294,7 +309,7 @@ public class XYCurve implements Result, TextWriteable { out.commentPrint(" "); out.commentPrint(labely); out.flush(); - for(int pos = 0; pos < data.size(); pos+=2) { + for (int pos = 0; pos < data.size(); pos += 2) { out.inlinePrint(Double.toString(data.get(pos))); out.inlinePrint(Double.toString(data.get(pos + 1))); out.flush(); @@ -306,7 +321,7 @@ public class XYCurve implements Result, TextWriteable { StringBuilder buf = new StringBuilder(); buf.append("XYCurve["); buf.append(labelx).append(',').append(labely).append(':'); - for(int pos = 0; pos < data.size(); pos += 2) { + for (int pos = 0; pos < data.size(); pos += 2) { buf.append(' ').append(data.get(pos)).append(',').append(data.get(pos + 1)); } buf.append(']'); @@ -334,21 +349,21 @@ public class XYCurve implements Result, TextWriteable { public static double areaUnderCurve(XYCurve curve) { TDoubleArrayList data = curve.data; double prevx = data.get(0), prevy = data.get(1); - if(prevx > curve.minx) { + if (prevx > curve.minx) { throw new UnsupportedOperationException("Curves must be monotone on X for areaUnderCurve to be valid."); } double area = 0.0; - for(int pos = 2; pos < data.size(); pos += 2) { + for (int pos = 2; pos < data.size(); pos += 2) { final double curx = data.get(pos), cury = data.get(pos + 1); - if(prevx > curx) { + if (prevx > curx) { throw new UnsupportedOperationException("Curves must be monotone on X for areaUnderCurve to be valid."); } area += (curx - prevx) * (prevy + cury) * .5; // .5 = mean Y prevx = curx; prevy = cury; } - if(prevx < curve.maxx) { - throw new UnsupportedOperationException("Curves must be monotone on X for areaUnderCurve to be valid."); + if (prevx < curve.maxx) { + throw new UnsupportedOperationException("Curves must be complete on X for areaUnderCurve to be valid."); } return area; } @@ -415,4 +430,4 @@ public class XYCurve implements Result, TextWriteable { return pos < data.size(); } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/math/geometry/package-info.java b/src/de/lmu/ifi/dbs/elki/math/geometry/package-info.java index a7bb6e6e..d94ac05a 100644 --- a/src/de/lmu/ifi/dbs/elki/math/geometry/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/math/geometry/package-info.java @@ -5,7 +5,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/AffineTransformation.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/AffineTransformation.java index 5a4f40fb..a4df5e80 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/AffineTransformation.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/AffineTransformation.java @@ -2,11 +2,13 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra; import java.util.Arrays; +import de.lmu.ifi.dbs.elki.math.MathUtil; + /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -215,10 +217,11 @@ public class AffineTransformation { ht[i][i] = 1.0; } // insert rotation values - ht[axis1][axis1] = +Math.cos(angle); - ht[axis1][axis2] = -Math.sin(angle); - ht[axis2][axis1] = +Math.sin(angle); - ht[axis2][axis2] = +Math.cos(angle); + double c = Math.cos(angle), s = MathUtil.cosToSin(angle, c); + ht[axis1][axis1] = +c; + ht[axis1][axis2] = -s; + ht[axis2][axis1] = +s; + ht[axis2][axis2] = +c; // Multiply from left trans = new Matrix(ht).times(trans); } diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Centroid.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Centroid.java index c46ae89a..7650ecd2 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Centroid.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Centroid.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/CholeskyDecomposition.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/CholeskyDecomposition.java index 8805b18c..6fa455ca 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/CholeskyDecomposition.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/CholeskyDecomposition.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/CovarianceMatrix.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/CovarianceMatrix.java index 87b7bf1d..69023328 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/CovarianceMatrix.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/CovarianceMatrix.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/EigenPair.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/EigenPair.java index 132393c6..3b65d99a 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/EigenPair.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/EigenPair.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/EigenvalueDecomposition.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/EigenvalueDecomposition.java index 54434cdd..f58cd86f 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/EigenvalueDecomposition.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/EigenvalueDecomposition.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/LUDecomposition.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/LUDecomposition.java index 6e68352a..16fcf21c 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/LUDecomposition.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/LUDecomposition.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/LinearEquationSystem.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/LinearEquationSystem.java index 53954c08..f32ce410 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/LinearEquationSystem.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/LinearEquationSystem.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Matrix.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Matrix.java index 5a0d1e74..f9ac4c4c 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Matrix.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Matrix.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -1474,7 +1474,7 @@ public class Matrix { int n = v.size(); // Now we've got the number of columns! double row[] = v.toArray(); - ArrayList<double[]> rowV = new ArrayList<double[]>(); + ArrayList<double[]> rowV = new ArrayList<>(); rowV.add(row); // Start storing rows instead of columns. while (tokenizer.nextToken() == StreamTokenizer.TT_WORD) { // While non-empty lines diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/ProjectedCentroid.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/ProjectedCentroid.java index 2a24a74d..a0a1e011 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/ProjectedCentroid.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/ProjectedCentroid.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/ProjectionResult.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/ProjectionResult.java index 9aa1fa7c..901450c3 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/ProjectionResult.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/ProjectionResult.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/QRDecomposition.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/QRDecomposition.java index 5b52d837..4b30bf5d 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/QRDecomposition.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/QRDecomposition.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/SingularValueDecomposition.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/SingularValueDecomposition.java index 772e92eb..4a496b5c 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/SingularValueDecomposition.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/SingularValueDecomposition.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -35,7 +35,7 @@ import de.lmu.ifi.dbs.elki.math.MathUtil; * The singular values, sigma[k] = S[k][k], are ordered so that sigma[0] >= * sigma[1] >= ... >= sigma[n-1]. * <P> - * The singular value decompostion always exists, so the constructor will never + * The singular value decomposition always exists, so the constructor will never * fail. The matrix condition number and the effective numerical rank can be * computed from this decomposition. * @@ -542,7 +542,7 @@ public class SingularValueDecomposition { /** * Effective numerical matrix rank * - * @return Number of nonnegligible singular values. + * @return Number of non-negligible singular values. */ public int rank() { double eps = Math.pow(2.0, -52.0); diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/SortedEigenPairs.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/SortedEigenPairs.java index dcc2f2c0..185ae612 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/SortedEigenPairs.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/SortedEigenPairs.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/SubspaceProjectionResult.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/SubspaceProjectionResult.java index 3cab3b51..4e03bc78 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/SubspaceProjectionResult.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/SubspaceProjectionResult.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/VMath.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/VMath.java index 6b6d7090..efac8ff0 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/VMath.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/VMath.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Vector.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Vector.java index b0e1c78e..eca1dbea 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Vector.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Vector.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/FittingFunction.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/FittingFunction.java index 9e77d810..48e90c00 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/FittingFunction.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/FittingFunction.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.fitting; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/FittingFunctionResult.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/FittingFunctionResult.java index 76f90138..96d5381b 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/FittingFunctionResult.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/FittingFunctionResult.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.fitting; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/GaussianFittingFunction.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/GaussianFittingFunction.java index 42badb2f..239d51c6 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/GaussianFittingFunction.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/GaussianFittingFunction.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.fitting; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -61,6 +61,11 @@ import de.lmu.ifi.dbs.elki.math.MathUtil; */ public class GaussianFittingFunction implements FittingFunction { /** + * Static instance + */ + public static final GaussianFittingFunction STATIC = new GaussianFittingFunction(); + + /** * compute the mixture of Gaussians at the given position */ @Override @@ -77,7 +82,7 @@ public class GaussianFittingFunction implements FittingFunction { // Numerical Recipes in C: The Art of Scientific Computing // Due to their license, we cannot use their code, but we have to implement // the mathematics ourselves. We hope the loss in precision is not too big. - for(int i = 0; i < params.length; i += 3) { + for (int i = 0; i < params.length; i += 3) { // Standardized Gaussian parameter (centered, scaled by stddev) double stdpar = (x - params[i]) / params[i + 1]; double e = Math.exp(-.5 * stdpar * stdpar); @@ -94,4 +99,4 @@ public class GaussianFittingFunction implements FittingFunction { return new FittingFunctionResult(y, gradients); } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/LevenbergMarquardtMethod.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/LevenbergMarquardtMethod.java index 20200f99..1f2aa913 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/LevenbergMarquardtMethod.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/LevenbergMarquardtMethod.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.fitting; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -357,15 +357,17 @@ public class LevenbergMarquardtMethod { * Iterate until convergence, at most 100 times. */ public void run() { + int maxruns = this.maxruns; + int maxsmall = this.maxsmall; while(maxruns > 0) { double oldchi = getChiSq(); iterate(); - maxruns--; + --maxruns; double newchi = getChiSq(); // stop condition: only a small improvement in Chi. double deltachi = newchi - oldchi; if(deltachi < 0 && deltachi > -small) { - maxsmall--; + --maxsmall; if(maxsmall < 0) { break; } diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/package-info.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/package-info.java index 2c4143a8..100f36e2 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/package-info.java @@ -7,7 +7,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/package-info.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/package-info.java index 5c55dfd1..4806b2d6 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/package-info.java @@ -43,7 +43,7 @@ can be found at <a href="http://math.nist.gov/javanumerics/jama/">http://math.ni This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/AbstractCovarianceMatrixBuilder.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/AbstractCovarianceMatrixBuilder.java index 3e12dc0e..7079f5e1 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/AbstractCovarianceMatrixBuilder.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/AbstractCovarianceMatrixBuilder.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -28,8 +28,8 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs; +import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList; import de.lmu.ifi.dbs.elki.database.relation.Relation; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult; import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix; import de.lmu.ifi.dbs.elki.utilities.optionhandling.Parameterizable; @@ -52,7 +52,7 @@ public abstract class AbstractCovarianceMatrixBuilder<V extends NumberVector<?>> public abstract Matrix processIds(DBIDs ids, Relation<? extends V> database); @Override - public <D extends NumberDistance<D, ?>> Matrix processQueryResults(DistanceDBIDResult<D> results, Relation<? extends V> database, int k) { + public <D extends NumberDistance<D, ?>> Matrix processQueryResults(DistanceDBIDList<D> results, Relation<? extends V> database, int k) { ModifiableDBIDs ids = DBIDUtil.newArray(k); int have = 0; for(DBIDIter it = results.iter(); it.valid() && have < k; it.advance(), have++) { @@ -62,7 +62,7 @@ public abstract class AbstractCovarianceMatrixBuilder<V extends NumberVector<?>> } @Override - public final <D extends NumberDistance<D, ?>> Matrix processQueryResults(DistanceDBIDResult<D> results, Relation<? extends V> database) { + public final <D extends NumberDistance<D, ?>> Matrix processQueryResults(DistanceDBIDList<D> results, Relation<? extends V> database) { return processQueryResults(results, database, results.size()); } }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/CompositeEigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/CompositeEigenPairFilter.java index 6a1c3898..eb725400 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/CompositeEigenPairFilter.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/CompositeEigenPairFilter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -93,7 +93,7 @@ public class CompositeEigenPairFilter implements EigenPairFilter { @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - ObjectListParameter<EigenPairFilter> filtersP = new ObjectListParameter<EigenPairFilter>(EIGENPAIR_FILTER_COMPOSITE_LIST, EigenPairFilter.class); + ObjectListParameter<EigenPairFilter> filtersP = new ObjectListParameter<>(EIGENPAIR_FILTER_COMPOSITE_LIST, EigenPairFilter.class); if(config.grab(filtersP)) { filters = filtersP.instantiateClasses(config); diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/CovarianceMatrixBuilder.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/CovarianceMatrixBuilder.java index fce6dbb3..d819c839 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/CovarianceMatrixBuilder.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/CovarianceMatrixBuilder.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -25,8 +25,8 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList; import de.lmu.ifi.dbs.elki.database.relation.Relation; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult; import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix; @@ -66,7 +66,7 @@ public interface CovarianceMatrixBuilder<V extends NumberVector<?>> { * @param <D> distance type * @return Covariance Matrix */ - <D extends NumberDistance<D, ?>> Matrix processQueryResults(DistanceDBIDResult<D> results, Relation<? extends V> database, int k); + <D extends NumberDistance<D, ?>> Matrix processQueryResults(DistanceDBIDList<D> results, Relation<? extends V> database, int k); /** * Compute Covariance Matrix for a QueryResult Collection. @@ -78,5 +78,5 @@ public interface CovarianceMatrixBuilder<V extends NumberVector<?>> { * @param <D> distance type * @return Covariance Matrix */ - <D extends NumberDistance<D, ?>> Matrix processQueryResults(DistanceDBIDResult<D> results, Relation<? extends V> database); + <D extends NumberDistance<D, ?>> Matrix processQueryResults(DistanceDBIDList<D> results, Relation<? extends V> database); }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/DropEigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/DropEigenPairFilter.java index 8791b6a2..22cefa87 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/DropEigenPairFilter.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/DropEigenPairFilter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -74,8 +74,8 @@ public class DropEigenPairFilter implements EigenPairFilter { @Override public FilteredEigenPairs filter(SortedEigenPairs eigenPairs) { // init strong and weak eigenpairs - List<EigenPair> strongEigenPairs = new ArrayList<EigenPair>(); - List<EigenPair> weakEigenPairs = new ArrayList<EigenPair>(); + List<EigenPair> strongEigenPairs = new ArrayList<>(); + List<EigenPair> weakEigenPairs = new ArrayList<>(); // default value is "all strong". int contrastMaximum = eigenPairs.size() - 1; diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/EigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/EigenPairFilter.java index be1eae17..a3f506e1 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/EigenPairFilter.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/EigenPairFilter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/FilteredEigenPairs.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/FilteredEigenPairs.java index 4ab120fb..6cb6f07c 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/FilteredEigenPairs.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/FilteredEigenPairs.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/FirstNEigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/FirstNEigenPairFilter.java index bd6b4400..f613e067 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/FirstNEigenPairFilter.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/FirstNEigenPairFilter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -82,8 +82,8 @@ public class FirstNEigenPairFilter implements EigenPairFilter { } // init strong and weak eigenpairs - List<EigenPair> strongEigenPairs = new ArrayList<EigenPair>(); - List<EigenPair> weakEigenPairs = new ArrayList<EigenPair>(); + List<EigenPair> strongEigenPairs = new ArrayList<>(); + List<EigenPair> weakEigenPairs = new ArrayList<>(); // determine strong and weak eigenpairs for(int i = 0; i < eigenPairs.size(); i++) { diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/LimitEigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/LimitEigenPairFilter.java index d7d999b4..b3bd04b3 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/LimitEigenPairFilter.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/LimitEigenPairFilter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -122,8 +122,8 @@ public class LimitEigenPairFilter implements EigenPairFilter { } // init strong and weak eigenpairs - List<EigenPair> strongEigenPairs = new ArrayList<EigenPair>(); - List<EigenPair> weakEigenPairs = new ArrayList<EigenPair>(); + List<EigenPair> strongEigenPairs = new ArrayList<>(); + List<EigenPair> weakEigenPairs = new ArrayList<>(); // determine strong and weak eigenpairs for (int i = 0; i < eigenPairs.size(); i++) { @@ -184,7 +184,7 @@ public class LimitEigenPairFilter implements EigenPairFilter { // delta must be >= 0 and <= 1 if it's a relative value // Since relative or absolute is dependent on the absolute flag this is a // global constraint! - List<ParameterConstraint<? super Double>> cons = new ArrayList<ParameterConstraint<? super Double>>(); + List<ParameterConstraint<? super Double>> cons = new ArrayList<>(); // TODO: Keep the constraint here - applies to non-conditional case as // well, and is set above. ParameterConstraint<Number> aboveNull = new GreaterEqualConstraint(0.); @@ -192,7 +192,7 @@ public class LimitEigenPairFilter implements EigenPairFilter { ParameterConstraint<Number> underOne = new LessEqualConstraint(1.); cons.add(underOne); - GlobalParameterConstraint gpc = new ParameterFlagGlobalConstraint<Double>(deltaP, cons, absoluteF, false); + GlobalParameterConstraint gpc = new ParameterFlagGlobalConstraint<>(deltaP, cons, absoluteF, false); config.checkConstraint(gpc); } diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/NormalizingEigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/NormalizingEigenPairFilter.java index 55c9c438..1d74d307 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/NormalizingEigenPairFilter.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/NormalizingEigenPairFilter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -60,8 +60,8 @@ public class NormalizingEigenPairFilter implements EigenPairFilter { public FilteredEigenPairs filter(final SortedEigenPairs eigenPairs) { // initialize strong and weak eigenpairs // all normalized eigenpairs are regarded as strong - final List<EigenPair> strongEigenPairs = new ArrayList<EigenPair>(); - final List<EigenPair> weakEigenPairs = new ArrayList<EigenPair>(); + final List<EigenPair> strongEigenPairs = new ArrayList<>(); + final List<EigenPair> weakEigenPairs = new ArrayList<>(); for(int i = 0; i < eigenPairs.size(); i++) { final EigenPair eigenPair = eigenPairs.getEigenPair(i); normalizeEigenPair(eigenPair); diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredAutotuningRunner.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredAutotuningRunner.java index 73462c2f..4a36036a 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredAutotuningRunner.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredAutotuningRunner.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -30,13 +30,13 @@ import java.util.List; import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList; +import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter; +import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDPairList; +import de.lmu.ifi.dbs.elki.database.ids.distance.ModifiableDistanceDBIDList; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; -import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDList; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.ModifiableDistanceDBIDResult; +import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction; import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.logging.LoggingUtil; import de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid; @@ -76,7 +76,7 @@ public class PCAFilteredAutotuningRunner<V extends NumberVector<?>> extends PCAF // Assume Euclidean distance. In the context of PCA, the neighborhood should // be L2-spherical to be unbiased. V center = Centroid.make(database, ids).toVector(database); - DoubleDistanceDBIDList dres = new DoubleDistanceDBIDList(ids.size()); + DoubleDistanceDBIDPairList dres = new DoubleDistanceDBIDPairList(ids.size()); for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { final double dist = EuclideanDistanceFunction.STATIC.doubleDistance(center, database.get(iter)); dres.add(dist, iter); @@ -117,11 +117,11 @@ public class PCAFilteredAutotuningRunner<V extends NumberVector<?>> extends PCAF } @Override - public <D extends NumberDistance<D, ?>> PCAFilteredResult processQueryResult(DistanceDBIDResult<D> results, Relation<? extends V> database) { + public <D extends NumberDistance<D, ?>> PCAFilteredResult processQueryResult(DistanceDBIDList<D> results, Relation<? extends V> database) { assertSortedByDistance(results); final int dim = RelationUtil.dimensionality(database); - List<Matrix> best = new LinkedList<Matrix>(); + List<Matrix> best = new LinkedList<>(); for (int i = 0; i < dim; i++) { best.add(null); } @@ -131,7 +131,7 @@ public class PCAFilteredAutotuningRunner<V extends NumberVector<?>> extends PCAF } int[] bestk = new int[dim]; // 'history' - LinkedList<Cand> prev = new LinkedList<Cand>(); + LinkedList<Cand> prev = new LinkedList<>(); // TODO: starting parameter shouldn't be hardcoded... int smooth = 3; int startk = 4; @@ -227,11 +227,11 @@ public class PCAFilteredAutotuningRunner<V extends NumberVector<?>> extends PCAF * @param results Results to process * @param <D> distance type */ - private <D extends NumberDistance<D, ?>> void assertSortedByDistance(DistanceDBIDResult<D> results) { + private <D extends NumberDistance<D, ?>> void assertSortedByDistance(DistanceDBIDList<D> results) { // TODO: sort results instead? double dist = -1.0; boolean sorted = true; - for (DistanceDBIDResultIter<D> it = results.iter(); it.valid(); it.advance()) { + for (DistanceDBIDListIter<D> it = results.iter(); it.valid(); it.advance()) { double qr = it.getDistance().doubleValue(); if (qr < dist) { sorted = false; @@ -240,7 +240,7 @@ public class PCAFilteredAutotuningRunner<V extends NumberVector<?>> extends PCAF } if (!sorted) { try { - ModifiableDistanceDBIDResult.class.cast(results).sort(); + ModifiableDistanceDBIDList.class.cast(results).sort(); } catch (ClassCastException e) { LoggingUtil.warning("WARNING: results not sorted by distance!", e); } catch (UnsupportedOperationException e) { @@ -259,7 +259,7 @@ public class PCAFilteredAutotuningRunner<V extends NumberVector<?>> extends PCAF public static class Parameterizer<V extends NumberVector<?>> extends PCAFilteredRunner.Parameterizer<V> { @Override protected PCAFilteredAutotuningRunner<V> makeInstance() { - return new PCAFilteredAutotuningRunner<V>(covarianceMatrixBuilder, eigenPairFilter, big, small); + return new PCAFilteredAutotuningRunner<>(covarianceMatrixBuilder, eigenPairFilter, big, small); } } } diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredResult.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredResult.java index 3e79e7b6..7303f530 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredResult.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredResult.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredRunner.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredRunner.java index 59dca276..2318b72d 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredRunner.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredRunner.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -25,8 +25,8 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList; import de.lmu.ifi.dbs.elki.database.relation.Relation; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult; import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.math.linearalgebra.EigenvalueDecomposition; import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix; @@ -46,7 +46,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * @author Erich Schubert * * @apiviz.landmark - * @apiviz.uses PCAFilteredResult oneway - - «create» + * @apiviz.has PCAFilteredResult oneway - - «create» * @apiviz.composedOf EigenPairFilter * * @param <V> Vector class to use @@ -140,7 +140,7 @@ public class PCAFilteredRunner<V extends NumberVector<?>> extends PCARunner<V> { * @return PCA result */ @Override - public <D extends NumberDistance<D, ?>> PCAFilteredResult processQueryResult(DistanceDBIDResult<D> results, Relation<? extends V> database) { + public <D extends NumberDistance<D, ?>> PCAFilteredResult processQueryResult(DistanceDBIDList<D> results, Relation<? extends V> database) { return processCovarMatrix(covarianceMatrixBuilder.processQueryResults(results, database)); } @@ -206,7 +206,7 @@ public class PCAFilteredRunner<V extends NumberVector<?>> extends PCARunner<V> { @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - ObjectParameter<EigenPairFilter> filterP = new ObjectParameter<EigenPairFilter>(PCA_EIGENPAIR_FILTER, EigenPairFilter.class, PercentageEigenPairFilter.class); + ObjectParameter<EigenPairFilter> filterP = new ObjectParameter<>(PCA_EIGENPAIR_FILTER, EigenPairFilter.class, PercentageEigenPairFilter.class); if (config.grab(filterP)) { eigenPairFilter = filterP.instantiateClass(config); } @@ -225,12 +225,12 @@ public class PCAFilteredRunner<V extends NumberVector<?>> extends PCARunner<V> { } // global constraint small <--> big - config.checkConstraint(new LessGlobalConstraint<Double>(smallP, bigP)); + config.checkConstraint(new LessGlobalConstraint<>(smallP, bigP)); } @Override protected PCAFilteredRunner<V> makeInstance() { - return new PCAFilteredRunner<V>(covarianceMatrixBuilder, eigenPairFilter, big, small); + return new PCAFilteredRunner<>(covarianceMatrixBuilder, eigenPairFilter, big, small); } } } diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAResult.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAResult.java index 87518b1d..1634cbf0 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAResult.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAResult.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCARunner.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCARunner.java index 4fb9dbdf..91447024 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCARunner.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCARunner.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -26,8 +26,8 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList; import de.lmu.ifi.dbs.elki.database.relation.Relation; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult; import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.math.linearalgebra.EigenvalueDecomposition; import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix; @@ -113,7 +113,7 @@ public class PCARunner<V extends NumberVector<?>> implements Parameterizable { * @param <D> distance type * @return PCA result */ - public <D extends NumberDistance<D, ?>> PCAResult processQueryResult(DistanceDBIDResult<D> results, Relation<? extends V> database) { + public <D extends NumberDistance<D, ?>> PCAResult processQueryResult(DistanceDBIDList<D> results, Relation<? extends V> database) { return processCovarMatrix(covarianceMatrixBuilder.processQueryResults(results, database)); } @@ -174,7 +174,7 @@ public class PCARunner<V extends NumberVector<?>> implements Parameterizable { @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - ObjectParameter<CovarianceMatrixBuilder<V>> covarianceP = new ObjectParameter<CovarianceMatrixBuilder<V>>(PCA_COVARIANCE_MATRIX, CovarianceMatrixBuilder.class, StandardCovarianceMatrixBuilder.class); + ObjectParameter<CovarianceMatrixBuilder<V>> covarianceP = new ObjectParameter<>(PCA_COVARIANCE_MATRIX, CovarianceMatrixBuilder.class, StandardCovarianceMatrixBuilder.class); if(config.grab(covarianceP)) { covarianceMatrixBuilder = covarianceP.instantiateClass(config); } @@ -182,7 +182,7 @@ public class PCARunner<V extends NumberVector<?>> implements Parameterizable { @Override protected PCARunner<V> makeInstance() { - return new PCARunner<V>(covarianceMatrixBuilder); + return new PCARunner<>(covarianceMatrixBuilder); } } }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PercentageEigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PercentageEigenPairFilter.java index 0daa3d94..85e29867 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PercentageEigenPairFilter.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PercentageEigenPairFilter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -96,8 +96,8 @@ public class PercentageEigenPairFilter implements EigenPairFilter { } // init strong and weak eigenpairs - List<EigenPair> strongEigenPairs = new ArrayList<EigenPair>(); - List<EigenPair> weakEigenPairs = new ArrayList<EigenPair>(); + List<EigenPair> strongEigenPairs = new ArrayList<>(); + List<EigenPair> weakEigenPairs = new ArrayList<>(); // determine sum of eigenvalues double totalSum = 0; diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/ProgressiveEigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/ProgressiveEigenPairFilter.java index 4c359dad..b99de2e1 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/ProgressiveEigenPairFilter.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/ProgressiveEigenPairFilter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -128,8 +128,8 @@ public class ProgressiveEigenPairFilter implements EigenPairFilter { @Override public FilteredEigenPairs filter(SortedEigenPairs eigenPairs) { // init strong and weak eigenpairs - List<EigenPair> strongEigenPairs = new ArrayList<EigenPair>(); - List<EigenPair> weakEigenPairs = new ArrayList<EigenPair>(); + List<EigenPair> strongEigenPairs = new ArrayList<>(); + List<EigenPair> weakEigenPairs = new ArrayList<>(); // determine sum of eigenvalues double totalSum = 0; @@ -162,7 +162,7 @@ public class ProgressiveEigenPairFilter implements EigenPairFilter { if (!found) { assert (weakEigenPairs.size() == 0); weakEigenPairs = strongEigenPairs; - strongEigenPairs = new ArrayList<EigenPair>(); + strongEigenPairs = new ArrayList<>(); } for (; i < eigenPairs.size(); i++) { EigenPair eigenPair = eigenPairs.getEigenPair(i); diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/RANSACCovarianceMatrixBuilder.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/RANSACCovarianceMatrixBuilder.java index e39820c4..3e9bccf7 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/RANSACCovarianceMatrixBuilder.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/RANSACCovarianceMatrixBuilder.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -184,7 +184,7 @@ public class RANSACCovarianceMatrixBuilder<V extends NumberVector<?>> extends Ab @Override protected RANSACCovarianceMatrixBuilder<V> makeInstance() { - return new RANSACCovarianceMatrixBuilder<V>(iterations, rnd); + return new RANSACCovarianceMatrixBuilder<>(iterations, rnd); } } } diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/RelativeEigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/RelativeEigenPairFilter.java index 38b0fcc2..12da3fdf 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/RelativeEigenPairFilter.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/RelativeEigenPairFilter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -85,8 +85,8 @@ public class RelativeEigenPairFilter implements EigenPairFilter { @Override public FilteredEigenPairs filter(SortedEigenPairs eigenPairs) { // init strong and weak eigenpairs - List<EigenPair> strongEigenPairs = new ArrayList<EigenPair>(); - List<EigenPair> weakEigenPairs = new ArrayList<EigenPair>(); + List<EigenPair> strongEigenPairs = new ArrayList<>(); + List<EigenPair> weakEigenPairs = new ArrayList<>(); // default value is "all strong". int contrastAtMax = eigenPairs.size() - 1; diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/SignificantEigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/SignificantEigenPairFilter.java index c22e9592..73044b59 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/SignificantEigenPairFilter.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/SignificantEigenPairFilter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -79,8 +79,8 @@ public class SignificantEigenPairFilter implements EigenPairFilter { @Override public FilteredEigenPairs filter(SortedEigenPairs eigenPairs) { // init strong and weak eigenpairs - List<EigenPair> strongEigenPairs = new ArrayList<EigenPair>(); - List<EigenPair> weakEigenPairs = new ArrayList<EigenPair>(); + List<EigenPair> strongEigenPairs = new ArrayList<>(); + List<EigenPair> weakEigenPairs = new ArrayList<>(); // default value is "all strong". int contrastMaximum = eigenPairs.size() - 1; diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/StandardCovarianceMatrixBuilder.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/StandardCovarianceMatrixBuilder.java index c7104e3b..90dd59c1 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/StandardCovarianceMatrixBuilder.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/StandardCovarianceMatrixBuilder.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/WeakEigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/WeakEigenPairFilter.java index 3c60cd0d..66bcba30 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/WeakEigenPairFilter.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/WeakEigenPairFilter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -80,8 +80,8 @@ public class WeakEigenPairFilter implements EigenPairFilter { @Override public FilteredEigenPairs filter(SortedEigenPairs eigenPairs) { // init strong and weak eigenpairs - List<EigenPair> strongEigenPairs = new ArrayList<EigenPair>(); - List<EigenPair> weakEigenPairs = new ArrayList<EigenPair>(); + List<EigenPair> strongEigenPairs = new ArrayList<>(); + List<EigenPair> weakEigenPairs = new ArrayList<>(); // determine sum of eigenvalues double totalSum = 0; diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/WeightedCovarianceMatrixBuilder.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/WeightedCovarianceMatrixBuilder.java index 3066a831..236a24bc 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/WeightedCovarianceMatrixBuilder.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/WeightedCovarianceMatrixBuilder.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -26,14 +26,14 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; -import de.lmu.ifi.dbs.elki.database.ids.DistanceDBIDPair; -import de.lmu.ifi.dbs.elki.database.ids.DoubleDistanceDBIDPair; +import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList; +import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDPair; +import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter; +import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDPair; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; -import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction; import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter; +import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction; import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid; @@ -162,7 +162,7 @@ public class WeightedCovarianceMatrixBuilder<V extends NumberVector<?>> extends * @return Covariance Matrix */ @Override - public <D extends NumberDistance<D, ?>> Matrix processQueryResults(DistanceDBIDResult<D> results, Relation<? extends V> database, int k) { + public <D extends NumberDistance<D, ?>> Matrix processQueryResults(DistanceDBIDList<D> results, Relation<? extends V> database, int k) { final int dim = RelationUtil.dimensionality(database); final CovarianceMatrix cmat = new CovarianceMatrix(dim); @@ -176,7 +176,7 @@ public class WeightedCovarianceMatrixBuilder<V extends NumberVector<?>> extends double stddev = 0.0; { int i = 0; - for (DistanceDBIDResultIter<D> it = results.iter(); it.valid() && i < k; it.advance(), k++) { + for (DistanceDBIDListIter<D> it = results.iter(); it.valid() && i < k; it.advance(), k++) { DistanceDBIDPair<D> res = it.getDistancePair(); final double dist; if(res instanceof DoubleDistanceDBIDPair) { @@ -198,7 +198,7 @@ public class WeightedCovarianceMatrixBuilder<V extends NumberVector<?>> extends // calculate weighted PCA int i = 0; - for (DistanceDBIDResultIter<D> it = results.iter(); it.valid() && i < k; it.advance(), k++) { + for (DistanceDBIDListIter<D> it = results.iter(); it.valid() && i < k; it.advance(), k++) { DistanceDBIDPair<D> res = it.getDistancePair(); final double dist; if(res instanceof DoubleDistanceDBIDPair) { @@ -231,7 +231,7 @@ public class WeightedCovarianceMatrixBuilder<V extends NumberVector<?>> extends @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - ObjectParameter<WeightFunction> weightfunctionP = new ObjectParameter<WeightFunction>(WEIGHT_ID, WeightFunction.class, ConstantWeight.class); + ObjectParameter<WeightFunction> weightfunctionP = new ObjectParameter<>(WEIGHT_ID, WeightFunction.class, ConstantWeight.class); if(config.grab(weightfunctionP)) { weightfunction = weightfunctionP.instantiateClass(config); } @@ -239,7 +239,7 @@ public class WeightedCovarianceMatrixBuilder<V extends NumberVector<?>> extends @Override protected WeightedCovarianceMatrixBuilder<V> makeInstance() { - return new WeightedCovarianceMatrixBuilder<V>(weightfunction); + return new WeightedCovarianceMatrixBuilder<>(weightfunction); } } }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/package-info.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/package-info.java index f08a016c..2606234b 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/package-info.java @@ -5,7 +5,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ConstantWeight.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ConstantWeight.java index ef089980..2252b512 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ConstantWeight.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ConstantWeight.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ErfcStddevWeight.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ErfcStddevWeight.java index 656406c3..645eff29 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ErfcStddevWeight.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ErfcStddevWeight.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ErfcWeight.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ErfcWeight.java index 6a8783ed..be7626a3 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ErfcWeight.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ErfcWeight.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ExponentialStddevWeight.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ExponentialStddevWeight.java index e6cb9951..7495805d 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ExponentialStddevWeight.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ExponentialStddevWeight.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ExponentialWeight.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ExponentialWeight.java index 0792e764..13653ac8 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ExponentialWeight.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/ExponentialWeight.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/GaussStddevWeight.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/GaussStddevWeight.java index 9ef589d6..698cd50d 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/GaussStddevWeight.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/GaussStddevWeight.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/GaussWeight.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/GaussWeight.java index 05c3389f..bdb1d9d9 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/GaussWeight.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/GaussWeight.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/InverseLinearWeight.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/InverseLinearWeight.java index d5843cf1..e82b51c9 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/InverseLinearWeight.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/InverseLinearWeight.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/InverseProportionalStddevWeight.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/InverseProportionalStddevWeight.java index 740ec8ca..b8577617 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/InverseProportionalStddevWeight.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/InverseProportionalStddevWeight.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/InverseProportionalWeight.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/InverseProportionalWeight.java index c4ae9a97..b51c85fa 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/InverseProportionalWeight.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/InverseProportionalWeight.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/LinearWeight.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/LinearWeight.java index 38318133..68448433 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/LinearWeight.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/LinearWeight.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/QuadraticStddevWeight.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/QuadraticStddevWeight.java index 372dc016..1279b22d 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/QuadraticStddevWeight.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/QuadraticStddevWeight.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/QuadraticWeight.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/QuadraticWeight.java index 49cb1aca..b23713a6 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/QuadraticWeight.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/QuadraticWeight.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/WeightFunction.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/WeightFunction.java index 79c9f8a5..0c9f8ae0 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/WeightFunction.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/WeightFunction.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,8 +23,6 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import de.lmu.ifi.dbs.elki.utilities.InspectionUtilFrequentlyScanned; - /** * WeightFunction interface that allows the use of various distance-based weight * functions. In addition to the distance parameter, the maximum distance and @@ -33,7 +31,7 @@ import de.lmu.ifi.dbs.elki.utilities.InspectionUtilFrequentlyScanned; * * @author Erich Schubert */ -public interface WeightFunction extends InspectionUtilFrequentlyScanned { +public interface WeightFunction { /** * Evaluate weight function with given parameters. * diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/package-info.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/package-info.java index 649a8fb7..7869427b 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/package-info.java @@ -5,7 +5,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/randomprojections/AbstractRandomProjectionFamily.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/randomprojections/AbstractRandomProjectionFamily.java new file mode 100644 index 00000000..292fb002 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/randomprojections/AbstractRandomProjectionFamily.java @@ -0,0 +1,131 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra.randomprojections; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Random; + +import de.lmu.ifi.dbs.elki.data.NumberVector; +import de.lmu.ifi.dbs.elki.data.VectorUtil; +import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix; +import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector; +import de.lmu.ifi.dbs.elki.utilities.RandomFactory; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter; + +/** + * Abstract base class for random projection families. + * + * @author Erich Schubert + */ +public abstract class AbstractRandomProjectionFamily implements RandomProjectionFamily { + /** + * Random generator. + */ + protected Random random; + + /** + * Constructor. + */ + public AbstractRandomProjectionFamily(RandomFactory random) { + super(); + this.random = random.getRandom(); + } + + /** + * Parameterization interface (with the shared + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public abstract static class Parameterizer extends AbstractParameterizer { + /** + * Parameter for the random generator. + */ + public static final OptionID RANDOM_ID = new OptionID("randomproj.random", "Random generator seed."); + + /** + * Random generator. + */ + protected RandomFactory random; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + RandomParameter rndP = new RandomParameter(RANDOM_ID); + if (config.grab(rndP)) { + random = rndP.getValue(); + } + } + } + + /** + * Class to project using a matrix multiplication. + * + * @author Erich Schubert + */ + public static class MatrixProjection implements Projection { + /** + * Projection matrix. + */ + Matrix matrix; + + /** + * Projection buffer. + */ + Vector buf; + + /** + * Projection buffer values. + */ + double[] vals; + + /** + * Constructor. + * + * @param matrix Projection matrix. + */ + public MatrixProjection(Matrix matrix) { + super(); + this.matrix = matrix; + this.buf = new Vector(matrix.getColumnDimensionality()); + this.vals = buf.getArrayRef(); + } + + @Override + public double[] project(NumberVector<?> in) { + for (int d = 0; d < vals.length; d++) { + vals[d] = in.doubleValue(d); + } + return VectorUtil.fastTimes(matrix, buf); + } + + @Override + public int getOutputDimensionality() { + return matrix.getRowDimensionality(); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/randomprojections/AchlioptasRandomProjectionFamily.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/randomprojections/AchlioptasRandomProjectionFamily.java new file mode 100644 index 00000000..50610cf9 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/randomprojections/AchlioptasRandomProjectionFamily.java @@ -0,0 +1,124 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra.randomprojections; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix; +import de.lmu.ifi.dbs.elki.utilities.RandomFactory; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; + +/** + * Random projections as suggested by Dimitris Achlioptas. + * + * Reference: + * <p> + * Database-friendly random projections: Johnson-Lindenstrauss with binary coins + * <br /> + * Dimitris Achlioptas<br /> + * In: Proceedings of the twentieth ACM SIGMOD-SIGACT-SIGART symposium on + * Principles of database systems + * </p> + * + * @author Erich Schubert + */ +@Reference(title = "Database-friendly random projections: Johnson-Lindenstrauss with binary coins", authors = "Dimitris Achlioptas", booktitle = "Proceedings of the twentieth ACM SIGMOD-SIGACT-SIGART symposium on Principles of database systems", url = "http://dx.doi.org/10.1145/375551.375608") +public class AchlioptasRandomProjectionFamily extends AbstractRandomProjectionFamily { + /** + * Projection sparsity. + */ + private double sparsity; + + /** + * Constructor. + * + * @param sparsity Projection sparsity + * @param random Random number generator. + */ + public AchlioptasRandomProjectionFamily(double sparsity, RandomFactory random) { + super(random); + this.sparsity = sparsity; + } + + @Override + public Projection generateProjection(int idim, int odim) { + final double pPos = .5 / sparsity; + final double pNeg = pPos + pPos; // Threshold + double baseValuePart = Math.sqrt(this.sparsity); + + Matrix projectionMatrix = new Matrix(odim, idim); + for (int i = 0; i < odim; ++i) { + for (int j = 0; j < idim; ++j) { + final double r = random.nextDouble(); + final double value; + if (r < pPos) { + value = baseValuePart; + } else if (r < pNeg) { + value = -baseValuePart; + } else { + value = 0.; + } + + projectionMatrix.set(i, j, value); + } + } + return new MatrixProjection(projectionMatrix); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractRandomProjectionFamily.Parameterizer { + /** + * Parameter for the projection sparsity. + */ + public static final OptionID SPARSITY_ID = new OptionID("achlioptas.sparsity", "Frequency of zeros in the projection matrix."); + + /** + * Projection sparsity + */ + private double sparsity = 3.; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + DoubleParameter sparsP = new DoubleParameter(SPARSITY_ID); + sparsP.setDefaultValue(3.); + sparsP.addConstraint(new GreaterEqualConstraint(1.)); + if (config.grab(sparsP)) { + sparsity = sparsP.doubleValue(); + } + } + + @Override + protected AchlioptasRandomProjectionFamily makeInstance() { + return new AchlioptasRandomProjectionFamily(sparsity, random); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/randomprojections/CauchyRandomProjectionFamily.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/randomprojections/CauchyRandomProjectionFamily.java new file mode 100644 index 00000000..0e7aa7e1 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/randomprojections/CauchyRandomProjectionFamily.java @@ -0,0 +1,78 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra.randomprojections; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix; +import de.lmu.ifi.dbs.elki.utilities.RandomFactory; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; + +/** + * Random projections using Cauchy distributions (1-stable). + * + * Reference: + * <p> + * M. Datar and N. Immorlica and P. Indyk and V. S. Mirrokni<br /> + * Locality-sensitive hashing scheme based on p-stable distributions.<br /> + * In Proc. 20th annual symposium on Computational geometry + * </p> + * + * @author Erich Schubert + */ +@Reference(title = "Locality-sensitive hashing scheme based on p-stable distributions", authors = "M. Datar and N. Immorlica and P. Indyk and V. S. Mirrokni", booktitle = "Proceedings of the 20th annual symposium on Computational geometry", url = "http://dx.doi.org/10.1145/997817.997857") +public class CauchyRandomProjectionFamily extends AbstractRandomProjectionFamily { + /** + * Constructor. + * + * @param random Random number generator. + */ + public CauchyRandomProjectionFamily(RandomFactory random) { + super(random); + } + + @Override + public Projection generateProjection(int idim, int odim) { + Matrix projectionMatrix = new Matrix(odim, idim); + for (int i = 0; i < odim; ++i) { + for (int j = 0; j < idim; ++j) { + final double r = random.nextDouble() - .5; + final double value = Math.tan(Math.PI * r); + projectionMatrix.set(i, j, value); + } + } + return new MatrixProjection(projectionMatrix); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractRandomProjectionFamily.Parameterizer { + @Override + protected CauchyRandomProjectionFamily makeInstance() { + return new CauchyRandomProjectionFamily(random); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/randomprojections/GaussianRandomProjectionFamily.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/randomprojections/GaussianRandomProjectionFamily.java new file mode 100644 index 00000000..722c8454 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/randomprojections/GaussianRandomProjectionFamily.java @@ -0,0 +1,77 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra.randomprojections; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix; +import de.lmu.ifi.dbs.elki.utilities.RandomFactory; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; + +/** + * Random projections using Cauchy distributions (1-stable). + * + * Reference: + * <p> + * M. Datar and N. Immorlica and P. Indyk and V. S. Mirrokni<br /> + * Locality-sensitive hashing scheme based on p-stable distributions.<br /> + * In Proc. 20th annual symposium on Computational geometry + * </p> + * + * @author Erich Schubert + */ +@Reference(title = "Locality-sensitive hashing scheme based on p-stable distributions", authors = "M. Datar and N. Immorlica and P. Indyk and V. S. Mirrokni", booktitle = "Proceedings of the 20th annual symposium on Computational geometry", url = "http://dx.doi.org/10.1145/997817.997857") +public class GaussianRandomProjectionFamily extends AbstractRandomProjectionFamily { + /** + * Constructor. + * + * @param random Random number generator. + */ + public GaussianRandomProjectionFamily(RandomFactory random) { + super(random); + } + + @Override + public Projection generateProjection(int idim, int odim) { + Matrix projectionMatrix = new Matrix(odim, idim); + for (int i = 0; i < odim; ++i) { + for (int j = 0; j < idim; ++j) { + final double value = random.nextGaussian(); + projectionMatrix.set(i, j, value); + } + } + return new MatrixProjection(projectionMatrix); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractRandomProjectionFamily.Parameterizer { + @Override + protected GaussianRandomProjectionFamily makeInstance() { + return new GaussianRandomProjectionFamily(random); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/randomprojections/RandomProjectionFamily.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/randomprojections/RandomProjectionFamily.java new file mode 100644 index 00000000..aa796383 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/randomprojections/RandomProjectionFamily.java @@ -0,0 +1,66 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra.randomprojections; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.data.NumberVector; + +/** + * Interface for random projection families. + * + * @author Erich Schubert + * + * @apiviz.has Projection + */ +public interface RandomProjectionFamily { + /** + * Generate a projection matrix for the given dimensionalities. + * + * @param idim Input Dimensionality + * @param odim Output Dimensionality + * @return Projection matrix + */ + Projection generateProjection(int idim, int odim); + + /** + * Interface for projection instances. + * + * @author Erich Schubert + */ + static interface Projection { + /** + * Project a single vector. + * + * @param in Input vector + * @return Projected vector + */ + double[] project(NumberVector<?> in); + + /** + * Get the output dimensionality. + * + * @return Output dimensionality + */ + int getOutputDimensionality(); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/randomprojections/RandomSubsetProjectionFamily.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/randomprojections/RandomSubsetProjectionFamily.java new file mode 100644 index 00000000..d5192086 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/randomprojections/RandomSubsetProjectionFamily.java @@ -0,0 +1,167 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra.randomprojections; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import java.util.Arrays; +import java.util.Random; + +import de.lmu.ifi.dbs.elki.data.NumberVector; +import de.lmu.ifi.dbs.elki.utilities.RandomFactory; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; + +/** + * Random projection family based on selecting random features. + * + * The basic idea of using this for data mining should probably be attributed to + * L. Breiman, who used it to improve the performance of predictors in an + * ensemble. + * + * Reference: + * <p> + * L. Breiman<br /> + * Bagging predictors<br /> + * Machine learning 24.2 + * </p> + * + * @author Erich Schubert + */ +@Reference(authors = "L. Breiman", title = "Bagging predictors", booktitle = "Machine learning 24.2", url = "http://dx.doi.org/10.1007/BF00058655") +public class RandomSubsetProjectionFamily extends AbstractRandomProjectionFamily { + /** + * Constructor. + * + * @param random Random generator. + */ + public RandomSubsetProjectionFamily(RandomFactory random) { + super(random); + } + + @Override + public Projection generateProjection(int idim, int odim) { + int[] dims; + if (odim < idim) { + dims = Arrays.copyOf(randomPermutation(range(0, idim), random), odim); + } else if (odim == idim) { + dims = randomPermutation(range(0, idim), random); + } else { + int mdim = idim; + while (mdim < odim) { + mdim += idim; + } + dims = new int[mdim]; + for (int i = 0; i < mdim; i++) { + dims[i] = i % idim; + } + dims = Arrays.copyOf(randomPermutation(dims, random), odim); + } + return new SubsetProjection(dims); + } + + /** + * Initialize an integer value range. + * + * FIXME: move to shared code. + * + * @param start Starting value + * @param end End value (exclusive) + * @return Array of integers start..end, excluding end. + */ + public static int[] range(int start, int end) { + int[] out = new int[end - start]; + for (int i = 0, j = start; j < end; i++, j++) { + out[i] = j; + } + return out; + } + + /** + * Perform a random permutation of the array, in-place. + * + * Knuth / Fisher-Yates style shuffle + * + * FIXME: move to shared code. + * + * @param existing Existing array + * @param random Random generator. + * @return Same array. + */ + public static int[] randomPermutation(final int[] out, Random random) { + for (int i = out.length - 1; i > 0; i--) { + // Swap with random preceeding element. + int ri = random.nextInt(i + 1); + int tmp = out[ri]; + out[ri] = out[i]; + out[i] = tmp; + } + return out; + } + + /** + * Random subset projection. + * + * @author Erich Schubert + */ + public static class SubsetProjection implements Projection { + /** + * Input dimensions. + */ + private int[] dims; + + /** + * Constructor. + * + * @param dims Data permutation. + */ + public SubsetProjection(int[] dims) { + this.dims = dims; + } + + @Override + public double[] project(NumberVector<?> in) { + double[] buf = new double[dims.length]; + for (int i = 0; i < dims.length; i++) { + buf[i] = in.doubleValue(dims[i]); + } + return buf; + } + + @Override + public int getOutputDimensionality() { + return dims.length; + } + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractRandomProjectionFamily.Parameterizer { + @Override + protected RandomSubsetProjectionFamily makeInstance() { + return new RandomSubsetProjectionFamily(random); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/KernelDensityFunction.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/randomprojections/package-info.java index 29718fcb..3ddd4a5c 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/KernelDensityFunction.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/randomprojections/package-info.java @@ -1,10 +1,11 @@ -package de.lmu.ifi.dbs.elki.math.statistics; - +/** +<p>Random projection families.</p> + */ /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -22,19 +23,4 @@ package de.lmu.ifi.dbs.elki.math.statistics; You should have received a copy of the GNU Affero General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ - - -/** - * Inner function of a kernel density estimator. - * - * @author Erich Schubert - */ -public interface KernelDensityFunction { - /** - * Density contribution of a point at the given relative distance {@code delta}. - * - * @param delta Relative distance - * @return density contribution - */ - public double density(double delta); -}
\ No newline at end of file +package de.lmu.ifi.dbs.elki.math.linearalgebra.randomprojections;
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/package-info.java b/src/de/lmu/ifi/dbs/elki/math/package-info.java index e01c3e50..5a5f168f 100644 --- a/src/de/lmu/ifi/dbs/elki/math/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/math/package-info.java @@ -5,7 +5,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/scales/LinearScale.java b/src/de/lmu/ifi/dbs/elki/math/scales/LinearScale.java index 32015853..07191d61 100644 --- a/src/de/lmu/ifi/dbs/elki/math/scales/LinearScale.java +++ b/src/de/lmu/ifi/dbs/elki/math/scales/LinearScale.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.scales; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/scales/Scales.java b/src/de/lmu/ifi/dbs/elki/math/scales/Scales.java index 097b6fce..67a4e748 100644 --- a/src/de/lmu/ifi/dbs/elki/math/scales/Scales.java +++ b/src/de/lmu/ifi/dbs/elki/math/scales/Scales.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.scales; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/scales/package-info.java b/src/de/lmu/ifi/dbs/elki/math/scales/package-info.java index 1047bb0a..17465485 100644 --- a/src/de/lmu/ifi/dbs/elki/math/scales/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/math/scales/package-info.java @@ -6,7 +6,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/AbstractSpatialSorter.java b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/AbstractSpatialSorter.java index 7c1265dd..347af4a7 100644 --- a/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/AbstractSpatialSorter.java +++ b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/AbstractSpatialSorter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.spacefillingcurves; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -43,7 +43,7 @@ public abstract class AbstractSpatialSorter implements SpatialSorter { @Override public <T extends SpatialComparable> void sort(List<T> objs) { double[] mms = computeMinMax(objs); - sort(objs, 0, objs.size(), mms); + sort(objs, 0, objs.size(), mms, null); } /** @@ -125,18 +125,18 @@ public abstract class AbstractSpatialSorter implements SpatialSorter { // Compute min and max for each dimension: double[] mm = new double[dim << 1]; { - for (int d = 0; d < dim; d++) { - mm[d << 1] = Double.POSITIVE_INFINITY; - mm[(d << 1) + 1] = Double.NEGATIVE_INFINITY; + for (int d = 0, d2 = 0; d < dim; d++, d2 += 2) { + mm[d2] = Double.POSITIVE_INFINITY; + mm[d2 + 1] = Double.NEGATIVE_INFINITY; } for (SpatialComparable obj : objs) { - for (int d = 0; d < dim; d++) { - mm[d << 1] = Math.min(mm[d << 1], obj.getMin(d)); - mm[(d << 1) + 1] = Math.max(mm[(d << 1) + 1], obj.getMax(d)); + for (int d = 0, d2 = 0; d < dim; d++, d2 += 2) { + mm[d2] = Math.min(mm[d2], obj.getMin(d)); + mm[d2 + 1] = Math.max(mm[d2 + 1], obj.getMax(d)); } } - for (int d = 0; d < dim; d++) { - assert (mm[d << 1] <= mm[(d << 1) + 1]); + for (int d = 0, d2 = 0; d < dim; d++, d2 += 2) { + assert (mm[d2] <= mm[d2 + 1]) : ("Min > Max in dim " + d + ": " + mm[d2] + " " + mm[d2 + 1]); } } return mm; diff --git a/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/BinarySplitSpatialSorter.java b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/BinarySplitSpatialSorter.java index 1ba58511..742fae07 100644 --- a/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/BinarySplitSpatialSorter.java +++ b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/BinarySplitSpatialSorter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.spacefillingcurves; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -22,10 +22,10 @@ package de.lmu.ifi.dbs.elki.math.spacefillingcurves; You should have received a copy of the GNU Affero General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import java.util.Comparator; import java.util.List; import de.lmu.ifi.dbs.elki.data.spatial.SpatialComparable; +import de.lmu.ifi.dbs.elki.data.spatial.SpatialSingleMeanComparator; import de.lmu.ifi.dbs.elki.utilities.datastructures.QuickSelect; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; @@ -58,9 +58,9 @@ public class BinarySplitSpatialSorter extends AbstractSpatialSorter { } @Override - public <T extends SpatialComparable> void sort(List<T> objs, int start, int end, double[] minmax) { - final int dims = objs.get(0).getDimensionality(); - binarySplitSort(objs, start, end, 1, dims, new DimC()); + public <T extends SpatialComparable> void sort(List<T> objs, int start, int end, double[] minmax, int[] dims) { + final int numdim = (dims != null) ? dims.length : (minmax.length >>> 1); + binarySplitSort(objs, start, end, 0, numdim, dims, new SpatialSingleMeanComparator(0)); } /** @@ -70,47 +70,24 @@ public class BinarySplitSpatialSorter extends AbstractSpatialSorter { * @param objs List of objects * @param start Interval start * @param end Interval end (exclusive) - * @param curdim Current dimension - * @param dims Number of dimensions + * @param depth Recursion depth + * @param numdim Number of dimensions + * @param dims Dimension indexes to sort by. * @param comp Comparator to use * @param <T> Object type */ - private <T extends SpatialComparable> void binarySplitSort(List<T> objs, final int start, final int end, int curdim, final int dims, DimC comp) { + private <T extends SpatialComparable> void binarySplitSort(List<T> objs, final int start, final int end, int depth, final int numdim, int[] dims, SpatialSingleMeanComparator comp) { final int mid = start + ((end - start) >>> 1); // Make invariant - comp.dim = curdim; + comp.setDimension(dims != null ? dims[depth] : depth); QuickSelect.quickSelect(objs, comp, start, end, mid); // Recurse - final int nextdim = (curdim + 1) % dims; - if(start < mid - 1) { - binarySplitSort(objs, start, mid, nextdim, dims, comp); + final int nextdim = (depth + 1) % numdim; + if (start < mid - 1) { + binarySplitSort(objs, start, mid, nextdim, numdim, dims, comp); } - if(mid + 2 < end) { - binarySplitSort(objs, mid + 1, end, nextdim, dims, comp); + if (mid + 2 < end) { + binarySplitSort(objs, mid + 1, end, nextdim, numdim, dims, comp); } } - - /** - * Comparator that uses only a particular dimension. - * - * This comparator is meant to be reused, and the dimension to be changed, to - * reduce the number of objects allocated. - * - * @author Erich Schubert - * - * @apiviz.exclude - */ - private static class DimC implements Comparator<SpatialComparable> { - /** - * Dimension. - */ - public int dim = -1; - - @Override - public int compare(SpatialComparable o1, SpatialComparable o2) { - double m1 = o1.getMax(dim) + o1.getMin(dim); - double m2 = o2.getMax(dim) + o2.getMin(dim); - return Double.compare(m1, m2); - } - } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/HilbertSpatialSorter.java b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/HilbertSpatialSorter.java index 317e47c1..231e8637 100644 --- a/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/HilbertSpatialSorter.java +++ b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/HilbertSpatialSorter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.spacefillingcurves; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -57,19 +57,20 @@ public class HilbertSpatialSorter extends AbstractSpatialSorter { } @Override - public <T extends SpatialComparable> void sort(List<T> objs, int start, int end, double[] minmax) { - final int dim = minmax.length >> 1; - List<HilbertRef<T>> tmp = new ArrayList<HilbertRef<T>>(end - start); + public <T extends SpatialComparable> void sort(List<T> objs, int start, int end, double[] minmax, int[] dims) { + final int dim = (dims != null) ? dims.length : (minmax.length >> 1); + List<HilbertRef<T>> tmp = new ArrayList<>(end - start); int[] buf = new int[dim]; for (int i = start; i < end; i++) { T v = objs.get(i); // Convert into integers - for (int d = 0, d2 = 0; d < dim; d++, d2 += 2) { - double val = (v.getMin(d) + v.getMax(d)) * .5; - val = Integer.MAX_VALUE * ((val - minmax[d2]) / (minmax[d2 + 1] - minmax[d2])); + for (int d = 0; d < dim; d++) { + final int ed = (dims != null) ? dims[d] : d, ed2 = ed << 1; + double val = (v.getMin(ed) + v.getMax(ed)) * .5; + val = Integer.MAX_VALUE * ((val - minmax[ed2]) / (minmax[ed2 + 1] - minmax[ed2])); buf[d] = (int) val; } - tmp.add(new HilbertRef<T>(v, coordinatesToHilbert(buf, Integer.SIZE - 1, 1))); + tmp.add(new HilbertRef<>(v, coordinatesToHilbert(buf, Integer.SIZE - 1, 1))); } // Sort and copy back Collections.sort(tmp); diff --git a/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/PeanoSpatialSorter.java b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/PeanoSpatialSorter.java index 865197ae..3519e75d 100644 --- a/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/PeanoSpatialSorter.java +++ b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/PeanoSpatialSorter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.spacefillingcurves; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -79,8 +79,8 @@ public class PeanoSpatialSorter extends AbstractSpatialSorter { } @Override - public <T extends SpatialComparable> void sort(List<T> objs, int start, int end, double[] minmax) { - peanoSort(objs, start, end, minmax, 0, new BitSet(), false); + public <T extends SpatialComparable> void sort(List<T> objs, int start, int end, double[] minmax, int[] dims) { + peanoSort(objs, start, end, minmax, dims, 0, new BitSet(), false); } /** @@ -90,21 +90,24 @@ public class PeanoSpatialSorter extends AbstractSpatialSorter { * @param start Start index * @param end End * @param mms Minmax values - * @param dim Dimension + * @param dims Dimensions index + * @param depth Dimension * @param bits Bit set for inversions * @param desc Current ordering */ - protected <T extends SpatialComparable> void peanoSort(List<T> objs, int start, int end, double[] mms, int dim, BitSet bits, boolean desc) { - final int dims = mms.length >> 1; + protected <T extends SpatialComparable> void peanoSort(List<T> objs, int start, int end, double[] mms, int[] dims, int depth, BitSet bits, boolean desc) { + final int numdim = (dims != null) ? dims.length : (mms.length >> 1); + final int edim = (dims != null) ? dims[depth] : depth; // Find the splitting points. - final double min = mms[2 * dim], max = mms[2 * dim + 1]; + final double min = mms[2 * edim], max = mms[2 * edim + 1]; final double tfirst = (min + min + max) / 3.; final double tsecond = (min + max + max) / 3.; // Safeguard against duplicate points: if(max - tsecond < 1E-10 || tsecond - tfirst < 1E-10 || tfirst - min < 1E-10) { boolean ok = false; - for(int d = 0; d < mms.length; d += 2) { - if(mms[d + 1] - mms[d] >= 1E-10) { + for (int d = 0; d < numdim; d++) { + int d2 = ((dims != null) ? dims[d] : d) << 1; + if (mms[d2 + 1] - mms[d2] >= 1E-10) { ok = true; break; } @@ -113,38 +116,38 @@ public class PeanoSpatialSorter extends AbstractSpatialSorter { return; } } - final boolean inv = bits.get(dim) ^ desc; + final boolean inv = bits.get(edim) ^ desc; // Split the data set into three parts int fsplit, ssplit; if(!inv) { - fsplit = pivotizeList1D(objs, start, end, dim, tfirst, false); - ssplit = (fsplit < end - 1) ? pivotizeList1D(objs, fsplit, end, dim, tsecond, false) : fsplit; + fsplit = pivotizeList1D(objs, start, end, edim, tfirst, false); + ssplit = (fsplit < end - 1) ? pivotizeList1D(objs, fsplit, end, edim, tsecond, false) : fsplit; } else { - fsplit = pivotizeList1D(objs, start, end, dim, tsecond, true); - ssplit = (fsplit < end - 1) ? pivotizeList1D(objs, fsplit, end, dim, tfirst, true) : fsplit; + fsplit = pivotizeList1D(objs, start, end, edim, tsecond, true); + ssplit = (fsplit < end - 1) ? pivotizeList1D(objs, fsplit, end, edim, tfirst, true) : fsplit; } - int nextdim = (dim + 1) % dims; + int nextdim = (depth + 1) % numdim; // Do we need to update the min/max values? if(start < fsplit - 1) { - mms[2 * dim] = !inv ? min : tsecond; - mms[2 * dim + 1] = !inv ? tfirst : max; - peanoSort(objs, start, fsplit, mms, nextdim, bits, desc); + mms[2 * edim] = !inv ? min : tsecond; + mms[2 * edim + 1] = !inv ? tfirst : max; + peanoSort(objs, start, fsplit, mms, dims, nextdim, bits, desc); } if(fsplit < ssplit - 1) { - bits.flip(dim); // set (all but dim: we also flip "desc") - mms[2 * dim] = tfirst; - mms[2 * dim + 1] = tsecond; - peanoSort(objs, fsplit, ssplit, mms, nextdim, bits, !desc); - bits.flip(dim); + bits.flip(edim); // set (all but dim: we also flip "desc") + mms[2 * edim] = tfirst; + mms[2 * edim + 1] = tsecond; + peanoSort(objs, fsplit, ssplit, mms, dims, nextdim, bits, !desc); + bits.flip(edim); } if(ssplit < end - 1) { - mms[2 * dim] = !inv ? tsecond : min; - mms[2 * dim + 1] = !inv ? max : tfirst; - peanoSort(objs, ssplit, end, mms, nextdim, bits, desc); + mms[2 * edim] = !inv ? tsecond : min; + mms[2 * edim + 1] = !inv ? max : tfirst; + peanoSort(objs, ssplit, end, mms, dims, nextdim, bits, desc); } // Restore ranges - mms[2 * dim] = min; - mms[2 * dim + 1] = max; + mms[2 * edim] = min; + mms[2 * edim + 1] = max; } }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/SpatialSorter.java b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/SpatialSorter.java index fe23a854..30e6e019 100644 --- a/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/SpatialSorter.java +++ b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/SpatialSorter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.spacefillingcurves; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -49,6 +49,7 @@ public interface SpatialSorter { * @param start First index to sort (e.g. 0) * @param end End of range (e.g. <code>site()</code>) * @param minmax Array with dim pairs of (min, max) of value ranges + * @param dims Dimensions to sort by, for indexing vectors <em>and</em> {@code minmax}. */ - <T extends SpatialComparable> void sort(List<T> objs, int start, int end, double[] minmax); + <T extends SpatialComparable> void sort(List<T> objs, int start, int end, double[] minmax, int[] dims); }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/ZCurveSpatialSorter.java b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/ZCurveSpatialSorter.java index c5a91699..d30f88d6 100644 --- a/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/ZCurveSpatialSorter.java +++ b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/ZCurveSpatialSorter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.spacefillingcurves; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -37,8 +37,8 @@ public class ZCurveSpatialSorter extends AbstractSpatialSorter { private static final double STOPVAL = 1E-10; @Override - public <T extends SpatialComparable> void sort(List<T> objs, int start, int end, double[] minmax) { - zSort(objs, start, end, minmax, 0); + public <T extends SpatialComparable> void sort(List<T> objs, int start, int end, double[] minmax, int[] dims) { + zSort(objs, start, end, minmax, dims, 0); } /** @@ -48,44 +48,44 @@ public class ZCurveSpatialSorter extends AbstractSpatialSorter { * @param start Start * @param end End * @param mms Min-Max value ranges - * @param dim Current dimension + * @param dims Dimensions to process + * @param depth Current dimension */ - protected <T extends SpatialComparable> void zSort(List<T> objs, int start, int end, double[] mms, int dim) { + protected <T extends SpatialComparable> void zSort(List<T> objs, int start, int end, double[] mms, int[] dims, int depth) { + final int numdim = (dims != null) ? dims.length : (mms.length >> 1); + final int edim = (dims != null) ? dims[depth] : depth; // Find the splitting points. - final double min = mms[2 * dim], max = mms[2 * dim + 1]; + final double min = mms[2 * edim], max = mms[2 * edim + 1]; double spos = (min + max) / 2.; // Safeguard against duplicate points: - if(max - spos < STOPVAL || spos - min < STOPVAL) { + if (max - spos < STOPVAL || spos - min < STOPVAL) { boolean ok = false; - for(int d = 0; d < mms.length; d += 2) { - if(mms[d + 1] - mms[d] >= STOPVAL) { - // LoggingUtil.warning("No: " + (mms[d + 1] - mms[d])); + for (int d = 0; d < numdim; d++) { + int d2 = ((dims != null) ? dims[d] : d) << 1; + if (mms[d2 + 1] - mms[d2] >= STOPVAL) { ok = true; break; } } - if(!ok) { + if (!ok) { return; } } - int split = pivotizeList1D(objs, start, end, dim, spos, false); + int split = pivotizeList1D(objs, start, end, edim, spos, false); assert (start <= split && split <= end); - int nextdim = (dim + 1) % objs.get(0).getDimensionality(); - // LoggingUtil.warning("dim: " + dim + " min: " + min + " split: " + spos + - // " max:" + max + " " + start + " < " + split + " < " + end); - if(start < split - 1) { - mms[2 * dim] = min; - mms[2 * dim + 1] = spos; - zSort(objs, start, split, mms, nextdim); + int nextdim = (depth + 1) % numdim; + if (start < split - 1) { + mms[2 * edim] = min; + mms[2 * edim + 1] = spos; + zSort(objs, start, split, mms, dims, nextdim); } - if(split < end - 1) { - mms[2 * dim] = spos; - mms[2 * dim + 1] = max; - zSort(objs, split, end, mms, nextdim); + if (split < end - 1) { + mms[2 * edim] = spos; + mms[2 * edim + 1] = max; + zSort(objs, split, end, mms, dims, nextdim); } // Restore ranges - mms[2 * dim] = min; - mms[2 * dim + 1] = max; - // FIXME: implement completely and test. + mms[2 * edim] = min; + mms[2 * edim + 1] = max; } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/ZCurveTransformer.java b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/ZCurveTransformer.java index 26137cb3..7474a00d 100644 --- a/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/ZCurveTransformer.java +++ b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/ZCurveTransformer.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.spacefillingcurves; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/package-info.java b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/package-info.java index 9fb9494a..a658d747 100644 --- a/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/package-info.java @@ -5,7 +5,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/KernelDensityEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/KernelDensityEstimator.java index d7ffefb8..72c50869 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/KernelDensityEstimator.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/KernelDensityEstimator.java @@ -1,10 +1,14 @@ package de.lmu.ifi.dbs.elki.math.statistics; +import java.util.Arrays; + +import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.KernelDensityFunction; + /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,12 +27,10 @@ package de.lmu.ifi.dbs.elki.math.statistics; along with this program. If not, see <http://www.gnu.org/licenses/>. */ - /** * Estimate density given an array of points. * - * Estimates a density using a kernel density estimator. Multiple common Kernel - * functions are supported. + * Estimates a density using a variable width kernel density estimation. * * @author Erich Schubert * @@ -48,39 +50,51 @@ public class KernelDensityEstimator { /** * Initialize and execute kernel density estimation. * - * @param data data to use + * @param data data to use (must be sorted!) * @param min minimum value * @param max maximum value * @param kernel Kernel function to use - * @param windows window size + * @param window window size + * @param epsilon Precision threshold */ - public KernelDensityEstimator(double[] data, double min, double max, KernelDensityFunction kernel, int windows) { - process(data, min, max, kernel, windows); + public KernelDensityEstimator(double[] data, double min, double max, KernelDensityFunction kernel, int window, double epsilon) { + process(data, min, max, kernel, window, epsilon); } - + /** * Process a new array * - * @param data data to use + * @param data data to use (must be sorted!) * @param min minimum value * @param max maximum value * @param kernel Kernel function to use - * @param windows window size + * @param window window size + * @param epsilon Precision threshold */ - private void process(double[] data, double min, double max, KernelDensityFunction kernel, int windows) { + private void process(double[] data, double min, double max, KernelDensityFunction kernel, int window, double epsilon) { dens = new double[data.length]; var = new double[data.length]; - double halfwidth = ((max - min) / windows) * .5; + // This is the desired bandwidth of the kernel. + double halfwidth = ((max - min) / window) * .5; - // collect data points - for(int current = 0; current < data.length; current++) { + for (int current = 0; current < data.length; current++) { double value = 0.0; - // TODO: is there any way we can skip through some of the data (at least if its sorted?) - // Since we know that all kKernels return 0 outside of [-1:1]? - for(int i = 0; i < data.length; i++) { + for (int i = current; i >= 0; i--) { double delta = Math.abs(data[i] - data[current]) / halfwidth; - value += kernel.density(delta); + final double contrib = kernel.density(delta); + value += contrib; + if (contrib < epsilon) { + break; + } + } + for (int i = current + 1; i < data.length; i++) { + double delta = Math.abs(data[i] - data[current]) / halfwidth; + final double contrib = kernel.density(delta); + value += contrib; + if (contrib < epsilon) { + break; + } } double realwidth = (Math.min(data[current] + halfwidth, max) - Math.max(min, data[current] - halfwidth)); double weight = realwidth / (2 * halfwidth); @@ -94,22 +108,27 @@ public class KernelDensityEstimator { * * @param data data to process * @param kernel Kernel function to use. + * @param epsilon Precision threshold */ - public KernelDensityEstimator(double[] data, KernelDensityFunction kernel) { - double min = Double.MAX_VALUE; - double max = Double.MIN_VALUE; - for(double d : data) { - if(d < min) { - min = d; - } - if(d > max) { - max = d; + public KernelDensityEstimator(double[] data, KernelDensityFunction kernel, double epsilon) { + boolean needsort = false; + for (int i = 1; i < data.length; i++) { + if (data[i - 1] > data[i]) { + needsort = true; + break; } } - // Heuristics. + // Duplicate and sort when needed: + if (needsort) { + data = data.clone(); + Arrays.sort(data); + } + final double min = data[0]; + final double max = data[data.length - 1]; + // Heuristic for choosing the window size. int windows = 1 + (int) (Math.log(data.length)); - process(data, min, max, kernel, windows); + process(data, min, max, kernel, windows, epsilon); } /** diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/LinearRegression.java b/src/de/lmu/ifi/dbs/elki/math/statistics/LinearRegression.java index 231b2071..00999245 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/LinearRegression.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/LinearRegression.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.statistics; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/MultipleLinearRegression.java b/src/de/lmu/ifi/dbs/elki/math/statistics/MultipleLinearRegression.java index 79c84701..0d37ca67 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/MultipleLinearRegression.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/MultipleLinearRegression.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.statistics; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/PolynomialRegression.java b/src/de/lmu/ifi/dbs/elki/math/statistics/PolynomialRegression.java index b1bfe31b..12dfe28c 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/PolynomialRegression.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/PolynomialRegression.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.statistics; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/ProbabilityWeightedMoments.java b/src/de/lmu/ifi/dbs/elki/math/statistics/ProbabilityWeightedMoments.java new file mode 100644 index 00000000..c783f5f1 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/ProbabilityWeightedMoments.java @@ -0,0 +1,176 @@ +package de.lmu.ifi.dbs.elki.math.statistics; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; + +/** + * Estimate the L-Moments of a sample. + * + * Reference: + * <p> + * J. R. M. Hosking, J. R. Wallis, and E. F. Wood<br /> + * Estimation of the generalized extreme-value distribution by the method of + * probability-weighted moments.<br /> + * Technometrics 27.3 + * </p> + * + * Also based on: + * <p> + * J. R. M. Hosking<br /> + * Fortran routines for use with the method of L-moments Version 3.03<br /> + * IBM Research. + * </p> + * + * @author Erich Schubert + */ +@Reference(authors = "J.R.M. Hosking, J. R. Wallis, and E. F. Wood", title = "Estimation of the generalized extreme-value distribution by the method of probability-weighted moments.", booktitle = "Technometrics 27.3", url = "http://dx.doi.org/10.1080/00401706.1985.10488049") +public class ProbabilityWeightedMoments { + /** + * Compute the alpha_r factors using the method of probability-weighted + * moments. + * + * @param sorted <b>Presorted</b> data array. + * @param nmom Number of moments to compute + * @return Alpha moments (0-indexed) + */ + public static <A> double[] alphaPWM(A data, NumberArrayAdapter<?, A> adapter, final int nmom) { + final int n = adapter.size(data); + final double[] xmom = new double[nmom]; + double weight = 1. / n; + for (int i = 0; i < n; i++) { + final double val = adapter.getDouble(data, i); + xmom[0] += weight * val; + for (int j = 1; j < nmom; j++) { + weight *= (n - i - j + 1) / (n - j + 1); + xmom[j] += weight * val; + } + } + return xmom; + } + + /** + * Compute the beta_r factors using the method of probability-weighted + * moments. + * + * @param sorted <b>Presorted</b> data array. + * @param nmom Number of moments to compute + * @return Beta moments (0-indexed) + */ + public static <A> double[] betaPWM(A data, NumberArrayAdapter<?, A> adapter, final int nmom) { + final int n = adapter.size(data); + final double[] xmom = new double[nmom]; + double weight = 1. / n; + for (int i = 0; i < n; i++) { + final double val = adapter.getDouble(data, i); + xmom[0] += weight * val; + for (int j = 1; j < nmom; j++) { + weight *= (i - j + 1) / (n - j + 1); + xmom[j] += weight * val; + } + } + return xmom; + } + + /** + * Compute the alpha_r and beta_r factors in parallel using the method of + * probability-weighted moments. Usually cheaper than computing them + * separately. + * + * @param sorted <b>Presorted</b> data array. + * @param nmom Number of moments to compute + * @return Alpha and Beta moments (0-indexed, interleaved) + */ + public static <A> double[] alphaBetaPWM(A data, NumberArrayAdapter<?, A> adapter, final int nmom) { + final int n = adapter.size(data); + final double[] xmom = new double[nmom << 1]; + double aweight = 1. / n, bweight = aweight; + for (int i = 0; i < n; i++) { + final double val = adapter.getDouble(data, i); + xmom[0] += aweight * val; + xmom[1] += bweight * val; + for (int j = 1, k = 2; j < nmom; j++, k += 2) { + aweight *= (n - i - j + 1) / (n - j + 1); + bweight *= (i - j + 1) / (n - j + 1); + xmom[k + 1] += aweight * val; + xmom[k + 1] += bweight * val; + } + } + return xmom; + } + + /** + * Compute the sample L-Moments using probability weighted moments. + * + * @param sorted <b>Presorted</b> data array. + * @param nmom Number of moments to compute + * @return Array containing Lambda1, Lambda2, Tau3 ... TauN + */ + public static <A> double[] samLMR(A sorted, NumberArrayAdapter<?, A> adapter, final int nmom) { + final int n = adapter.size(sorted); + if (nmom >= n) { + throw new ArithmeticException("Can't compute higher order moments for just" + n + " observations."); + } + final double[] sum = new double[nmom]; + // Estimate probability weighted moments (unbiased) + for (int i = 0; i < n; i++) { + double term = adapter.getDouble(sorted, i); + // Robustness: skip bad values + if (Double.isInfinite(term) || Double.isNaN(term)) { + continue; + } + sum[0] += term; + for (int j = 1, z = i; j < nmom; j++, z--) { + term *= z; + sum[j] += term; + } + } + // Normalize by "n choose (j + 1)" + sum[0] /= n; + double z = n; + for (int j = 1; j < nmom; j++) { + z *= n - j; + sum[j] /= z; + } + for (int k = nmom - 1; k >= 1; --k) { + double p = ((k & 1) == 0) ? +1 : -1; + double temp = p * sum[0]; + for (int i = 0; i < k; i++) { + double ai = i + 1.; + p *= -(k + ai) * (k - i) / (ai * ai); + temp += p * sum[i + 1]; + } + sum[k] = temp; + } + if (nmom > 2 && !(sum[1] > 0)) { + throw new ArithmeticException("Can't compute higher order moments for constant data. Sum: " + sum[1]); + } + // Map lambda3...lambdaN to tau3...tauN + for (int i = 2; i < nmom; i++) { + sum[i] /= sum[1]; + } + return sum; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/BetaDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/BetaDistribution.java index 1efe19dd..c48583ea 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/BetaDistribution.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/BetaDistribution.java @@ -34,7 +34,7 @@ import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; * @author Jan Brusis
* @author Erich Schubert
*/
-public class BetaDistribution implements DistributionWithRandom {
+public class BetaDistribution implements Distribution {
/**
* Numerical precision to use
*/
@@ -94,7 +94,7 @@ public class BetaDistribution implements DistributionWithRandom { */
public BetaDistribution(double a, double b, Random random) {
super();
- if(a <= 0.0 || b <= 0.0) {
+ if (a <= 0.0 || b <= 0.0) {
throw new IllegalArgumentException("Invalid parameters for Beta distribution.");
}
@@ -106,23 +106,23 @@ public class BetaDistribution implements DistributionWithRandom { @Override
public double pdf(double val) {
- if(val < 0. || val > 1.) {
+ if (val < 0. || val > 1.) {
return 0.;
}
- if(val == 0.) {
- if(alpha > 1.) {
+ if (val == 0.) {
+ if (alpha > 1.) {
return 0.;
}
- if(alpha < 1.) {
+ if (alpha < 1.) {
return Double.POSITIVE_INFINITY;
}
return beta;
}
- if(val == 1.) {
- if(beta > 1.) {
+ if (val == 1.) {
+ if (beta > 1.) {
return 0.;
}
- if(beta < 1.) {
+ if (beta < 1.) {
return Double.POSITIVE_INFINITY;
}
return alpha;
@@ -132,23 +132,22 @@ public class BetaDistribution implements DistributionWithRandom { @Override
public double cdf(double x) {
- if(alpha <= 0.0 || beta <= 0.0 || Double.isNaN(alpha) || Double.isNaN(beta) || Double.isNaN(x)) {
+ if (alpha <= 0.0 || beta <= 0.0 || Double.isNaN(alpha) || Double.isNaN(beta) || Double.isNaN(x)) {
return Double.NaN;
}
- if(x <= 0.0) {
+ if (x <= 0.0) {
return 0.0;
}
- if(x >= 1.0) {
+ if (x >= 1.0) {
return 1.0;
}
- if(alpha > SWITCH && beta > SWITCH) {
+ if (alpha > SWITCH && beta > SWITCH) {
return regularizedIncBetaQuadrature(alpha, beta, x);
}
double bt = Math.exp(-logbab + alpha * Math.log(x) + beta * Math.log1p(-x));
- if(x < (alpha + 1.0) / (alpha + beta + 2.0)) {
+ if (x < (alpha + 1.0) / (alpha + beta + 2.0)) {
return bt * regularizedIncBetaCF(alpha, beta, x) / alpha;
- }
- else {
+ } else {
return 1.0 - bt * regularizedIncBetaCF(beta, alpha, 1.0 - x) / beta;
}
}
@@ -156,20 +155,19 @@ public class BetaDistribution implements DistributionWithRandom { @Override
public double quantile(double x) {
// Valid parameters
- if(x < 0 || x > 1 || Double.isNaN(x)) {
+ if (x < 0 || x > 1 || Double.isNaN(x)) {
return Double.NaN;
}
- if(x == 0) {
+ if (x == 0) {
return 0.0;
}
- if(x == 1) {
+ if (x == 1) {
return 1.0;
}
// Simpler to compute inverse?
- if(x > 0.5) {
+ if (x > 0.5) {
return 1 - rawQuantile(1 - x, beta, alpha, logbab);
- }
- else {
+ } else {
return rawQuantile(x, alpha, beta, logbab);
}
}
@@ -207,26 +205,26 @@ public class BetaDistribution implements DistributionWithRandom { * @return probability density
*/
public static double pdf(double val, double alpha, double beta) {
- if(alpha <= 0. || beta <= 0. || Double.isNaN(alpha) || Double.isNaN(beta) || Double.isNaN(val)) {
+ if (alpha <= 0. || beta <= 0. || Double.isNaN(alpha) || Double.isNaN(beta) || Double.isNaN(val)) {
return Double.NaN;
}
- if(val < 0. || val > 1.) {
+ if (val < 0. || val > 1.) {
return 0.;
}
- if(val == 0.) {
- if(alpha > 1.) {
+ if (val == 0.) {
+ if (alpha > 1.) {
return 0.;
}
- if(alpha < 1.) {
+ if (alpha < 1.) {
return Double.POSITIVE_INFINITY;
}
return beta;
}
- if(val == 1.) {
- if(beta > 1.) {
+ if (val == 1.) {
+ if (beta > 1.) {
return 0.;
}
- if(beta < 1.) {
+ if (beta < 1.) {
return Double.POSITIVE_INFINITY;
}
return alpha;
@@ -235,6 +233,42 @@ public class BetaDistribution implements DistributionWithRandom { }
/**
+ * Static version of the PDF of the beta distribution
+ *
+ * @param val Value
+ * @param alpha Shape parameter a
+ * @param beta Shape parameter b
+ * @return probability density
+ */
+ public static double logpdf(double val, double alpha, double beta) {
+ if (alpha <= 0. || beta <= 0. || Double.isNaN(alpha) || Double.isNaN(beta) || Double.isNaN(val)) {
+ return Double.NaN;
+ }
+ if (val < 0. || val > 1.) {
+ return Double.NEGATIVE_INFINITY;
+ }
+ if (val == 0.) {
+ if (alpha > 1.) {
+ return Double.NEGATIVE_INFINITY;
+ }
+ if (alpha < 1.) {
+ return Double.POSITIVE_INFINITY;
+ }
+ return Math.log(beta);
+ }
+ if (val == 1.) {
+ if (beta > 1.) {
+ return Double.NEGATIVE_INFINITY;
+ }
+ if (beta < 1.) {
+ return Double.POSITIVE_INFINITY;
+ }
+ return Math.log(alpha);
+ }
+ return -logBeta(alpha, beta) + Math.log(val) * (alpha - 1) + Math.log1p(-val) * (beta - 1);
+ }
+
+ /**
* Compute log beta(a,b)
*
* @param alpha Shape parameter a
@@ -255,23 +289,22 @@ public class BetaDistribution implements DistributionWithRandom { * @return Value of the regularized incomplete beta function
*/
public static double regularizedIncBeta(double x, double alpha, double beta) {
- if(alpha <= 0.0 || beta <= 0.0 || Double.isNaN(alpha) || Double.isNaN(beta) || Double.isNaN(x)) {
+ if (alpha <= 0.0 || beta <= 0.0 || Double.isNaN(alpha) || Double.isNaN(beta) || Double.isNaN(x)) {
return Double.NaN;
}
- if(x <= 0.0) {
+ if (x <= 0.0) {
return 0.0;
}
- if(x >= 1.0) {
+ if (x >= 1.0) {
return 1.0;
}
- if(alpha > SWITCH && beta > SWITCH) {
+ if (alpha > SWITCH && beta > SWITCH) {
return regularizedIncBetaQuadrature(alpha, beta, x);
}
double bt = Math.exp(-logBeta(alpha, beta) + alpha * Math.log(x) + beta * Math.log1p(-x));
- if(x < (alpha + 1.0) / (alpha + beta + 2.0)) {
+ if (x < (alpha + 1.0) / (alpha + beta + 2.0)) {
return bt * regularizedIncBetaCF(alpha, beta, x) / alpha;
- }
- else {
+ } else {
return 1.0 - bt * regularizedIncBetaCF(beta, alpha, 1.0 - x) / beta;
}
}
@@ -292,37 +325,37 @@ public class BetaDistribution implements DistributionWithRandom { double qam = alpha - 1.0;
double c = 1.0;
double d = 1.0 - qab * x / qap;
- if(Math.abs(d) < FPMIN) {
+ if (Math.abs(d) < FPMIN) {
d = FPMIN;
}
d = 1.0 / d;
double h = d;
- for(int m = 1; m < 10000; m++) {
+ for (int m = 1; m < 10000; m++) {
int m2 = 2 * m;
double aa = m * (beta - m) * x / ((qam + m2) * (alpha + m2));
d = 1.0 + aa * d;
- if(Math.abs(d) < FPMIN) {
+ if (Math.abs(d) < FPMIN) {
d = FPMIN;
}
c = 1.0 + aa / c;
- if(Math.abs(c) < FPMIN) {
+ if (Math.abs(c) < FPMIN) {
c = FPMIN;
}
d = 1.0 / d;
h *= d * c;
aa = -(alpha + m) * (qab + m) * x / ((alpha + m2) * (qap + m2));
d = 1.0 + aa * d;
- if(Math.abs(d) < FPMIN) {
+ if (Math.abs(d) < FPMIN) {
d = FPMIN;
}
c = 1.0 + aa / c;
- if(Math.abs(c) < FPMIN) {
+ if (Math.abs(c) < FPMIN) {
c = FPMIN;
}
d = 1.0 / d;
double del = d * c;
h *= del;
- if(Math.abs(del - 1.0) <= NUM_PRECISION) {
+ if (Math.abs(del - 1.0) <= NUM_PRECISION) {
break;
}
}
@@ -339,31 +372,31 @@ public class BetaDistribution implements DistributionWithRandom { * @return result
*/
protected static double regularizedIncBetaQuadrature(double alpha, double beta, double x) {
- double a1 = alpha - 1.0;
- double b1 = beta - 1.0;
- double mu = alpha / (alpha + beta);
- double lnmu = Math.log(mu);
- double lnmuc = Math.log1p(-mu);
- double t = Math.sqrt(alpha * beta / ((alpha + beta) * (alpha + beta) * (alpha + beta + 1.0)));
- double xu;
- if(x > alpha / (alpha + beta)) {
- if(x >= 1.0) {
+ final double alphapbeta = alpha + beta;
+ final double a1 = alpha - 1.0;
+ final double b1 = beta - 1.0;
+ final double mu = alpha / alphapbeta;
+ final double lnmu = Math.log(mu);
+ final double lnmuc = Math.log1p(-mu);
+ double t = Math.sqrt(alpha * beta / (alphapbeta * alphapbeta * (alphapbeta + 1.0)));
+ final double xu;
+ if (x > alpha / alphapbeta) {
+ if (x >= 1.0) {
return 1.0;
}
xu = Math.min(1.0, Math.max(mu + 10.0 * t, x + 5.0 * t));
- }
- else {
- if(x <= 0.0) {
+ } else {
+ if (x <= 0.0) {
return 0.0;
}
xu = Math.max(0.0, Math.min(mu - 10.0 * t, x - 5.0 * t));
}
double sum = 0.0;
- for(int i = 0; i < GAUSSLEGENDRE_Y.length; i++) {
+ for (int i = 0; i < GAUSSLEGENDRE_Y.length; i++) {
t = x + (xu - x) * GAUSSLEGENDRE_Y[i];
sum += GAUSSLEGENDRE_W[i] * Math.exp(a1 * (Math.log(t) - lnmu) + b1 * (Math.log1p(-t) - lnmuc));
}
- double ans = sum * (xu - x) * Math.exp(a1 * lnmu - GammaDistribution.logGamma(alpha) + b1 * lnmuc - GammaDistribution.logGamma(b1) + GammaDistribution.logGamma(alpha + beta));
+ double ans = sum * (xu - x) * Math.exp(a1 * lnmu - GammaDistribution.logGamma(alpha) + b1 * lnmuc - GammaDistribution.logGamma(beta) + GammaDistribution.logGamma(alphapbeta));
return ans > 0 ? 1.0 - ans : -ans;
}
@@ -377,23 +410,22 @@ public class BetaDistribution implements DistributionWithRandom { */
public static double quantile(double p, double alpha, double beta) {
// Valid parameters
- if(Double.isNaN(alpha) || Double.isNaN(beta) || Double.isNaN(p) || alpha < 0. || beta < 0.) {
+ if (Double.isNaN(alpha) || Double.isNaN(beta) || Double.isNaN(p) || alpha < 0. || beta < 0.) {
return Double.NaN;
}
- if(p < 0 || p > 1) {
+ if (p < 0 || p > 1) {
return Double.NaN;
}
- if(p == 0) {
+ if (p == 0) {
return 0.0;
}
- if(p == 1) {
+ if (p == 1) {
return 1.0;
}
// Simpler to compute inverse?
- if(p > 0.5) {
+ if (p > 0.5) {
return 1 - rawQuantile(1 - p, beta, alpha, logBeta(beta, alpha));
- }
- else {
+ } else {
return rawQuantile(p, alpha, beta, logBeta(alpha, beta));
}
}
@@ -415,33 +447,30 @@ public class BetaDistribution implements DistributionWithRandom { double tmp = Math.sqrt(-2 * Math.log(p));
double y = tmp - (2.30753 + 0.27061 * tmp) / (1. + (0.99229 + 0.04481 * tmp) * tmp);
- if(alpha > 1 && beta > 1) {
+ if (alpha > 1 && beta > 1) {
double r = (y * y - 3.) / 6.;
double s = 1. / (alpha + alpha - 1.);
double t = 1. / (beta + beta - 1.);
double h = 2. / (s + t);
double w = y * Math.sqrt(h + r) / h - (t - s) * (r + 5. / 6. - 2. / (3. * h));
x = alpha / (alpha + beta * Math.exp(w + w));
- }
- else {
+ } else {
double r = beta + beta;
double t = 1. / (9. * beta);
t = r * Math.pow(1. - t + y * Math.sqrt(t), 3.0);
- if(t <= 0.) {
+ if (t <= 0.) {
x = 1. - Math.exp((Math.log1p(-p) + Math.log(beta) + logbeta) / beta);
- }
- else {
+ } else {
t = (4. * alpha + r - 2.) / t;
- if(t <= 1.) {
+ if (t <= 1.) {
x = Math.exp((Math.log(p * alpha) + logbeta) / alpha);
- }
- else {
+ } else {
x = 1. - 2. / (t + 1.);
}
}
}
// Degenerate initial approximations
- if(x < 3e-308 || x > 1 - 2.22e-16) {
+ if (x < 3e-308 || x > 1 - 2.22e-16) {
x = 0.5;
}
}
@@ -455,29 +484,29 @@ public class BetaDistribution implements DistributionWithRandom { final double acu = Math.max(1e-300, Math.pow(10., -13 - 2.5 / (alpha * alpha) - .5 / (p * p)));
double prevstep = 0., y = 0., stepsize = 1;
- for(int outer = 0; outer < 1000; outer++) {
+ for (int outer = 0; outer < 1000; outer++) {
// Current CDF value
double ynew = cdf(x, alpha, beta);
- if(Double.isInfinite(ynew)) { // Degenerated.
+ if (Double.isInfinite(ynew)) { // Degenerated.
return Double.NaN;
}
// Error gradient
ynew = (ynew - p) * Math.exp(logbeta + ialpha * Math.log(x) + ibeta * Math.log1p(-x));
- if(ynew * y <= 0.) {
+ if (ynew * y <= 0.) {
prevstep = Math.max(Math.abs(stepsize), 3e-308);
}
// Inner loop: try different step sizes: y * 3^-i
double g = 1, xnew = 0.;
- for(int inner = 0; inner < 1000; inner++) {
+ for (int inner = 0; inner < 1000; inner++) {
stepsize = g * ynew;
- if(Math.abs(stepsize) < prevstep) {
+ if (Math.abs(stepsize) < prevstep) {
xnew = x - stepsize; // Candidate x
- if(xnew >= 0. && xnew <= 1.) {
+ if (xnew >= 0. && xnew <= 1.) {
// Close enough
- if(prevstep <= acu || Math.abs(ynew) <= acu) {
+ if (prevstep <= acu || Math.abs(ynew) <= acu) {
return x;
}
- if(xnew != 0. && xnew != 1.) {
+ if (xnew != 0. && xnew != 1.) {
break;
}
}
@@ -485,7 +514,7 @@ public class BetaDistribution implements DistributionWithRandom { g /= 3.;
}
// Convergence
- if(Math.abs(xnew - x) < 1e-15 * x) {
+ if (Math.abs(xnew - x) < 1e-15 * x) {
return x;
}
// Iterate with new values
@@ -496,4 +525,4 @@ public class BetaDistribution implements DistributionWithRandom { // Not converged in Newton-Raphson
throw new AbortException("Beta quantile computation did not converge.");
}
-}
\ No newline at end of file +}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/CauchyDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/CauchyDistribution.java new file mode 100644 index 00000000..c218a37f --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/CauchyDistribution.java @@ -0,0 +1,135 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Random; + +/** + * Cauchy distribution. + * + * @author Erich Schubert + */ +public class CauchyDistribution implements Distribution { + /** + * The location (x0) parameter. + */ + final double location; + + /** + * The shape (gamma) parameter. + */ + final double shape; + + /** + * The random generator. + */ + private Random random; + + /** + * Constructor with default random. + * + * @param location Location (x0) + * @param shape Shape (gamma) + */ + public CauchyDistribution(double location, double shape) { + this(location, shape, new Random()); + } + + /** + * Constructor. + * + * @param location Location (x0) + * @param shape Shape (gamma) + * @param random Random generator + */ + public CauchyDistribution(double location, double shape, Random random) { + super(); + this.location = location; + this.shape = shape; + this.random = random; + } + + @Override + public double pdf(double x) { + return pdf(x, location, shape); + } + + @Override + public double cdf(double x) { + return cdf(x, location, shape); + } + + @Override + public double quantile(double x) { + return quantile(x, location, shape); + } + + @Override + public double nextRandom() { + final double r = random.nextDouble() - .5; + return Math.tan(Math.PI * r); + } + + /** + * PDF function, static version. + * + * @param x Value + * @param location Location (x0) + * @param shape Shape (gamma) + * @return PDF value + */ + public static double pdf(double x, double location, double shape) { + final double v = (x - location) / shape; + return 1. / Math.PI * shape * (1 + v * v); + } + + /** + * PDF function, static version. + * + * @param x Value + * @param location Location (x0) + * @param shape Shape (gamma) + * @return PDF value + */ + public static double cdf(double x, double location, double shape) { + return Math.atan2(x - location, shape) / Math.PI + .5; + } + + /** + * PDF function, static version. + * + * @param x Value + * @param location Location (x0) + * @param shape Shape (gamma) + * @return PDF value + */ + public static double quantile(double x, double location, double shape) { + return location + shape * Math.tan(Math.PI * (x - .5)); + } + + @Override + public String toString() { + return "CauchyDistribution(location=" + location + ", shape=" + shape + ")"; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiDistribution.java index 5dc5b399..a552e413 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiDistribution.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiDistribution.java @@ -1,12 +1,13 @@ package de.lmu.ifi.dbs.elki.math.statistics.distribution; import de.lmu.ifi.dbs.elki.utilities.exceptions.ExceptionMessages; +import de.lmu.ifi.dbs.elki.utilities.exceptions.NotImplementedException; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -32,7 +33,7 @@ import de.lmu.ifi.dbs.elki.utilities.exceptions.ExceptionMessages; * * @apiviz.composedOf ChiSquaredDistribution */ -public class ChiDistribution implements DistributionWithRandom { +public class ChiDistribution implements Distribution { /** * Degrees of freedom. Usually integer. */ @@ -97,7 +98,7 @@ public class ChiDistribution implements DistributionWithRandom { // FIXME: implement! @Override public double quantile(double val) { - throw new UnsupportedOperationException(ExceptionMessages.UNSUPPORTED_NOT_YET); + throw new NotImplementedException(ExceptionMessages.UNSUPPORTED_NOT_YET); } @Override diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiSquaredDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiSquaredDistribution.java index efa24079..235367cd 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiSquaredDistribution.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiSquaredDistribution.java @@ -6,7 +6,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -59,14 +59,14 @@ public class ChiSquaredDistribution extends GammaDistribution { * @return probability density */ public static double pdf(double x, double dof) { - if(x <= 0) { + if (x <= 0) { return 0.0; } - if(x == 0) { + if (x == 0) { return 0.0; } final double k = dof * .5; - if(Math.abs(k - 1.0) < Double.MIN_NORMAL) { + if (Math.abs(k - 1.0) < Double.MIN_NORMAL) { return Math.exp(-x * 2.0) * 2.0; } return Math.exp((k - 1.0) * Math.log(x * 2.0) - x * 2.0 - logGamma(k)) * 2.0; @@ -90,4 +90,9 @@ public class ChiSquaredDistribution extends GammaDistribution { public static double quantile(double x, double dof) { return GammaDistribution.quantile(x, .5 * dof, .5); } -}
\ No newline at end of file + + @Override + public String toString() { + return "ChiSquaredDistribution(dof=" + (2 * getK()) + ")"; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ConstantDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ConstantDistribution.java index b046f0ef..35d5294f 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ConstantDistribution.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ConstantDistribution.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.statistics.distribution; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -28,7 +28,7 @@ package de.lmu.ifi.dbs.elki.math.statistics.distribution; * * @author Erich Schubert */ -public class ConstantDistribution implements DistributionWithRandom { +public class ConstantDistribution implements Distribution { /** * The constant */ diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/Distribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/Distribution.java index ad4ef944..519ba0b3 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/Distribution.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/Distribution.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.statistics.distribution; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -24,9 +24,7 @@ package de.lmu.ifi.dbs.elki.math.statistics.distribution; */ /** - * Statistical distributions, with their common functions. See - * {@link DistributionWithRandom} for distributions that also have a random - * generator included. + * Statistical distributions, with their common functions. * * @author Erich Schubert */ @@ -56,10 +54,17 @@ public interface Distribution { double quantile(double val); /** + * Generate a new random value + * + * @return new random value + */ + double nextRandom(); + + /** * Describe the distribution * * @return description */ @Override String toString(); -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ExponentialDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ExponentialDistribution.java index 866f40d6..e5af3e5b 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ExponentialDistribution.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ExponentialDistribution.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.statistics.distribution; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -30,7 +30,7 @@ import java.util.Random; * * @author Erich Schubert */ -public class ExponentialDistribution implements DistributionWithRandom { +public class ExponentialDistribution implements Distribution { /** * Random generator. */ @@ -40,6 +40,11 @@ public class ExponentialDistribution implements DistributionWithRandom { * Rate, inverse of mean */ double rate; + + /** + * Location parameter. + */ + double location; /** * Constructor. @@ -47,7 +52,17 @@ public class ExponentialDistribution implements DistributionWithRandom { * @param rate Rate parameter (1/scale) */ public ExponentialDistribution(double rate) { - this(rate, new Random()); + this(rate, 0.0, null); + } + + /** + * Constructor. + * + * @param rate Rate parameter (1/scale) + * @param location Location parameter + */ + public ExponentialDistribution(double rate, double location) { + this(rate, location, null); } /** @@ -57,14 +72,29 @@ public class ExponentialDistribution implements DistributionWithRandom { * @param random Random generator */ public ExponentialDistribution(double rate, Random random) { + this(rate, 0.0, random); + } + + /** + * Constructor. + * + * @param rate Rate parameter (1/scale) + * @param location Location parameter + * @param random Random generator + */ + public ExponentialDistribution(double rate, double location, Random random) { super(); this.rate = rate; + this.location = location; this.rnd = random; } @Override public double pdf(double val) { - return rate * Math.exp(-rate * val); + if (val < location) { + return 0.; + } + return rate * Math.exp(-rate * (val - location)); } /** @@ -75,12 +105,18 @@ public class ExponentialDistribution implements DistributionWithRandom { * @return probability density */ public static double pdf(double val, double rate) { + if (val < 0.) { + return 0.; + } return rate * Math.exp(-rate * val); } @Override public double cdf(double val) { - return 1 - Math.exp(-rate * val); + if (val < location) { + return 0.; + } + return 1 - Math.exp(-rate * (val - location)); } /** @@ -91,12 +127,15 @@ public class ExponentialDistribution implements DistributionWithRandom { * @return cumulative density */ public static double cdf(double val, double rate) { + if (val < 0.) { + return 0.; + } return 1 - Math.exp(-rate * val); } @Override public double quantile(double val) { - return -Math.log(1 - val) / rate; + return -Math.log(1 - val) / rate + location; } /** @@ -121,6 +160,11 @@ public class ExponentialDistribution implements DistributionWithRandom { */ @Override public double nextRandom() { - return -Math.log(rnd.nextDouble()) / rate; + return -Math.log(rnd.nextDouble()) / rate + location; + } + + @Override + public String toString() { + return "ExponentialDistribution(rate=" + rate + ", location=" + location + ")"; } }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ExponentiallyModifiedGaussianDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ExponentiallyModifiedGaussianDistribution.java new file mode 100644 index 00000000..01e91777 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ExponentiallyModifiedGaussianDistribution.java @@ -0,0 +1,188 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Random; + +import de.lmu.ifi.dbs.elki.utilities.Alias; +import de.lmu.ifi.dbs.elki.utilities.exceptions.ExceptionMessages; +import de.lmu.ifi.dbs.elki.utilities.exceptions.NotImplementedException; + +/** + * Exponentially modified Gaussian (EMG) distribution (ExGaussian distribution) + * is a combination of a normal distribution and an exponential distribution. + * + * @author Erich Schubert + */ +@Alias({ "exgaussian" }) +public class ExponentiallyModifiedGaussianDistribution implements Distribution { + /** + * Mean value for the generator + */ + private double mean; + + /** + * Standard deviation + */ + private double stddev; + + /** + * Exponential rate. + */ + private double lambda; + + /** + * Random generator. + */ + private Random rnd; + + /** + * Constructor for ExGaussian distribution + * + * @param mean Mean + * @param stddev Standard Deviation + * @param lambda Rate + * @param rnd Random + */ + public ExponentiallyModifiedGaussianDistribution(double mean, double stddev, double lambda, Random rnd) { + super(); + this.mean = mean; + this.stddev = stddev; + this.lambda = lambda; + this.rnd = rnd; + } + + /** + * Constructor for ExGaussian distribution + * + * @param mean Mean + * @param stddev Standard Deviation + * @param lambda Rate + */ + public ExponentiallyModifiedGaussianDistribution(double mean, double stddev, double lambda) { + this(mean, stddev, lambda, null); + } + + @Override + public double pdf(double val) { + return pdf(val, mean, stddev, lambda); + } + + @Override + public double cdf(double val) { + return cdf(val, mean, stddev, lambda); + } + + /** + * @deprecated Not yet implemented! + */ + @Override + @Deprecated + public double quantile(double q) { + return quantile(q, mean, stddev, lambda); + } + + @Override + public double nextRandom() { + double no = mean + rnd.nextGaussian() * stddev; + double ex = -Math.log(rnd.nextDouble()) / lambda; + return no + ex; + } + + @Override + public String toString() { + return "ExGaussianDistribution(mean=" + mean + ", stddev=" + stddev + ", lambda=" + lambda + ")"; + } + + /** + * @return the mean + */ + public double getMean() { + return mean; + } + + /** + * @return the standard deviation + */ + public double getStddev() { + return stddev; + } + + /** + * @return the lambda value. + */ + public double getLambda() { + return lambda; + } + + /** + * Probability density function of the ExGaussian distribution. + * + * @param x The value. + * @param mu The mean. + * @param sigma The standard deviation. + * @param lambda Rate parameter. + * @return PDF of the given exgauss distribution at x. + */ + public static double pdf(double x, double mu, double sigma, double lambda) { + final double dx = x - mu; + final double erfc = NormalDistribution.erfc(lambda * sigma * sigma - dx); + return .5 * lambda * Math.exp(lambda * (lambda * sigma * sigma * .5 - dx)) * erfc; + } + + /** + * Cumulative probability density function (CDF) of an exgauss distribution. + * + * @param x value to evaluate CDF at. + * @param mu Mean value. + * @param sigma Standard deviation. + * @param lambda Rate parameter. + * @return The CDF of the given exgauss distribution at x. + */ + public static double cdf(double x, double mu, double sigma, double lambda) { + final double u = lambda * (x - mu); + final double v = lambda * sigma; + final double v2 = v * v; + final double logphi = Math.log(NormalDistribution.cdf(u, v2, v)); + return NormalDistribution.cdf(u, 0., v) - Math.exp(-u + v2 * .5 + logphi); + } + + /** + * Inverse cumulative probability density function (probit) of an exgauss + * distribution. + * + * @param x value to evaluate probit function at. + * @param mu Mean value. + * @param sigma Standard deviation. + * @param lambda Rate parameter. + * @return The probit of the given exgauss distribution at x. + * + * @deprecated Not yet implemented! + */ + @Deprecated + public static double quantile(double x, double mu, double sigma, double lambda) { + // FIXME: implement! + throw new NotImplementedException(ExceptionMessages.UNSUPPORTED_NOT_YET); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GammaDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GammaDistribution.java index 21eebc51..1b9e2b42 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GammaDistribution.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GammaDistribution.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.statistics.distribution; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -34,7 +34,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; * * @author Erich Schubert */ -public class GammaDistribution implements DistributionWithRandom { +public class GammaDistribution implements Distribution { /** * Euler–Mascheroni constant */ @@ -51,11 +51,22 @@ public class GammaDistribution implements DistributionWithRandom { static final double[] LANCZOS = { 0.99999999999999709182, 57.156235665862923517, -59.597960355475491248, 14.136097974741747174, -0.49191381609762019978, .33994649984811888699e-4, .46523628927048575665e-4, -.98374475304879564677e-4, .15808870322491248884e-3, -.21026444172410488319e-3, .21743961811521264320e-3, -.16431810653676389022e-3, .84418223983852743293e-4, -.26190838401581408670e-4, .36899182659531622704e-5, }; /** - * Numerical precision to use + * Numerical precision to use (data type dependent!) + * + * If you change this, make sure to test exhaustively! */ static final double NUM_PRECISION = 1E-15; /** + * Maximum number of iterations for regularizedGammaP. To prevent degeneration + * for extreme values. + * + * FIXME: is this too high, too low? Can we improve behavior for extreme + * cases? + */ + static final int MAX_ITERATIONS = 1000; + + /** * Alpha == k */ private final double k; @@ -79,8 +90,8 @@ public class GammaDistribution implements DistributionWithRandom { */ public GammaDistribution(double k, double theta, Random random) { super(); - if(k <= 0.0 || theta <= 0.0) { - throw new IllegalArgumentException("Invalid parameters for Gamma distribution."); + if (!(k > 0.0) || !(theta > 0.0)) { // Note: also tests for NaNs! + throw new IllegalArgumentException("Invalid parameters for Gamma distribution: " + k + " " + theta); } this.k = k; @@ -151,6 +162,9 @@ public class GammaDistribution implements DistributionWithRandom { * @return cdf value */ public static double cdf(double val, double k, double theta) { + if (val < 0) { + return 0.; + } return regularizedGammaP(k, val * theta); } @@ -163,6 +177,9 @@ public class GammaDistribution implements DistributionWithRandom { * @return cdf value */ public static double logcdf(double val, double k, double theta) { + if (val < 0) { + return Double.NEGATIVE_INFINITY; + } return logregularizedGammaP(k, val * theta); } @@ -175,18 +192,17 @@ public class GammaDistribution implements DistributionWithRandom { * @return probability density */ public static double pdf(double x, double k, double theta) { - if(x < 0) { + if (x < 0) { return 0.0; } - if(x == 0) { - if(k == 1.0) { + if (x == 0) { + if (k == 1.0) { return theta; - } - else { + } else { return 0.0; } } - if(k == 1.0) { + if (k == 1.0) { return Math.exp(-x * theta) * theta; } @@ -202,18 +218,17 @@ public class GammaDistribution implements DistributionWithRandom { * @return probability density */ public static double logpdf(double x, double k, double theta) { - if(x < 0) { + if (x < 0) { return Double.NEGATIVE_INFINITY; } - if(x == 0) { - if(k == 1.0) { + if (x == 0) { + if (k == 1.0) { return Math.log(theta); - } - else { + } else { return Double.NEGATIVE_INFINITY; } } - if(k == 1.0) { + if (k == 1.0) { return Math.log(theta) - x * theta; } @@ -232,14 +247,14 @@ public class GammaDistribution implements DistributionWithRandom { * @return log(Γ(x)) */ public static double logGamma(final double x) { - if(Double.isNaN(x) || (x <= 0.0)) { + if (Double.isNaN(x) || (x <= 0.0)) { return Double.NaN; } double g = 607.0 / 128.0; double tmp = x + g + .5; tmp = (x + 0.5) * Math.log(tmp) - tmp; double ser = LANCZOS[0]; - for(int i = LANCZOS.length - 1; i > 0; --i) { + for (int i = LANCZOS.length - 1; i > 0; --i) { ser += LANCZOS[i] / (x + i); } return tmp + Math.log(MathUtil.SQRTTWOPI * ser / x); @@ -275,30 +290,30 @@ public class GammaDistribution implements DistributionWithRandom { */ public static double regularizedGammaP(final double a, final double x) { // Special cases - if(Double.isNaN(a) || Double.isNaN(x) || (a <= 0.0) || (x < 0.0)) { + if (Double.isInfinite(a) || Double.isInfinite(x) || !(a > 0.0) || !(x >= 0.0)) { return Double.NaN; } - if(x == 0.0) { + if (x == 0.0) { return 0.0; } - if(x >= a + 1) { + if (x >= a + 1) { // Expected to converge faster return 1.0 - regularizedGammaQ(a, x); } // Loosely following "Numerical Recipes" - double del = 1.0 / a; - double sum = del; - for(int n = 1; n < Integer.MAX_VALUE; n++) { + double term = 1.0 / a; + double sum = term; + for (int n = 1; n < MAX_ITERATIONS; n++) { // compute next element in the series - del *= x / (a + n); - sum = sum + del; - if(Math.abs(del / sum) < NUM_PRECISION || sum >= Double.POSITIVE_INFINITY) { + term = x / (a + n) * term; + sum = sum + term; + if (sum == Double.POSITIVE_INFINITY) { + return 1.0; + } + if (Math.abs(term / sum) < NUM_PRECISION) { break; } } - if(Double.isInfinite(sum)) { - return 1.0; - } return Math.exp(-x + (a * Math.log(x)) - logGamma(a)) * sum; } @@ -316,13 +331,13 @@ public class GammaDistribution implements DistributionWithRandom { */ public static double logregularizedGammaP(final double a, final double x) { // Special cases - if(Double.isNaN(a) || Double.isNaN(x) || (a <= 0.0) || (x < 0.0)) { + if (Double.isNaN(a) || Double.isNaN(x) || (a <= 0.0) || (x < 0.0)) { return Double.NaN; } - if(x == 0.0) { + if (x == 0.0) { return Double.NEGATIVE_INFINITY; } - if(x >= a + 1) { + if (x >= a + 1) { // Expected to converge faster // FIXME: and in log? return Math.log(1.0 - regularizedGammaQ(a, x)); @@ -330,15 +345,15 @@ public class GammaDistribution implements DistributionWithRandom { // Loosely following "Numerical Recipes" double del = 1.0 / a; double sum = del; - for(int n = 1; n < Integer.MAX_VALUE; n++) { + for (int n = 1; n < Integer.MAX_VALUE; n++) { // compute next element in the series del *= x / (a + n); sum = sum + del; - if(Math.abs(del / sum) < NUM_PRECISION || sum >= Double.POSITIVE_INFINITY) { + if (Math.abs(del / sum) < NUM_PRECISION || sum >= Double.POSITIVE_INFINITY) { break; } } - if(Double.isInfinite(sum)) { + if (Double.isInfinite(sum)) { return 0; } // TODO: reread numerical recipes, can we replace log(sum)? @@ -360,13 +375,13 @@ public class GammaDistribution implements DistributionWithRandom { * @return Result */ public static double regularizedGammaQ(final double a, final double x) { - if(Double.isNaN(a) || Double.isNaN(x) || (a <= 0.0) || (x < 0.0)) { + if (Double.isNaN(a) || Double.isNaN(x) || (a <= 0.0) || (x < 0.0)) { return Double.NaN; } - if(x == 0.0) { + if (x == 0.0) { return 1.0; } - if(x < a + 1.0) { + if (x < a + 1.0) { // Expected to converge faster return 1.0 - regularizedGammaP(a, x); } @@ -376,21 +391,21 @@ public class GammaDistribution implements DistributionWithRandom { double c = 1.0 / FPMIN; double d = 1.0 / b; double fac = d; - for(int i = 1; i < Integer.MAX_VALUE; i++) { + for (int i = 1; i < MAX_ITERATIONS; i++) { double an = i * (a - i); b += 2; d = an * d + b; - if(Math.abs(d) < FPMIN) { + if (Math.abs(d) < FPMIN) { d = FPMIN; } c = b + an / c; - if(Math.abs(c) < FPMIN) { + if (Math.abs(c) < FPMIN) { c = FPMIN; } d = 1 / d; double del = d * c; fac *= del; - if(Math.abs(del - 1.0) <= NUM_PRECISION) { + if (Math.abs(del - 1.0) <= NUM_PRECISION) { break; } } @@ -424,33 +439,30 @@ public class GammaDistribution implements DistributionWithRandom { final double e4 = 0.041664508, e5 = 0.008345522, e6 = 0.001353826; final double e7 = 0.000247453; - if(k < 1.0) { // Base case, for small k + if (k < 1.0) { // Base case, for small k final double b = 1.0 + 0.36788794412 * k; // Step 1 - while(true) { + while (true) { final double p = b * random.nextDouble(); - if(p <= 1.0) { // when gds <= 1 + if (p <= 1.0) { // when gds <= 1 final double gds = Math.exp(Math.log(p) / k); - if(Math.log(random.nextDouble()) <= -gds) { + if (Math.log(random.nextDouble()) <= -gds) { return (gds / theta); } - } - else { // when gds > 1 + } else { // when gds > 1 final double gds = -Math.log((b - p) / k); - if(Math.log(random.nextDouble()) <= ((k - 1.0) * Math.log(gds))) { + if (Math.log(random.nextDouble()) <= ((k - 1.0) * Math.log(gds))) { return (gds / theta); } } } - } - else { + } else { // Step 1. Preparations final double ss, s, d; - if(k != -1.0) { + if (k != -1.0) { ss = k - 0.5; s = Math.sqrt(ss); d = 5.656854249 - 12.0 * s; - } - else { + } else { // For k == -1.0: ss = 0.0; s = 0.0; @@ -465,7 +477,7 @@ public class GammaDistribution implements DistributionWithRandom { tv2 = 2.0 * random.nextDouble() - 1.0; tv12 = tv1 * tv1 + tv2 * tv2; } - while(tv12 > 1.0); + while (tv12 > 1.0); v1 = tv1; /* v2 = tv2; */ v12 = tv12; @@ -480,39 +492,36 @@ public class GammaDistribution implements DistributionWithRandom { final double t = v1 * Math.sqrt(-2.0 * Math.log(v12) / v12); final double x = s + 0.5 * t; final double gds = x * x; - if(t >= 0.0) { + if (t >= 0.0) { return (gds / theta); // Immediate acceptance } // Random uniform final double un = random.nextDouble(); // Squeeze acceptance - if(d * un <= t * t * t) { + if (d * un <= t * t * t) { return (gds / theta); } - if(k != -1.0) { // Step 4. Set-up for hat case + if (k != -1.0) { // Step 4. Set-up for hat case final double r = 1.0 / k; q0 = ((((((((q9 * r + q8) * r + q7) * r + q6) * r + q5) * r + q4) * r + q3) * r + q2) * r + q1) * r; - if(k > 3.686) { - if(k > 13.022) { + if (k > 3.686) { + if (k > 13.022) { b = 1.77; si = 0.75; c = 0.1515 / s; - } - else { + } else { b = 1.654 + 0.0076 * ss; si = 1.68 / s + 0.275; c = 0.062 / s + 0.024; } - } - else { + } else { b = 0.463 + s - 0.178 * ss; si = 1.235; c = 0.195 / s - 0.079 + 0.016 * s; } - } - else { + } else { // For k == -1.0: b = 0.0; c = 0.0; @@ -520,24 +529,23 @@ public class GammaDistribution implements DistributionWithRandom { q0 = 0.0; } // Compute v and q - if(x > 0.0) { + if (x > 0.0) { final double v = t / (s + s); final double q; - if(Math.abs(v) > 0.25) { + if (Math.abs(v) > 0.25) { q = q0 - s * t + 0.25 * t * t + (ss + ss) * Math.log(1.0 + v); - } - else { + } else { q = q0 + 0.5 * t * t * ((((((((a9 * v + a8) * v + a7) * v + a6) * v + a5) * v + a4) * v + a3) * v + a2) * v + a1) * v; } // Quotient acceptance: - if(Math.log(1.0 - un) <= q) { + if (Math.log(1.0 - un) <= q) { return (gds / theta); } } } // Double exponential deviate t - while(true) { + while (true) { double e, u, sign_u, t; // Retry until t is sufficiently large do { @@ -547,30 +555,28 @@ public class GammaDistribution implements DistributionWithRandom { sign_u = (u > 0) ? 1.0 : -1.0; t = b + (e * si) * sign_u; } - while(t <= -0.71874483771719); + while (t <= -0.71874483771719); // New v(t) and q(t) final double v = t / (s + s); final double q; - if(Math.abs(v) > 0.25) { + if (Math.abs(v) > 0.25) { q = q0 - s * t + 0.25 * t * t + (ss + ss) * Math.log(1.0 + v); - } - else { + } else { q = q0 + 0.5 * t * t * ((((((((a9 * v + a8) * v + a7) * v + a6) * v + a5) * v + a4) * v + a3) * v + a2) * v + a1) * v; } - if(q <= 0.0) { + if (q <= 0.0) { continue; // retry } // Compute w(t) final double w; - if(q > 0.5) { + if (q > 0.5) { w = Math.exp(q) - 1.0; - } - else { + } else { w = ((((((e7 * q + e6) * q + e5) * q + e4) * q + e3) * q + e2) * q + e1) * q; } // Hat acceptance - if(c * u * sign_u <= w * Math.exp(e - 0.5 * t * t)) { + if (c * u * sign_u <= w * Math.exp(e - 0.5 * t * t)) { final double x = s + 0.5 * t; return (x * x / theta); } @@ -585,7 +591,7 @@ public class GammaDistribution implements DistributionWithRandom { * * Reference: * <p> - * Algorithm AS 91: The percentage points of the $\chi$ 2 distribution<br /> + * Algorithm AS 91: The percentage points of the $\chi^2$ distribution<br /> * D.J. Best, D. E. Roberts<br /> * Journal of the Royal Statistical Society. Series C (Applied Statistics) * </p> @@ -599,18 +605,18 @@ public class GammaDistribution implements DistributionWithRandom { protected static double chisquaredProbitApproximation(final double p, double nu, double g) { final double EPS1 = 1e-2; // Approximation quality // Sanity checks - if(Double.isNaN(p) || Double.isNaN(nu)) { + if (Double.isNaN(p) || Double.isNaN(nu)) { return Double.NaN; } // Range check - if(p <= 0) { + if (p <= 0) { return 0; } - if(p >= 1) { + if (p >= 1) { return Double.POSITIVE_INFINITY; } // Invalid parameters - if(nu <= 0) { + if (nu <= 0) { return Double.NaN; } // Shape of gamma distribution, "XX" in AS 91 @@ -618,7 +624,7 @@ public class GammaDistribution implements DistributionWithRandom { // For small chi squared values - AS 91 final double logp = Math.log(p); - if(nu < -1.24 * logp) { + if (nu < -1.24 * logp) { // FIXME: implement and use logGammap1 instead - more stable? // // final double lgam1pa = (alpha < 0.5) ? logGammap1(alpha) : @@ -626,31 +632,29 @@ public class GammaDistribution implements DistributionWithRandom { // return Math.exp((lgam1pa + logp) / alpha + MathUtil.LOG2); // This is literal AS 91, above is the GNU R variant. return Math.pow(p * k * Math.exp(g + k * MathUtil.LOG2), 1 / k); - } - else if(nu > 0.32) { + } else if (nu > 0.32) { // Wilson and Hilferty estimate: - AS 91 at 3 final double x = NormalDistribution.quantile(p, 0, 1); final double p1 = 2. / (9. * nu); double ch = nu * Math.pow(x * Math.sqrt(p1) + 1 - p1, 3); // Better approximation for p tending to 1: - if(ch > 2.2 * nu + 6) { + if (ch > 2.2 * nu + 6) { ch = -2 * (Math.log1p(-p) - (k - 1) * Math.log(0.5 * ch) + g); } return ch; - } - else { + } else { // nu <= 0.32, AS 91 at 1 final double C7 = 4.67, C8 = 6.66, C9 = 6.73, C10 = 13.32; final double ag = Math.log1p(-p) + g + (k - 1) * MathUtil.LOG2; double ch = 0.4; - while(true) { + while (true) { final double p1 = 1 + ch * (C7 + ch); final double p2 = ch * (C9 + ch * (C8 + ch)); final double t = -0.5 + (C7 + 2 * ch) / p1 - (C9 + ch * (C10 + 3 * ch)) / p2; final double delta = (1 - Math.exp(ag + 0.5 * ch) * p2 / p1) / t; ch -= delta; - if(Math.abs(delta) > EPS1 * Math.abs(ch)) { + if (Math.abs(delta) > EPS1 * Math.abs(ch)) { return ch; } } @@ -664,7 +668,7 @@ public class GammaDistribution implements DistributionWithRandom { * * Reference: * <p> - * Algorithm AS 91: The percentage points of the $\chi$^2 distribution<br /> + * Algorithm AS 91: The percentage points of the $\chi^2$ distribution<br /> * D.J. Best, D. E. Roberts<br /> * Journal of the Royal Statistical Society. Series C (Applied Statistics) * </p> @@ -674,34 +678,34 @@ public class GammaDistribution implements DistributionWithRandom { * @param theta Theta = 1.0/Beta aka. "scaling" parameter * @return Probit for Gamma distribution */ - @Reference(title = "Algorithm AS 91: The percentage points of the $\\chi$^2 distribution", authors = "D.J. Best, D. E. Roberts", booktitle = "Journal of the Royal Statistical Society. Series C (Applied Statistics)") + @Reference(title = "Algorithm AS 91: The percentage points of the $\\chi^2$ distribution", authors = "D.J. Best, D. E. Roberts", booktitle = "Journal of the Royal Statistical Society. Series C (Applied Statistics)") public static double quantile(double p, double k, double theta) { final double EPS2 = 5e-7; // final precision of AS 91 final int MAXIT = 1000; // Avoid degenerates - if(Double.isNaN(p) || Double.isNaN(k) || Double.isNaN(theta)) { + if (Double.isNaN(p) || Double.isNaN(k) || Double.isNaN(theta)) { return Double.NaN; } // Range check - if(p <= 0) { + if (p <= 0) { return 0; } - if(p >= 1) { + if (p >= 1) { return Double.POSITIVE_INFINITY; } // Shape parameter check - if(k < 0 || theta <= 0) { + if (k < 0 || theta <= 0) { return Double.NaN; } // Corner case - all at 0 - if(k == 0) { + if (k == 0) { return 0.; } int max_newton_iterations = 1; // For small values, ensure some refinement iterations - if(k < 1e-10) { + if (k < 1e-10) { max_newton_iterations = 7; } @@ -713,17 +717,17 @@ public class GammaDistribution implements DistributionWithRandom { // Second hald of AS 91 follows: // Refine ChiSquared approximation chisq: { - if(Double.isInfinite(ch)) { + if (Double.isInfinite(ch)) { // Cannot refine infinity max_newton_iterations = 0; break chisq; } - if(ch < EPS2) { + if (ch < EPS2) { // Do not iterate, but refine with newton method max_newton_iterations = 20; break chisq; } - if(p > 1 - 1e-14 || p < 1e-100) { + if (p > 1 - 1e-14 || p < 1e-100) { // Not in appropriate value range for AS 91 max_newton_iterations = 20; break chisq; @@ -732,11 +736,11 @@ public class GammaDistribution implements DistributionWithRandom { // Phase II: Iteration final double c = k - 1; final double ch0 = ch; // backup initial approximation - for(int i = 1; i <= MAXIT; i++) { + for (int i = 1; i <= MAXIT; i++) { final double q = ch; // previous approximation final double p1 = 0.5 * ch; final double p2 = p - regularizedGammaP(k, p1); - if(Double.isInfinite(p2) || ch <= 0) { + if (Double.isInfinite(p2) || ch <= 0) { ch = ch0; max_newton_iterations = 27; break chisq; @@ -753,11 +757,11 @@ public class GammaDistribution implements DistributionWithRandom { final double s6 = (120. + c * (346. + 127. * c)) / 5040.; ch += t * (1 + 0.5 * t * s1 - b * c * (s1 - b * (s2 - b * (s3 - b * (s4 - b * (s5 - b * s6)))))); } - if(Math.abs(q - ch) < EPS2 * ch) { + if (Math.abs(q - ch) < EPS2 * ch) { break chisq; } // Divergence treatment, from GNU R - if(Math.abs(q - ch) > 0.1 * Math.abs(ch)) { + if (Math.abs(q - ch) > 0.1 * Math.abs(ch)) { ch = ((ch < q) ? 0.9 : 1.1) * q; } } @@ -765,7 +769,7 @@ public class GammaDistribution implements DistributionWithRandom { // no convergence in MAXIT iterations -- but we add Newton now... } double x = 0.5 * ch / theta; - if(max_newton_iterations > 0) { + if (max_newton_iterations > 0) { // Refine result using final Newton steps. // TODO: add unit tests that show an improvement! Maybe in logscale only? x = gammaQuantileNewtonRefinement(Math.log(p), k, theta, max_newton_iterations, x); @@ -788,33 +792,33 @@ public class GammaDistribution implements DistributionWithRandom { protected static double gammaQuantileNewtonRefinement(final double logpt, final double k, final double theta, final int maxit, double x) { final double EPS_N = 1e-15; // Precision threshold // 0 is not possible, try MIN_NORMAL instead - if(x <= 0) { + if (x <= 0) { x = Double.MIN_NORMAL; } // Current estimation double logpc = logcdf(x, k, theta); - if(x == Double.MIN_NORMAL && logpc > logpt * (1. + 1e-7)) { + if (x == Double.MIN_NORMAL && logpc > logpt * (1. + 1e-7)) { return 0.; } - if(logpc == Double.NEGATIVE_INFINITY) { + if (logpc == Double.NEGATIVE_INFINITY) { return 0.; } // Refine by newton iterations - for(int i = 0; i < maxit; i++) { + for (int i = 0; i < maxit; i++) { // Error of current approximation final double logpe = logpc - logpt; - if(Math.abs(logpe) < Math.abs(EPS_N * logpt)) { + if (Math.abs(logpe) < Math.abs(EPS_N * logpt)) { break; } // Step size is controlled by PDF: final double g = logpdf(x, k, theta); - if(g == Double.NEGATIVE_INFINITY) { + if (g == Double.NEGATIVE_INFINITY) { break; } final double newx = x - logpe * Math.exp(logpc - g); // New estimate: logpc = logcdf(newx, k, theta); - if(Math.abs(logpc - logpt) > Math.abs(logpe) || (i > 0 && Math.abs(logpc - logpt) == Math.abs(logpe))) { + if (Math.abs(logpc - logpt) > Math.abs(logpe) || (i > 0 && Math.abs(logpc - logpt) == Math.abs(logpe))) { // no further improvement break; } @@ -840,21 +844,20 @@ public class GammaDistribution implements DistributionWithRandom { */ @Reference(authors = "J. M. Bernando", title = "Algorithm AS 103: Psi (Digamma) Function", booktitle = "Statistical Algorithms") public static double digamma(double x) { - if(!(x > 0)) { + if (!(x > 0)) { return Double.NaN; } // Method of equation 5: - if(x <= 1e-5) { + if (x <= 1e-5) { return -EULERS_CONST - 1. / x; } // Method of equation 4: - else if(x > 49.) { + else if (x > 49.) { final double ix2 = 1. / (x * x); // Partial series expansion return Math.log(x) - 0.5 / x - ix2 * ((1.0 / 12.) + ix2 * (1.0 / 120. - ix2 / 252.)); // + O(x^8) error - } - else { + } else { // Stirling expansion return digamma(x + 1.) - 1. / x; } @@ -869,78 +872,22 @@ public class GammaDistribution implements DistributionWithRandom { * @return trigamma value */ public static double trigamma(double x) { - if(!(x > 0)) { + if (!(x > 0)) { return Double.NaN; } // Method of equation 5: - if(x <= 1e-5) { + if (x <= 1e-5) { return 1. / (x * x); } // Method of equation 4: - else if(x > 49.) { + else if (x > 49.) { final double ix2 = 1. / (x * x); // Partial series expansion return 1 / x - ix2 / 2. + ix2 / x * (1.0 / 6. - ix2 * (1.0 / 30. + ix2 / 42.)); // + O(x^8) error - } - else { + } else { // Stirling expansion return trigamma(x + 1.) - 1. / (x * x); } } - - /** - * Mean least squares estimation of Gamma distribution to a set of - * observations. - * - * @param data Data - * @return Assumed distribution - */ - public static GammaDistribution estimate(double[] data) { - return estimate(data, data.length); - } - - /** - * Mean least squares estimation of Gamma distribution to a set of - * observations. - * - * Reference: - * <p> - * Maximum likelihood estimation of the parameters of the gamma distribution - * and their bias<br /> - * S. C. Choi, R. Wette<br /> - * in: Technometrics - * </p> - * - * @param data Data - * @param len Length of array - * @return Assumed distribution - */ - @Reference(title = "Maximum likelihood estimation of the parameters of the gamma distribution and their bias", authors = "S. C. Choi, R. Wette", booktitle = "Technometrics", url = "http://www.jstor.org/stable/10.2307/1266892") - public static GammaDistribution estimate(double[] data, int len) { - double meanx = 0, meanlogx = 0; - for(int i = 0; i < len; i++) { - final double logx = Math.log(data[i]); - final double deltax = data[i] - meanx; - final double deltalogx = logx - meanlogx; - meanx += deltax / (i + 1.); - meanlogx += deltalogx / (i + 1.); - } - // Initial approximation - final double logmeanx = Math.log(meanx); - final double diff = logmeanx - meanlogx; - double k = (3 - diff + Math.sqrt((diff - 3) * (diff - 3) + 24 * diff)) / (12 * diff); - - // Refine via newton iteration, based on Choi and Wette equation - while(true) { - double kdelta = (Math.log(k) - digamma(k) - diff) / (1 / k - trigamma(k)); - if(Math.abs(kdelta) < 1E-8 || Double.isNaN(kdelta)) { - break; - } - k += kdelta; - } - // Estimate theta: - final double theta = k / meanx; - return new GammaDistribution(k, theta); - } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GeneralizedExtremeValueDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GeneralizedExtremeValueDistribution.java new file mode 100644 index 00000000..9cd6cb4e --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GeneralizedExtremeValueDistribution.java @@ -0,0 +1,167 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution; + +import java.util.Random; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** + * Generalized Extreme Value (GEV) distribution, also known as Fisher–Tippett + * distribution. + * + * This is a generalization of the Frechnet, Gumbel and (reversed) Weibull + * distributions. + * + * Implementation notice: apparently (see unit tests), our definition differs + * from the scipy definition by having the negative shape. + * + * @author Erich Schubert + */ +public class GeneralizedExtremeValueDistribution implements Distribution { + /** + * Parameters (location, scale, shape) + */ + final double mu, sigma, k; + + /** + * Random number generator. + */ + Random random; + + /** + * Constructor. + * + * @param mu Location parameter mu + * @param sigma Scale parameter sigma + * @param k Shape parameter k + */ + public GeneralizedExtremeValueDistribution(double mu, double sigma, double k) { + this(mu, sigma, k, null); + } + + /** + * Constructor. + * + * @param mu Location parameter mu + * @param sigma Scale parameter sigma + * @param k Shape parameter k + * @param random Random number generator + */ + public GeneralizedExtremeValueDistribution(double mu, double sigma, double k, Random random) { + super(); + this.mu = mu; + this.sigma = sigma; + this.k = k; + this.random = random; + } + + /** + * PDF of GEV distribution + * + * @param x Value + * @param mu Location parameter mu + * @param sigma Scale parameter sigma + * @param k Shape parameter k + * @return PDF at position x. + */ + public static double pdf(double x, double mu, double sigma, double k) { + x = (x - mu) / sigma; + if (k > 0 || k < 0) { + if (k * x < -1) { + return 0.; + } + final double tx = Math.pow(1 + k * x, -1. / k); + return Math.pow(tx, k + 1) * Math.exp(-tx) / sigma; + } else { // Gumbel case: + return Math.exp(-x - Math.exp(-x)) / sigma; + } + } + + @Override + public double pdf(double x) { + return pdf(x, mu, sigma, k); + } + + /** + * CDF of GEV distribution + * + * @param val Value + * @param mu Location parameter mu + * @param sigma Scale parameter sigma + * @param k Shape parameter k + * @return CDF at position x. + */ + public static double cdf(double val, double mu, double sigma, double k) { + final double x = (val - mu) / sigma; + if (k > 0 || k < 0) { + if (k * x <= -1) { + return (k > 0) ? 0 : 1; + } + return Math.exp(-Math.pow(1 + k * x, -1. / k)); + } else { // Gumbel case: + return Math.exp(-Math.exp(-x)); + } + } + + @Override + public double cdf(double val) { + return cdf(val, mu, sigma, k); + } + + /** + * Quantile function of GEV distribution + * + * @param val Value + * @param mu Location parameter mu + * @param sigma Scale parameter sigma + * @param k Shape parameter k + * @return Quantile function at position x. + */ + public static double quantile(double val, double mu, double sigma, double k) { + if (val < 0.0 || val > 1.0) { + return Double.NaN; + } + if (k > 0) { + return mu + sigma * Math.max((Math.pow(-Math.log(val), -k) - 1.) / k, -1. / k); + } else if (k < 0) { + return mu + sigma * Math.min((Math.pow(-Math.log(val), -k) - 1.) / k, -1. / k); + } else { // Gumbel + return mu + sigma * Math.log(1. / Math.log(1. / val)); + } + } + + @Override + public double quantile(double val) { + return quantile(val, mu, sigma, k); + } + + @Override + public double nextRandom() { + return quantile(random.nextDouble()); + } + + @Override + public String toString() { + return "GeneralizedExtremeValueDistribution(sigma=" + sigma + ", mu=" + mu + ", k=" + k + ")"; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GeneralizedLogisticAlternateDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GeneralizedLogisticAlternateDistribution.java new file mode 100644 index 00000000..467d6aae --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GeneralizedLogisticAlternateDistribution.java @@ -0,0 +1,162 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import java.util.Random; + +/** + * Generalized logistic distribution. + * + * One of multiple ways of generalizing the logistic distribution. + * + * Where {@code shape=0} yields the regular logistic distribution. + * + * @author Erich Schubert + */ +public class GeneralizedLogisticAlternateDistribution implements Distribution { + /** + * Parameters: location and scale + */ + double location, scale; + + /** + * Shape parameter, for generalized logistic distribution. + */ + double shape; + + /** + * Random number generator + */ + Random random; + + /** + * Constructor. + * + * @param location Location + * @param scale Scale + * @param shape Shape parameter + */ + public GeneralizedLogisticAlternateDistribution(double location, double scale, double shape) { + this(location, scale, shape, null); + } + + /** + * Constructor. + * + * @param location Location + * @param scale Scale + * @param shape Shape parameter + * @param random Random number generator + */ + public GeneralizedLogisticAlternateDistribution(double location, double scale, double shape, Random random) { + super(); + this.location = location; + this.scale = scale; + this.shape = shape; + this.random = random; + if (!(shape > -1.) || !(shape < 1.)) { + throw new ArithmeticException("Invalid shape parameter - must be -1 to +1, is: " + shape); + } + } + + /** + * Probability density function. + * + * @param val Value + * @param loc Location + * @param scale Scale + * @param shape Shape + * @return PDF + */ + public static double pdf(double val, double loc, double scale, double shape) { + val = (val - loc) / scale; + if (shape != 0.) { + val = -Math.log(1 - shape * val) / shape; + } + double f = 1. + Math.exp(-val); + return Math.exp(-val * (1 - shape)) / (scale * f * f); + } + + @Override + public double pdf(double val) { + return pdf(val, location, scale, shape); + } + + /** + * Cumulative density function. + * + * @param val Value + * @param loc Location + * @param scale Scale + * @param shape Shape + * @return CDF + */ + public static double cdf(double val, double loc, double scale, double shape) { + val = (val - loc) / scale; + if (shape != 0.) { + final double tmp = 1 - shape * val; + if (tmp < 1e-15) { + return (shape < 0) ? 0 : 1; + } + val = -Math.log(tmp) / shape; + } + return 1. / (1. + Math.exp(-val)); + } + + @Override + public double cdf(double val) { + return cdf(val, location, scale, shape); + } + + /** + * Quantile function. + * + * @param val Value + * @param loc Location + * @param scale Scale + * @param shape Shape + * @return Quantile + */ + public static double quantile(double val, double loc, double scale, double shape) { + if (shape == 0.) { + return loc - scale * Math.log((1 - val) / val); + } + return loc + scale * (1 - Math.pow((1 - val) / val, shape)) / shape; + } + + @Override + public double quantile(double val) { + return quantile(val, location, scale, shape); + } + + @Override + public double nextRandom() { + double u = random.nextDouble(); + return quantile(u, location, scale, shape); + } + + @Override + public String toString() { + return "GeneralizedLogisticAlternateDistribution(location=" + location + ", scale=" + scale + ", shape=" + shape + ")"; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GeneralizedLogisticDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GeneralizedLogisticDistribution.java new file mode 100644 index 00000000..76f71107 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GeneralizedLogisticDistribution.java @@ -0,0 +1,184 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import java.util.Random; + +/** + * Generalized logistic distribution. (Type I, Skew-logistic distribution) + * + * One of multiple ways of generalizing the logistic distribution. + * + * {@code pdf(x) = shape * Math.exp(-x) / (1 + Math.exp(-x))**(shape+1)} + * + * {@code cdf(x) = Math.pow(1+Math.exp(-x), -shape)} + * + * Where {@code shape=1} yields the regular logistic distribution. + * + * @author Erich Schubert + */ +public class GeneralizedLogisticDistribution implements Distribution { + /** + * Parameters: location and scale + */ + double location, scale; + + /** + * Shape parameter, for generalized logistic distribution. + */ + double shape; + + /** + * Random number generator + */ + Random random; + + /** + * Constructor. + * + * @param location Location + * @param scale Scale + * @param shape Shape parameter + */ + public GeneralizedLogisticDistribution(double location, double scale, double shape) { + this(location, scale, shape, null); + } + + /** + * Constructor. + * + * @param location Location + * @param scale Scale + * @param shape Shape parameter + * @param random Random number generator + */ + public GeneralizedLogisticDistribution(double location, double scale, double shape, Random random) { + super(); + this.location = location; + this.scale = scale; + this.shape = shape; + this.random = random; + } + + /** + * Probability density function. + * + * @param val Value + * @param loc Location + * @param scale Scale + * @param shape Shape + * @return PDF + */ + public static double pdf(double val, double loc, double scale, double shape) { + val = (val - loc) / scale; + double e = Math.exp(-val); + double f = 1. + e; + return shape * e / (scale * Math.pow(f, shape + 1.)); + } + + /** + * log Probability density function. + * + * TODO: untested. + * + * @param val Value + * @param loc Location + * @param scale Scale + * @param shape Shape + * @return log PDF + */ + public static double logpdf(double val, double loc, double scale, double shape) { + val = (val - loc) / scale; + double e = Math.exp(-val); + return -(val + (shape + 1.0) * Math.log1p(e)) + Math.log(shape); + } + + @Override + public double pdf(double val) { + return pdf(val, location, scale, shape); + } + + /** + * Cumulative density function. + * + * @param val Value + * @param loc Location + * @param scale Scale + * @param shape Shape + * @return CDF + */ + public static double cdf(double val, double loc, double scale, double shape) { + val = (val - loc) / scale; + return Math.pow(1. + Math.exp(-val), -shape); + } + + /** + * log Cumulative density function. + * + * TODO: untested. + * + * @param val Value + * @param loc Location + * @param scale Scale + * @param shape Shape + * @return log PDF + */ + public static double logcdf(double val, double loc, double scale, double shape) { + val = (val - loc) / scale; + return Math.log1p(Math.exp(-val)) * -shape; + } + + @Override + public double cdf(double val) { + return cdf(val, location, scale, shape); + } + + /** + * Quantile function. + * + * @param val Value + * @param loc Location + * @param scale Scale + * @param shape Shape + * @return Quantile + */ + public static double quantile(double val, double loc, double scale, double shape) { + return loc + scale * -Math.log(Math.pow(val, -1.0 / shape) - 1); + } + + @Override + public double quantile(double val) { + return quantile(val, location, scale, shape); + } + + @Override + public double nextRandom() { + double u = random.nextDouble(); + return location + scale * -Math.log(Math.pow(u, -1.0 / shape) - 1); + } + + @Override + public String toString() { + return "GeneralizedLogisticDistribution(location=" + location + ", scale=" + scale + ", shape=" + shape + ")"; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GumbelDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GumbelDistribution.java new file mode 100644 index 00000000..15b4ca24 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/GumbelDistribution.java @@ -0,0 +1,134 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Random; + +/** + * Gumbel distribution, also known as Log-Weibull distribution. + * + * @author Erich Schubert + */ +public class GumbelDistribution implements Distribution { + /** + * Mode parameter mu. + */ + double mu; + + /** + * Shape parameter beta. + */ + double beta; + + /** + * Random number generator. + */ + Random random; + + /** + * Constructor. + * + * @param mu Mode + * @param beta Shape + */ + public GumbelDistribution(double mu, double beta) { + this(mu, beta, null); + } + + /** + * Constructor. + * + * @param mu Mode + * @param beta Shape + * @param random Random number generator + */ + public GumbelDistribution(double mu, double beta, Random random) { + super(); + this.mu = mu; + this.beta = beta; + this.random = random; + } + + /** + * PDF of Weibull distribution + * + * @param x Value + * @param mu Mode + * @param beta Shape + * @return PDF at position x. + */ + public static double pdf(double x, double mu, double beta) { + final double z = (x - mu) / beta; + return Math.exp(-z - Math.exp(-z)) / beta; + } + + @Override + public double pdf(double x) { + return pdf(x, mu, beta); + } + + /** + * CDF of Weibull distribution + * + * @param val Value + * @param mu Mode + * @param beta Shape + * @return CDF at position x. + */ + public static double cdf(double val, double mu, double beta) { + return Math.exp(-Math.exp(-(val - mu) / beta)); + } + + @Override + public double cdf(double val) { + return cdf(val, mu, beta); + } + + /** + * Quantile function of Weibull distribution + * + * @param val Value + * @param mu Mode + * @param beta Shape + * @return Quantile function at position x. + */ + public static double quantile(double val, double mu, double beta) { + return mu + beta * Math.log(1 / Math.log(1 / val)); + } + + @Override + public double quantile(double val) { + return quantile(val, mu, beta); + } + + @Override + public double nextRandom() { + return mu + beta * Math.log(1 / Math.log(1 / random.nextDouble())); + } + + @Override + public String toString() { + return "GumbelDistribution(mu=" + mu + ", beta=" + beta + ")"; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/HaltonUniformDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/HaltonUniformDistribution.java index df8fecda..145744db 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/HaltonUniformDistribution.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/HaltonUniformDistribution.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.statistics.distribution; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -63,7 +63,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; * @author Erich Schubert */ @Reference(title = "Randomized halton sequences", authors = "Wang, X. and Hickernell, F.J.", booktitle = "Mathematical and Computer Modelling Vol. 32 (7)", url = "http://dx.doi.org/10.1016/S0895-7177(00)00178-3") -public class HaltonUniformDistribution implements DistributionWithRandom { +public class HaltonUniformDistribution implements Distribution { /** * Minimum */ diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/KappaDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/KappaDistribution.java new file mode 100644 index 00000000..9414767c --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/KappaDistribution.java @@ -0,0 +1,223 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import java.util.Random; + +/** + * Kappa distribution, by Hosking. + * + * TODO: add references. + * + * @author Erich Schubert + */ +public class KappaDistribution implements Distribution { + /** + * Parameters: location and scale + */ + double location, scale; + + /** + * Shape parameters. + */ + double shape1, shape2; + + /** + * Random number generator + */ + Random random; + + /** + * Constructor. + * + * @param location Location + * @param scale Scale + * @param shape1 Shape parameter + * @param shape2 Shape parameter + */ + public KappaDistribution(double location, double scale, double shape1, double shape2) { + this(location, scale, shape1, shape2, null); + } + + /** + * Constructor. + * + * @param location Location + * @param scale Scale + * @param shape1 Shape parameter + * @param shape2 Shape parameter + * @param random Random number generator + */ + public KappaDistribution(double location, double scale, double shape1, double shape2, Random random) { + super(); + this.location = location; + this.scale = scale; + this.shape1 = shape1; + this.shape2 = shape2; + this.random = random; + if(shape2 >= 0.) { + if(shape1 < -1.) { + throw new ArithmeticException("Invalid shape1 parameter - must be greater than -1 if shape2 >= 0.!"); + } + } + else { + if(shape1 < 1. || shape1 > 1. / shape2) { + throw new ArithmeticException("Invalid shape1 parameter - must be -1 to +1/shape2 if shape2 < 0.!"); + } + } + } + + /** + * Probability density function. + * + * @param val Value + * @param loc Location + * @param scale Scale + * @param shape1 Shape parameter + * @param shape2 Shape parameter + * @return PDF + */ + public static double pdf(double val, double loc, double scale, double shape1, double shape2) { + final double c = cdf(val, loc, scale, shape1, shape2); + val = (val - loc) / scale; + if(shape1 != 0.) { + val = 1 - shape1 * val; + if(val < 1e-15) { + return 0.; + } + val = (1. - 1. / shape1) * Math.log(val); + } + val = Math.exp(-val); + return val / scale * Math.pow(c, 1. - shape2); + } + + @Override + public double pdf(double val) { + return pdf(val, location, scale, shape1, shape2); + } + + /** + * Cumulative density function. + * + * @param val Value + * @param loc Location + * @param scale Scale + * @param shape1 Shape parameter + * @param shape2 Shape parameter + * @return CDF + */ + public static double cdf(double val, double loc, double scale, double shape1, double shape2) { + val = (val - loc) / scale; + if(shape1 != 0.) { + double tmp = 1. - shape1 * val; + if(tmp < 1e-15) { + return (shape1 < 0.) ? 0. : 1.; + } + val = Math.exp(Math.log(tmp) / shape1); + } + else { + val = Math.exp(-val); + } + if(shape2 != 0.) { + double tmp = 1. - shape2 * val; + if(tmp < 1e-15) { + return 0.; + } + val = Math.exp(Math.log(tmp) / shape2); + } + else { + val = Math.exp(-val); + } + return val; + } + + @Override + public double cdf(double val) { + return cdf(val, location, scale, shape1, shape2); + } + + /** + * Quantile function. + * + * @param val Value + * @param loc Location + * @param scale Scale + * @param shape1 Shape parameter + * @param shape2 Shape parameter + * @return Quantile + */ + public static double quantile(double val, double loc, double scale, double shape1, double shape2) { + if(!(val >= 0.) || !(val <= 1.)) { + return Double.NaN; + } + if(val == 0.) { + if(shape2 <= 0.) { + if(shape1 < 0.) { + return loc + scale / shape1; + } + else { + return Double.NEGATIVE_INFINITY; + } + } + else { + if(shape1 != 0.) { + return loc + scale / shape1 * (1. - Math.pow(shape2, -shape1)); + } + else { + return loc + scale * Math.log(shape2); + } + } + } + if(val == 1.) { + if(shape1 <= 0.) { + return Double.NEGATIVE_INFINITY; + } + return loc + scale / shape1; + } + val = -Math.log(val); + if(shape2 != 0.) { + val = (1 - Math.exp(-shape2 * val)) / shape2; + } + val = -Math.log(val); + if(shape1 != 0.) { + val = (1 - Math.exp(-shape1 * val)) / shape1; + } + return loc + scale * val; + } + + @Override + public double quantile(double val) { + return quantile(val, location, scale, shape1, shape2); + } + + @Override + public double nextRandom() { + double u = random.nextDouble(); + return quantile(u, location, scale, shape1, shape2); + } + + @Override + public String toString() { + return "KappaDistribution(location=" + location + ", scale=" + scale + ", shape1=" + shape1 + ", shape2=" + shape2 + ")"; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/LaplaceDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/LaplaceDistribution.java new file mode 100644 index 00000000..eb238a20 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/LaplaceDistribution.java @@ -0,0 +1,172 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Random; + +import de.lmu.ifi.dbs.elki.utilities.Alias; + +/** + * Laplace distribution also known as double exponential distribution + * + * @author Erich Schubert + */ +@Alias("DoubleExponentialDistribution") +public class LaplaceDistribution implements Distribution { + /** + * Random generator. + */ + Random rnd; + + /** + * Rate, inverse of mean + */ + double rate; + + /** + * Location parameter. + */ + double location; + + /** + * Constructor. + * + * @param rate Rate parameter (1/scale) + */ + public LaplaceDistribution(double rate) { + this(rate, 0.0, null); + } + + /** + * Constructor. + * + * @param rate Rate parameter (1/scale) + * @param location Location parameter + */ + public LaplaceDistribution(double rate, double location) { + this(rate, location, null); + } + + /** + * Constructor. + * + * @param rate Rate parameter (1/scale) + * @param random Random generator + */ + public LaplaceDistribution(double rate, Random random) { + this(rate, 0.0, random); + } + + /** + * Constructor. + * + * @param rate Rate parameter (1/scale) + * @param location Location parameter + * @param random Random generator + */ + public LaplaceDistribution(double rate, double location, Random random) { + super(); + this.rate = rate; + this.location = location; + this.rnd = random; + } + + @Override + public double pdf(double val) { + return .5 * rate * Math.exp(-rate * Math.abs(val - location)); + } + + /** + * PDF, static version + * + * @param val Value to compute PDF at + * @param rate Rate parameter (1/scale) + * @return probability density + */ + public static double pdf(double val, double rate) { + return .5 * rate * Math.exp(-rate * Math.abs(val)); + } + + @Override + public double cdf(double val) { + final double v = .5 * Math.exp(-rate * (val - location)); + return (val < location) ? v : 1 - v; + } + + /** + * Cumulative density, static version + * + * @param val Value to compute CDF at + * @param rate Rate parameter (1/scale) + * @return cumulative density + */ + public static double cdf(double val, double rate) { + final double v = .5 * Math.exp(-rate * val); + return (val < 0.) ? v : 1 - v; + } + + @Override + public double quantile(double val) { + if (val < .5) { + return Math.log(2 * val) / rate + location; + } else { + return -Math.log(2. - 2. * val) / rate + location; + } + } + + /** + * Quantile function, static version + * + * @param val Value to compute quantile for + * @param rate Rate parameter + * @param location Location parameter + * @return Quantile + */ + public static double quantile(double val, double rate, double location) { + if (val < .5) { + return Math.log(2 * val) / rate + location; + } else { + return -Math.log(2. - 2. * val) / rate + location; + } + } + + /** + * This method currently uses the naive approach of returning + * <code>-log(uniform)</code>. + */ + @Override + public double nextRandom() { + double val = rnd.nextDouble(); + if (val < .5) { + return Math.log(2 * val) / rate + location; + } else { + return -Math.log(2. - 2. * val) / rate + location; + } + } + + @Override + public String toString() { + return "LaplaceDistribution(rate=" + rate + ", location=" + location + ")"; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/LogGammaAlternateDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/LogGammaAlternateDistribution.java new file mode 100644 index 00000000..496e6867 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/LogGammaAlternateDistribution.java @@ -0,0 +1,209 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Random; + +/** + * Alternate Log-Gamma Distribution, with random generation and density + * functions. + * + * This distribution can be outlined as Y=log X with X Gamma distributed. + * + * Note: this matches the loggamma of SciPy. + * + * @author Erich Schubert + */ +public class LogGammaAlternateDistribution implements Distribution { + /** + * Alpha == k. + */ + private final double k; + + /** + * Theta == 1 / Beta. + */ + private final double theta; + + /** + * Translation offset. + */ + private final double shift; + + /** + * The random generator. + */ + private Random random; + + /** + * Constructor for Gamma distribution. + * + * @param k k, alpha aka. "shape" parameter + * @param shift Location offset + * @param theta Theta = 1.0/Beta aka. "scaling" parameter + * @param random Random generator + */ + public LogGammaAlternateDistribution(double k, double theta, double shift, Random random) { + super(); + if (!(k > 0.0) || !(theta > 0.0)) { // Note: also tests for NaNs! + throw new IllegalArgumentException("Invalid parameters for Gamma distribution: " + k + " " + theta); + } + + this.k = k; + this.theta = theta; + this.shift = shift; + this.random = random; + } + + /** + * Constructor for Gamma distribution. + * + * @param k k, alpha aka. "shape" parameter + * @param theta Theta = 1.0/Beta aka. "scaling" parameter + * @param shift Location offset + */ + public LogGammaAlternateDistribution(double k, double theta, double shift) { + this(k, theta, shift, null); + } + + @Override + public double pdf(double val) { + return pdf(val, k, theta, shift); + } + + @Override + public double cdf(double val) { + return cdf(val, k, theta, shift); + } + + @Override + public double quantile(double val) { + return quantile(val, k, theta, shift); + } + + @Override + public double nextRandom() { + return Math.log(GammaDistribution.nextRandom(k, 1., random)) / theta + shift; + } + + /** + * Simple toString explaining the distribution parameters. + * + * Used in producing a model description. + */ + @Override + public String toString() { + return "LogGammaAlternateDistribution(k=" + k + ", theta=" + theta + ", shift=" + shift + ")"; + } + + /** + * @return the value of k + */ + public double getK() { + return k; + } + + /** + * @return the standard deviation + */ + public double getTheta() { + return theta; + } + + /** + * The CDF, static version. + * + * @param x Value + * @param k Shape k + * @param theta Theta = 1.0/Beta aka. "scaling" parameter + * @return cdf value + */ + public static double cdf(double x, double k, double theta, double shift) { + if (x <= shift) { + return 0.; + } + x = (x - shift) * theta; + return GammaDistribution.regularizedGammaP(k, Math.exp(x)); + } + + /** + * The log CDF, static version. + * + * @param x Value + * @param k Shape k + * @param theta Theta = 1.0/Beta aka. "scaling" parameter + * @return cdf value + */ + public static double logcdf(double x, double k, double theta, double shift) { + if (x <= shift) { + return 0.; + } + x = (x - shift) * theta; + return GammaDistribution.logregularizedGammaP(k, Math.exp(x)); + } + + /** + * LogGamma distribution PDF (with 0.0 for x < 0) + * + * @param x query value + * @param k Alpha + * @param theta Theta = 1 / Beta + * @return probability density + */ + public static double pdf(double x, double k, double theta, double shift) { + if (x <= shift) { + return 0.; + } + x = (x - shift) * theta; + return theta * Math.exp(k * x - Math.exp(x) - GammaDistribution.logGamma(k)); + } + + /** + * LogGamma distribution PDF (with 0.0 for x < 0) + * + * @param x query value + * @param k Alpha + * @param theta Theta = 1 / Beta + * @return probability density + */ + public static double logpdf(double x, double k, double theta, double shift) { + if (x <= shift) { + return 0.0; + } + x = (x - shift) * theta; + return Math.log(theta) + k * x - Math.exp(x) - GammaDistribution.logGamma(k); + } + + /** + * Compute probit (inverse cdf) for LogGamma distributions. + * + * @param p Probability + * @param k k, alpha aka. "shape" parameter + * @param theta Theta = 1.0/Beta aka. "scaling" parameter + * @return Probit for Gamma distribution + */ + public static double quantile(double p, double k, double theta, double shift) { + return Math.log(GammaDistribution.quantile(p, k, 1.)) / theta + shift; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/LogGammaDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/LogGammaDistribution.java new file mode 100644 index 00000000..db3a2b3f --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/LogGammaDistribution.java @@ -0,0 +1,194 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Random; + +/** + * Log-Gamma Distribution, with random generation and density functions. + * + * This distribution can be outlined as Y=e^X with X Gamma distributed. + * + * Note: this is a different loggamma than scipy uses. + * + * @author Erich Schubert + */ +public class LogGammaDistribution implements Distribution { + /** + * Alpha == k. + */ + private final double k; + + /** + * Theta == 1 / Beta. + */ + private final double theta; + + /** + * Translation offset. + */ + private final double shift; + + /** + * The random generator. + */ + private Random random; + + /** + * Constructor for Gamma distribution. + * + * @param k k, alpha aka. "shape" parameter + * @param shift Location offset + * @param theta Theta = 1.0/Beta aka. "scaling" parameter + * @param random Random generator + */ + public LogGammaDistribution(double k, double theta, double shift, Random random) { + super(); + if (!(k > 0.0) || !(theta > 0.0)) { // Note: also tests for NaNs! + throw new IllegalArgumentException("Invalid parameters for Gamma distribution: " + k + " " + theta); + } + + this.k = k; + this.theta = theta; + this.shift = shift; + this.random = random; + } + + /** + * Constructor for Gamma distribution. + * + * @param k k, alpha aka. "shape" parameter + * @param theta Theta = 1.0/Beta aka. "scaling" parameter + * @param shift Location offset + */ + public LogGammaDistribution(double k, double theta, double shift) { + this(k, theta, shift, null); + } + + @Override + public double pdf(double val) { + return pdf(val, k, theta, shift); + } + + @Override + public double cdf(double val) { + return cdf(val, k, theta, shift); + } + + @Override + public double quantile(double val) { + return quantile(val, k, theta, shift); + } + + @Override + public double nextRandom() { + return Math.exp(GammaDistribution.nextRandom(k, theta, random)) + shift; + } + + /** + * Simple toString explaining the distribution parameters. + * + * Used in producing a model description. + */ + @Override + public String toString() { + return "LogGammaDistribution(k=" + k + ", theta=" + theta + ", shift=" + shift + ")"; + } + + /** + * @return the value of k + */ + public double getK() { + return k; + } + + /** + * @return the standard deviation + */ + public double getTheta() { + return theta; + } + + /** + * The CDF, static version. + * + * @param x Value + * @param k Shape k + * @param theta Theta = 1.0/Beta aka. "scaling" parameter + * @return cdf value + */ + public static double cdf(double x, double k, double theta, double shift) { + x = (x - shift); + if (x <= 1.) { + return 0.; + } + return GammaDistribution.regularizedGammaP(k, Math.log(x)); + } + + /** + * The log CDF, static version. + * + * @param x Value + * @param k Shape k + * @param theta Theta = 1.0/Beta aka. "scaling" parameter + * @return cdf value + */ + public static double logcdf(double x, double k, double theta, double shift) { + x = (x - shift); + if (x <= 1.) { + return 0.; + } + return GammaDistribution.logregularizedGammaP(k, Math.log(x)); + } + + /** + * LogGamma distribution PDF (with 0.0 for x < 0) + * + * @param x query value + * @param k Alpha + * @param theta Theta = 1 / Beta + * @return probability density + */ + public static double pdf(double x, double k, double theta, double shift) { + x = (x - shift); + if (x <= 1.) { + return 0.; + } + return Math.pow(theta, -k) / GammaDistribution.gamma(k) * Math.pow(x, -(1. / theta + 1.)) * Math.pow(Math.log(x), k - 1.); + } + + // TODO: logpdf + + /** + * Compute probit (inverse cdf) for LogGamma distributions. + * + * @param p Probability + * @param k k, alpha aka. "shape" parameter + * @param theta Theta = 1.0/Beta aka. "scaling" parameter + * @return Probit for Gamma distribution + */ + public static double quantile(double p, double k, double theta, double shift) { + return Math.exp(GammaDistribution.quantile(p, k, theta)) + shift; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/LogLogisticDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/LogLogisticDistribution.java new file mode 100644 index 00000000..cb75561d --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/LogLogisticDistribution.java @@ -0,0 +1,137 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import java.util.Random; + +/** + * Log-Logistic distribution also known as Fisk distribution. + * + * @author Erich Schubert + */ +public class LogLogisticDistribution implements Distribution { + /** + * Parameters: scale and shape + */ + double scale, shape; + + /** + * Random number generator + */ + Random random; + + /** + * Constructor. + * + * @param scale Scale + * @param shape Shape + */ + public LogLogisticDistribution(double scale, double shape) { + this(scale, shape, null); + } + + /** + * Constructor. + * + * @param scale Scale + * @param shape Shape + * @param random Random number generator + */ + public LogLogisticDistribution(double scale, double shape, Random random) { + super(); + this.scale = scale; + this.shape = shape; + this.random = random; + } + + /** + * Probability density function. + * + * @param val Value + * @param scale Scale + * @param shape Shape + * @return PDF + */ + public static double pdf(double val, double scale, double shape) { + if(val < 0) { + return 0; + } + val = Math.abs(val / scale); + double f = shape / scale * Math.pow(val, shape - 1.); + double d = 1. + Math.pow(val, shape); + return f / (d * d); + } + + @Override + public double pdf(double val) { + return pdf(val, scale, shape); + } + + /** + * Cumulative density function. + * + * @param val Value + * @param scale Scale + * @param shape Shape + * @return CDF + */ + public static double cdf(double val, double scale, double shape) { + if(val < 0) { + return 0; + } + return 1. / (1. + Math.pow(val / scale, -shape)); + } + + @Override + public double cdf(double val) { + return cdf(val, scale, shape); + } + + /** + * Quantile function. + * + * @param val Value + * @param scale Scale + * @param shape Shape + * @return Quantile + */ + public static double quantile(double val, double scale, double shape) { + return scale * Math.pow(val / (1. - val), 1. / shape); + } + + @Override + public double quantile(double val) { + return quantile(val, scale, shape); + } + + @Override + public double nextRandom() { + double u = random.nextDouble(); + return scale * Math.pow(u / (1. - u), 1. / shape); + } + + @Override + public String toString() { + return "LogLogisticDistribution(scale=" + scale + ", shape=" + shape + ")"; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/LogNormalDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/LogNormalDistribution.java new file mode 100644 index 00000000..4c3d9aa0 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/LogNormalDistribution.java @@ -0,0 +1,165 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import java.util.Random; + +import de.lmu.ifi.dbs.elki.math.MathUtil; +import de.lmu.ifi.dbs.elki.utilities.Alias; + +/** + * Log-Normal distribution. + * + * The parameterization of this class is somewhere inbetween of GNU R and SciPy. + * Similar to GNU R we use the logmean and logstddev. Similar to Scipy, we also + * have a location parameter that shifts the distribution. + * + * Our implementation maps to SciPy's as follows: + * <tt>scipy.stats.lognorm(logstddev, shift, math.exp(logmean))</tt> + * + * @author Erich Schubert + */ +@Alias({ "lognormal" }) +public class LogNormalDistribution implements Distribution { + /** + * Mean value for the generator + */ + private double logmean; + + /** + * Standard deviation + */ + private double logstddev; + + /** + * Additional shift factor + */ + private double shift = 0.; + + /** + * The random generator. + */ + private Random random; + + /** + * Constructor for Log-Normal distribution + * + * @param logmean Mean + * @param logstddev Standard Deviation + * @param shift Shifting offset + * @param random Random generator + */ + public LogNormalDistribution(double logmean, double logstddev, double shift, Random random) { + super(); + this.logmean = logmean; + this.logstddev = logstddev; + this.shift = shift; + this.random = random; + } + + /** + * Constructor. + * + * @param logmean Mean + * @param logstddev Standard deviation + * @param shift Shifting offset + */ + public LogNormalDistribution(double logmean, double logstddev, double shift) { + this(logmean, logstddev, shift, null); + } + + @Override + public double pdf(double val) { + return pdf(val - shift, logmean, logstddev); + } + + @Override + public double cdf(double val) { + return cdf(val - shift, logmean, logstddev); + } + + @Override + public double quantile(double val) { + return quantile(val, logmean, logstddev) + shift; + } + + /** + * Probability density function of the normal distribution. + * + * <pre> + * 1/(SQRT(2*pi)*sigma*x) * e^(-log(x-mu)^2/2sigma^2) + * </pre> + * + * + * @param x The value. + * @param mu The mean. + * @param sigma The standard deviation. + * @return PDF of the given normal distribution at x. + */ + public static double pdf(double x, double mu, double sigma) { + if (x <= 0.) { + return 0.; + } + final double x_mu = Math.log(x) - mu; + final double sigmasq = sigma * sigma; + return 1 / (MathUtil.SQRTTWOPI * sigma * x) * Math.exp(-.5 * x_mu * x_mu / sigmasq); + } + + /** + * Cumulative probability density function (CDF) of a normal distribution. + * + * @param x value to evaluate CDF at + * @param mu Mean value + * @param sigma Standard deviation. + * @return The CDF of the given normal distribution at x. + */ + public static double cdf(double x, double mu, double sigma) { + if (x <= 0.) { + return 0.; + } + return .5 * (1 + NormalDistribution.erf((Math.log(x) - mu) / (MathUtil.SQRT2 * sigma))); + } + + /** + * Inverse cumulative probability density function (probit) of a normal + * distribution. + * + * @param x value to evaluate probit function at + * @param mu Mean value + * @param sigma Standard deviation. + * @return The probit of the given normal distribution at x. + */ + public static double quantile(double x, double mu, double sigma) { + return Math.exp(mu + sigma * NormalDistribution.standardNormalQuantile(x)); + } + + @Override + public double nextRandom() { + return Math.exp(logmean + random.nextGaussian() * logstddev) + shift; + } + + @Override + public String toString() { + return "LogNormalDistribution(logmean=" + logmean + ", logstddev=" + logstddev + ", shift=" + shift + ")"; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/LogisticDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/LogisticDistribution.java new file mode 100644 index 00000000..052847d6 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/LogisticDistribution.java @@ -0,0 +1,186 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import java.util.Random; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + +/** + * Logistic distribution. + * + * @author Erich Schubert + */ +public class LogisticDistribution implements Distribution { + /** + * Parameters: location and scale + */ + double location, scale; + + /** + * Random number generator + */ + Random random; + + /** + * Constructor. + * + * @param location Location + * @param scale Scale + */ + public LogisticDistribution(double location, double scale) { + this(location, scale, null); + } + + /** + * Constructor. + * + * @param location Location + * @param scale Scale + * @param random Random number generator + */ + public LogisticDistribution(double location, double scale, Random random) { + super(); + this.location = location; + this.scale = scale; + this.random = random; + } + + /** + * Probability density function. + * + * @param val Value + * @param loc Location + * @param scale Scale + * @return PDF + */ + public static double pdf(double val, double loc, double scale) { + val = Math.abs((val - loc) / scale); + double e = Math.exp(-val); + double f = 1.0 + e; + return e / (scale * f * f); + } + + /** + * log Probability density function. + * + * TODO: untested. + * + * @param val Value + * @param loc Location + * @param scale Scale + * @return log PDF + */ + public static double logpdf(double val, double loc, double scale) { + val = Math.abs((val - loc) / scale); + double e = Math.exp(-val); + double f = 1.0 + e; + return -(val + Math.log(scale * f * f)); + } + + @Override + public double pdf(double val) { + return pdf(val, location, scale); + } + + /** + * Cumulative density function. + * + * @param val Value + * @param loc Location + * @param scale Scale + * @return CDF + */ + public static double cdf(double val, double loc, double scale) { + val = (val - loc) / scale; + return 1. / (1. + Math.exp(-val)); + } + + /** + * log Cumulative density function. + * + * TODO: untested. + * + * @param val Value + * @param loc Location + * @param scale Scale + * @return log PDF + */ + public static double logcdf(double val, double loc, double scale) { + val = (val - loc) / scale; + if (val <= 18.) { + return -Math.log1p(Math.exp(-val)); + } else if (val > 33.3) { + return val; + } else { + return val - Math.exp(val); + } + } + + @Override + public double cdf(double val) { + return cdf(val, location, scale); + } + + /** + * Quantile function. + * + * @param val Value + * @param loc Location + * @param scale Scale + * @return Quantile + */ + public static double quantile(double val, double loc, double scale) { + return loc + scale * Math.log(val / (1. - val)); + } + + /** + * log Quantile function. + * + * TODO: untested. + * + * @param val Value + * @param loc Location + * @param scale Scale + * @return Quantile + */ + public static double logquantile(double val, double loc, double scale) { + return loc + scale * (val - MathUtil.log1mexp(-val)); + } + + @Override + public double quantile(double val) { + return quantile(val, location, scale); + } + + @Override + public double nextRandom() { + double u = random.nextDouble(); + return location + scale * Math.log(u / (1. - u)); + } + + @Override + public String toString() { + return "LogisticDistribution(location=" + location + ", scale=" + scale + ")"; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/NormalDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/NormalDistribution.java index 1845dec1..c4ae7b6c 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/NormalDistribution.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/NormalDistribution.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.statistics.distribution; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -26,13 +26,15 @@ package de.lmu.ifi.dbs.elki.math.statistics.distribution; import java.util.Random; import de.lmu.ifi.dbs.elki.math.MathUtil; +import de.lmu.ifi.dbs.elki.utilities.Alias; /** * Gaussian distribution aka normal distribution * * @author Erich Schubert */ -public class NormalDistribution implements DistributionWithRandom { +@Alias({ "GaussianDistribution", "normal", "gauss" }) +public class NormalDistribution implements Distribution { /** * Coefficients for erf approximation. * @@ -106,6 +108,11 @@ public class NormalDistribution implements DistributionWithRandom { static final double ERFINV_D[] = { 7.784695709041462e-03, 3.224671290700398e-01, 2.445134137142996e+00, 3.754408661907416e+00 }; /** + * 1 / CDFINV(0.75) + */ + public static final double ONEBYPHIINV075 = 1.48260221850560186054; + + /** * Mean value for the generator */ private double mean; @@ -194,28 +201,28 @@ public class NormalDistribution implements DistributionWithRandom { * @return erfc(x) */ public static double erfc(double x) { - if(Double.isNaN(x)) { + if (Double.isNaN(x)) { return Double.NaN; } - if(Double.isInfinite(x)) { + if (Double.isInfinite(x)) { return (x < 0.0) ? 2 : 0; } double result = Double.NaN; double absx = Math.abs(x); // First approximation interval - if(absx < 0.46875) { + if (absx < 0.46875) { double z = x * x; result = 1 - x * ((((ERFAPP_A[0] * z + ERFAPP_A[1]) * z + ERFAPP_A[2]) * z + ERFAPP_A[3]) * z + ERFAPP_A[4]) / ((((ERFAPP_B[0] * z + ERFAPP_B[1]) * z + ERFAPP_B[2]) * z + ERFAPP_B[3]) * z + ERFAPP_B[4]); } // Second approximation interval - else if(absx < 4.0) { + else if (absx < 4.0) { double z = absx; result = ((((((((ERFAPP_C[0] * z + ERFAPP_C[1]) * z + ERFAPP_C[2]) * z + ERFAPP_C[3]) * z + ERFAPP_C[4]) * z + ERFAPP_C[5]) * z + ERFAPP_C[6]) * z + ERFAPP_C[7]) * z + ERFAPP_C[8]) / ((((((((ERFAPP_D[0] * z + ERFAPP_D[1]) * z + ERFAPP_D[2]) * z + ERFAPP_D[3]) * z + ERFAPP_D[4]) * z + ERFAPP_D[5]) * z + ERFAPP_D[6]) * z + ERFAPP_D[7]) * z + ERFAPP_D[8]); double rounded = Math.round(result * 16.0) / 16.0; double del = (absx - rounded) * (absx + rounded); result = Math.exp(-rounded * rounded) * Math.exp(-del) * result; - if(x < 0.0) { + if (x < 0.0) { result = 2.0 - result; } } @@ -227,7 +234,7 @@ public class NormalDistribution implements DistributionWithRandom { double rounded = Math.round(result * 16.0) / 16.0; double del = (absx - rounded) * (absx + rounded); result = Math.exp(-rounded * rounded) * Math.exp(-del) * result; - if(x < 0.0) { + if (x < 0.0) { result = 2.0 - result; } } @@ -254,50 +261,24 @@ public class NormalDistribution implements DistributionWithRandom { * @return erfinv(x) */ public static double erfinv(double x) { - return standardNormalQuantile(0.5 * (x + 1)) / MathUtil.SQRT2; + return standardNormalQuantile(0.5 * (x + 1)) * MathUtil.SQRTHALF; } /** - * Approximate the inverse error function for normal distributions. - * - * Largely based on: - * <p> - * http://www.math.uio.no/~jacklam/notes/invnorm/index.html <br> - * by Peter John Acklam - * </p> + * Probability density function of the normal distribution. * - * FIXME: precision of this seems to be rather low, compared to our other - * functions. Only about 8-9 digits agree with SciPy/GNU R. + * <pre> + * 1/(SQRT(2*pi*sigma^2)) * e^(-(x-mu)^2/2sigma^2) + * </pre> * - * @param d Quantile. Must be in [0:1], obviously. - * @return Inverse erf. + * @param x The value. + * @param mu The mean. + * @param sigma The standard deviation. + * @return PDF of the given normal distribution at x. */ - public static double standardNormalQuantile(double d) { - if(d == 0) { - return Double.NEGATIVE_INFINITY; - } - else if(d == 1) { - return Double.POSITIVE_INFINITY; - } - else if(Double.isNaN(d) || d < 0 || d > 1) { - return Double.NaN; - } - else if(d < P_LOW) { - // Rational approximation for lower region: - double q = Math.sqrt(-2 * Math.log(d)); - return (((((ERFINV_C[0] * q + ERFINV_C[1]) * q + ERFINV_C[2]) * q + ERFINV_C[3]) * q + ERFINV_C[4]) * q + ERFINV_C[5]) / ((((ERFINV_D[0] * q + ERFINV_D[1]) * q + ERFINV_D[2]) * q + ERFINV_D[3]) * q + 1); - } - else if(P_HIGH < d) { - // Rational approximation for upper region: - double q = Math.sqrt(-2 * Math.log(1 - d)); - return -(((((ERFINV_C[0] * q + ERFINV_C[1]) * q + ERFINV_C[2]) * q + ERFINV_C[3]) * q + ERFINV_C[4]) * q + ERFINV_C[5]) / ((((ERFINV_D[0] * q + ERFINV_D[1]) * q + ERFINV_D[2]) * q + ERFINV_D[3]) * q + 1); - } - else { - // Rational approximation for central region: - double q = d - 0.5D; - double r = q * q; - return (((((ERFINV_A[0] * r + ERFINV_A[1]) * r + ERFINV_A[2]) * r + ERFINV_A[3]) * r + ERFINV_A[4]) * r + ERFINV_A[5]) * q / (((((ERFINV_B[0] * r + ERFINV_B[1]) * r + ERFINV_B[2]) * r + ERFINV_B[3]) * r + ERFINV_B[4]) * r + 1); - } + public static double pdf(double x, double mu, double sigma) { + x = (x - mu) / sigma; + return MathUtil.ONE_BY_SQRTTWOPI / sigma * Math.exp(-.5 * x * x); } /** @@ -308,14 +289,10 @@ public class NormalDistribution implements DistributionWithRandom { * </pre> * * @param x The value. - * @param mu The mean. - * @param sigma The standard deviation. * @return PDF of the given normal distribution at x. */ - public static double pdf(double x, double mu, double sigma) { - final double x_mu = x - mu; - final double sigmasq = sigma * sigma; - return 1 / (Math.sqrt(MathUtil.TWOPI * sigmasq)) * Math.exp(-.5 * x_mu * x_mu / sigmasq); + public static double standardNormalPDF(double x) { + return Math.exp(-.5 * x * x) * MathUtil.SQRTHALF; } /** @@ -324,10 +301,21 @@ public class NormalDistribution implements DistributionWithRandom { * @param x value to evaluate CDF at * @param mu Mean value * @param sigma Standard deviation. - * @return The CDF of the normal given distribution at x. + * @return The CDF of the given normal distribution at x. */ public static double cdf(double x, double mu, double sigma) { - return .5 * (1 + erf((x - mu) / (MathUtil.SQRT2 * sigma))); + x = (x - mu) / sigma; + return .5 + .5 * erf(x * MathUtil.SQRTHALF); + } + + /** + * Cumulative probability density function (CDF) of a normal distribution. + * + * @param x value to evaluate CDF at + * @return The CDF of the given normal distribution at x. + */ + public static double standardNormalCDF(double x) { + return .5 + .5 * erf(x * MathUtil.SQRTHALF); } /** @@ -337,9 +325,47 @@ public class NormalDistribution implements DistributionWithRandom { * @param x value to evaluate probit function at * @param mu Mean value * @param sigma Standard deviation. - * @return The probit of the normal given distribution at x. + * @return The probit of the given normal distribution at x. */ public static double quantile(double x, double mu, double sigma) { return mu + sigma * standardNormalQuantile(x); } + + /** + * Approximate the inverse error function for normal distributions. + * + * Largely based on: + * <p> + * http://www.math.uio.no/~jacklam/notes/invnorm/index.html <br> + * by Peter John Acklam + * </p> + * + * FIXME: precision of this seems to be rather low, compared to our other + * functions. Only about 8-9 digits agree with SciPy/GNU R. + * + * @param d Quantile. Must be in [0:1], obviously. + * @return Inverse erf. + */ + public static double standardNormalQuantile(double d) { + if (d == 0) { + return Double.NEGATIVE_INFINITY; + } else if (d == 1) { + return Double.POSITIVE_INFINITY; + } else if (Double.isNaN(d) || d < 0 || d > 1) { + return Double.NaN; + } else if (d < P_LOW) { + // Rational approximation for lower region: + double q = Math.sqrt(-2 * Math.log(d)); + return (((((ERFINV_C[0] * q + ERFINV_C[1]) * q + ERFINV_C[2]) * q + ERFINV_C[3]) * q + ERFINV_C[4]) * q + ERFINV_C[5]) / ((((ERFINV_D[0] * q + ERFINV_D[1]) * q + ERFINV_D[2]) * q + ERFINV_D[3]) * q + 1); + } else if (P_HIGH < d) { + // Rational approximation for upper region: + double q = Math.sqrt(-2 * Math.log(1 - d)); + return -(((((ERFINV_C[0] * q + ERFINV_C[1]) * q + ERFINV_C[2]) * q + ERFINV_C[3]) * q + ERFINV_C[4]) * q + ERFINV_C[5]) / ((((ERFINV_D[0] * q + ERFINV_D[1]) * q + ERFINV_D[2]) * q + ERFINV_D[3]) * q + 1); + } else { + // Rational approximation for central region: + double q = d - 0.5D; + double r = q * q; + return (((((ERFINV_A[0] * r + ERFINV_A[1]) * r + ERFINV_A[2]) * r + ERFINV_A[3]) * r + ERFINV_A[4]) * r + ERFINV_A[5]) * q / (((((ERFINV_B[0] * r + ERFINV_B[1]) * r + ERFINV_B[2]) * r + ERFINV_B[3]) * r + ERFINV_B[4]) * r + 1); + } + } } diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/PoissonDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/PoissonDistribution.java index a4ea9402..f6b2e0ca 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/PoissonDistribution.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/PoissonDistribution.java @@ -1,15 +1,10 @@ package de.lmu.ifi.dbs.elki.math.statistics.distribution; -import de.lmu.ifi.dbs.elki.math.MathUtil; -import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; -import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; -import de.lmu.ifi.dbs.elki.utilities.exceptions.ExceptionMessages; - /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -27,6 +22,10 @@ import de.lmu.ifi.dbs.elki.utilities.exceptions.ExceptionMessages; You should have received a copy of the GNU Affero General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ +import de.lmu.ifi.dbs.elki.math.MathUtil; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.exceptions.ExceptionMessages; +import de.lmu.ifi.dbs.elki.utilities.exceptions.NotImplementedException; /** * INCOMPLETE implementation of the poisson distribution. @@ -129,30 +128,28 @@ public class PoissonDistribution implements Distribution { @Reference(title = "Fast and accurate computation of binomial probabilities", authors = "C. Loader", booktitle = "", url = "http://projects.scipy.org/scipy/raw-attachment/ticket/620/loader2000Fast.pdf") public double pmf(int x) { // Invalid values - if(x < 0 || x > n) { + if (x < 0 || x > n) { return 0.0; } // Extreme probabilities - if(p <= 0d) { + if (p <= 0d) { return x == 0 ? 1.0 : 0.0; } - if(p >= 1d) { + if (p >= 1d) { return x == n ? 1.0 : 0.0; } // Extreme values of x - if(x == 0) { - if(p < 0.1) { + if (x == 0) { + if (p < 0.1) { return Math.exp(-devianceTerm(n, n * (1.0 - p)) - n * p); - } - else { + } else { return Math.exp(n * Math.log(1.0 - p)); } } - if(x == n) { - if(p > 0.9) { + if (x == n) { + if (p > 0.9) { return Math.exp(-devianceTerm(n, n * p) - n * (1 - p)); - } - else { + } else { return Math.exp(n * Math.log(p)); } } @@ -166,33 +163,31 @@ public class PoissonDistribution implements Distribution { @Reference(title = "Fast and accurate computation of binomial probabilities", authors = "C. Loader", booktitle = "", url = "http://projects.scipy.org/scipy/raw-attachment/ticket/620/loader2000Fast.pdf") public double pdf(double x) { // Invalid values - if(x < 0 || x > n) { + if (x < 0 || x > n) { return 0.0; } // Extreme probabilities - if(p <= 0d) { + if (p <= 0d) { return x == 0 ? 1.0 : 0.0; } - if(p >= 1d) { + if (p >= 1d) { return x == n ? 1.0 : 0.0; } final double q = 1 - p; // FIXME: check for x to be integer, return 0 otherwise? // Extreme values of x - if(x == 0) { - if(p < 0.1) { + if (x == 0) { + if (p < 0.1) { return Math.exp(-devianceTerm(n, n * q) - n * p); - } - else { + } else { return Math.exp(n * Math.log(q)); } } - if(x == n) { - if(p > 0.9) { + if (x == n) { + if (p > 0.9) { return Math.exp(-devianceTerm(n, n * p) - n * q); - } - else { + } else { return Math.exp(n * Math.log(p)); } } @@ -204,13 +199,19 @@ public class PoissonDistribution implements Distribution { // FIXME: implement! @Override public double cdf(double val) { - throw new AbortException(ExceptionMessages.UNSUPPORTED_NOT_YET); + throw new NotImplementedException(ExceptionMessages.UNSUPPORTED_NOT_YET); } // FIXME: implement! @Override public double quantile(double val) { - throw new AbortException(ExceptionMessages.UNSUPPORTED_NOT_YET); + throw new NotImplementedException(ExceptionMessages.UNSUPPORTED_NOT_YET); + } + + // FIXME: implement! + @Override + public double nextRandom() { + throw new NotImplementedException(ExceptionMessages.UNSUPPORTED_NOT_YET); } /** @@ -223,16 +224,15 @@ public class PoissonDistribution implements Distribution { * @return pdf */ public static double poissonPDFm1(double x_plus_1, double lambda) { - if(Double.isInfinite(lambda)) { + if (Double.isInfinite(lambda)) { return 0.; } - if(x_plus_1 > 1) { + if (x_plus_1 > 1) { return rawProbability(x_plus_1 - 1, lambda); } - if(lambda > Math.abs(x_plus_1 - 1) * MathUtil.LOG2 * Double.MAX_EXPONENT / 1e-14) { + if (lambda > Math.abs(x_plus_1 - 1) * MathUtil.LOG2 * Double.MAX_EXPONENT / 1e-14) { return Math.exp(-lambda - GammaDistribution.logGamma(x_plus_1)); - } - else { + } else { return rawProbability(x_plus_1, lambda) * (x_plus_1 / lambda); } } @@ -247,16 +247,15 @@ public class PoissonDistribution implements Distribution { * @return pdf */ public static double logpoissonPDFm1(double x_plus_1, double lambda) { - if(Double.isInfinite(lambda)) { + if (Double.isInfinite(lambda)) { return Double.NEGATIVE_INFINITY; } - if(x_plus_1 > 1) { + if (x_plus_1 > 1) { return rawLogProbability(x_plus_1 - 1, lambda); } - if(lambda > Math.abs(x_plus_1 - 1) * MathUtil.LOG2 * Double.MAX_EXPONENT / 1e-14) { + if (lambda > Math.abs(x_plus_1 - 1) * MathUtil.LOG2 * Double.MAX_EXPONENT / 1e-14) { return -lambda - GammaDistribution.logGamma(x_plus_1); - } - else { + } else { return rawLogProbability(x_plus_1, lambda) + Math.log(x_plus_1 / lambda); } } @@ -272,18 +271,18 @@ public class PoissonDistribution implements Distribution { @Reference(title = "Fast and accurate computation of binomial probabilities", authors = "C. Loader", booktitle = "", url = "http://projects.scipy.org/scipy/raw-attachment/ticket/620/loader2000Fast.pdf") private static double stirlingError(int n) { // Try to use a table value: - if(n < 16) { + if (n < 16) { return STIRLING_EXACT_ERROR[n << 1]; } final double nn = n * n; // Use the appropriate number of terms - if(n > 500) { + if (n > 500) { return (S0 - S1 / nn) / n; } - if(n > 80) { + if (n > 80) { return ((S0 - (S1 - S2 / nn)) / nn) / n; } - if(n > 35) { + if (n > 35) { return ((S0 - (S1 - (S2 - S3 / nn) / nn) / nn) / n); } return ((S0 - (S1 - (S2 - (S3 - S4 / nn) / nn) / nn) / nn) / n); @@ -299,24 +298,23 @@ public class PoissonDistribution implements Distribution { */ @Reference(title = "Fast and accurate computation of binomial probabilities", authors = "C. Loader", booktitle = "", url = "http://projects.scipy.org/scipy/raw-attachment/ticket/620/loader2000Fast.pdf") private static double stirlingError(double n) { - if(n < 16.0) { + if (n < 16.0) { // Our table has a step size of 0.5 final double n2 = 2.0 * n; - if(Math.floor(n2) == n2) { // Exact match + if (Math.floor(n2) == n2) { // Exact match return STIRLING_EXACT_ERROR[(int) n2]; - } - else { + } else { return GammaDistribution.logGamma(n + 1.0) - (n + 0.5) * Math.log(n) + n - MathUtil.LOGSQRTTWOPI; } } final double nn = n * n; - if(n > 500.0) { + if (n > 500.0) { return (S0 - S1 / nn) / n; } - if(n > 80.0) { + if (n > 80.0) { return ((S0 - (S1 - S2 / nn)) / nn) / n; } - if(n > 35.0) { + if (n > 35.0) { return ((S0 - (S1 - (S2 - S3 / nn) / nn) / nn) / n); } return ((S0 - (S1 - (S2 - (S3 - S4 / nn) / nn) / nn) / nn) / n); @@ -333,15 +331,15 @@ public class PoissonDistribution implements Distribution { */ @Reference(title = "Fast and accurate computation of binomial probabilities", authors = "C. Loader", booktitle = "", url = "http://projects.scipy.org/scipy/raw-attachment/ticket/620/loader2000Fast.pdf") private static double devianceTerm(double x, double np) { - if(Math.abs(x - np) < 0.1 * (x + np)) { + if (Math.abs(x - np) < 0.1 * (x + np)) { final double v = (x - np) / (x + np); double s = (x - np) * v; double ej = 2.0d * x * v; - for(int j = 1;; j++) { + for (int j = 1;; j++) { ej *= v * v; final double s1 = s + ej / (2 * j + 1); - if(s1 == s) { + if (s1 == s) { return s1; } s = s1; @@ -361,17 +359,17 @@ public class PoissonDistribution implements Distribution { */ public static double rawProbability(double x, double lambda) { // Extreme lambda - if(lambda == 0) { + if (lambda == 0) { return ((x == 0) ? 1. : 0.); } // Extreme values - if(Double.isInfinite(lambda) || x < 0) { + if (Double.isInfinite(lambda) || x < 0) { return 0.; } - if(x <= lambda * Double.MIN_NORMAL) { + if (x <= lambda * Double.MIN_NORMAL) { return Math.exp(-lambda); } - if(lambda < x * Double.MIN_NORMAL) { + if (lambda < x * Double.MIN_NORMAL) { double r = -lambda + x * Math.log(lambda) - GammaDistribution.logGamma(x + 1); return Math.exp(r); } @@ -391,21 +389,26 @@ public class PoissonDistribution implements Distribution { */ public static double rawLogProbability(double x, double lambda) { // Extreme lambda - if(lambda == 0) { + if (lambda == 0) { return ((x == 0) ? 1. : Double.NEGATIVE_INFINITY); } // Extreme values - if(Double.isInfinite(lambda) || x < 0) { + if (Double.isInfinite(lambda) || x < 0) { return Double.NEGATIVE_INFINITY; } - if(x <= lambda * Double.MIN_NORMAL) { + if (x <= lambda * Double.MIN_NORMAL) { return -lambda; } - if(lambda < x * Double.MIN_NORMAL) { + if (lambda < x * Double.MIN_NORMAL) { return -lambda + x * Math.log(lambda) - GammaDistribution.logGamma(x + 1); } final double f = MathUtil.TWOPI * x; final double y = -stirlingError(x) - devianceTerm(x, lambda); return -0.5 * Math.log(f) + y; } -}
\ No newline at end of file + + @Override + public String toString() { + return "PoissonDistribution(n=" + n + ", p=" + p + ")"; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/RayleighDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/RayleighDistribution.java new file mode 100644 index 00000000..31faf8ed --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/RayleighDistribution.java @@ -0,0 +1,165 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Random; + +/** + * Rayleigh distribution. + * + * @author Erich Schubert + */ +public class RayleighDistribution implements Distribution { + /** + * Position parameter. + */ + double mu = 0.0; + + /** + * Scale parameter. + */ + double sigma; + + /** + * Random number generator. + */ + Random random; + + /** + * Constructor. + * + * @param sigma Scale parameter + */ + public RayleighDistribution(double sigma) { + this(0., sigma, null); + } + + /** + * Constructor. + * + * @param mu Position parameter + * @param sigma Scale parameter + */ + public RayleighDistribution(double mu, double sigma) { + this(mu, sigma, null); + } + + /** + * Constructor. + * + * @param sigma Scale parameter + * @param random Random number generator + */ + public RayleighDistribution(double sigma, Random random) { + this(0., sigma, random); + } + + /** + * Constructor. + * + * @param mu Position parameter + * @param sigma Scale parameter + * @param random Random number generator + */ + public RayleighDistribution(double mu, double sigma, Random random) { + super(); + this.mu = mu; + this.sigma = sigma; + this.random = random; + } + + @Override + public double pdf(double x) { + return pdf(x - mu, sigma); + } + + /** + * PDF of Rayleigh distribution + * + * @param x Value + * @param sigma Scale + * @return PDF at position x. + */ + public static double pdf(double x, double sigma) { + if (x <= 0.) { + return 0.; + } + final double xs = x / sigma; + return xs / sigma * Math.exp(-.5 * xs * xs); + } + + @Override + public double cdf(double val) { + return cdf(val - mu, sigma); + } + + /** + * CDF of Rayleigh distribution + * + * @param x Value + * @param sigma Scale parameter + * @return CDF at position x. + */ + public static double cdf(double x, double sigma) { + if (x <= 0.) { + return 0.; + } + final double xs = x / sigma; + return 1. - Math.exp(-.5 * xs * xs); + } + + @Override + public double quantile(double val) { + return mu + quantile(val, sigma); + } + + /** + * Quantile function of Rayleigh distribution + * + * @param val Value + * @param sigma Scale parameter + * @return Quantile function at position x. + */ + public static double quantile(double val, double sigma) { + if (!(val >= 0.) || !(val <= 1.)) { + return Double.NaN; + } else if (val == 0.) { + return 0.; + } else if (val == 1.) { + return Double.POSITIVE_INFINITY; + } else { + return sigma * Math.sqrt(-2. * Math.log(val)); + } + } + + @Override + public double nextRandom() { + return mu + sigma * Math.sqrt(-2. * Math.log(random.nextDouble())); + } + + @Override + public String toString() { + return "RayleighDistribution(mu=" + mu + ", sigma=" + sigma + ")"; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/SkewGeneralizedNormalDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/SkewGeneralizedNormalDistribution.java new file mode 100644 index 00000000..f04e776b --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/SkewGeneralizedNormalDistribution.java @@ -0,0 +1,169 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Random; + +import de.lmu.ifi.dbs.elki.math.MathUtil; + +/** + * Generalized Gaussian distribution by adding a skew term, similar to lognormal + * distributions. + * + * This is one kind of generalized normal distributions. Note that there are + * multiple that go by the name of a "Generalized Normal Distribution". + * + * @author Erich Schubert + */ +public class SkewGeneralizedNormalDistribution implements Distribution { + /** + * Mean value for the generator + */ + private double mean; + + /** + * Standard deviation + */ + private double stddev; + + /** + * Skew. + */ + private double skew; + + /** + * The random generator. + */ + private Random random; + + /** + * Constructor for Gaussian distribution + * + * @param mean Mean + * @param stddev Standard Deviation + * @param skew Skew + * @param random Random generator + */ + public SkewGeneralizedNormalDistribution(double mean, double stddev, double skew, Random random) { + super(); + this.mean = mean; + this.stddev = stddev; + this.skew = skew; + this.random = random; + } + + /** + * Constructor for Gaussian distribution + * + * @param mean Mean + * @param stddev Standard Deviation + * @param skew Skew + */ + public SkewGeneralizedNormalDistribution(double mean, double stddev, double skew) { + this(mean, stddev, skew, null); + } + + @Override + public double pdf(double val) { + return pdf(val, mean, stddev, skew); + } + + @Override + public double cdf(double val) { + return cdf(val, mean, stddev, skew); + } + + @Override + public double quantile(double q) { + return quantile(q, mean, stddev, skew); + } + + @Override + public double nextRandom() { + double y = random.nextGaussian(); + if (Math.abs(skew) > 0.) { + y = (1. - Math.exp(-skew * y)) / skew; + } + return mean + stddev * y; + + } + + @Override + public String toString() { + return "SkewNormalDistribution(mean=" + mean + ", stddev=" + stddev + ", skew=" + skew + ")"; + } + + /** + * Probability density function of the skewed normal distribution. + * + * @param x The value. + * @param mu The mean. + * @param sigma The standard deviation. + * @return PDF of the given normal distribution at x. + */ + public static double pdf(double x, double mu, double sigma, double skew) { + x = (x - mu) / sigma; + if (Math.abs(skew) > 0.) { + x = -Math.log(1. - skew * x) / skew; + } + return MathUtil.SQRTHALF * Math.exp(-.5 * x * x) / sigma / (1 - skew * x); + } + + /** + * Cumulative probability density function (CDF) of a normal distribution. + * + * @param x value to evaluate CDF at + * @param mu Mean value + * @param sigma Standard deviation. + * @return The CDF of the given normal distribution at x. + */ + public static double cdf(double x, double mu, double sigma, double skew) { + x = (x - mu) / sigma; + if (Math.abs(skew) > 0.) { + double tmp = 1 - skew * x; + if (tmp < 1e-15) { + return (skew < 0.) ? 0. : 1.; + } + x = -Math.log(tmp) / skew; + } + return .5 + .5 * NormalDistribution.erf(x * MathUtil.SQRTHALF); + } + + /** + * Inverse cumulative probability density function (probit) of a normal + * distribution. + * + * @param x value to evaluate probit function at + * @param mu Mean value + * @param sigma Standard deviation. + * @return The probit of the given normal distribution at x. + */ + public static double quantile(double x, double mu, double sigma, double skew) { + x = NormalDistribution.standardNormalQuantile(x); + if (Math.abs(skew) > 0.) { + x = (1. - Math.exp(-skew * x)) / skew; + } + return mu + sigma * x; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/StudentsTDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/StudentsTDistribution.java index fcb96c12..442df2e2 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/StudentsTDistribution.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/StudentsTDistribution.java @@ -23,8 +23,8 @@ package de.lmu.ifi.dbs.elki.math.statistics.distribution; along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.exceptions.ExceptionMessages;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.NotImplementedException;
/**
* Student's t distribution.
@@ -61,7 +61,13 @@ public class StudentsTDistribution implements Distribution { // FIXME: implement!
@Override
public double quantile(double val) {
- throw new AbortException(ExceptionMessages.UNSUPPORTED_NOT_YET);
+ throw new NotImplementedException(ExceptionMessages.UNSUPPORTED_NOT_YET);
+ }
+
+ // FIXME: implement!
+ @Override
+ public double nextRandom() {
+ throw new NotImplementedException(ExceptionMessages.UNSUPPORTED_NOT_YET);
}
/**
@@ -87,4 +93,9 @@ public class StudentsTDistribution implements Distribution { double x = v / (val * val + v);
return 1 - (0.5 * BetaDistribution.regularizedIncBeta(x, v * .5, 0.5));
}
-}
\ No newline at end of file +
+ @Override
+ public String toString() {
+ return "StudentsTDistribution(v=" + v + ")";
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/UniformDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/UniformDistribution.java index 4f54fbf9..efae5080 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/UniformDistribution.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/UniformDistribution.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.math.statistics.distribution; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -30,7 +30,7 @@ import java.util.Random; * * @author Erich Schubert */ -public class UniformDistribution implements DistributionWithRandom { +public class UniformDistribution implements Distribution { /** * Minimum */ @@ -60,8 +60,14 @@ public class UniformDistribution implements DistributionWithRandom { */ public UniformDistribution(double min, double max, Random random) { super(); + if (Double.isInfinite(min) || Double.isInfinite(max)) { + throw new ArithmeticException("Infinite values given for uniform distribution."); + } + if (Double.isNaN(min) || Double.isNaN(max)) { + throw new ArithmeticException("NaN values given for uniform distribution."); + } // Swap parameters if they were given incorrectly. - if(min > max) { + if (min > max) { double tmp = min; min = max; max = tmp; @@ -79,31 +85,31 @@ public class UniformDistribution implements DistributionWithRandom { * @param max Maximum value */ public UniformDistribution(double min, double max) { - this(min, max, new Random()); + this(min, max, null); } @Override public double pdf(double val) { - if(val < min || val >= max) { + if (!(val >= min) || val >= max) { return 0.0; } - return 1.0 / len; + return (len > 0.) ? 1.0 / len : Double.POSITIVE_INFINITY; } @Override public double cdf(double val) { - if(val < min) { - return 0.0; + if (!(val > min)) { + return 0.; } - if(val > max) { - return 1.0; + if (val >= max) { + return 1.; } - return (val - min) / len; + return (len > 0.) ? (val - min) / len : .5; } - + @Override public double quantile(double val) { - return min + len * val; + return min + len * val; } @Override @@ -129,4 +135,4 @@ public class UniformDistribution implements DistributionWithRandom { public double getMax() { return max; } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/WaldDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/WaldDistribution.java new file mode 100644 index 00000000..ec0ea712 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/WaldDistribution.java @@ -0,0 +1,173 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Random; + +import de.lmu.ifi.dbs.elki.math.MathUtil; +import de.lmu.ifi.dbs.elki.utilities.Alias; +import de.lmu.ifi.dbs.elki.utilities.exceptions.ExceptionMessages; +import de.lmu.ifi.dbs.elki.utilities.exceptions.NotImplementedException; + +/** + * Inverse Gaussian distribution aka Wald distribution + * + * @author Erich Schubert + */ +@Alias({ "InverseGaussianDistribution", "invgauss" }) +public class WaldDistribution implements Distribution { + /** + * Mean value + */ + private double mean; + + /** + * Shape parameter + */ + private double shape; + + /** + * The random generator. + */ + private Random random; + + /** + * Constructor for wald distribution + * + * @param mean Mean + * @param shape Shape parameter + * @param random Random generator + */ + public WaldDistribution(double mean, double shape, Random random) { + super(); + this.mean = mean; + this.shape = shape; + this.random = random; + } + + /** + * Constructor for Gaussian distribution + * + * @param mean Mean + * @param shape Shape parameter + */ + public WaldDistribution(double mean, double shape) { + this(mean, shape, null); + } + + @Override + public double pdf(double val) { + return pdf(val, mean, shape); + } + + @Override + public double cdf(double val) { + return cdf(val, mean, shape); + } + + /** + * @deprecated NOT YET IMPLEMENTED. + */ + @Override + @Deprecated + public double quantile(double q) { + return quantile(q, mean, shape); + } + + @Override + public double nextRandom() { + double v = random.nextGaussian(); + v *= v; + double x = mean + mean * .5 / shape * (mean * v - Math.sqrt(4. * mean * shape * v + mean * mean * v * v)); + double u = random.nextDouble(); + if (u * (mean + x) <= mean) { + return x; + } else { + return mean * mean / x; + } + } + + @Override + public String toString() { + return "WaldDistribution(mean=" + mean + ", shape=" + shape + ")"; + } + + /** + * Probability density function of the Wald distribution. + * + * + * @param x The value. + * @param mu The mean. + * @param shape Shape parameter + * @return PDF of the given Wald distribution at x. + */ + public static double pdf(double x, double mu, double shape) { + if (!(x > 0)) { + return 0; + } + final double v = (x - mu); + return Math.sqrt(shape / (MathUtil.TWOPI * x * x * x)) * Math.exp(-shape * v * v / (2. * mu * mu * x)); + } + + /** + * Cumulative probability density function (CDF) of a Wald distribution. + * + * @param x value to evaluate CDF at + * @param mu Mean value + * @param shape Shape parameter + * @return The CDF of the given Wald distribution at x. + */ + public static double cdf(double x, double mu, double shape) { + if (!(x > 0.)) { + return 0.; + } + // TODO: accelerate by caching exp(2 * shape / mu). + final double v0 = x / mu; + final double v1 = Math.sqrt(shape / x); + double c1 = NormalDistribution.standardNormalCDF(v1 * (v0 - 1.)); + double c2 = NormalDistribution.standardNormalCDF(-v1 * (v0 + 1.)); + if (c2 > 0.) { + return c1 + Math.exp(2 * shape / mu) * c2; + } else { + return c1; + } + } + + /** + * Inverse cumulative probability density function (probit) of a Wald + * distribution. + * + * @param x value to evaluate probit function at + * @param mu Mean value + * @param shape Shape parameter + * @return The probit of the given Wald distribution at x. + * + * @deprecated NOT YET IMPLEMENTED. + */ + @Deprecated + public static double quantile(double x, double mu, double shape) { + // FIXME: implement! + throw new NotImplementedException(ExceptionMessages.UNSUPPORTED_NOT_YET); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/WeibullDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/WeibullDistribution.java new file mode 100644 index 00000000..165f536a --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/WeibullDistribution.java @@ -0,0 +1,182 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Random; + +/** + * Weibull distribution. + * + * @author Erich Schubert + */ +public class WeibullDistribution implements Distribution { + /** + * Shift offset. + */ + double theta = 0.0; + + /** + * Shape parameter k. + */ + double k; + + /** + * Lambda parameter. + */ + double lambda; + + /** + * Random number generator. + */ + Random random; + + /** + * Constructor. + * + * @param k Shape parameter + * @param lambda Scale parameter + */ + public WeibullDistribution(double k, double lambda) { + this(k, lambda, 0.0, null); + } + + /** + * Constructor. + * + * @param k Shape parameter + * @param lambda Scale parameter + * @param theta Shift offset parameter + */ + public WeibullDistribution(double k, double lambda, double theta) { + this(k, lambda, theta, null); + } + + /** + * Constructor. + * + * @param k Shape parameter + * @param lambda Scale parameter + * @param random Random number generator + */ + public WeibullDistribution(double k, double lambda, Random random) { + this(k, lambda, 0.0, random); + } + + /** + * Constructor. + * + * @param k Shape parameter + * @param lambda Scale parameter + * @param theta Shift offset parameter + * @param random Random number generator + */ + public WeibullDistribution(double k, double lambda, double theta, Random random) { + super(); + this.k = k; + this.lambda = lambda; + this.theta = theta; + this.random = random; + } + + @Override + public double pdf(double x) { + return pdf(x, k, lambda, theta); + } + + /** + * PDF of Weibull distribution + * + * @param x Value + * @param k Shape parameter + * @param lambda Scale parameter + * @param theta Shift offset parameter + * @return PDF at position x. + */ + public static double pdf(double x, double k, double lambda, double theta) { + if (x > theta) { + double xl = (x - theta) / lambda; + return k / lambda * Math.pow(xl, k - 1) * Math.exp(-Math.pow(xl, k)); + } else { + return 0.; + } + } + + /** + * CDF of Weibull distribution + * + * @param val Value + * @param k Shape parameter + * @param lambda Scale parameter + * @param theta Shift offset parameter + * @return CDF at position x. + */ + public static double cdf(double val, double k, double lambda, double theta) { + if (val > theta) { + return 1.0 - Math.exp(-Math.pow((val - theta) / lambda, k)); + } else { + return 0.0; + } + } + + @Override + public double cdf(double val) { + return cdf(val, k, lambda, theta); + } + + /** + * Quantile function of Weibull distribution + * + * @param val Value + * @param k Shape parameter + * @param lambda Scale parameter + * @param theta Shift offset parameter + * @return Quantile function at position x. + */ + public static double quantile(double val, double k, double lambda, double theta) { + if (val < 0.0 || val > 1.0) { + return Double.NaN; + } else if (val == 0) { + return 0.0; + } else if (val == 1) { + return Double.POSITIVE_INFINITY; + } else { + return theta + lambda * Math.pow(-Math.log(1.0 - val), 1.0 / k); + } + } + + @Override + public double quantile(double val) { + return quantile(val, k, lambda, theta); + } + + @Override + public double nextRandom() { + return theta + lambda * Math.pow(-Math.log(1 - random.nextDouble()), 1. / k); + } + + @Override + public String toString() { + return "WeibullDistribution(k=" + k + ", lambda=" + lambda + ", theta=" + theta + ")"; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/AbstractExpMADEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/AbstractExpMADEstimator.java new file mode 100644 index 00000000..6f9dc541 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/AbstractExpMADEstimator.java @@ -0,0 +1,96 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution; +import de.lmu.ifi.dbs.elki.utilities.datastructures.QuickSelect; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; + +/** + * Abstract base class for estimators based on the median and MAD. + * + * @author Erich Schubert + * + * @param <D> Distribution to generate. + */ +public abstract class AbstractExpMADEstimator<D extends Distribution> implements ExpMADDistributionEstimator<D> { + /** + * Constructor. + */ + public AbstractExpMADEstimator() { + super(); + } + + @Override + public abstract D estimateFromExpMedianMAD(double median, double mad); + + @Override + public <A> D estimate(A data, NumberArrayAdapter<?, A> adapter) { + // TODO: detect pre-sorted data? + final int len = adapter.size(data); + // Modifiable copy: + double[] x = new double[len]; + for (int i = 0; i < len; i++) { + x[i] = Math.exp(adapter.getDouble(data, i)); + } + double median = QuickSelect.median(x); + double mad = computeMAD(x, median); + return estimateFromExpMedianMAD(median, mad); + } + + /** + * Compute the median absolute deviation from median. + * + * @param x Input data <b>will be modified</b> + * @param median Median value. + * @return Median absolute deviation from median. + */ + public static double computeMAD(double[] x, double median) { + // Compute deviations: + for (int i = 0; i < x.length; i++) { + x[i] = Math.abs(x[i] - median); + } + double mad = QuickSelect.median(x); + // Fallback if we have more than 50% ties to next largest. + if (!(mad > 0.)) { + double min = Double.POSITIVE_INFINITY; + for (double xi : x) { + if (xi > 0. && xi < min) { + min = xi; + } + } + if (min < Double.POSITIVE_INFINITY) { + mad = min; + } else { + mad = 1.0; // Maybe all constant. No real value. + } + } + return mad; + } + + @Override + public String toString() { + return this.getClass().getSimpleName(); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/AbstractLMMEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/AbstractLMMEstimator.java new file mode 100644 index 00000000..3d77a1e6 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/AbstractLMMEstimator.java @@ -0,0 +1,71 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; + +import de.lmu.ifi.dbs.elki.math.statistics.ProbabilityWeightedMoments; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayLikeUtil; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; + +/** + * Abstract base class for L-Moments based estimators (LMM). + * + * @author Erich Schubert + * + * @param <D> Distribution class. + */ +public abstract class AbstractLMMEstimator<D extends Distribution> implements LMMDistributionEstimator<D> { + /** + * Constructor. + */ + public AbstractLMMEstimator() { + super(); + } + + @Override + public <A> D estimate(A data, NumberArrayAdapter<?, A> adapter) { + // Sort: + final int size = adapter.size(data); + double[] sorted = new double[size]; + for (int i = 0; i < size; i++) { + sorted[i] = adapter.getDouble(data, i); + } + Arrays.sort(sorted); + double[] xmom = ProbabilityWeightedMoments.samLMR(sorted, ArrayLikeUtil.DOUBLEARRAYADAPTER, getNumMoments()); + return estimateFromLMoments(xmom); + } + + @Override + abstract public D estimateFromLMoments(double[] xmom); + + @Override + abstract public int getNumMoments(); + + @Override + public String toString() { + return this.getClass().getSimpleName(); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/AbstractLogMADEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/AbstractLogMADEstimator.java new file mode 100644 index 00000000..b4e4e095 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/AbstractLogMADEstimator.java @@ -0,0 +1,101 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution; +import de.lmu.ifi.dbs.elki.utilities.datastructures.QuickSelect; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; + +/** + * Abstract base class for estimators based on the median and MAD. + * + * @author Erich Schubert + * + * @param <D> Distribution to generate. + */ +public abstract class AbstractLogMADEstimator<D extends Distribution> implements LogMADDistributionEstimator<D> { + /** + * Constructor. + */ + public AbstractLogMADEstimator() { + super(); + } + + @Override + public abstract D estimateFromLogMedianMAD(double median, double mad, double shift); + + @Override + public <A> D estimate(A data, NumberArrayAdapter<?, A> adapter) { + // TODO: detect pre-sorted data? + final int len = adapter.size(data); + double min = AbstractLogMOMEstimator.min(data, adapter, 0., 1e-10); + // Modifiable copy: + double[] x = new double[len]; + for (int i = 0; i < len; i++) { + final double val = adapter.getDouble(data, i) - min; + x[i] = val > 0. ? Math.log(val) : Double.NEGATIVE_INFINITY; + if (Double.isNaN(x[i])) { + throw new ArithmeticException("NaN value."); + } + } + double median = QuickSelect.median(x); + double mad = computeMAD(x, median); + return estimateFromLogMedianMAD(median, mad, min); + } + + /** + * Compute the median absolute deviation from median. + * + * @param x Input data <b>will be modified</b> + * @param median Median value. + * @return Median absolute deviation from median. + */ + public static double computeMAD(double[] x, double median) { + // Compute deviations: + for (int i = 0; i < x.length; i++) { + x[i] = Math.abs(x[i] - median); + } + double mad = QuickSelect.median(x); + // Fallback if we have more than 50% ties to next largest. + if (!(mad > 0.)) { + double min = Double.POSITIVE_INFINITY; + for (double xi : x) { + if (xi > 0. && xi < min) { + min = xi; + } + } + if (min < Double.POSITIVE_INFINITY) { + mad = min; + } else { + mad = 1.0; // Maybe all constant. No real value. + } + } + return mad; + } + + @Override + public String toString() { + return this.getClass().getSimpleName(); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/AbstractLogMOMEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/AbstractLogMOMEstimator.java new file mode 100644 index 00000000..8ae86d4a --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/AbstractLogMOMEstimator.java @@ -0,0 +1,94 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.StatisticalMoments; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; + +/** + * Abstract base class for estimators based on the statistical moments. + * + * @author Erich Schubert + * + * @param <D> Distribution to generate. + */ +public abstract class AbstractLogMOMEstimator<D extends Distribution> implements LogMOMDistributionEstimator<D> { + /** + * Constructor. + */ + public AbstractLogMOMEstimator() { + super(); + } + + @Override + public abstract D estimateFromLogStatisticalMoments(StatisticalMoments moments, double shift); + + @Override + public <A> D estimate(A data, NumberArrayAdapter<?, A> adapter) { + final int len = adapter.size(data); + double min = AbstractLogMOMEstimator.min(data, adapter, 0., 1e-10); + StatisticalMoments mv = new StatisticalMoments(); + for (int i = 0; i < len; i++) { + final double val = adapter.getDouble(data, i) - min; + if (Double.isInfinite(val) || Double.isNaN(val) || val <= 0.) { + continue; + } + mv.put(Math.log(val)); + } + return estimateFromLogStatisticalMoments(mv, min); + } + + /** + * Utility function to find minimum and maximum values. + * + * @param <A> array type + * @param data Data array + * @param adapter Array adapter + * @param minmin Minimum value for minimum. + * @return Minimum + */ + public static <A> double min(A data, NumberArrayAdapter<?, A> adapter, double minmin, double margin) { + final int len = adapter.size(data); + double min = adapter.getDouble(data, 0), max = min; + for (int i = 1; i < len; i++) { + final double val = adapter.getDouble(data, i); + if (val < min) { + min = val; + } else if (val > max) { + max = val; + } + } + if (min > minmin) { + return minmin; + } + // Add some extra margin, to not have 0s. + return min - (max - min) * margin; + } + + @Override + public String toString() { + return this.getClass().getSimpleName(); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/AbstractLogMeanVarianceEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/AbstractLogMeanVarianceEstimator.java new file mode 100644 index 00000000..a21186db --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/AbstractLogMeanVarianceEstimator.java @@ -0,0 +1,80 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.MeanVariance; +import de.lmu.ifi.dbs.elki.math.StatisticalMoments; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; + +/** + * Estimators that work on Mean and Variance only (i.e. the first two moments + * only). + * + * @author Erich Schubert + * + * @param <D> Distribution to estimate. + */ +public abstract class AbstractLogMeanVarianceEstimator<D extends Distribution> extends AbstractLogMOMEstimator<D> { + /** + * Constructor. + */ + public AbstractLogMeanVarianceEstimator() { + super(); + } + + @Override + public D estimateFromLogStatisticalMoments(StatisticalMoments moments, double shift) { + if (!(moments.getCount() > 1.)) { + throw new ArithmeticException("Too small sample size to estimate variance."); + } + return estimateFromLogMeanVariance(moments, shift); + } + + /** + * Estimate the distribution from mean and variance. + * + * @param mv Mean and variance. + * @param shift Shift that was applied to avoid negative values. + * @return Distribution + */ + public abstract D estimateFromLogMeanVariance(MeanVariance mv, double shift); + + @Override + public <A> D estimate(A data, NumberArrayAdapter<?, A> adapter) { + final int len = adapter.size(data); + double min = AbstractLogMOMEstimator.min(data, adapter, 0., 1e-10); + MeanVariance mv = new MeanVariance(); + for (int i = 0; i < len; i++) { + final double val = adapter.getDouble(data, i) - min; + if (Double.isInfinite(val) || Double.isNaN(val) || val <= 0.) { + continue; + } + mv.put(Math.log(val)); + } + if (!(mv.getCount() > 1.)) { + throw new ArithmeticException("Too small sample size to estimate variance."); + } + return estimateFromLogMeanVariance(mv, min); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/AbstractMADEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/AbstractMADEstimator.java new file mode 100644 index 00000000..54009592 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/AbstractMADEstimator.java @@ -0,0 +1,109 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution; +import de.lmu.ifi.dbs.elki.utilities.datastructures.QuickSelect; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; + +/** + * Abstract base class for estimators based on the median and MAD. + * + * @author Erich Schubert + * + * @param <D> Distribution to generate. + */ +public abstract class AbstractMADEstimator<D extends Distribution> implements MADDistributionEstimator<D> { + /** + * Constructor. + */ + public AbstractMADEstimator() { + super(); + } + + @Override + public abstract D estimateFromMedianMAD(double median, double mad); + + @Override + public <A> D estimate(A data, NumberArrayAdapter<?, A> adapter) { + // TODO: detect pre-sorted data? + final int len = adapter.size(data); + // Modifiable copy: + double[] x = new double[len]; + for (int i = 0; i < len; i++) { + x[i] = adapter.getDouble(data, i); + } + double median = QuickSelect.median(x); + double mad = computeMAD(x, median); + return estimateFromMedianMAD(median, mad); + } + + /** + * Compute the median absolute deviation from median. + * + * @param x Input data <b>will be modified</b> + * @param median Median value. + * @return Median absolute deviation from median. + */ + public static double computeMAD(double[] x, double median) { + // Compute deviations: + for (int i = 0; i < x.length; i++) { + x[i] = Math.abs(x[i] - median); + } + double mad = QuickSelect.median(x); + // Fallback if we have more than 50% ties to next largest. + if (!(mad > 0.)) { + double min = Double.POSITIVE_INFINITY; + for (double xi : x) { + if (xi > 0. && xi < min) { + min = xi; + } + } + if (min < Double.POSITIVE_INFINITY) { + mad = min; + } else { + mad = 1.0; // Maybe all constant. No real value. + } + } + if (mad == Double.POSITIVE_INFINITY) { + double max = 0.; + for (double xi : x) { + if (xi < Double.POSITIVE_INFINITY && xi > max) { + max = xi; + } + } + if (max < Double.POSITIVE_INFINITY) { + mad = max; + } else { + mad = 1.0; // No reasonable value. Give up. + } + } + return mad; + } + + @Override + public String toString() { + return this.getClass().getSimpleName(); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/AbstractMOMEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/AbstractMOMEstimator.java new file mode 100644 index 00000000..30bd0802 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/AbstractMOMEstimator.java @@ -0,0 +1,66 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.StatisticalMoments; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; + +/** + * Abstract base class for estimators based on the statistical moments. + * + * @author Erich Schubert + * + * @param <D> Distribution to generate. + */ +public abstract class AbstractMOMEstimator<D extends Distribution> implements MOMDistributionEstimator<D> { + /** + * Constructor. + */ + public AbstractMOMEstimator() { + super(); + } + + @Override + public abstract D estimateFromStatisticalMoments(StatisticalMoments moments); + + @Override + public <A> D estimate(A data, NumberArrayAdapter<?, A> adapter) { + StatisticalMoments mv = new StatisticalMoments(); + int size = adapter.size(data); + for (int i = 0; i < size; i++) { + final double val = adapter.getDouble(data, i); + if (Double.isInfinite(val) || Double.isNaN(val)) { + continue; + } + mv.put(val); + } + return estimateFromStatisticalMoments(mv); + } + + @Override + public String toString() { + return this.getClass().getSimpleName(); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/AbstractMeanVarianceEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/AbstractMeanVarianceEstimator.java new file mode 100644 index 00000000..73a2e6d2 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/AbstractMeanVarianceEstimator.java @@ -0,0 +1,67 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.MeanVariance; +import de.lmu.ifi.dbs.elki.math.StatisticalMoments; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; + +/** + * Estimators that work on Mean and Variance only (i.e. the first two moments + * only). + * + * @author Erich Schubert + * + * @param <D> Distribution to estimate. + */ +public abstract class AbstractMeanVarianceEstimator<D extends Distribution> extends AbstractMOMEstimator<D> implements MeanVarianceDistributionEstimator<D> { + /** + * Constructor. + */ + public AbstractMeanVarianceEstimator() { + super(); + } + + @Override + public D estimateFromStatisticalMoments(StatisticalMoments moments) { + return estimateFromMeanVariance(moments); + } + + @Override + public abstract D estimateFromMeanVariance(MeanVariance mv); + + @Override + public <A> D estimate(A data, NumberArrayAdapter<?, A> adapter) { + MeanVariance mv = new MeanVariance(); + int size = adapter.size(data); + for (int i = 0; i < size; i++) { + final double val = adapter.getDouble(data, i); + if (Double.isInfinite(val) || Double.isNaN(val)) { + continue; + } + mv.put(val); + } + return estimateFromMeanVariance(mv); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/CauchyMADEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/CauchyMADEstimator.java new file mode 100644 index 00000000..e1cfb20c --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/CauchyMADEstimator.java @@ -0,0 +1,80 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.statistics.distribution.CauchyDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate Cauchy distribution parameters using Median and MAD. + * + * Reference: + * <p> + * D. J. Olive<br /> + * Applied Robust Statistics<br /> + * Preprint of an upcoming book, University of Minnesota + * </p> + * + * @author Erich Schubert + * + * @apiviz.has CauchyDistribution + */ +@Reference(title = "Applied Robust Statistics", authors = "D. J. Olive", booktitle = "Applied Robust Statistics", url="http://lagrange.math.siu.edu/Olive/preprints.htm") +public class CauchyMADEstimator extends AbstractMADEstimator<CauchyDistribution> { + /** + * Static instance. + */ + public static final CauchyMADEstimator STATIC = new CauchyMADEstimator(); + + /** + * Private constructor, use static instance! + */ + private CauchyMADEstimator() { + // Do not instantiate + } + + @Override + public CauchyDistribution estimateFromMedianMAD(double median, double mad) { + return new CauchyDistribution(median, mad); + } + + @Override + public Class<? super CauchyDistribution> getDistributionClass() { + return CauchyDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected CauchyMADEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/DistributionEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/DistributionEstimator.java new file mode 100644 index 00000000..953fcff8 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/DistributionEstimator.java @@ -0,0 +1,52 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; + +/** + * Estimate distribution parameters from a sample. + * + * @author Erich Schubert + * + * @param <D> Distribution type + */ +public interface DistributionEstimator<D extends Distribution> { + /** + * General form of the parameter estimation + * + * @param data Data set + * @param adapter Number array adapter + * @return Estimated distribution + */ + <A> D estimate(A data, NumberArrayAdapter<?, A> adapter); + + /** + * Get the class that is produced by the estimator. + * + * @return Distribution class + */ + Class<? super D> getDistributionClass(); +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/EMGOlivierNorbergEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/EMGOlivierNorbergEstimator.java new file mode 100644 index 00000000..65c89c83 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/EMGOlivierNorbergEstimator.java @@ -0,0 +1,82 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.StatisticalMoments; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.ExponentiallyModifiedGaussianDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Naive distribution estimation using mean and sample variance. + * + * @author Erich Schubert + * + * @apiviz.has ExponentiallyModifiedGaussianDistribution - - estimates + */ +@Reference(authors = "J. Olivier, M. M. Norberg", title = "Positively skewed data: Revisiting the Box-Cox power transformation", booktitle = "International Journal of Psychological Research Vol. 3 No. 1") +public class EMGOlivierNorbergEstimator extends AbstractMOMEstimator<ExponentiallyModifiedGaussianDistribution> { + /** + * Static estimator class. + */ + public static EMGOlivierNorbergEstimator STATIC = new EMGOlivierNorbergEstimator(); + + /** + * Private constructor, use static instance! + */ + private EMGOlivierNorbergEstimator() { + // Do not instantiate + } + + @Override + public ExponentiallyModifiedGaussianDistribution estimateFromStatisticalMoments(StatisticalMoments moments) { + // Avoid NaN by disallowing negative kurtosis. + final double halfsk13 = Math.pow(Math.max(0., moments.getSampleSkewness() * .5), 1. / 3.); + final double st = moments.getSampleStddev(); + final double mu = moments.getMean() - st * halfsk13; + // Note: we added "abs" here, to avoid even more NaNs. + final double si = st * Math.sqrt(Math.abs((1. + halfsk13) * (1. - halfsk13))); + // One more workaround to ensure finite lambda... + final double la = (halfsk13 > 0) ? 1 / (st * halfsk13) : 1; + return new ExponentiallyModifiedGaussianDistribution(mu, si, la); + } + + @Override + public Class<? super ExponentiallyModifiedGaussianDistribution> getDistributionClass() { + return ExponentiallyModifiedGaussianDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected EMGOlivierNorbergEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/ExpMADDistributionEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/ExpMADDistributionEstimator.java new file mode 100644 index 00000000..70a16f3e --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/ExpMADDistributionEstimator.java @@ -0,0 +1,45 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution; + +/** + * Distribuition estimators that use the method of moments (MOM) in + * exponentiated data. + * + * @author Erich Schubert + * + * @param <D> Distribution estimated. + */ +public interface ExpMADDistributionEstimator<D extends Distribution> extends DistributionEstimator<D> { + /** + * General form of the parameter estimation + * + * @param median Median lof exp values. + * @param mad Median absolute deviation from median (in expspace). + * @return Estimated distribution + */ + D estimateFromExpMedianMAD(double median, double mad); +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/ExponentialLMMEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/ExponentialLMMEstimator.java new file mode 100644 index 00000000..66176545 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/ExponentialLMMEstimator.java @@ -0,0 +1,92 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.statistics.distribution.ExponentialDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate the parameters of a Gamma Distribution, using the methods of + * L-Moments (LMM). + * + * Reference: + * <p> + * J. R. M. Hosking<br /> + * Fortran routines for use with the method of L-moments Version 3.03<br /> + * IBM Research. + * </p> + * + * + * @author Erich Schubert + * + * @apiviz.has ExponentialDistribution + */ +@Reference(authors = "J.R.M. Hosking", title = "Fortran routines for use with the method of L-moments Version 3.03", booktitle = "IBM Research Technical Report") +public class ExponentialLMMEstimator extends AbstractLMMEstimator<ExponentialDistribution> { + /** + * Static instance. + */ + public static final ExponentialLMMEstimator STATIC = new ExponentialLMMEstimator(); + + /** + * Constructor. Private: use static instance. + */ + private ExponentialLMMEstimator() { + super(); + } + + @Override + public int getNumMoments() { + return 2; + } + + @Override + public ExponentialDistribution estimateFromLMoments(double[] xmom) { + double scale = 2. * xmom[1]; + if (!(scale > 0.)) { + throw new ArithmeticException("Data with non-positive scale cannot be exponential distributed."); + } + return new ExponentialDistribution(1. / scale, xmom[0] - scale); + } + + @Override + public Class<? super ExponentialDistribution> getDistributionClass() { + return ExponentialDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected ExponentialLMMEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/ExponentialMADEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/ExponentialMADEstimator.java new file mode 100644 index 00000000..208fc72b --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/ExponentialMADEstimator.java @@ -0,0 +1,85 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.statistics.distribution.ExponentialDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate Exponential distribution parameters using Median and MAD. + * + * Reference: + * <p> + * D. J. Olive<br /> + * Applied Robust Statistics<br /> + * Preprint of an upcoming book, University of Minnesota + * </p> + * + * @author Erich Schubert + * + * @apiviz.has ExponentialDistribution + */ +@Reference(title = "Applied Robust Statistics", authors = "D. J. Olive", booktitle = "Applied Robust Statistics", url="http://lagrange.math.siu.edu/Olive/preprints.htm") +public class ExponentialMADEstimator extends AbstractMADEstimator<ExponentialDistribution> { + /** + * Static instance. + */ + public static final ExponentialMADEstimator STATIC = new ExponentialMADEstimator(); + + /** + * Private constructor, use static instance! + */ + private ExponentialMADEstimator() { + // Do not instantiate + } + + @Override + public ExponentialDistribution estimateFromMedianMAD(double median, double mad) { + final double location = median - 1.440 * mad; + final double scale = 2.0781 * mad; + if (!(scale > 0.)) { + throw new ArithmeticException("Data with non-positive MAD cannot be exponential distributed."); + } + return new ExponentialDistribution(1./scale, location); + } + + @Override + public Class<? super ExponentialDistribution> getDistributionClass() { + return ExponentialDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected ExponentialMADEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/ExponentialMOMEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/ExponentialMOMEstimator.java new file mode 100644 index 00000000..4c3f93aa --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/ExponentialMOMEstimator.java @@ -0,0 +1,77 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.MeanVariance; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.ExponentialDistribution; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate Exponential distribution parameters using the mean, which is the + * maximum-likelihood estimate (MLE), but not very robust. + * + * @author Erich Schubert + * + * @apiviz.has ExponentialDistribution + */ +public class ExponentialMOMEstimator extends AbstractMeanVarianceEstimator<ExponentialDistribution> { + /** + * Static instance. + */ + public static final ExponentialMOMEstimator STATIC = new ExponentialMOMEstimator(); + + /** + * Private constructor, use static instance! + */ + private ExponentialMOMEstimator() { + // Do not instantiate + } + + @Override + public ExponentialDistribution estimateFromMeanVariance(MeanVariance mv) { + final double scale = mv.getMean(); + if (!(scale > 0.)) { + throw new ArithmeticException("Data with non-positive mean cannot be exponential distributed."); + } + return new ExponentialDistribution(1. / scale); + } + + @Override + public Class<? super ExponentialDistribution> getDistributionClass() { + return ExponentialDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected ExponentialMOMEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/ExponentialMedianEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/ExponentialMedianEstimator.java new file mode 100644 index 00000000..19ce63c7 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/ExponentialMedianEstimator.java @@ -0,0 +1,83 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.statistics.distribution.ExponentialDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate Exponential distribution parameters using Median and MAD. + * + * Reference: + * <p> + * Robust Estimators for Transformed Location Scale Families<br /> + * D. J. Olive + * </p> + * + * @author Erich Schubert + * + * @apiviz.has ExponentialDistribution + */ +@Reference(title = "Robust Estimators for Transformed Location Scale Families", authors = "D. J. Olive", booktitle = "") +public class ExponentialMedianEstimator extends AbstractMADEstimator<ExponentialDistribution> { + /** + * Static instance. + */ + public static final ExponentialMedianEstimator STATIC = new ExponentialMedianEstimator(); + + /** + * Private constructor, use static instance! + */ + private ExponentialMedianEstimator() { + // Do not instantiate + } + + @Override + public ExponentialDistribution estimateFromMedianMAD(double median, double mad) { + final double scale = 1.441 * median; + if (!(scale > 0.)) { + throw new ArithmeticException("Data with non-positive mean cannot be exponential distributed."); + } + return new ExponentialDistribution(1. / scale); + } + + @Override + public Class<? super ExponentialDistribution> getDistributionClass() { + return ExponentialDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected ExponentialMedianEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GammaChoiWetteEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GammaChoiWetteEstimator.java new file mode 100644 index 00000000..d41881f0 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GammaChoiWetteEstimator.java @@ -0,0 +1,118 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.statistics.distribution.GammaDistribution; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate distribution parameters using the method by Choi and Wette. + * + * Reference: + * <p> + * Maximum likelihood estimation of the parameters of the gamma distribution and + * their bias<br /> + * S. C. Choi, R. Wette<br /> + * in: Technometrics + * </p> + * + * @author Erich Schubert + * + * @apiviz.has GammaDistribution - - estimates + */ +@Reference(title = "Maximum likelihood estimation of the parameters of the gamma distribution and their bias", authors = "S. C. Choi, R. Wette", booktitle = "Technometrics", url = "http://www.jstor.org/stable/10.2307/1266892") +public class GammaChoiWetteEstimator implements DistributionEstimator<GammaDistribution> { + /** + * Static estimation, using iterative refinement. + */ + public static final GammaChoiWetteEstimator STATIC = new GammaChoiWetteEstimator(); + + /** + * Private constructor. + */ + private GammaChoiWetteEstimator() { + // Do not instantiate - use static class + } + + @Override + public <A> GammaDistribution estimate(A data, NumberArrayAdapter<?, A> adapter) { + final int len = adapter.size(data); + double meanx = 0, meanlogx = 0; + for (int i = 0; i < len; i++) { + final double val = adapter.getDouble(data, i); + if (val <= 0 || Double.isInfinite(val) || Double.isNaN(val)) { + continue; + } + final double logx = (val > 0) ? Math.log(val) : meanlogx; + final double deltax = val - meanx; + final double deltalogx = logx - meanlogx; + meanx += deltax / (i + 1.); + meanlogx += deltalogx / (i + 1.); + } + // Initial approximation + final double logmeanx = Math.log(meanx); + final double diff = logmeanx - meanlogx; + double k = (3 - diff + Math.sqrt((diff - 3) * (diff - 3) + 24 * diff)) / (12 * diff); + + // Refine via newton iteration, based on Choi and Wette equation + while (true) { + double kdelta = (Math.log(k) - GammaDistribution.digamma(k) - diff) / (1 / k - GammaDistribution.trigamma(k)); + if (Math.abs(kdelta) / k < 1E-8 || !(kdelta < Double.POSITIVE_INFINITY)) { + break; + } + k += kdelta; + } + // Estimate theta: + final double theta = k / meanx; + if (!(k > 0.0) || !(theta > 0.0)) { + throw new ArithmeticException("Gamma estimation produced non-positive parameter values: k=" + k + " theta=" + theta); + } + return new GammaDistribution(k, theta); + } + + @Override + public Class<? super GammaDistribution> getDistributionClass() { + return GammaDistribution.class; + } + + @Override + public String toString() { + return this.getClass().getSimpleName(); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected GammaChoiWetteEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GammaLMMEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GammaLMMEstimator.java new file mode 100644 index 00000000..edfc3f51 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GammaLMMEstimator.java @@ -0,0 +1,113 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.statistics.distribution.GammaDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate the parameters of a Gamma Distribution, using the methods of + * L-Moments (LMM). + * + * Reference: + * <p> + * J. R. M. Hosking<br /> + * Fortran routines for use with the method of L-moments Version 3.03<br /> + * IBM Research. + * </p> + * + * @author Erich Schubert + * + * @apiviz.has GammaDistribution + */ +@Reference(authors = "J.R.M. Hosking", title = "Fortran routines for use with the method of L-moments Version 3.03", booktitle = "IBM Research Technical Report") +public class GammaLMMEstimator extends AbstractLMMEstimator<GammaDistribution> { + /** + * Static instance. + */ + public static final GammaLMMEstimator STATIC = new GammaLMMEstimator(); + + /** Coefficients for polynomial approximation */ + private static double // + A1 = -0.3080, // + A2 = -0.05812, // + A3 = 0.01765; + + /** Coefficients for polynomial approximation */ + private static double // + B1 = 0.7213, // + B2 = -0.5947, // + B3 = -2.1817, // + B4 = 1.2113; + + /** + * Constructor. Private: use static instance. + */ + private GammaLMMEstimator() { + super(); + } + + @Override + public int getNumMoments() { + return 2; + } + + @Override + public GammaDistribution estimateFromLMoments(double[] xmom) { + double cv = xmom[1] / xmom[0]; + double alpha; + if (cv < .5) { + double t = Math.PI * cv * cv; + alpha = (1. + A1 * t) / (t * (1. + t * (A2 + t * A3))); + } else { + double t = 1. - cv; + alpha = t * (B1 + t * B2) / (1. + t * (B3 + t * B4)); + } + final double theta = alpha / xmom[0]; + if (!(alpha > 0.0) || !(theta > 0.0)) { + throw new ArithmeticException("Gamma estimation produced non-positive parameter values: k=" + alpha + " theta=" + theta); + } + return new GammaDistribution(alpha, theta); + } + + @Override + public Class<? super GammaDistribution> getDistributionClass() { + return GammaDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected GammaLMMEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GammaMADEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GammaMADEstimator.java new file mode 100644 index 00000000..54b0d38b --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GammaMADEstimator.java @@ -0,0 +1,96 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.statistics.distribution.GammaDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Robust parameter estimation for the Gamma distribution. + * + * Based on the Median and Median absolute deviation from Median (MAD). + * + * Reference: + * <p> + * J. Chen and H. Rubin<br /> + * Bounds for the difference between median and mean of Gamma and Poisson + * distributions<br /> + * In: Statist. Probab. Lett., 4 , 281–283. + * </p> + * + * @author Erich Schubert + * + * @apiviz.has GammaDistribution - - estimates + */ +@Reference(authors = "J. Chen. H. Rubin", title = "Bounds for the difference between median and mean of Gamma and Poisson distributions", booktitle = "Statist. Probab. Lett., 4") +public class GammaMADEstimator extends AbstractMADEstimator<GammaDistribution> { + /** + * Static instance. + */ + public static final GammaMADEstimator STATIC = new GammaMADEstimator(); + + /** + * Private constructor. + */ + private GammaMADEstimator() { + // Do not instantiate - use static class + } + + @Override + public GammaDistribution estimateFromMedianMAD(double median, double mad) { + if (median < Double.MIN_NORMAL) { + throw new ArithmeticException("Cannot estimate Gamma parameters on a distribution with zero median."); + } + if (mad < Double.MIN_NORMAL) { + throw new ArithmeticException("Cannot estimate Gamma parameters on a distribution with zero MAD."); + } + + final double theta = median / (mad * mad); + final double k = median * theta; + if (!(k > 0.) || !(theta > 0.)) { + throw new ArithmeticException("Gamma estimation produced non-positive parameter values: k=" + k + " theta=" + theta); + } + return new GammaDistribution(k, theta); + + } + + @Override + public Class<? super GammaDistribution> getDistributionClass() { + return GammaDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected GammaMADEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GammaMOMEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GammaMOMEstimator.java new file mode 100644 index 00000000..0ff0cf47 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GammaMOMEstimator.java @@ -0,0 +1,93 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.MeanVariance; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.GammaDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Simple parameter estimation for the Gamma distribution. + * + * This is a very naive estimation, based on the mean and variance only, + * sometimes referred to as the "Method of Moments" (MOM). + * + * Reference: + * <p> + * G. Casella, R. L. Berger<br /> + * Statistical inference. Vol. 70 + * </p> + * + * @author Erich Schubert + * + * @apiviz.has GammaDistribution - - estimates + */ +@Reference(authors = "G. Casella, R. L. Berger", title = "Statistical inference. Vol. 70", booktitle = "Statistical inference. Vol. 70") +public class GammaMOMEstimator extends AbstractMeanVarianceEstimator<GammaDistribution> { + /** + * Static estimation using just the mean and variance. + */ + public static final GammaMOMEstimator STATIC = new GammaMOMEstimator(); + + /** + * Private constructor. + */ + private GammaMOMEstimator() { + // Do not instantiate - use static class + } + + @Override + public GammaDistribution estimateFromMeanVariance(MeanVariance mv) { + final double mu = mv.getMean(); + final double var = mv.getSampleVariance(); + if (mu < Double.MIN_NORMAL || var < Double.MIN_NORMAL) { + throw new ArithmeticException("Cannot estimate Gamma parameters on a distribution with zero mean or variance: " + mv.toString()); + } + final double theta = mu / var; + final double k = mu * theta; + if (!(k > 0.) || !(theta > 0.)) { + throw new ArithmeticException("Gamma estimation produced non-positive parameter values: k=" + k + " theta=" + theta); + } + return new GammaDistribution(k, theta); + } + + @Override + public Class<? super GammaDistribution> getDistributionClass() { + return GammaDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected GammaMOMEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GeneralizedExtremeValueLMMEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GeneralizedExtremeValueLMMEstimator.java new file mode 100644 index 00000000..cdadf47d --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GeneralizedExtremeValueLMMEstimator.java @@ -0,0 +1,161 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.MathUtil; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.GammaDistribution; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.GeneralizedExtremeValueDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate the parameters of a Generalized Extreme Value Distribution, using + * the methods of L-Moments (LMM). + * + * Reference: + * <p> + * J. R. M. Hosking, J. R. Wallis, and E. F. Wood<br /> + * Estimation of the generalized extreme-value distribution by the method of + * probability-weighted moments.<br /> + * Technometrics 27.3 + * </p> + * + * @author Erich Schubert + * + * @apiviz.has GeneralizedExtremeValueDistribution + */ +@Reference(authors = "J.R.M. Hosking, J. R. Wallis, and E. F. Wood", title = "Estimation of the generalized extreme-value distribution by the method of probability-weighted moments.", booktitle = "Technometrics 27.3", url = "http://dx.doi.org/10.1080/00401706.1985.10488049") +public class GeneralizedExtremeValueLMMEstimator extends AbstractLMMEstimator<GeneralizedExtremeValueDistribution> { + /** + * Static instance. + */ + public static final GeneralizedExtremeValueLMMEstimator STATIC = new GeneralizedExtremeValueLMMEstimator(); + + /** + * Constants for fast rational approximations. + */ + private static final double // + A0 = 0.28377530, // + A1 = -1.21096399, // + A2 = -2.50728214, // + A3 = -1.13455566, // + A4 = -0.07138022; + + private static final double // + B1 = 2.06189696, // + B2 = 1.31912239, // + B3 = 0.25077104; + + private static final double // + C1 = 1.59921491, // + C2 = -0.48832213, // + C3 = 0.01573152, // + D1 = -0.64363929, // + D2 = 0.08985247; + + /** Maximum number of iterations. */ + static int MAXIT = 20; + + /** + * Constructor. Private: use static instance. + */ + private GeneralizedExtremeValueLMMEstimator() { + super(); + } + + @Override + public int getNumMoments() { + return 3; + } + + @Override + public GeneralizedExtremeValueDistribution estimateFromLMoments(double[] xmom) { + double t3 = xmom[2]; + if (Math.abs(t3) < 1e-50 || (t3 >= 1.)) { + throw new ArithmeticException("Invalid moment estimation."); + } + // Approximation for t3 between 0 and 1: + double g; + if (t3 > 0.) { + double z = 1. - t3; + g = (-1. + z * (C1 + z * (C2 + z * C3))) / (1. + z * (D1 + z * D2)); + // g: Almost zero? + if (Math.abs(g) < 1e-50) { + double k = 0; + double sigma = xmom[1] / MathUtil.LOG2; + double mu = xmom[0] - Math.E * sigma; + return new GeneralizedExtremeValueDistribution(mu, sigma, k); + } + } else { + // Approximation for t3 between -.8 and 0L: + g = (A0 + t3 * (A1 + t3 * (A2 + t3 * (A3 + t3 * A4)))) / (1. + t3 * (B1 + t3 * (B2 + t3 * B3))); + if (t3 < -.8) { + // Newton-Raphson iteration for t3 < -.8 + if (t3 <= -.97) { + g = 1. - Math.log(1. + t3) / MathUtil.LOG2; + } + double t0 = .5 * (t3 + 3.); + for (int it = 1;; it++) { + double x2 = Math.pow(2., -g), xx2 = 1. - x2; + double x3 = Math.pow(3., -g), xx3 = 1. - x3; + double t = xx3 / xx2; + double deriv = (xx2 * x3 * MathUtil.LOG3 - xx3 * x2 * MathUtil.LOG2) / (xx2 * x2); + double oldg = g; + g -= (t - t0) / deriv; + if (Math.abs(g - oldg) < 1e-20 * g) { + break; + } + if (it >= MAXIT) { + throw new ArithmeticException("Newton-Raphson did not converge."); + } + } + } + } + double gam = Math.exp(GammaDistribution.logGamma(1. + g)); + final double mu, sigma, k; + k = g; + sigma = xmom[1] * g / (gam * (1. - Math.pow(2., -g))); + mu = xmom[0] - sigma * (1. - gam) / g; + return new GeneralizedExtremeValueDistribution(mu, sigma, k); + } + + @Override + public Class<? super GeneralizedExtremeValueDistribution> getDistributionClass() { + return GeneralizedExtremeValueDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected GeneralizedExtremeValueLMMEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GeneralizedLogisticAlternateLMMEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GeneralizedLogisticAlternateLMMEstimator.java new file mode 100644 index 00000000..dfcbcd52 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GeneralizedLogisticAlternateLMMEstimator.java @@ -0,0 +1,98 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.statistics.distribution.GeneralizedLogisticAlternateDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate the parameters of a Generalized Logistic Distribution, using the + * methods of L-Moments (LMM). + * + * Reference: + * <p> + * J. R. M. Hosking<br /> + * Fortran routines for use with the method of L-moments Version 3.03<br /> + * IBM Research. + * </p> + * + * @author Erich Schubert + * + * @apiviz.has GeneralizedLogisticAlternateDistribution + */ +@Reference(authors = "J.R.M. Hosking", title = "Fortran routines for use with the method of L-moments Version 3.03", booktitle = "IBM Research Technical Report") +public class GeneralizedLogisticAlternateLMMEstimator extends AbstractLMMEstimator<GeneralizedLogisticAlternateDistribution> { + /** + * Static instance. + */ + public static final GeneralizedLogisticAlternateLMMEstimator STATIC = new GeneralizedLogisticAlternateLMMEstimator(); + + /** + * Constructor. Private: use static instance. + */ + private GeneralizedLogisticAlternateLMMEstimator() { + super(); + } + + @Override + public int getNumMoments() { + return 3; + } + + @Override + public GeneralizedLogisticAlternateDistribution estimateFromLMoments(double[] xmom) { + double shape = -xmom[2]; + if (!(shape >= -1 && shape <= 1)) { + throw new ArithmeticException("Invalid moment estimation."); + } + if (Math.abs(shape) < 1e-6) { + // Effectively zero, so non-generalized. + return new GeneralizedLogisticAlternateDistribution(xmom[0], xmom[1], 0.); + } + double tmp = shape * Math.PI / Math.sin(shape * Math.PI); + double scale = xmom[1] / tmp; + double location = xmom[0] - scale * (1. - tmp) / shape; + return new GeneralizedLogisticAlternateDistribution(location, scale, shape); + } + + @Override + public Class<? super GeneralizedLogisticAlternateDistribution> getDistributionClass() { + return GeneralizedLogisticAlternateDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected GeneralizedLogisticAlternateLMMEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GumbelLMMEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GumbelLMMEstimator.java new file mode 100644 index 00000000..c0f64006 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GumbelLMMEstimator.java @@ -0,0 +1,90 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.MathUtil; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.GumbelDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate the parameters of a Gumbel Distribution, using the methods of + * L-Moments (LMM). + * + * Reference: + * <p> + * J. R. M. Hosking<br /> + * Fortran routines for use with the method of L-moments Version 3.03<br /> + * IBM Research. + * </p> + * + * + * @author Erich Schubert + * + * @apiviz.has GumbelDistribution + */ +@Reference(authors = "J.R.M. Hosking", title = "Fortran routines for use with the method of L-moments Version 3.03", booktitle = "IBM Research Technical Report") +public class GumbelLMMEstimator extends AbstractLMMEstimator<GumbelDistribution> { + /** + * Static instance. + */ + public static final GumbelLMMEstimator STATIC = new GumbelLMMEstimator(); + + /** + * Constructor. Private: use static instance. + */ + private GumbelLMMEstimator() { + super(); + } + + @Override + public int getNumMoments() { + return 2; + } + + @Override + public GumbelDistribution estimateFromLMoments(double[] xmom) { + double scale = xmom[1] / MathUtil.LOG2; + return new GumbelDistribution(xmom[0] - Math.E * scale, scale); + } + + @Override + public Class<? super GumbelDistribution> getDistributionClass() { + return GumbelDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected GumbelLMMEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GumbelMADEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GumbelMADEstimator.java new file mode 100644 index 00000000..ebf6354a --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/GumbelMADEstimator.java @@ -0,0 +1,82 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.statistics.distribution.GumbelDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Parameter estimation via median and median absolute deviation from median + * (MAD). + * + * Reference: + * <p> + * D. J. Olive<br /> + * Applied Robust Statistics<br /> + * Preprint of an upcoming book, University of Minnesota + * </p> + * + * @author Erich Schubert + * + * @apiviz.has GumbelDistribution - - estimates + */ +@Reference(title = "Applied Robust Statistics", authors = "D. J. Olive", booktitle = "Applied Robust Statistics", url="http://lagrange.math.siu.edu/Olive/preprints.htm") +public class GumbelMADEstimator extends AbstractMADEstimator<GumbelDistribution> { + /** + * Static instance. + */ + public static final GumbelMADEstimator STATIC = new GumbelMADEstimator(); + + /** + * Private constructor, use static instance! + */ + private GumbelMADEstimator() { + // Do not instantiate + } + + @Override + public GumbelDistribution estimateFromMedianMAD(double median, double mad) { + // TODO: Work around degenerate cases? + return new GumbelDistribution(median + 0.4778 * mad, 1.3037 * mad); + } + + @Override + public Class<? super GumbelDistribution> getDistributionClass() { + return GumbelDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected GumbelMADEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LMMDistributionEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LMMDistributionEstimator.java new file mode 100644 index 00000000..f3d8d1b2 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LMMDistributionEstimator.java @@ -0,0 +1,51 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution; + +/** + * Interface for distribution estimators based on the methods of L-Moments + * (LMM). + * + * @author Erich Schubert + * + * @param <D> Distribution class. + */ +public interface LMMDistributionEstimator<D extends Distribution> extends DistributionEstimator<D> { + /** + * Estimate from the L-Moments. + * + * @param moments L-Moments + * @return Distribution + */ + D estimateFromLMoments(double[] moments); + + /** + * The number of moments needed. + * + * @return Moments needed. + */ + int getNumMoments(); +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LaplaceLMMEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LaplaceLMMEstimator.java new file mode 100644 index 00000000..1e31af28 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LaplaceLMMEstimator.java @@ -0,0 +1,78 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.statistics.distribution.LaplaceDistribution; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate Laplace distribution parameters using the method of L-Moments (LMM). + * + * @author Erich Schubert + * + * @apiviz.has ExponentialDistribution + */ +public class LaplaceLMMEstimator extends AbstractLMMEstimator<LaplaceDistribution> { + /** + * Static instance. + */ + public static final LaplaceLMMEstimator STATIC = new LaplaceLMMEstimator(); + + /** + * Private constructor, use static instance! + */ + private LaplaceLMMEstimator() { + // Do not instantiate + } + + @Override + public LaplaceDistribution estimateFromLMoments(double[] xmom) { + final double location = xmom[0]; + final double scale = 4. / 3. * xmom[1]; + return new LaplaceDistribution(1. / scale, location); + } + + @Override + public int getNumMoments() { + return 2; + } + + @Override + public Class<? super LaplaceDistribution> getDistributionClass() { + return LaplaceDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected LaplaceLMMEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LaplaceMADEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LaplaceMADEstimator.java new file mode 100644 index 00000000..d4671362 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LaplaceMADEstimator.java @@ -0,0 +1,82 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.statistics.distribution.LaplaceDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate Laplace distribution parameters using Median and MAD. + * + * Reference: + * <p> + * D. J. Olive<br /> + * Applied Robust Statistics<br /> + * Preprint of an upcoming book, University of Minnesota + * </p> + * + * @author Erich Schubert + * + * @apiviz.has ExponentialDistribution + */ +@Reference(title = "Applied Robust Statistics", authors = "D. J. Olive", booktitle = "Applied Robust Statistics", url="http://lagrange.math.siu.edu/Olive/preprints.htm") +public class LaplaceMADEstimator extends AbstractMADEstimator<LaplaceDistribution> { + /** + * Static instance. + */ + public static final LaplaceMADEstimator STATIC = new LaplaceMADEstimator(); + + /** + * Private constructor, use static instance! + */ + private LaplaceMADEstimator() { + // Do not instantiate + } + + @Override + public LaplaceDistribution estimateFromMedianMAD(double median, double mad) { + final double location = median; + final double scale = 1.443 * mad; + return new LaplaceDistribution(1. / scale, location); + } + + @Override + public Class<? super LaplaceDistribution> getDistributionClass() { + return LaplaceDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected LaplaceMADEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LaplaceMLEEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LaplaceMLEEstimator.java new file mode 100644 index 00000000..f44e2b3a --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LaplaceMLEEstimator.java @@ -0,0 +1,94 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.statistics.distribution.LaplaceDistribution; +import de.lmu.ifi.dbs.elki.utilities.datastructures.QuickSelect; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate Laplace distribution parameters using Median and mean deviation from + * median. + * + * Reference: + * <p> + * R. M. Norton<br /> + * The Double Exponential Distribution: Using Calculus to Find a Maximum + * Likelihood Estimator<br /> + * The American Statistician 38 (2) + * </p> + * + * @author Erich Schubert + * + * @apiviz.has ExponentialDistribution + */ +@Reference(title = "The Double Exponential Distribution: Using Calculus to Find a Maximum Likelihood Estimator", authors = "R. M. Norton", booktitle = "The American Statistician 38 (2)", url = "http://dx.doi.org/10.2307%2F2683252") +public class LaplaceMLEEstimator implements DistributionEstimator<LaplaceDistribution> { + /** + * Static instance. + */ + public static final LaplaceMLEEstimator STATIC = new LaplaceMLEEstimator(); + + /** + * Private constructor, use static instance! + */ + private LaplaceMLEEstimator() { + // Do not instantiate + } + + @Override + public <A> LaplaceDistribution estimate(A data, NumberArrayAdapter<?, A> adapter) { + int len = adapter.size(data); + double[] temp = new double[len]; + for (int i = 0; i < len; i++) { + temp[i] = adapter.getDouble(data, i); + } + double location = QuickSelect.median(temp); + double meandev = 0.; + for (int i = 0; i < len; i++) { + meandev += Math.abs(temp[i] - location); + } + return new LaplaceDistribution(len / meandev, location); + } + + @Override + public Class<? super LaplaceDistribution> getDistributionClass() { + return LaplaceDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected LaplaceMLEEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogGammaAlternateExpMADEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogGammaAlternateExpMADEstimator.java new file mode 100644 index 00000000..7ea0a6be --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogGammaAlternateExpMADEstimator.java @@ -0,0 +1,85 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.statistics.distribution.LogGammaAlternateDistribution; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Robust parameter estimation for the LogGamma distribution. + * + * A modified algorithm for LogGamma distributions. + * + * @author Erich Schubert + * + * @apiviz.has LogGammaAlternateDistribution - - estimates + */ +public class LogGammaAlternateExpMADEstimator extends AbstractExpMADEstimator<LogGammaAlternateDistribution> { + /** + * Static estimator, more robust to outliers by using the median. + */ + public static final LogGammaAlternateExpMADEstimator STATIC = new LogGammaAlternateExpMADEstimator(); + + /** + * Private constructor. + */ + private LogGammaAlternateExpMADEstimator() { + // Do not instantiate - use static class + } + + @Override + public LogGammaAlternateDistribution estimateFromExpMedianMAD(double median, double mad) { + if (median < Double.MIN_NORMAL) { + throw new ArithmeticException("Cannot estimate Gamma parameters on a distribution with zero median."); + } + if (mad < Double.MIN_NORMAL) { + throw new ArithmeticException("Cannot estimate Gamma parameters on a distribution with zero MAD."); + } + + final double b = median / (mad * mad); + final double k = median * b; + if (!(k > 0.) || !(b > 0.)) { + throw new ArithmeticException("LogGammaAlternate estimation produced non-positive parameter values: k=" + k + " b=" + b + " median=" + median + " mad=" + mad); + } + return new LogGammaAlternateDistribution(k, Math.log(b), 0.); + } + + @Override + public Class<? super LogGammaAlternateDistribution> getDistributionClass() { + return LogGammaAlternateDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected LogGammaAlternateExpMADEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogGammaChoiWetteEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogGammaChoiWetteEstimator.java new file mode 100644 index 00000000..ba3a899d --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogGammaChoiWetteEstimator.java @@ -0,0 +1,126 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.statistics.distribution.GammaDistribution; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.LogGammaDistribution; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate distribution parameters using the method by Choi and Wette. + * + * A modified algorithm for LogGamma distributions. + * + * Reference: + * <p> + * Maximum likelihood estimation of the parameters of the gamma distribution and + * their bias<br /> + * S. C. Choi, R. Wette<br /> + * in: Technometrics + * </p> + * + * @author Erich Schubert + * + * @apiviz.has LogGammaDistribution - - estimates + */ +@Reference(title = "Maximum likelihood estimation of the parameters of the gamma distribution and their bias", authors = "S. C. Choi, R. Wette", booktitle = "Technometrics", url = "http://www.jstor.org/stable/10.2307/1266892") +public class LogGammaChoiWetteEstimator implements DistributionEstimator<LogGammaDistribution> { + /** + * Static estimation, using iterative refinement. + */ + public static final LogGammaChoiWetteEstimator STATIC = new LogGammaChoiWetteEstimator(); + + /** + * Private constructor. + */ + private LogGammaChoiWetteEstimator() { + // Do not instantiate - use static class + } + + @Override + public <A> LogGammaDistribution estimate(A data, NumberArrayAdapter<?, A> adapter) { + final int len = adapter.size(data); + double shift = AbstractLogMOMEstimator.min(data, adapter, 0., 1e-10); + double meanx = 0, meanlogx = 0; + for (int i = 0; i < len; i++) { + double val = adapter.getDouble(data, i) - shift; + if (val <= 0 || Double.isInfinite(val) || Double.isNaN(val)) { + continue; + } + val = Math.log(val); + final double logx = (val > 0) ? Math.log(val) : meanlogx; + final double deltax = val - meanx; + final double deltalogx = logx - meanlogx; + meanx += deltax / (i + 1.); + meanlogx += deltalogx / (i + 1.); + } + if (!(meanx > 0)) { + throw new ArithmeticException("Cannot estimate LogGamma distribution with mean " + meanx); + } + // Initial approximation + final double logmeanx = Math.log(meanx); + final double diff = logmeanx - meanlogx; + double k = (3 - diff + Math.sqrt((diff - 3) * (diff - 3) + 24 * diff)) / (12 * diff); + + // Refine via newton iteration, based on Choi and Wette equation + while (true) { + double kdelta = (Math.log(k) - GammaDistribution.digamma(k) - diff) / (1 / k - GammaDistribution.trigamma(k)); + if (Math.abs(kdelta) / k < 1E-8 || !(kdelta < Double.POSITIVE_INFINITY) || !(kdelta > Double.NEGATIVE_INFINITY)) { + break; + } + k += kdelta; + } + // Estimate theta: + final double theta = k / meanx; + if (!(k > 0.0) || !(theta > 0.0)) { + throw new ArithmeticException("LogGamma estimation produced non-positive parameter values: k=" + k + " theta=" + theta); + } + return new LogGammaDistribution(k, theta, shift - 1); + } + + @Override + public Class<? super LogGammaDistribution> getDistributionClass() { + return LogGammaDistribution.class; + } + + @Override + public String toString() { + return this.getClass().getSimpleName(); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected LogGammaChoiWetteEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogGammaLogMADEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogGammaLogMADEstimator.java new file mode 100644 index 00000000..ed34870c --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogGammaLogMADEstimator.java @@ -0,0 +1,85 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.statistics.distribution.LogGammaDistribution; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Robust parameter estimation for the LogGamma distribution. + * + * A modified algorithm for LogGamma distributions. + * + * @author Erich Schubert + * + * @apiviz.has LogGammaDistribution - - estimates + */ +public class LogGammaLogMADEstimator extends AbstractLogMADEstimator<LogGammaDistribution> { + /** + * Static estimator, more robust to outliers by using the median. + */ + public static final LogGammaLogMADEstimator STATIC = new LogGammaLogMADEstimator(); + + /** + * Private constructor. + */ + private LogGammaLogMADEstimator() { + // Do not instantiate - use static class + } + + @Override + public LogGammaDistribution estimateFromLogMedianMAD(double median, double mad, double shift) { + if (median < Double.MIN_NORMAL) { + throw new ArithmeticException("Cannot estimate Gamma parameters on a distribution with zero median."); + } + if (mad < Double.MIN_NORMAL) { + throw new ArithmeticException("Cannot estimate Gamma parameters on a distribution with zero MAD."); + } + + final double theta = median / (mad * mad); + final double k = median * theta; + if (!(k > 0.) || !(theta > 0.)) { + throw new ArithmeticException("LogGamma estimation produced non-positive parameter values: k=" + k + " theta=" + theta); + } + return new LogGammaDistribution(k, theta, shift - 1); + } + + @Override + public Class<? super LogGammaDistribution> getDistributionClass() { + return LogGammaDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected LogGammaLogMADEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogGammaLogMOMEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogGammaLogMOMEstimator.java new file mode 100644 index 00000000..ddc6cbb9 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogGammaLogMOMEstimator.java @@ -0,0 +1,85 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.MeanVariance; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.LogGammaDistribution; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Simple parameter estimation for the Gamma distribution. + * + * This is a very naive estimation, based on the mean and variance of the log + * transformed values. + * + * @author Erich Schubert + * + * @apiviz.has LogGammaDistribution - - estimates + */ +public class LogGammaLogMOMEstimator extends AbstractLogMeanVarianceEstimator<LogGammaDistribution> { + /** + * Static estimation using just the mean and variance. + */ + public static final LogGammaLogMOMEstimator STATIC = new LogGammaLogMOMEstimator(); + + /** + * Private constructor: use static instance. + */ + private LogGammaLogMOMEstimator() { + // Do not instantiate - use static class + } + + @Override + public LogGammaDistribution estimateFromLogMeanVariance(MeanVariance mv, double shift) { + final double mu = mv.getMean(); + final double var = mv.getSampleVariance(); + if (mu < Double.MIN_NORMAL || var < Double.MIN_NORMAL) { + throw new ArithmeticException("Cannot estimate Gamma parameters on a distribution with zero mean or variance: " + mv.toString()); + } + final double theta = mu / var; + final double k = mu * theta; + if (!(k > 0.) || !(theta > 0.)) { + throw new ArithmeticException("LogGamma estimation produced non-positive parameter values: k=" + k + " theta=" + theta); + } + return new LogGammaDistribution(k, theta, shift - 1); + } + + @Override + public Class<? super LogGammaDistribution> getDistributionClass() { + return LogGammaDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected LogGammaLogMOMEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogLogisticMADEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogLogisticMADEstimator.java new file mode 100644 index 00000000..61b111c9 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogLogisticMADEstimator.java @@ -0,0 +1,81 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.MathUtil; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.LogLogisticDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate Logistic distribution parameters using Median and MAD. + * + * Reference: + * <p> + * D. J. Olive<br /> + * Applied Robust Statistics<br /> + * Preprint of an upcoming book, University of Minnesota + * </p> + * + * @author Erich Schubert + * + * @apiviz.has LogLogisticDistribution + */ +@Reference(title = "Applied Robust Statistics", authors = "D. J. Olive", booktitle = "Applied Robust Statistics", url="http://lagrange.math.siu.edu/Olive/preprints.htm") +public class LogLogisticMADEstimator extends AbstractMADEstimator<LogLogisticDistribution> { + /** + * Static instance. + */ + public static final LogLogisticMADEstimator STATIC = new LogLogisticMADEstimator(); + + /** + * Private constructor, use static instance! + */ + private LogLogisticMADEstimator() { + // Do not instantiate + } + + @Override + public LogLogisticDistribution estimateFromMedianMAD(double median, double mad) { + return new LogLogisticDistribution(1. / median, MathUtil.LOG3 / mad); + } + + @Override + public Class<? super LogLogisticDistribution> getDistributionClass() { + return LogLogisticDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected LogLogisticMADEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogMADDistributionEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogMADDistributionEstimator.java new file mode 100644 index 00000000..9c281952 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogMADDistributionEstimator.java @@ -0,0 +1,45 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution; + +/** + * Distribuition estimators that use the method of moments (MOM) in logspace. + * + * @author Erich Schubert + * + * @param <D> Distribution estimated. + */ +public interface LogMADDistributionEstimator<D extends Distribution> extends DistributionEstimator<D> { + /** + * General form of the parameter estimation + * + * @param median Median lof log values. + * @param mad Median absolute deviation from median (in logspace). + * @param shift Shift offset that was used to avoid negative values. + * @return Estimated distribution + */ + D estimateFromLogMedianMAD(double median, double mad, double shift); +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogMOMDistributionEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogMOMDistributionEstimator.java new file mode 100644 index 00000000..5a589faa --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogMOMDistributionEstimator.java @@ -0,0 +1,46 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.StatisticalMoments; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution; + +/** + * Distribuition estimators that use the method of moments (MOM) in logspace, + * i.e. that only need the statistical moments of a data set after logarithms. + * + * @author Erich Schubert + * + * @param <D> Distribution estimated. + */ +public interface LogMOMDistributionEstimator<D extends Distribution> extends DistributionEstimator<D> { + /** + * General form of the parameter estimation + * + * @param moments Statistical moments + * @param shift Shifting offset that was used + * @return Estimated distribution + */ + D estimateFromLogStatisticalMoments(StatisticalMoments moments, double shift); +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogNormalBilkovaLMMEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogNormalBilkovaLMMEstimator.java new file mode 100644 index 00000000..e8fab89f --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogNormalBilkovaLMMEstimator.java @@ -0,0 +1,104 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.statistics.distribution.LogNormalDistribution; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Alternate estimate the parameters of a log Gamma Distribution, using the + * methods of L-Moments (LMM) for the Generalized Normal Distribution. + * + * Reference: + * <p> + * D. Bílková<br /> + * Lognormal distribution and using L-moment method for estimating its + * parameters<br /> + * Int. Journal of Mathematical Models and Methods in Applied Sciences (NAUN) + * </p> + * + * See also {@link LogNormalLMMEstimator} for a similar estimator, based on the + * generalized normal distribution, as used by Hosking. + * + * @author Erich Schubert + * + * @apiviz.has LogNormalDistribution + */ +@Reference(authors = "D. Bílková", title = "Lognormal distribution and using L-moment method for estimating its parameters", booktitle = "Int. Journal of Mathematical Models and Methods in Applied Sciences (NAUN)", url = "http://www.naun.org/multimedia/NAUN/m3as/17-079.pdf") +public class LogNormalBilkovaLMMEstimator extends AbstractLMMEstimator<LogNormalDistribution> { + /** + * Static instance. + */ + public static final LogNormalBilkovaLMMEstimator STATIC = new LogNormalBilkovaLMMEstimator(); + + /** + * Scaling constant. + */ + private static final double SQRT8_3 = Math.sqrt(8. / 3.); + + /** + * Constructor. Private: use static instance. + */ + private LogNormalBilkovaLMMEstimator() { + super(); + } + + @Override + public int getNumMoments() { + return 3; + } + + @Override + public LogNormalDistribution estimateFromLMoments(double[] xmom) { + if (!(xmom[1] > 0.) || !(Math.abs(xmom[2]) < 1.0) || !(xmom[2] > 0.)) { + throw new ArithmeticException("L-Moments invalid"); + } + final double z = SQRT8_3 * NormalDistribution.standardNormalQuantile(.5 * (1. + xmom[2])), z2 = z * z; + final double sigma = 0.999281 * z - 0.006118 * z * z2 + 0.000127 * z * z2 * z2; + final double sigmasqhalf = sigma * sigma * .5; + final double logmu = Math.log(xmom[1] / NormalDistribution.erf(.5 * sigma)) - sigmasqhalf; + return new LogNormalDistribution(logmu, Math.max(sigma, Double.MIN_NORMAL), xmom[0] - Math.exp(logmu + sigmasqhalf)); + } + + @Override + public Class<? super LogNormalDistribution> getDistributionClass() { + return LogNormalDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected LogNormalBilkovaLMMEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogNormalLMMEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogNormalLMMEstimator.java new file mode 100644 index 00000000..48865d3f --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogNormalLMMEstimator.java @@ -0,0 +1,130 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.MathUtil; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.LogNormalDistribution; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate the parameters of a log Normal Distribution, using the methods of + * L-Moments (LMM) for the Generalized Normal Distribution. + * + * Reference: + * <p> + * J. R. M. Hosking<br /> + * Fortran routines for use with the method of L-moments Version 3.03<br /> + * IBM Research. + * </p> + * + * @author Erich Schubert + * + * @apiviz.has LogNormalDistribution + */ +@Reference(authors = "J.R.M. Hosking", title = "Fortran routines for use with the method of L-moments Version 3.03", booktitle = "IBM Research Technical Report") +public class LogNormalLMMEstimator extends AbstractLMMEstimator<LogNormalDistribution> { + /** + * Static instance. + */ + public static final LogNormalLMMEstimator STATIC = new LogNormalLMMEstimator(); + + /** Polynomial approximation */ + private static final double // + A0 = 0.20466534e+01, // + A1 = -0.36544371e+01, // + A2 = 0.18396733e+01, // + A3 = -0.20360244; + + /** Polynomial approximation */ + private static final double // + B1 = -0.20182173e+01, // + B2 = 0.12420401e+01, // + B3 = -0.21741801; + + /** + * Constructor. Private: use static instance. + */ + private LogNormalLMMEstimator() { + super(); + } + + @Override + public int getNumMoments() { + return 3; + } + + @Override + public LogNormalDistribution estimateFromLMoments(double[] xmom) { + // Note: the third condition probably is okay for Generalized Normal, but + // not for lognormal estimation. + if (!(xmom[1] > 0.) || !(Math.abs(xmom[2]) < 1.0) || !(xmom[2] > 0.0)) { + throw new ArithmeticException("L-Moments invalid"); + } + // Generalized Normal Distribution estimation: + double t3 = xmom[2]; + final double location, scale, shape; + if (Math.abs(t3) >= .95) { + // Extreme skewness + location = 0.; + scale = -1; + shape = 0.; + } else if (Math.abs(t3) < 1e-8) { + // t3 effectively zero. + location = xmom[0]; + scale = xmom[1] * MathUtil.SQRTPI; + shape = 0.; + } else { + final double tt = t3 * t3; + shape = -t3 * (A0 + tt * (A1 + tt * (A2 + tt * A3))) / (1. + tt * (B1 + tt * (B2 + tt * B3))); + final double e = Math.exp(.5 * shape * shape); + scale = xmom[1] * shape / (e * NormalDistribution.erf(.5 * shape)); + location = xmom[0] + scale * (e - 1.) / shape; + } + // Estimate logNormal from generalized normal: + final double sigma = -shape; + final double expmu = scale / sigma; + return new LogNormalDistribution(Math.log(expmu), Math.max(sigma, Double.MIN_NORMAL), location - expmu); + } + + @Override + public Class<? super LogNormalDistribution> getDistributionClass() { + return LogNormalDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected LogNormalLMMEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogNormalLevenbergMarquardtKDEEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogNormalLevenbergMarquardtKDEEstimator.java new file mode 100644 index 00000000..b4b8ff0d --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogNormalLevenbergMarquardtKDEEstimator.java @@ -0,0 +1,122 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import java.util.Arrays; + +import de.lmu.ifi.dbs.elki.math.MeanVariance; +import de.lmu.ifi.dbs.elki.math.linearalgebra.fitting.GaussianFittingFunction; +import de.lmu.ifi.dbs.elki.math.linearalgebra.fitting.LevenbergMarquardtMethod; +import de.lmu.ifi.dbs.elki.math.statistics.KernelDensityEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.LogNormalDistribution; +import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.GaussianKernelDensityFunction; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Distribution parameter estimation using Levenberg-Marquardt iterative + * optimization and a kernel density estimation. + * + * Note: this estimator is rather expensive, and needs optimization in the KDE + * phase, which currently is O(n^2)! + * + * This estimator is primarily attractive when only part of the distribution was + * observed. + * + * @author Erich Schubert + * + * @apiviz.has LogNormalDistribution - - estimates + */ +public class LogNormalLevenbergMarquardtKDEEstimator implements DistributionEstimator<LogNormalDistribution> { + /** + * Static estimator for small sample sizes and <em>partial</em> data. + */ + public static final LogNormalLevenbergMarquardtKDEEstimator STATIC = new LogNormalLevenbergMarquardtKDEEstimator(); + + /** + * Constructor. Private: use static instance! + */ + private LogNormalLevenbergMarquardtKDEEstimator() { + super(); + } + + @Override + public <A> LogNormalDistribution estimate(A data, NumberArrayAdapter<?, A> adapter) { + // We first need the basic parameters: + final int len = adapter.size(data); + MeanVariance mv = new MeanVariance(); + // X positions of samples + double[] x = new double[len]; + for (int i = 0; i < len; i++) { + final double val = adapter.getDouble(data, i); + if (!(val > 0)) { + throw new ArithmeticException("Cannot fit logNormal to a data set which includes non-positive values: " + val); + } + x[i] = Math.log(val); + mv.put(x[i]); + } + // Sort our copy. + Arrays.sort(x); + double median = (x[len >> 1] + x[(len + 1) >> 1]) * .5; + + // Height = density, via KDE. + KernelDensityEstimator de = new KernelDensityEstimator(x, GaussianKernelDensityFunction.KERNEL, 1e-6); + double[] y = de.getDensity(); + + // Weights: + double[] s = new double[len]; + Arrays.fill(s, 1.0); + + // Initial parameter estimate: + double[] params = { median, mv.getSampleStddev(), 1 }; + boolean[] dofit = { true, true, false }; + LevenbergMarquardtMethod fit = new LevenbergMarquardtMethod(GaussianFittingFunction.STATIC, params, dofit, x, y, s); + fit.run(); + double[] ps = fit.getParams(); + return new LogNormalDistribution(ps[0], ps[1], 0.); + } + + @Override + public Class<? super LogNormalDistribution> getDistributionClass() { + return LogNormalDistribution.class; + } + + @Override + public String toString() { + return this.getClass().getSimpleName(); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected LogNormalLevenbergMarquardtKDEEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogNormalLogMADEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogNormalLogMADEstimator.java new file mode 100644 index 00000000..6ad1dc33 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogNormalLogMADEstimator.java @@ -0,0 +1,89 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.statistics.distribution.LogNormalDistribution; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimator using Medians. More robust to outliers, and just slightly more + * expensive (needs to copy the data for partial sorting to find the median). + * + * References: + * <p> + * F. R. Hampel<br /> + * The Influence Curve and Its Role in Robust Estimation<br /> + * in: Journal of the American Statistical Association, June 1974, Vol. 69, No. + * 346 + * </p> + * <p> + * P. J. Rousseeuw, C. Croux<br /> + * Alternatives to the Median Absolute Deviation<br /> + * in: Journal of the American Statistical Association, December 1993, Vol. 88, + * No. 424, Theory and Methods + * </p> + * + * @author Erich Schubert + * + * @apiviz.has LogNormalDistribution - - estimates + */ +@Reference(authors = "F. R. Hampel", title = "The Influence Curve and Its Role in Robust Estimation", booktitle = "Journal of the American Statistical Association, June 1974, Vol. 69, No. 346", url = "http://www.jstor.org/stable/10.2307/2285666") +public class LogNormalLogMADEstimator extends AbstractLogMADEstimator<LogNormalDistribution> { + /** + * Static estimator, more robust to outliers by using the median. + */ + public static LogNormalLogMADEstimator STATIC = new LogNormalLogMADEstimator(); + + /** + * Constructor. Private: use static instance! + */ + private LogNormalLogMADEstimator() { + super(); + } + + @Override + public LogNormalDistribution estimateFromLogMedianMAD(double median, double mad, double shift) { + return new LogNormalDistribution(median, Math.max(NormalDistribution.ONEBYPHIINV075 * mad, Double.MIN_NORMAL), shift); + } + + @Override + public Class<? super LogNormalDistribution> getDistributionClass() { + return LogNormalDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected LogNormalLogMADEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogNormalLogMOMEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogNormalLogMOMEstimator.java new file mode 100644 index 00000000..5b753c54 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogNormalLogMOMEstimator.java @@ -0,0 +1,74 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.MeanVariance; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.LogNormalDistribution; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Naive distribution estimation using mean and sample variance. + * + * This is a maximum-likelihood-estimator (MLE). + * + * @author Erich Schubert + * + * @apiviz.has LogNormalDistribution - - estimates + */ +public class LogNormalLogMOMEstimator extends AbstractLogMeanVarianceEstimator<LogNormalDistribution> { + /** + * Static estimator, using mean and variance. + */ + public static LogNormalLogMOMEstimator STATIC = new LogNormalLogMOMEstimator(); + + /** + * Private constructor, use static instance! + */ + private LogNormalLogMOMEstimator() { + super(); + } + + @Override + public LogNormalDistribution estimateFromLogMeanVariance(MeanVariance mv, double shift) { + return new LogNormalDistribution(mv.getMean(), Math.max(mv.getSampleStddev(), Double.MIN_NORMAL), shift); + } + + @Override + public Class<? super LogNormalDistribution> getDistributionClass() { + return LogNormalDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected LogNormalLogMOMEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogisticLMMEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogisticLMMEstimator.java new file mode 100644 index 00000000..973a91de --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogisticLMMEstimator.java @@ -0,0 +1,90 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.statistics.distribution.LogisticDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate the parameters of a Logistic Distribution, using the methods of + * L-Moments (LMM). + * + * Reference: + * <p> + * J. R. M. Hosking<br /> + * Fortran routines for use with the method of L-moments Version 3.03<br /> + * IBM Research. + * </p> + * + * @author Erich Schubert + * + * @apiviz.has LogisticDistribution + */ +@Reference(authors = "J.R.M. Hosking", title = "Fortran routines for use with the method of L-moments Version 3.03", booktitle = "IBM Research Technical Report") +public class LogisticLMMEstimator extends AbstractLMMEstimator<LogisticDistribution> { + /** + * Static instance. + */ + public static final LogisticLMMEstimator STATIC = new LogisticLMMEstimator(); + + /** + * Constructor. Private: use static instance. + */ + private LogisticLMMEstimator() { + super(); + } + + @Override + public int getNumMoments() { + return 2; + } + + @Override + public LogisticDistribution estimateFromLMoments(double[] xmom) { + // The original publication would also estimate a shape, but we don't have + // the generalized logistic distribution yet. + // So we continue as if the Type II shape is 0, fairly trivial: + return new LogisticDistribution(xmom[0], xmom[1]); + } + + @Override + public Class<? super LogisticDistribution> getDistributionClass() { + return LogisticDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected LogisticLMMEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogisticMADEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogisticMADEstimator.java new file mode 100644 index 00000000..45181486 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/LogisticMADEstimator.java @@ -0,0 +1,80 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.MathUtil; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.LogisticDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate Logistic distribution parameters using Median and MAD. + * + * Reference: + * <p> + * Robust Estimators for Transformed Location Scale Families<br /> + * D. J. Olive + * </p> + * + * @author Erich Schubert + * + * @apiviz.has LogisticDistribution + */ +@Reference(title = "Robust Estimators for Transformed Location Scale Families", authors = "D. J. Olive", booktitle = "") +public class LogisticMADEstimator extends AbstractMADEstimator<LogisticDistribution> { + /** + * Static instance. + */ + public static final LogisticMADEstimator STATIC = new LogisticMADEstimator(); + + /** + * Private constructor, use static instance! + */ + private LogisticMADEstimator() { + // Do not instantiate + } + + @Override + public LogisticDistribution estimateFromMedianMAD(double median, double mad) { + return new LogisticDistribution(median, mad / MathUtil.LOG3); + } + + @Override + public Class<? super LogisticDistribution> getDistributionClass() { + return LogisticDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected LogisticMADEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/MADDistributionEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/MADDistributionEstimator.java new file mode 100644 index 00000000..6bf2b3ae --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/MADDistributionEstimator.java @@ -0,0 +1,45 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution; + +/** + * Distribuition estimators that use the method of moments (MOM), i.e. that only + * need the statistical moments of a data set. + * + * @author Erich Schubert + * + * @param <D> Distribution estimated. + */ +public interface MADDistributionEstimator<D extends Distribution> extends DistributionEstimator<D> { + /** + * General form of the parameter estimation + * + * @param median Median value + * @param mad Median absolute deviation from median + * @return Estimated distribution + */ + D estimateFromMedianMAD(double median, double mad); +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/MOMDistributionEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/MOMDistributionEstimator.java new file mode 100644 index 00000000..383f68cc --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/MOMDistributionEstimator.java @@ -0,0 +1,45 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.StatisticalMoments; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution; + +/** + * Distribuition estimators that use the method of moments (MOM), i.e. that only + * need the statistical moments of a data set. + * + * @author Erich Schubert + * + * @param <D> Distribution estimated. + */ +public interface MOMDistributionEstimator<D extends Distribution> extends DistributionEstimator<D> { + /** + * General form of the parameter estimation + * + * @param moments Statistical moments + * @return Estimated distribution + */ + D estimateFromStatisticalMoments(StatisticalMoments moments); +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/MeanVarianceDistributionEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/MeanVarianceDistributionEstimator.java new file mode 100644 index 00000000..4d84465f --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/MeanVarianceDistributionEstimator.java @@ -0,0 +1,45 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution; +import de.lmu.ifi.dbs.elki.math.MeanVariance; + +/** + * Interface for estimators that only need mean and variance. + * + * These can implicitely (obviously) also handle full statistical moments. + * + * @author Erich Schubert + * + * @param <D> Distribution type + */ +public interface MeanVarianceDistributionEstimator<D extends Distribution> extends MOMDistributionEstimator<D> { + /** + * Estimate the distribution from mean and variance. + * + * @param mv Mean and variance. + * @return Distribution + */ + D estimateFromMeanVariance(MeanVariance mv); +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/NormalLMMEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/NormalLMMEstimator.java new file mode 100644 index 00000000..67880950 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/NormalLMMEstimator.java @@ -0,0 +1,87 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.MathUtil; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate the parameters of a normal distribution using the method of + * L-Moments (LMM). + * + * Reference: + * <p> + * J. R. M. Hosking<br /> + * Fortran routines for use with the method of L-moments Version 3.03<br /> + * IBM Research. + * </p> + * + * @author Erich Schubert + * + * @apiviz.has NormalDistribution + */ +@Reference(authors = "J.R.M. Hosking", title = "Fortran routines for use with the method of L-moments Version 3.03", booktitle = "IBM Research Technical Report") +public class NormalLMMEstimator extends AbstractLMMEstimator<NormalDistribution> { + /** + * Static instance + */ + public static final NormalLMMEstimator STATIC = new NormalLMMEstimator(); + + /** + * Constructor. Private: use static instance. + */ + private NormalLMMEstimator() { + super(); + } + + @Override + public int getNumMoments() { + return 2; + } + + @Override + public NormalDistribution estimateFromLMoments(double[] xmom) { + return new NormalDistribution(xmom[0], Math.max(xmom[1] * MathUtil.SQRTPI, Double.MIN_NORMAL)); + } + + @Override + public Class<? super NormalDistribution> getDistributionClass() { + return NormalDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected NormalLMMEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/NormalLevenbergMarquardtKDEEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/NormalLevenbergMarquardtKDEEstimator.java new file mode 100644 index 00000000..bbbcda76 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/NormalLevenbergMarquardtKDEEstimator.java @@ -0,0 +1,118 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import java.util.Arrays; + +import de.lmu.ifi.dbs.elki.math.MeanVariance; +import de.lmu.ifi.dbs.elki.math.linearalgebra.fitting.GaussianFittingFunction; +import de.lmu.ifi.dbs.elki.math.linearalgebra.fitting.LevenbergMarquardtMethod; +import de.lmu.ifi.dbs.elki.math.statistics.KernelDensityEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution; +import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.GaussianKernelDensityFunction; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Distribution parameter estimation using Levenberg-Marquardt iterative + * optimization and a kernel density estimation. + * + * Note: this estimator is rather expensive, and needs optimization in the KDE + * phase, which currently is O(n^2)! + * + * This estimator is primarily attractive when only part of the distribution was + * observed. + * + * @author Erich Schubert + * + * @apiviz.has NormalDistribution - - estimates + */ +public class NormalLevenbergMarquardtKDEEstimator implements DistributionEstimator<NormalDistribution> { + /** + * Static estimator for small sample sizes and <em>partial</em> data. + */ + public static final NormalLevenbergMarquardtKDEEstimator STATIC = new NormalLevenbergMarquardtKDEEstimator(); + + /** + * Constructor. Private: use static instance. + */ + private NormalLevenbergMarquardtKDEEstimator() { + super(); + } + + @Override + public <A> NormalDistribution estimate(A data, NumberArrayAdapter<?, A> adapter) { + // We first need the basic parameters: + final int len = adapter.size(data); + MeanVariance mv = new MeanVariance(); + // X positions of samples + double[] x = new double[len]; + for (int i = 0; i < len; i++) { + x[i] = adapter.getDouble(data, i); + mv.put(x[i]); + } + // Sort our copy. + Arrays.sort(x); + double median = (x[len >> 1] + x[(len + 1) >> 1]) * .5; + + // Height = density, via KDE. + KernelDensityEstimator de = new KernelDensityEstimator(x, GaussianKernelDensityFunction.KERNEL, 1e-6); + double[] y = de.getDensity(); + + // Weights: + double[] s = new double[len]; + Arrays.fill(s, 1.0); + + // Initial parameter estimate: + double[] params = { median, mv.getSampleStddev(), 1 }; + boolean[] dofit = { true, true, false }; + LevenbergMarquardtMethod fit = new LevenbergMarquardtMethod(GaussianFittingFunction.STATIC, params, dofit, x, y, s); + fit.run(); + double[] ps = fit.getParams(); + return new NormalDistribution(ps[0], ps[1]); + } + + @Override + public Class<? super NormalDistribution> getDistributionClass() { + return NormalDistribution.class; + } + + @Override + public String toString() { + return this.getClass().getSimpleName(); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected NormalLevenbergMarquardtKDEEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/NormalMADEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/NormalMADEstimator.java new file mode 100644 index 00000000..2221ad4b --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/NormalMADEstimator.java @@ -0,0 +1,88 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimator using Medians. More robust to outliers, and just slightly more + * expensive (needs to copy the data for partial sorting to find the median). + * + * References: + * <p> + * F. R. Hampel<br /> + * The Influence Curve and Its Role in Robust Estimation<br /> + * in: Journal of the American Statistical Association, June 1974, Vol. 69, No. + * 346 + * </p> + * <p> + * P. J. Rousseeuw, C. Croux<br /> + * Alternatives to the Median Absolute Deviation<br /> + * in: Journal of the American Statistical Association, December 1993, Vol. 88, + * No. 424, Theory and Methods + * </p> + * + * @author Erich Schubert + * + * @apiviz.has NormalDistribution - - estimates + */ +@Reference(authors = "F. R. Hampel", title = "The Influence Curve and Its Role in Robust Estimation", booktitle = "Journal of the American Statistical Association, June 1974, Vol. 69, No. 346", url = "http://www.jstor.org/stable/10.2307/2285666") +public class NormalMADEstimator extends AbstractMADEstimator<NormalDistribution> { + /** + * Static estimator, more robust to outliers by using the median. + */ + public static NormalMADEstimator STATIC = new NormalMADEstimator(); + + /** + * Constructor. Private: use static instance! + */ + private NormalMADEstimator() { + super(); + } + + @Override + public NormalDistribution estimateFromMedianMAD(double median, double mad) { + return new NormalDistribution(median, Math.max(NormalDistribution.ONEBYPHIINV075 * mad, Double.MIN_NORMAL)); + } + + @Override + public Class<? super NormalDistribution> getDistributionClass() { + return NormalDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected NormalMADEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/NormalMOMEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/NormalMOMEstimator.java new file mode 100644 index 00000000..dae05eb0 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/NormalMOMEstimator.java @@ -0,0 +1,76 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.MeanVariance; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Naive maximum-likelihood estimations for the normal distribution using mean + * and sample variance. + * + * While this is the most commonly used estimator, it is not very robust against + * extreme values. + * + * @author Erich Schubert + * + * @apiviz.has NormalDistribution - - estimates + */ +public class NormalMOMEstimator extends AbstractMeanVarianceEstimator<NormalDistribution> { + /** + * Static estimator, using mean and variance. + */ + public static NormalMOMEstimator STATIC = new NormalMOMEstimator(); + + /** + * Private constructor, use static instance! + */ + private NormalMOMEstimator() { + // Do not instantiate + } + + @Override + public NormalDistribution estimateFromMeanVariance(MeanVariance mv) { + return new NormalDistribution(mv.getMean(), Math.max(mv.getSampleStddev(), Double.MIN_NORMAL)); + } + + @Override + public Class<? super NormalDistribution> getDistributionClass() { + return NormalDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected NormalMOMEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/RayleighLMMEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/RayleighLMMEstimator.java new file mode 100644 index 00000000..a827e1e8 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/RayleighLMMEstimator.java @@ -0,0 +1,85 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.MathUtil; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.RayleighDistribution; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate the scale parameter of a (non-shifted) RayleighDistribution using + * the method of L-Moments (LMM). + * + * @author Erich Schubert + * + * @apiviz.has RayleighDistribution - - estimates + */ +public class RayleighLMMEstimator extends AbstractLMMEstimator<RayleighDistribution> { + /** + * Static instance. + */ + public static final RayleighLMMEstimator STATIC = new RayleighLMMEstimator(); + + /** + * Constructor. Private: use static instance! + */ + private RayleighLMMEstimator() { + super(); + } + + @Override + public int getNumMoments() { + return 2; + } + + @Override + public RayleighDistribution estimateFromLMoments(double[] xmom) { + double sigma = 2. * xmom[1] / (MathUtil.SQRTPI * (MathUtil.SQRT2 - 1.)); + double mu = xmom[0] - sigma * MathUtil.SQRTHALFPI; + return new RayleighDistribution(mu, sigma); + } + + @Override + public Class<? super RayleighDistribution> getDistributionClass() { + return RayleighDistribution.class; + } + + @Override + public String toString() { + return this.getClass().getSimpleName(); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected RayleighLMMEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/RayleighMADEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/RayleighMADEstimator.java new file mode 100644 index 00000000..7382ada2 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/RayleighMADEstimator.java @@ -0,0 +1,90 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.statistics.distribution.RayleighDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate the parameters of a RayleighDistribution using the MAD. + * + * Reference: + * <p> + * D. J. Olive<br /> + * Applied Robust Statistics<br /> + * Preprint of an upcoming book, University of Minnesota + * </p> + * + * @author Erich Schubert + * + * @apiviz.has RayleighDistribution - - estimates + */ +@Reference(title = "Applied Robust Statistics", authors = "D. J. Olive", booktitle = "Applied Robust Statistics", url = "http://lagrange.math.siu.edu/Olive/preprints.htm") +public class RayleighMADEstimator extends AbstractMADEstimator<RayleighDistribution> { + /** + * Static instance. + */ + public static final RayleighMADEstimator STATIC = new RayleighMADEstimator(); + + /** + * Constructor. Private: use static instance! + */ + private RayleighMADEstimator() { + super(); + } + + /** + * See reference for the derivation of this constants. + */ + private static final double F1 = 1. / 0.448453, F2 = 1.17741 * F1; + + @Override + public RayleighDistribution estimateFromMedianMAD(double median, double mad) { + return new RayleighDistribution(median - F2 * mad, F1 * mad); + } + + @Override + public Class<? super RayleighDistribution> getDistributionClass() { + return RayleighDistribution.class; + } + + @Override + public String toString() { + return this.getClass().getSimpleName(); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected RayleighMADEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/RayleighMLEEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/RayleighMLEEstimator.java new file mode 100644 index 00000000..aa5dc300 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/RayleighMLEEstimator.java @@ -0,0 +1,84 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.statistics.distribution.RayleighDistribution; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate the scale parameter of a (non-shifted) RayleighDistribution using a + * maximum likelihood estimate. + * + * @author Erich Schubert + * + * @apiviz.has RayleighDistribution - - estimates + */ +public class RayleighMLEEstimator implements DistributionEstimator<RayleighDistribution> { + /** + * Static instance. + */ + public static final RayleighMLEEstimator STATIC = new RayleighMLEEstimator(); + + /** + * Constructor. Private: use static instance! + */ + private RayleighMLEEstimator() { + super(); + } + + @Override + public <A> RayleighDistribution estimate(A data, NumberArrayAdapter<?, A> adapter) { + final int len = adapter.size(data); + double sumsq = 0.; + for(int i = 0; i < len; i++) { + double v = adapter.getDouble(data, i); + sumsq += v * v; + } + return new RayleighDistribution(Math.sqrt(.5 * sumsq / len)); + } + + @Override + public Class<? super RayleighDistribution> getDistributionClass() { + return RayleighDistribution.class; + } + + @Override + public String toString() { + return this.getClass().getSimpleName(); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected RayleighMLEEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/SkewGNormalLMMEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/SkewGNormalLMMEstimator.java new file mode 100644 index 00000000..df05eef9 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/SkewGNormalLMMEstimator.java @@ -0,0 +1,125 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.MathUtil; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.SkewGeneralizedNormalDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate the parameters of a skew Normal Distribution (Hoskin's Generalized + * Normal Distribution), using the methods of L-Moments (LMM). + * + * Reference: + * <p> + * J. R. M. Hosking<br /> + * Fortran routines for use with the method of L-moments Version 3.03<br /> + * IBM Research. + * </p> + * + * @author Erich Schubert + * + * @apiviz.has SkewGeneralizedNormalDistribution + */ +@Reference(authors = "J.R.M. Hosking", title = "Fortran routines for use with the method of L-moments Version 3.03", booktitle = "IBM Research Technical Report") +public class SkewGNormalLMMEstimator extends AbstractLMMEstimator<SkewGeneralizedNormalDistribution> { + /** + * Static instance. + */ + public static final SkewGNormalLMMEstimator STATIC = new SkewGNormalLMMEstimator(); + + /** Polynomial approximation */ + private static final double // + A0 = 0.20466534e+01, // + A1 = -0.36544371e+01, // + A2 = 0.18396733e+01, // + A3 = -0.20360244; + + /** Polynomial approximation */ + private static final double // + B1 = -0.20182173e+01, // + B2 = 0.12420401e+01, // + B3 = -0.21741801; + + /** + * Constructor. Private: use static instance. + */ + private SkewGNormalLMMEstimator() { + super(); + } + + @Override + public int getNumMoments() { + return 3; + } + + @Override + public SkewGeneralizedNormalDistribution estimateFromLMoments(double[] xmom) { + if (!(xmom[1] > 0.) || !(Math.abs(xmom[2]) < 1.0)) { + throw new ArithmeticException("L-Moments invalid"); + } + // Generalized Normal Distribution estimation: + double t3 = xmom[2]; + final double location, scale, shape; + if (Math.abs(t3) >= .95) { + // Extreme skewness + location = 0.; + scale = -1.; + shape = 0.; + } else if (Math.abs(t3) <= 1e-8) { + // t3 effectively zero. + location = xmom[0]; + scale = xmom[1] * MathUtil.SQRTPI; + shape = 0.; + } else { + final double tt = t3 * t3; + shape = -t3 * (A0 + tt * (A1 + tt * (A2 + tt * A3))) / (1. + tt * (B1 + tt * (B2 + tt * B3))); + final double e = Math.exp(.5 * shape * shape); + scale = xmom[1] * shape / (e * NormalDistribution.erf(.5 * shape)); + location = xmom[0] + scale * (e - 1.) / shape; + } + return new SkewGeneralizedNormalDistribution(location, scale, shape); + } + + @Override + public Class<? super SkewGeneralizedNormalDistribution> getDistributionClass() { + return SkewGeneralizedNormalDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected SkewGNormalLMMEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/UniformEnhancedMinMaxEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/UniformEnhancedMinMaxEstimator.java new file mode 100644 index 00000000..834b0d94 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/UniformEnhancedMinMaxEstimator.java @@ -0,0 +1,100 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.DoubleMinMax; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.UniformDistribution; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Slightly improved estimation, that takes sample size into account and + * enhances the interval appropriately. + * + * @author Erich Schubert + * + * @apiviz.has UniformDistribution - - estimates + */ +public class UniformEnhancedMinMaxEstimator implements DistributionEstimator<UniformDistribution> { + /** + * Slightly more refined estimator: takes sample size into account. + */ + public static final UniformEnhancedMinMaxEstimator STATIC = new UniformEnhancedMinMaxEstimator(); + + /** + * Constructor. Private: use static instance! + */ + private UniformEnhancedMinMaxEstimator() { + super(); + } + + @Override + public <A> UniformDistribution estimate(A data, NumberArrayAdapter<?, A> adapter) { + final int len = adapter.size(data); + DoubleMinMax mm = new DoubleMinMax(); + for (int i = 0; i < len; i++) { + final double val = adapter.getDouble(data, i); + if (val > Double.NEGATIVE_INFINITY && val < Double.POSITIVE_INFINITY) { + mm.put(val); + } + } + return estimate(mm.getMin(), mm.getMax(), len); + } + + /** + * Estimate from simple characteristics. + * + * @param min Minimum + * @param max Maximum + * @param count Number of observations + * @return Distribution + */ + public UniformDistribution estimate(double min, double max, final int count) { + double grow = (count > 1) ? 0.5 * (max - min) / (count - 1) : 0.; + return new UniformDistribution(Math.max(min - grow, -Double.MAX_VALUE), Math.min(max + grow, Double.MAX_VALUE)); + } + + @Override + public Class<? super UniformDistribution> getDistributionClass() { + return UniformDistribution.class; + } + + @Override + public String toString() { + return this.getClass().getSimpleName(); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected UniformEnhancedMinMaxEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/UniformLMMEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/UniformLMMEstimator.java new file mode 100644 index 00000000..5b3e868f --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/UniformLMMEstimator.java @@ -0,0 +1,77 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.statistics.distribution.UniformDistribution; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate the parameters of a normal distribution using the method of + * L-Moments (LMM). + * + * @author Erich Schubert + * + * @apiviz.has UniformDistribution + */ +public class UniformLMMEstimator extends AbstractLMMEstimator<UniformDistribution> { + /** + * Static instance + */ + public static final UniformLMMEstimator STATIC = new UniformLMMEstimator(); + + /** + * Constructor. Private: use static instance. + */ + private UniformLMMEstimator() { + super(); + } + + @Override + public int getNumMoments() { + return 2; + } + + @Override + public UniformDistribution estimateFromLMoments(double[] xmom) { + return new UniformDistribution(Math.max(xmom[0] - 3 * xmom[1], -Double.MAX_VALUE), Math.min(xmom[0] + 3 * xmom[1], Double.MAX_VALUE)); + } + + @Override + public Class<? super UniformDistribution> getDistributionClass() { + return UniformDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected UniformLMMEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/UniformMADEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/UniformMADEstimator.java new file mode 100644 index 00000000..47dad134 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/UniformMADEstimator.java @@ -0,0 +1,80 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.statistics.distribution.UniformDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate Uniform distribution parameters using Median and MAD. + * + * Reference: + * <p> + * D. J. Olive<br /> + * Applied Robust Statistics<br /> + * Preprint of an upcoming book, University of Minnesota + * </p> + * + * @author Erich Schubert + * + * @apiviz.has UniformDistribution + */ +@Reference(title = "Applied Robust Statistics", authors = "D. J. Olive", booktitle = "Applied Robust Statistics", url="http://lagrange.math.siu.edu/Olive/preprints.htm") +public class UniformMADEstimator extends AbstractMADEstimator<UniformDistribution> { + /** + * Static instance. + */ + public static final UniformMADEstimator STATIC = new UniformMADEstimator(); + + /** + * Private constructor, use static instance! + */ + private UniformMADEstimator() { + // Do not instantiate + } + + @Override + public UniformDistribution estimateFromMedianMAD(double median, double mad) { + return new UniformDistribution(Math.max(median - 2 * mad, -Double.MAX_VALUE), Math.min(median + 2 * mad, Double.MAX_VALUE)); + } + + @Override + public Class<? super UniformDistribution> getDistributionClass() { + return UniformDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected UniformMADEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/UniformMinMaxEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/UniformMinMaxEstimator.java new file mode 100644 index 00000000..e9870884 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/UniformMinMaxEstimator.java @@ -0,0 +1,107 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.DoubleMinMax; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.UniformDistribution; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate the uniform distribution by computing min and max. + * + * @author Erich Schubert + * + * @apiviz.has UniformDistribution - - estimates + */ +public class UniformMinMaxEstimator implements DistributionEstimator<UniformDistribution> { + /** + * The most naive estimator possible: uses minimum and maximum. + */ + public static final UniformMinMaxEstimator STATIC = new UniformMinMaxEstimator(); + + /** + * Constructor. Private: use static instance! + */ + private UniformMinMaxEstimator() { + super(); + } + + @Override + public <A> UniformDistribution estimate(A data, NumberArrayAdapter<?, A> adapter) { + final int len = adapter.size(data); + DoubleMinMax mm = new DoubleMinMax(); + for (int i = 0; i < len; i++) { + final double val = adapter.getDouble(data, i); + if (val > Double.NEGATIVE_INFINITY && val < Double.POSITIVE_INFINITY) { + mm.put(val); + } + } + return estimate(mm); + } + + /** + * Estimate parameters from minimum and maximum observed. + * + * @param mm Minimum and Maximum + * @return Estimation + */ + public UniformDistribution estimate(DoubleMinMax mm) { + return new UniformDistribution(Math.max(mm.getMin(), -Double.MAX_VALUE), Math.min(mm.getMax(), Double.MAX_VALUE)); + } + + /** + * Estimate parameters from minimum and maximum observed. + * + * @param mm Minimum and Maximum + * @return Estimation + */ + public Distribution estimate(double min, double max) { + return new UniformDistribution(min, max); + } + + @Override + public Class<? super UniformDistribution> getDistributionClass() { + return UniformDistribution.class; + } + + @Override + public String toString() { + return this.getClass().getSimpleName(); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected UniformMinMaxEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/WaldMLEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/WaldMLEstimator.java new file mode 100644 index 00000000..16a33f89 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/WaldMLEstimator.java @@ -0,0 +1,85 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.statistics.distribution.WaldDistribution; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate parameter of the Wald distribution. + * + * @author Erich Schubert + * + * @apiviz.has WaldDistribution + */ +public class WaldMLEstimator implements DistributionEstimator<WaldDistribution> { + /** + * Static instance. + */ + public static final WaldMLEstimator STATIC = new WaldMLEstimator(); + + @Override + public <A> WaldDistribution estimate(A data, NumberArrayAdapter<?, A> adapter) { + final int len = adapter.size(data); + double mean = 0.; + for(int i = 0; i < len; i++) { + double v = adapter.getDouble(data, i); + mean += v; + } + mean /= len; + double invmean = 1. / mean; + double invdev = 0.; + for(int i = 0; i < len; i++) { + double v = adapter.getDouble(data, i); + if(v > 0.) { + invdev += 1. / v - invmean; + } + } + return new WaldDistribution(mean, len / invdev); + } + + @Override + public Class<? super WaldDistribution> getDistributionClass() { + return WaldDistribution.class; + } + + @Override + public String toString() { + return this.getClass().getSimpleName(); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected WaldMLEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/WaldMOMEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/WaldMOMEstimator.java new file mode 100644 index 00000000..82b70936 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/WaldMOMEstimator.java @@ -0,0 +1,71 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.MeanVariance; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.WaldDistribution; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate parameter of the Wald distribution. + * + * @author Erich Schubert + * + * @apiviz.has WaldDistribution + */ +public class WaldMOMEstimator extends AbstractMeanVarianceEstimator<WaldDistribution> { + /** + * Static instance. + */ + public static final WaldMOMEstimator STATIC = new WaldMOMEstimator(); + + @Override + public WaldDistribution estimateFromMeanVariance(MeanVariance mv) { + double mean = mv.getMean(); + return new WaldDistribution(mean, mean * mean * mean / mv.getSampleVariance()); + } + + @Override + public Class<? super WaldDistribution> getDistributionClass() { + return WaldDistribution.class; + } + + @Override + public String toString() { + return this.getClass().getSimpleName(); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected WaldMOMEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/WeibullLMMEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/WeibullLMMEstimator.java new file mode 100644 index 00000000..9d7d8e8e --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/WeibullLMMEstimator.java @@ -0,0 +1,85 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.statistics.distribution.GammaDistribution; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.WeibullDistribution; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Estimate parameters of the Weibull distribution using the method of L-Moments + * (LMM). + * + * @author Erich Schubert + * + * @apiviz.has WeibullDistribution + */ +public class WeibullLMMEstimator extends AbstractLMMEstimator<WeibullDistribution> { + /** + * Static instance. + */ + public static final WeibullLMMEstimator STATIC = new WeibullLMMEstimator(); + + /** + * Constructor. Private: use static instance! + */ + private WeibullLMMEstimator() { + super(); + } + + @Override + public int getNumMoments() { + return 3; + } + + @Override + public WeibullDistribution estimateFromLMoments(double[] xmom) { + double l = xmom[2], l2 = l * l, l3 = l2 * l, l4 = l3 * l, l5 = l4 * l, l6 = l5 * l; + double k = 285.3 * l6 - 658.6 * l5 + 622.8 * l4 - 317.2 * l3 + 98.52 * l2 - 21.256 * l + 3.516; + + double gam = GammaDistribution.gamma(1. + 1. / k); + double lambda = xmom[1] / (1. - Math.pow(2., -1. / k) * gam); + double mu = xmom[0] - lambda * gam; + + return new WeibullDistribution(k, lambda, mu); + } + + @Override + public Class<? super WeibullDistribution> getDistributionClass() { + return WeibullDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected WeibullLMMEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/WeibullLogMADEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/WeibullLogMADEstimator.java new file mode 100644 index 00000000..aacceae7 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/WeibullLogMADEstimator.java @@ -0,0 +1,85 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.MathUtil; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.WeibullDistribution; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Parameter estimation via median and median absolute deviation from median + * (MAD). + * + * Reference: + * <p> + * D. J. Olive<br /> + * Applied Robust Statistics<br /> + * Preprint of an upcoming book, University of Minnesota + * </p> + * + * @author Erich Schubert + * + * @apiviz.has WeibullDistribution - - estimates + */ +@Reference(title = "Applied Robust Statistics", authors = "D. J. Olive", booktitle = "Applied Robust Statistics", url="http://lagrange.math.siu.edu/Olive/preprints.htm") +public class WeibullLogMADEstimator extends AbstractLogMADEstimator<WeibullDistribution> { + /** + * The more robust median based estimator. + */ + public static final WeibullLogMADEstimator STATIC = new WeibullLogMADEstimator(); + + /** + * Constructor. Private: use static instance! + */ + private WeibullLogMADEstimator() { + super(); + } + + @Override + public WeibullDistribution estimateFromLogMedianMAD(double median, double mad, double shift) { + double isigma = 1.30370 / mad; + double lambda = Math.exp(isigma * median - MathUtil.LOGLOG2); + + return new WeibullDistribution(isigma, lambda); + } + + @Override + public Class<? super WeibullDistribution> getDistributionClass() { + return WeibullDistribution.class; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected WeibullLogMADEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/WeibullLogMOMEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/WeibullLogMOMEstimator.java new file mode 100644 index 00000000..9182a7ce --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/WeibullLogMOMEstimator.java @@ -0,0 +1,101 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.math.MeanVariance; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.WeibullDistribution; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Naive parameter estimation via least squares. + * + * TODO: this doesn't seem to work very well yet. Buggy? + * + * TODO: the naming is misleading: while it uses some method of moments, it + * doesn't use "the" statistical moments. + * + * @author Erich Schubert + * + * @apiviz.has WeibullDistribution - - estimates + */ +public class WeibullLogMOMEstimator implements DistributionEstimator<WeibullDistribution> { + /** + * The naive least-squares estimator. + */ + public static final WeibullLogMOMEstimator STATIC = new WeibullLogMOMEstimator(); + + /** + * Constructor. Private: use static instance! + */ + private WeibullLogMOMEstimator() { + super(); + } + + @Override + public <A> WeibullDistribution estimate(A data, NumberArrayAdapter<?, A> adapter) { + double beta1 = 0.0, beta3 = 0.0; + MeanVariance mvlogx = new MeanVariance(); + int size = adapter.size(data); + double size1 = size + 1.; + for (int i = 0; i < size; i++) { + final double val = adapter.getDouble(data, i); + if (!(val > 0)) { + throw new ArithmeticException("Cannot least squares fit weibull to a data set which includes non-positive values: " + val); + } + final double yi = Math.log(-Math.log((size - i) / size1)); + final double logxi = Math.log(val); + beta1 += yi * logxi; + beta3 += yi; + mvlogx.put(logxi); + } + double k = (beta1 / size - beta3 / size * mvlogx.getMean()) / mvlogx.getSampleVariance(); + double lambda = 1. / Math.exp(beta3 / size - k * mvlogx.getMean()); + + return new WeibullDistribution(k, lambda); + } + + @Override + public Class<? super WeibullDistribution> getDistributionClass() { + return WeibullDistribution.class; + } + + @Override + public String toString() { + return this.getClass().getSimpleName(); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected WeibullLogMOMEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/meta/BestFitEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/meta/BestFitEstimator.java new file mode 100644 index 00000000..dee3cbb3 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/meta/BestFitEstimator.java @@ -0,0 +1,472 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.meta; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; + +import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; +import de.lmu.ifi.dbs.elki.math.StatisticalMoments; +import de.lmu.ifi.dbs.elki.math.statistics.ProbabilityWeightedMoments; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.UniformDistribution; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.CauchyMADEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.DistributionEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.EMGOlivierNorbergEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.ExponentialLMMEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.ExponentialMADEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.ExponentialMOMEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.ExponentialMedianEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.GammaLMMEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.GammaMADEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.GammaMOMEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.GeneralizedExtremeValueLMMEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.GeneralizedLogisticAlternateLMMEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.GumbelLMMEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.GumbelMADEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.LMMDistributionEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.LaplaceLMMEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.LaplaceMADEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.LogGammaLogMADEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.LogGammaLogMOMEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.LogLogisticMADEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.LogMADDistributionEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.LogMOMDistributionEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.LogNormalBilkovaLMMEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.LogNormalLMMEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.LogNormalLogMADEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.LogNormalLogMOMEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.LogisticLMMEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.LogisticMADEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.MADDistributionEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.MOMDistributionEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.NormalLMMEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.NormalMADEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.NormalMOMEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.RayleighLMMEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.RayleighMADEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.SkewGNormalLMMEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.UniformEnhancedMinMaxEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.UniformLMMEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.UniformMADEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.UniformMinMaxEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.WaldMOMEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.WeibullLMMEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.WeibullLogMADEstimator; +import de.lmu.ifi.dbs.elki.math.statistics.tests.KolmogorovSmirnovTest; +import de.lmu.ifi.dbs.elki.utilities.datastructures.QuickSelect; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayLikeUtil; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * A meta estimator that will try a number of (inexpensive) estimations, then + * choose whichever works best. + * + * @author Erich Schubert + * + * @apiviz.composedOf MOMDistributionEstimator + * @apiviz.composedOf MADDistributionEstimator + * @apiviz.composedOf LMMDistributionEstimator + * @apiviz.composedOf LogMOMDistributionEstimator + * @apiviz.composedOf LogMADDistributionEstimator + */ +public class BestFitEstimator implements DistributionEstimator<Distribution> { + /** + * Class logger. + */ + private static final Logging LOG = Logging.getLogger(BestFitEstimator.class); + + /** + * Static instance. + */ + public static final BestFitEstimator STATIC = new BestFitEstimator(); + + /** + * Mean and variance based estimators. + */ + private Collection<MOMDistributionEstimator<?>> momests; + + /** + * Median average deviation from median estimators. + */ + private Collection<MADDistributionEstimator<?>> madests; + + /** + * L-Moment estimators. + */ + private Collection<LMMDistributionEstimator<?>> lmmests; + + /** + * Logspace Method of Moments estimators. + */ + private Collection<LogMOMDistributionEstimator<?>> logmomests; + + /** + * Logspace Median average deviation from median estimators. + */ + private Collection<LogMADDistributionEstimator<?>> logmadests; + + /** + * Constructor. Use static instance instead! + */ + protected BestFitEstimator() { + super(); + momests = new ArrayList<>(5); + momests.add(NormalMOMEstimator.STATIC); + momests.add(GammaMOMEstimator.STATIC); + momests.add(WaldMOMEstimator.STATIC); + momests.add(ExponentialMOMEstimator.STATIC); + momests.add(EMGOlivierNorbergEstimator.STATIC); + madests = new ArrayList<>(11); + madests.add(NormalMADEstimator.STATIC); + madests.add(GammaMADEstimator.STATIC); + madests.add(ExponentialMADEstimator.STATIC); + madests.add(ExponentialMedianEstimator.STATIC); + madests.add(LaplaceMADEstimator.STATIC); + madests.add(GumbelMADEstimator.STATIC); + madests.add(CauchyMADEstimator.STATIC); + madests.add(LogisticMADEstimator.STATIC); + madests.add(LogLogisticMADEstimator.STATIC); + madests.add(RayleighMADEstimator.STATIC); + madests.add(UniformMADEstimator.STATIC); + lmmests = new ArrayList<>(14); + lmmests.add(NormalLMMEstimator.STATIC); + lmmests.add(GammaLMMEstimator.STATIC); + lmmests.add(ExponentialLMMEstimator.STATIC); + lmmests.add(LaplaceLMMEstimator.STATIC); + lmmests.add(GumbelLMMEstimator.STATIC); + lmmests.add(LogisticLMMEstimator.STATIC); + lmmests.add(GeneralizedLogisticAlternateLMMEstimator.STATIC); + lmmests.add(LogNormalLMMEstimator.STATIC); + lmmests.add(LogNormalBilkovaLMMEstimator.STATIC); + lmmests.add(SkewGNormalLMMEstimator.STATIC); + lmmests.add(GeneralizedExtremeValueLMMEstimator.STATIC); + lmmests.add(RayleighLMMEstimator.STATIC); + lmmests.add(WeibullLMMEstimator.STATIC); + lmmests.add(UniformLMMEstimator.STATIC); + logmomests = new ArrayList<>(2); + logmomests.add(LogNormalLogMOMEstimator.STATIC); + logmomests.add(LogGammaLogMOMEstimator.STATIC); + logmadests = new ArrayList<>(3); + logmadests.add(LogNormalLogMADEstimator.STATIC); + logmadests.add(LogGammaLogMADEstimator.STATIC); + logmadests.add(WeibullLogMADEstimator.STATIC); + } + + @Override + public <A> Distribution estimate(A data, NumberArrayAdapter<?, A> adapter) { + int numlmm = 0; + for (LMMDistributionEstimator<?> est : lmmests) { + numlmm = Math.max(numlmm, est.getNumMoments()); + } + + final int len = adapter.size(data); + + // Build various statistics: + StatisticalMoments mom = new StatisticalMoments(), logmom = new StatisticalMoments(); + double[] x = new double[len], scratch = new double[len], logx = new double[len]; + + if (LOG.isDebuggingFine()) { + LOG.debugFine("Computing statistical moments and L-Moments."); + } + for (int i = 0; i < len; i++) { + final double val = adapter.getDouble(data, i); + x[i] = val; + mom.put(val); + } + if (mom.getMax() <= mom.getMin()) { + LOG.warning("Constant distribution detected. Cannot fit."); + return new UniformDistribution(mom.getMin() - .1, mom.getMax() + .1); + } + // Sort: for L-Moments, but getting the median is now also cheap. + Arrays.sort(x); + double[] lmm; + try { + lmm = (numlmm > 0) ? ProbabilityWeightedMoments.samLMR(x, ArrayLikeUtil.DOUBLEARRAYADAPTER, numlmm) : null; + } catch (ArithmeticException e) { + lmm = null; + } + final double min = x[0], median = .5 * (x[len >> 1] + x[(len + 1) >> 1]), max = x[len - 1]; + if (LOG.isDebuggingFine()) { + LOG.debugFine("Computing statistical moments in logspace."); + } + // Build logspace copy: + double shift = Math.min(0., min - (max - min) * 1e-10); + for (int i = 0; i < len; i++) { + double val = x[i] - shift; + val = val > 0. ? Math.log(val) : Double.NEGATIVE_INFINITY; + logx[i] = val; + if (!Double.isInfinite(val) && !Double.isNaN(val)) { + logmom.put(val); + } + } + double logmedian = .5 * (logx[len >> 1] + logx[(len + 1) >> 1]); + if (LOG.isDebuggingFine()) { + LOG.debugFine("Computing MADs."); + } + double mad = computeMAD(x, median, scratch, len); + double logmad = computeMAD(logx, logmedian, scratch, len); + + Distribution best = null; + double bestscore = Double.POSITIVE_INFINITY; + DistributionEstimator<?> bestest = null; + + final int numest = momests.size() + madests.size() + lmmests.size() + logmomests.size() + logmadests.size() + 2; + FiniteProgress prog = LOG.isDebuggingFine() ? new FiniteProgress("Finding best matching distribution", numest, LOG) : null; + for (MOMDistributionEstimator<?> est : momests) { + try { + Distribution d = est.estimateFromStatisticalMoments(mom); + double score = testFit(x, scratch, d); + if (LOG.isDebuggingFine()) { + LOG.debugFine(est.getClass().getSimpleName() + ": " + score + " " + d.toString()); + } + if (score < bestscore) { + best = d; + bestscore = score; + bestest = est; + } + } catch (ArithmeticException e) { + if (LOG.isDebuggingFine()) { + LOG.debugFine("Fitting distribution " + est.getClass().getSimpleName() + " failed: " + e.getMessage()); + } + } + if (prog != null) { + prog.incrementProcessed(LOG); + } + } + for (MADDistributionEstimator<?> est : madests) { + try { + Distribution d = est.estimateFromMedianMAD(median, mad); + double score = testFit(x, scratch, d); + if (LOG.isDebuggingFine()) { + LOG.debugFine(est.getClass().getSimpleName() + ": " + score + " " + d.toString()); + } + if (score < bestscore) { + best = d; + bestscore = score; + bestest = est; + } + } catch (ArithmeticException e) { + if (LOG.isDebuggingFine()) { + LOG.debugFine("Fitting distribution " + est.getClass().getSimpleName() + " failed: " + e.getMessage()); + } + } + if (prog != null) { + prog.incrementProcessed(LOG); + } + } + for (LMMDistributionEstimator<?> est : lmmests) { + if (lmm != null) { + try { + Distribution d = est.estimateFromLMoments(lmm); + double score = testFit(x, scratch, d); + if (LOG.isDebuggingFine()) { + LOG.debugFine(est.getClass().getSimpleName() + ": " + score + " " + d.toString()); + } + if (score < bestscore) { + best = d; + bestscore = score; + bestest = est; + } + } catch (ArithmeticException e) { + if (LOG.isDebuggingFine()) { + LOG.debugFine("Fitting distribution " + est.getClass().getSimpleName() + " failed: " + e.getMessage()); + } + } + } + if (prog != null) { + prog.incrementProcessed(LOG); + } + } + for (LogMOMDistributionEstimator<?> est : logmomests) { + try { + Distribution d = est.estimateFromLogStatisticalMoments(logmom, shift); + double score = testFit(x, scratch, d); + if (LOG.isDebuggingFine()) { + LOG.debugFine(est.getClass().getSimpleName() + ": " + score + " " + d.toString()); + } + if (score < bestscore) { + best = d; + bestscore = score; + bestest = est; + } + } catch (ArithmeticException e) { + if (LOG.isDebuggingFine()) { + LOG.debugFine("Fitting distribution " + est.getClass().getSimpleName() + " failed: " + e.getMessage()); + } + } + if (prog != null) { + prog.incrementProcessed(LOG); + } + } + for (LogMADDistributionEstimator<?> est : logmadests) { + try { + Distribution d = est.estimateFromLogMedianMAD(logmedian, logmad, shift); + double score = testFit(x, scratch, d); + if (LOG.isDebuggingFine()) { + LOG.debugFine(est.getClass().getSimpleName() + ": " + score + " " + d.toString()); + } + if (score < bestscore) { + best = d; + bestscore = score; + bestest = est; + } + } catch (ArithmeticException e) { + if (LOG.isDebuggingFine()) { + LOG.debugFine("Fitting distribution " + est.getClass().getSimpleName() + " failed: " + e.getMessage()); + } + } + if (prog != null) { + prog.incrementProcessed(LOG); + } + } + { // Uniform estimators. + final UniformMinMaxEstimator est = UniformMinMaxEstimator.STATIC; + try { + Distribution d = est.estimate(min, max); + double score = testFit(x, scratch, d); + if (LOG.isDebuggingFine()) { + LOG.debugFine(est.getClass().getSimpleName() + ": " + score + " " + d.toString()); + } + if (score < bestscore) { + best = d; + bestscore = score; + bestest = est; + } + } catch (ArithmeticException e) { + if (LOG.isDebuggingFine()) { + LOG.debugFine("Fitting distribution " + est.getClass().getSimpleName() + " failed: " + e.getMessage()); + } + } + if (prog != null) { + prog.incrementProcessed(LOG); + } + } + { // Uniform estimators. + final UniformEnhancedMinMaxEstimator est = UniformEnhancedMinMaxEstimator.STATIC; + try { + Distribution d = est.estimate(min, max, len); + double score = testFit(x, scratch, d); + if (LOG.isDebuggingFine()) { + LOG.debugFine(est.getClass().getSimpleName() + ": " + score + " " + d.toString()); + } + if (score < bestscore) { + best = d; + bestscore = score; + bestest = est; + } + } catch (ArithmeticException e) { + if (LOG.isDebuggingFine()) { + LOG.debugFine("Fitting distribution " + est.getClass().getSimpleName() + " failed: " + e.getMessage()); + } + } + if (prog != null) { + prog.incrementProcessed(LOG); + } + } + if (prog != null) { + prog.ensureCompleted(LOG); + } + + if (LOG.isVeryVerbose()) { + LOG.veryverbose("Best distribution fit: " + bestscore + " " + best.toString() + " via " + bestest); + } + + return best; + } + + public double computeMAD(double[] data, double median, double[] scratch, final int len) { + // Compute LogMAD: + for (int i = 0; i < len; i++) { + scratch[i] = Math.abs(data[i] - median); + } + double logmad = QuickSelect.median(scratch); + // Adjust LogMAD if 0: + if (!(logmad > 0.)) { + double xmin = Double.POSITIVE_INFINITY; + for (int i = (len >> 1); i < len; i++) { + if (scratch[i] > 0. && scratch[i] < xmin) { + xmin = scratch[i]; + } + } + if (!Double.isInfinite(xmin)) { + logmad = xmin; + } + } + return logmad; + } + + /** + * Test the quality of a fit. + * + * @param x Input data + * @param test Scratch space for testing (will be overwritten!) + * @param dist Distribution + * @return K-S-Test score + * @throws ArithmeticException + */ + private double testFit(double[] x, double[] test, Distribution dist) throws ArithmeticException { + for (int i = 0; i < test.length; i++) { + test[i] = dist.cdf(x[i]); + if (test[i] > 1.) { + test[i] = 1.; + } + if (test[i] < 0.) { + test[i] = 0.; + } + if (Double.isNaN(test[i])) { + throw new ArithmeticException("Got NaN after fitting " + dist.toString()); + } + } + // Should actually be sorted already... + Arrays.sort(test); + return KolmogorovSmirnovTest.simpleTest(test); + } + + @Override + public Class<? super Distribution> getDistributionClass() { + return Distribution.class; // No guarantees, sorry. + } + + @Override + public String toString() { + return this.getClass().getSimpleName(); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected BestFitEstimator makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/meta/TrimmedEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/meta/TrimmedEstimator.java new file mode 100644 index 00000000..5c1cf448 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/meta/TrimmedEstimator.java @@ -0,0 +1,156 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.meta; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; + +import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.DistributionEstimator; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayLikeUtil; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; + +/** + * Trimmed wrapper around other estimators. Sorts the data, trims it, then + * analyzes it using another estimator. + * + * @author Erich Schubert + * + * @param <D> Distribution type + */ +public class TrimmedEstimator<D extends Distribution> implements DistributionEstimator<D> { + /** + * Distribution estimator to use. + */ + private DistributionEstimator<D> inner; + + /** + * Amount of data to trim. + */ + private double trim; + + /** + * Constructor. + * + * @param inner Inner estimator. + * @param trim Trimming parameter. + */ + public TrimmedEstimator(DistributionEstimator<D> inner, double trim) { + super(); + this.inner = inner; + this.trim = trim; + } + + @Override + public <A> D estimate(A data, NumberArrayAdapter<?, A> adapter) { + // We first need the basic parameters: + int len = adapter.size(data); + final int cut = ((int) (len * trim)) >> 1; + // X positions of samples + double[] x = new double[len]; + for (int i = 0; i < len; i++) { + final double val = adapter.getDouble(data, i); + x[i] = val; + } + // Sort our copy. + Arrays.sort(x); + { // Trim: + // TODO: is it more efficient to just copy, or instead use a trimmed array + // adapter? + double[] trimmed = new double[len - 2 * cut]; + System.arraycopy(x, cut, trimmed, 0, trimmed.length); + x = trimmed; + len = trimmed.length; + } + return inner.estimate(x, ArrayLikeUtil.DOUBLEARRAYADAPTER); + } + + @Override + public Class<? super D> getDistributionClass() { + return inner.getDistributionClass(); + } + + @Override + public String toString() { + return this.getClass().getSimpleName() + "(" + inner.toString() + ", trim=" + trim + ")"; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + * + * @param <D> Distribution type + */ + public static class Parameterizer<D extends Distribution> extends AbstractParameterizer { + /** + * Option for the class to use on the trimmed sample. + */ + public static final OptionID INNER_ID = new OptionID("trimmedestimate.inner", "Estimator to use on the trimmed data."); + + /** + * Option for specifying the amount of data to trim. + */ + public static final OptionID TRIM_ID = new OptionID("trimmedestimate.trim", "Relative amount of data to trim on each end, must be 0 < trim < 0.5"); + + /** + * Distribution estimator to use. + */ + private DistributionEstimator<D> inner; + + /** + * Amount of data to trim. + */ + private double trim; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + ObjectParameter<DistributionEstimator<D>> innerP = new ObjectParameter<>(INNER_ID, DistributionEstimator.class); + if (config.grab(innerP)) { + inner = innerP.instantiateClass(config); + } + + DoubleParameter trimP = new DoubleParameter(TRIM_ID); + trimP.addConstraint(new GreaterConstraint(0.)); + trimP.addConstraint(new LessConstraint(0.5)); + if (config.grab(trimP)) { + trim = trimP.doubleValue(); + } + } + + @Override + protected TrimmedEstimator<D> makeInstance() { + return new TrimmedEstimator<>(inner, trim); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/meta/WinsorisingEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/meta/WinsorisingEstimator.java new file mode 100644 index 00000000..0ef6318d --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/meta/WinsorisingEstimator.java @@ -0,0 +1,166 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.meta; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.DistributionEstimator; +import de.lmu.ifi.dbs.elki.utilities.datastructures.QuickSelect; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayLikeUtil; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; + +/** + * Winsorising or Georgization estimator. Similar to trimming, this is expected + * to be more robust to outliers. However, instead of removing the extreme + * values, they are instead replaced with the cutoff value. This keeps the + * quantity of the data the same, and will have a lower impact on variance and + * similar measures. + * + * Reference: + * <p> + * C. Hastings, F. Mosteller, J. W. Tukey, C. P. Winsor<br /> + * Low moments for small samples: a comparative study of order statistics.<br /> + * The Annals of Mathematical Statistics, 18(3) * + * </p> + * + * @author Erich Schubert + * + * @param <D> Distribution type + */ +@Reference(authors = "C. Hastings, F. Mosteller, J. W. Tukey, C. P. Winsor", title = "Low moments for small samples: a comparative study of order statistics", booktitle = "The Annals of Mathematical Statistics, 18(3)", url = "http://dx.doi.org/10.1214/aoms/1177730388") +public class WinsorisingEstimator<D extends Distribution> implements DistributionEstimator<D> { + /** + * Distribution estimator to use. + */ + private DistributionEstimator<D> inner; + + /** + * Amount of data to winsorize. + */ + private double winsorize; + + /** + * Constructor. + * + * @param inner Inner estimator. + * @param winsorize Winsorize parameter. + */ + public WinsorisingEstimator(DistributionEstimator<D> inner, double winsorize) { + super(); + this.inner = inner; + this.winsorize = winsorize; + } + + @Override + public <A> D estimate(A data, NumberArrayAdapter<?, A> adapter) { + // We first need the basic parameters: + int len = adapter.size(data); + final int cut = ((int) (len * winsorize)) >> 1; + // X positions of samples + double[] x = new double[len]; + for (int i = 0; i < len; i++) { + final double val = adapter.getDouble(data, i); + x[i] = val; + } + // Partially sort our copy. + double min = QuickSelect.quickSelect(x, 0, len, cut); + double max = QuickSelect.quickSelect(x, cut, len, len - 1 - cut); + // Winsorize by replacing the smallest and largest values. + // QuickSelect ensured that these are correctly in place. + for (int i = 0, j = len - 1; i < cut; i++, j--) { + x[i] = min; + x[j] = max; + } + return inner.estimate(x, ArrayLikeUtil.DOUBLEARRAYADAPTER); + } + + @Override + public Class<? super D> getDistributionClass() { + return inner.getDistributionClass(); + } + + @Override + public String toString() { + return this.getClass().getSimpleName() + "(" + inner.toString() + ", trim=" + winsorize + ")"; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + * + * @param <D> Distribution type + */ + public static class Parameterizer<D extends Distribution> extends AbstractParameterizer { + /** + * Option for the class to use on the winsorized sample. + */ + public static final OptionID INNER_ID = new OptionID("winsorize.inner", "Estimator to use on the winsorized data."); + + /** + * Option for specifying the amount of data to winsorize. + */ + public static final OptionID WINSORIZE_ID = new OptionID("winsorize.winsorize", "Relative amount of data to winsorize on each end, must be 0 < winsorize < 0.5"); + + /** + * Distribution estimator to use. + */ + private DistributionEstimator<D> inner; + + /** + * Amount of data to winsorize. + */ + private double winsorize; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + ObjectParameter<DistributionEstimator<D>> innerP = new ObjectParameter<>(INNER_ID, DistributionEstimator.class); + if (config.grab(innerP)) { + inner = innerP.instantiateClass(config); + } + + DoubleParameter trimP = new DoubleParameter(WINSORIZE_ID); + trimP.addConstraint(new GreaterConstraint(0.)); + trimP.addConstraint(new LessConstraint(0.5)); + if (config.grab(trimP)) { + winsorize = trimP.doubleValue(); + } + } + + @Override + protected WinsorisingEstimator<D> makeInstance() { + return new WinsorisingEstimator<>(inner, winsorize); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/meta/package-info.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/meta/package-info.java new file mode 100644 index 00000000..c4b75f2d --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/meta/package-info.java @@ -0,0 +1,6 @@ +/** + * Meta estimators: estimators that do not actually estimate themselves, but instead use other estimators, e.g. on a trimmed data set, or as an ensemble. + * + * @author Erich Schubert + */ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.meta;
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/DistributionWithRandom.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/package-info.java index 02f5002f..62c98262 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/DistributionWithRandom.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/estimator/package-info.java @@ -1,9 +1,15 @@ -package de.lmu.ifi.dbs.elki.math.statistics.distribution; +/** + * Estimators for statistical distributions. + * + * @author Erich Schubert + */ +package de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator; + /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -21,17 +27,3 @@ package de.lmu.ifi.dbs.elki.math.statistics.distribution; You should have received a copy of the GNU Affero General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ - -/** - * Distribution that also has support for generating random numbers. - * - * @author Erich Schubert - */ -public interface DistributionWithRandom extends Distribution { - /** - * Generate a new random value - * - * @return new random value - */ - double nextRandom(); -}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/package-info.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/package-info.java index ed9c0e88..49357049 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/package-info.java @@ -5,7 +5,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/kernelfunctions/BiweightKernelDensityFunction.java b/src/de/lmu/ifi/dbs/elki/math/statistics/kernelfunctions/BiweightKernelDensityFunction.java new file mode 100644 index 00000000..4b6ec7b7 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/kernelfunctions/BiweightKernelDensityFunction.java @@ -0,0 +1,102 @@ +package de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.utilities.Alias; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Biweight (Quartic) kernel density estimator. + * + * @author Erich Schubert + */ +@Alias({ "biweight", "quartic" }) +public final class BiweightKernelDensityFunction implements KernelDensityFunction { + /** + * Static instance. + */ + public static final BiweightKernelDensityFunction KERNEL = new BiweightKernelDensityFunction(); + + /** + * Canonical bandwidth: 35^(1/5) + */ + @Reference(authors = "J.S. Marron, D. Nolan", title = "Canonical kernels for density estimation", booktitle = "Statistics & Probability Letters, Volume 7, Issue 3", url = "http://dx.doi.org/10.1016/0167-7152(88)90050-8") + public static final double CANONICAL_BANDWIDTH = Math.pow(35., .2); + + /** + * Standard deviation. + */ + public static final double STDDEV = 1. / Math.sqrt(7.); + + /** + * R constant. + */ + public static final double R = 5. / 7.; + + /** + * Private, empty constructor. Use the static instance! + */ + private BiweightKernelDensityFunction() { + // Nothing to do. + } + + @Override + public double density(double delta) { + if (delta >= 1.) { + return 0; + } + final double u = 1 - delta * delta; + return 0.9375 * u * u; + } + + @Override + public double canonicalBandwidth() { + return CANONICAL_BANDWIDTH; + } + + @Override + public double standardDeviation() { + return STDDEV; + } + + @Override + public double getR() { + return R; + } + + /** + * Parameterization stub. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected BiweightKernelDensityFunction makeInstance() { + return KERNEL; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/kernelfunctions/CosineKernelDensityFunction.java b/src/de/lmu/ifi/dbs/elki/math/statistics/kernelfunctions/CosineKernelDensityFunction.java new file mode 100644 index 00000000..230cb404 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/kernelfunctions/CosineKernelDensityFunction.java @@ -0,0 +1,99 @@ +package de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.math.MathUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Cosine kernel density estimator. + * + * @author Erich Schubert + */ +public final class CosineKernelDensityFunction implements KernelDensityFunction { + /** + * Static instance. + */ + public static final CosineKernelDensityFunction KERNEL = new CosineKernelDensityFunction(); + + /** + * Canonical bandwidth. + * + * Computed as (R / STDDEV^4)^(1/5) + * + * This is approximately: 1.7662654022050532 + */ + public static final double CANONICAL_BANDWIDTH = Math.pow(MathUtil.PISQUARE / (16. * (1. - 8. / MathUtil.PISQUARE) * (1. - 8. / MathUtil.PISQUARE)), .2); + + /** + * Standard deviation. + */ + private static final double STDDEV = Math.sqrt(1. - 8. / MathUtil.PISQUARE); + + /** + * R constant. + */ + private static final double R = Math.PI * Math.PI / 16.; + + /** + * Private, empty constructor. Use the static instance! + */ + private CosineKernelDensityFunction() { + // Nothing to do. + } + + @Override + public double density(double delta) { + return (delta < 1.) ? MathUtil.QUARTERPI * Math.cos(MathUtil.HALFPI * delta) : 0.; + } + + @Override + public double canonicalBandwidth() { + return CANONICAL_BANDWIDTH; + } + + @Override + public double standardDeviation() { + return STDDEV; + } + + @Override + public double getR() { + return R; + } + + /** + * Parameterization stub. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected CosineKernelDensityFunction makeInstance() { + return KERNEL; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/EpanechnikovKernelDensityFunction.java b/src/de/lmu/ifi/dbs/elki/math/statistics/kernelfunctions/EpanechnikovKernelDensityFunction.java index 25aa2ea7..bf91e227 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/EpanechnikovKernelDensityFunction.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/kernelfunctions/EpanechnikovKernelDensityFunction.java @@ -1,10 +1,10 @@ -package de.lmu.ifi.dbs.elki.math.statistics; +package de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,6 +23,8 @@ package de.lmu.ifi.dbs.elki.math.statistics; along with this program. If not, see <http://www.gnu.org/licenses/>. */ +import de.lmu.ifi.dbs.elki.utilities.Alias; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; /** @@ -30,14 +32,28 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; * * @author Erich Schubert */ +@Alias({ "epanechnikov" }) public final class EpanechnikovKernelDensityFunction implements KernelDensityFunction { - @Override - public double density(double delta) { - if(delta < 1) { - return 0.75 * (1 - delta * delta); - } - return 0; - } + /** + * Static instance. + */ + public static final EpanechnikovKernelDensityFunction KERNEL = new EpanechnikovKernelDensityFunction(); + + /** + * Canonical bandwidth: 15^(1/5) + */ + @Reference(authors = "J.S. Marron, D. Nolan", title = "Canonical kernels for density estimation", booktitle = "Statistics & Probability Letters, Volume 7, Issue 3", url = "http://dx.doi.org/10.1016/0167-7152(88)90050-8") + public static final double CANONICAL_BANDWIDTH = Math.pow(15., .2); + + /** + * Standard deviation. + */ + public static final double STDDEV = 1. / Math.sqrt(5.); + + /** + * R constant. + */ + public static final double R = 3. / 5.; /** * Private, empty constructor. Use the static instance! @@ -46,16 +62,31 @@ public final class EpanechnikovKernelDensityFunction implements KernelDensityFun // Nothing to do. } - /** - * Static instance. - */ - public static final EpanechnikovKernelDensityFunction KERNEL = new EpanechnikovKernelDensityFunction(); + @Override + public double density(double delta) { + return (delta < 1.) ? .75 * (1 - delta * delta) : 0.; + } + + @Override + public double canonicalBandwidth() { + return CANONICAL_BANDWIDTH; + } + + @Override + public double standardDeviation() { + return STDDEV; + } + + @Override + public double getR() { + return R; + } /** * Parameterization stub. * * @author Erich Schubert - * + * * @apiviz.exclude */ public static class Parameterizer extends AbstractParameterizer { diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/GaussianKernelDensityFunction.java b/src/de/lmu/ifi/dbs/elki/math/statistics/kernelfunctions/GaussianKernelDensityFunction.java index 2cd15408..2e666871 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/GaussianKernelDensityFunction.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/kernelfunctions/GaussianKernelDensityFunction.java @@ -1,10 +1,10 @@ -package de.lmu.ifi.dbs.elki.math.statistics; +package de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,6 +23,8 @@ package de.lmu.ifi.dbs.elki.math.statistics; along with this program. If not, see <http://www.gnu.org/licenses/>. */ +import de.lmu.ifi.dbs.elki.math.MathUtil; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; /** @@ -32,14 +34,20 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; */ public final class GaussianKernelDensityFunction implements KernelDensityFunction { /** - * Scaling constant for Gaussian kernel, to make it sum up to 1. + * Static instance. */ - private static final double GSCALE = 1.0 / Math.sqrt(2.0 * Math.PI); + public static final GaussianKernelDensityFunction KERNEL = new GaussianKernelDensityFunction(); - @Override - public double density(double delta) { - return GSCALE * Math.exp(-.5 * delta * delta); - } + /** + * Canonical bandwidth: (1./(4*pi))^(1/10) + */ + @Reference(authors = "J.S. Marron, D. Nolan", title = "Canonical kernels for density estimation", booktitle = "Statistics & Probability Letters, Volume 7, Issue 3", url = "http://dx.doi.org/10.1016/0167-7152(88)90050-8") + public static final double CANONICAL_BANDWIDTH = Math.pow(.25 / Math.PI, .1); + + /** + * R constant. + */ + public static final double R = .5 * MathUtil.ONE_BY_SQRTPI; /** * Private, empty constructor. Use the static instance! @@ -48,16 +56,31 @@ public final class GaussianKernelDensityFunction implements KernelDensityFunctio // Nothing to do. } - /** - * Static instance. - */ - public static final GaussianKernelDensityFunction KERNEL = new GaussianKernelDensityFunction(); + @Override + public double density(double delta) { + return MathUtil.ONE_BY_SQRTTWOPI * Math.exp(-.5 * delta * delta); + } + + @Override + public double canonicalBandwidth() { + return CANONICAL_BANDWIDTH; + } + + @Override + public double standardDeviation() { + return 1.; + } + + @Override + public double getR() { + return R; + } /** * Parameterization stub. * * @author Erich Schubert - * + * * @apiviz.exclude */ public static class Parameterizer extends AbstractParameterizer { diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/kernelfunctions/KernelDensityFunction.java b/src/de/lmu/ifi/dbs/elki/math/statistics/kernelfunctions/KernelDensityFunction.java new file mode 100644 index 00000000..ce6d5a0d --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/kernelfunctions/KernelDensityFunction.java @@ -0,0 +1,76 @@ +package de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; + +/** + * Inner function of a kernel density estimator. + * + * Note: as of now, this API does not support asymmetric kernels. + * + * @author Erich Schubert + */ +public interface KernelDensityFunction { + /** + * Density contribution of a point at the given relative distance + * {@code delta >= 0}. + * + * Note that for {@code delta < 0}, in particular for {@code delta < 1}, the + * results may become invalid. So usually, you will want to invoke this as: + * + * {@code kernel.density(Math.abs(delta))} + * + * @param delta Relative distance + * @return density contribution + */ + public double density(double delta); + + /** + * Get the canonical bandwidth for this kernel. + * + * Note: R uses a different definition of "canonical bandwidth", and also uses + * differently scaled kernels. + * + * @return Canonical bandwidth + */ + @Reference(authors = "J.S. Marron, D. Nolan", title = "Canonical kernels for density estimation", booktitle = "Statistics & Probability Letters, Volume 7, Issue 3", url = "http://dx.doi.org/10.1016/0167-7152(88)90050-8") + public double canonicalBandwidth(); + + /** + * Get the standard deviation of the kernel function. + * + * @return Standard deviation + */ + public double standardDeviation(); + + /** + * Get the R integral of the kernel, \int K^2(x) dx + * + * TODO: any better name for this? + * + * @return R value + */ + public double getR(); +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/TriangularKernelDensityFunction.java b/src/de/lmu/ifi/dbs/elki/math/statistics/kernelfunctions/TriangularKernelDensityFunction.java index aee544de..c6acf031 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/TriangularKernelDensityFunction.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/kernelfunctions/TriangularKernelDensityFunction.java @@ -1,10 +1,10 @@ -package de.lmu.ifi.dbs.elki.math.statistics; +package de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -31,13 +31,27 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; * @author Erich Schubert */ public final class TriangularKernelDensityFunction implements KernelDensityFunction { - @Override - public double density(double delta) { - if(delta < 1) { - return 1 - delta; - } - return 0; - } + /** + * Static instance. + */ + public static final TriangularKernelDensityFunction KERNEL = new TriangularKernelDensityFunction(); + + /** + * Canonical bandwidth. + * + * Computed as (R / STDDEV^4)^(1/5) + */ + public static final double CANONICAL_BANDWIDTH = Math.pow(24., .2); + + /** + * Standard deviation. + */ + private static final double STDDEV = 1. / Math.sqrt(6.); + + /** + * R constant. + */ + private static final double R = 2. / 3.; /** * Private, empty constructor. Use the static instance! @@ -46,16 +60,31 @@ public final class TriangularKernelDensityFunction implements KernelDensityFunct // Nothing to do. } - /** - * Static instance. - */ - public static final TriangularKernelDensityFunction KERNEL = new TriangularKernelDensityFunction(); + @Override + public double density(double delta) { + return (delta < 1.) ? 1. - delta : 0.; + } + + @Override + public double canonicalBandwidth() { + return CANONICAL_BANDWIDTH; + } + + @Override + public double standardDeviation() { + return STDDEV; + } + + @Override + public double getR() { + return R; + } /** * Parameterization stub. * * @author Erich Schubert - * + * * @apiviz.exclude */ public static class Parameterizer extends AbstractParameterizer { diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/kernelfunctions/TricubeKernelDensityFunction.java b/src/de/lmu/ifi/dbs/elki/math/statistics/kernelfunctions/TricubeKernelDensityFunction.java new file mode 100644 index 00000000..933207ed --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/kernelfunctions/TricubeKernelDensityFunction.java @@ -0,0 +1,105 @@ +package de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Tricube kernel density estimator. + * + * @author Erich Schubert + */ +public final class TricubeKernelDensityFunction implements KernelDensityFunction { + /** + * Static instance. + */ + public static final TricubeKernelDensityFunction KERNEL = new TricubeKernelDensityFunction(); + + /** + * Canonical bandwidth. + * + * Computed as (R / STDDEV^4)^(1/5) + */ + public static final double CANONICAL_BANDWIDTH = Math.pow(59049 / 1729, .2); + + /** + * Standard deviation. + */ + private static final double STDDEV = Math.sqrt(35. / 243.); + + /** + * R constant. + */ + private static final double R = 175. / 247.; + + /** + * Scaling factor. + */ + private final double SCALE = 70. / 81.; + + /** + * Private, empty constructor. Use the static instance! + */ + private TricubeKernelDensityFunction() { + // Nothing to do. + } + + @Override + public double density(double delta) { + if (delta >= 1.) { + return 0; + } + final double u = 1 - delta * delta * delta; + return SCALE * u * u * u; + } + + @Override + public double canonicalBandwidth() { + return CANONICAL_BANDWIDTH; + } + + @Override + public double standardDeviation() { + return STDDEV; + } + + @Override + public double getR() { + return R; + } + + /** + * Parameterization stub. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected TricubeKernelDensityFunction makeInstance() { + return KERNEL; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/kernelfunctions/TriweightKernelDensityFunction.java b/src/de/lmu/ifi/dbs/elki/math/statistics/kernelfunctions/TriweightKernelDensityFunction.java new file mode 100644 index 00000000..993ab8bf --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/kernelfunctions/TriweightKernelDensityFunction.java @@ -0,0 +1,102 @@ +package de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.utilities.Alias; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Triweight kernel density estimator. + * + * @author Erich Schubert + */ +@Alias({ "triweight" }) +public final class TriweightKernelDensityFunction implements KernelDensityFunction { + /** + * Static instance. + */ + public static final TriweightKernelDensityFunction KERNEL = new TriweightKernelDensityFunction(); + + /** + * Canonical bandwidth: (9450/143)^(1/5) + */ + @Reference(authors = "J.S. Marron, D. Nolan", title = "Canonical kernels for density estimation", booktitle = "Statistics & Probability Letters, Volume 7, Issue 3", url = "http://dx.doi.org/10.1016/0167-7152(88)90050-8") + public static final double CANONICAL_BANDWIDTH = Math.pow(9450. / 143., .2); + + /** + * Standard deviation. + */ + private static final double STDDEV = 1. / 3.; + + /** + * R constant. + */ + private static final double R = 350. / 429.; + + /** + * Private, empty constructor. Use the static instance! + */ + private TriweightKernelDensityFunction() { + // Nothing to do. + } + + @Override + public double density(double delta) { + if (delta >= 1.) { + return 0; + } + final double u = 1 - delta * delta; + return 1.09375 * u * u * u; + } + + @Override + public double canonicalBandwidth() { + return CANONICAL_BANDWIDTH; + } + + @Override + public double standardDeviation() { + return STDDEV; + } + + @Override + public double getR() { + return R; + } + + /** + * Parameterization stub. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected TriweightKernelDensityFunction makeInstance() { + return KERNEL; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/UniformKernelDensityFunction.java b/src/de/lmu/ifi/dbs/elki/math/statistics/kernelfunctions/UniformKernelDensityFunction.java index 66fe7888..2a820355 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/UniformKernelDensityFunction.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/kernelfunctions/UniformKernelDensityFunction.java @@ -1,10 +1,10 @@ -package de.lmu.ifi.dbs.elki.math.statistics; +package de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,6 +23,7 @@ package de.lmu.ifi.dbs.elki.math.statistics; along with this program. If not, see <http://www.gnu.org/licenses/>. */ +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; /** @@ -31,13 +32,26 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; * @author Erich Schubert */ public final class UniformKernelDensityFunction implements KernelDensityFunction { - @Override - public double density(double delta) { - if(delta < 1) { - return 0.5; - } - return 0; - } + /** + * Static instance. + */ + public static final UniformKernelDensityFunction KERNEL = new UniformKernelDensityFunction(); + + /** + * Canonical bandwidth: (9/2)^(1/5) + */ + @Reference(authors = "J.S. Marron, D. Nolan", title = "Canonical kernels for density estimation", booktitle = "Statistics & Probability Letters, Volume 7, Issue 3", url = "http://dx.doi.org/10.1016/0167-7152(88)90050-8") + public static final double CANONICAL_BANDWIDTH = Math.pow(4.5, .2); + + /** + * Standard deviation. + */ + private static final double STDDEV = 1. / Math.sqrt(3.); + + /** + * R constant. + */ + private static final double R = .5; /** * Private, empty constructor. Use the static instance! @@ -46,16 +60,31 @@ public final class UniformKernelDensityFunction implements KernelDensityFunction // Nothing to do. } - /** - * Static instance. - */ - public static final UniformKernelDensityFunction KERNEL = new UniformKernelDensityFunction(); + @Override + public double density(double delta) { + return (delta < 1.) ? .5 : 0.; + } + + @Override + public double canonicalBandwidth() { + return CANONICAL_BANDWIDTH; + } + + @Override + public double standardDeviation() { + return STDDEV; + } + + @Override + public double getR() { + return R; + } /** * Parameterization stub. * * @author Erich Schubert - * + * * @apiviz.exclude */ public static class Parameterizer extends AbstractParameterizer { diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/kernelfunctions/package-info.java b/src/de/lmu/ifi/dbs/elki/math/statistics/kernelfunctions/package-info.java new file mode 100644 index 00000000..adcadcaf --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/kernelfunctions/package-info.java @@ -0,0 +1,4 @@ +/** + * Kernel functions from statistics. + */ +package de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions;
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/package-info.java b/src/de/lmu/ifi/dbs/elki/math/statistics/package-info.java index 142524e6..cc39a615 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/package-info.java @@ -5,7 +5,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/tests/KolmogorovSmirnovTest.java b/src/de/lmu/ifi/dbs/elki/math/statistics/tests/KolmogorovSmirnovTest.java index 236f26b8..a134ab09 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/tests/KolmogorovSmirnovTest.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/tests/KolmogorovSmirnovTest.java @@ -64,7 +64,7 @@ public class KolmogorovSmirnovTest implements GoodnessOfFitTest { *
* @param sample1 first data sample positions
* @param sample2 second data sample positions
- * @return the largest distance between both functions
+ * @return the largest difference between both functions
*/
public static double calculateTestStatistic(double[] sample1, double[] sample2) {
double maximum = 0.0;
@@ -74,26 +74,26 @@ public class KolmogorovSmirnovTest implements GoodnessOfFitTest { // Parallel iteration over both curves. We can stop if we reach either end,
// As the difference can then only decrease!
- while(index1 < sample1.length && index2 < sample2.length) {
+ while (index1 < sample1.length && index2 < sample2.length) {
// Next (!) positions
final double x1 = sample1[index1], x2 = sample2[index2];
// Advance on first curve
- if(x1 <= x2) {
+ if (x1 <= x2) {
index1++;
// Handle multiple points with same x:
- while(index1 < sample1.length && sample1[index1] == x1) {
+ while (index1 < sample1.length && sample1[index1] == x1) {
index1++;
}
- cdf1 = ((double) index1) / sample1.length;
+ cdf1 = ((double) index1 + 1.) / (sample1.length + 1.);
}
// Advance on second curve
- if(x1 >= x2) {
+ if (x1 >= x2) {
index2++;
// Handle multiple points with same x:
- while(index2 < sample2.length && sample2[index2] == x2) {
+ while (index2 < sample2.length && sample2[index2] == x2) {
index2++;
}
- cdf2 = ((double) index2) / sample2.length;
+ cdf2 = ((double) index2 + 1.) / (sample2.length + 1.);
}
maximum = Math.max(maximum, Math.abs(cdf1 - cdf2));
}
@@ -114,4 +114,49 @@ public class KolmogorovSmirnovTest implements GoodnessOfFitTest { return STATIC;
}
}
-}
\ No newline at end of file +
+ /**
+ * Simplest version of the test: test if a sorted array is approximately
+ * uniform distributed on [0:1].
+ *
+ * @param test Presorted (!) array
+ * @return Maximum deviation from uniform.
+ */
+ public static double simpleTest(double[] test) {
+ // Weibull style empirical quantiles: (i+1) / (n+1)
+ double scale = 1. / (test.length + 1.);
+ double maxdev = Double.NEGATIVE_INFINITY;
+ for (int i = 0; i < test.length; i++) {
+ // Expected value at position i (Weibull style):
+ double expected = (i + 1.) * scale;
+ double dev = Math.abs(test[i] - expected);
+ if (dev > maxdev) {
+ maxdev = dev;
+ }
+ }
+ return Math.abs(maxdev);
+ }
+
+ /**
+ * Simplest version of the test: test if a sorted array is approximately
+ * uniform distributed on the given interval.
+ *
+ * @param test Presorted (!) array
+ * @param min Minimum of uniform distribution
+ * @param max Maximum of uniform distribution
+ * @return Maximum deviation from uniform.
+ */
+ public static double simpleTest(double[] test, final double min, final double max) {
+ double scale = (max - min) / (test.length + 1.);
+ double maxdev = Double.NEGATIVE_INFINITY;
+ for (int i = 0; i < test.length; i++) {
+ // Expected value at position i (Weibull style):
+ double expected = (i + 1.) * scale + min;
+ double dev = Math.abs(test[i] - expected);
+ if (dev > maxdev) {
+ maxdev = dev;
+ }
+ }
+ return Math.abs(maxdev);
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/tests/package-info.java b/src/de/lmu/ifi/dbs/elki/math/statistics/tests/package-info.java index 3e39a519..6698f462 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/tests/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/tests/package-info.java @@ -5,7 +5,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team |