diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/math')
87 files changed, 5576 insertions, 3020 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/math/DoubleMinMax.java b/src/de/lmu/ifi/dbs/elki/math/DoubleMinMax.java index cedfb389..56814649 100644 --- a/src/de/lmu/ifi/dbs/elki/math/DoubleMinMax.java +++ b/src/de/lmu/ifi/dbs/elki/math/DoubleMinMax.java @@ -102,7 +102,7 @@ public class DoubleMinMax extends DoubleDoublePair { */ public void put(Collection<Double> data) { for(Double value : data) { - this.put(value); + this.put(value.doubleValue()); } } diff --git a/src/de/lmu/ifi/dbs/elki/math/GeoUtil.java b/src/de/lmu/ifi/dbs/elki/math/GeoUtil.java new file mode 100644 index 00000000..226d8112 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/GeoUtil.java @@ -0,0 +1,670 @@ +package de.lmu.ifi.dbs.elki.math; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; + +/** + * Class with utility functions for geographic computations. + * + * The majority of formulas are adapted from: + * <p> + * Ed Williams<br /> + * Aviation Formulary<br /> + * Online: http://williams.best.vwh.net/avform.htm + * </p> + * + * TODO: add ellipsoid version of Vinentry formula. + * + * @author Erich Schubert + * @author Niels Dörre + */ +@Reference(authors = "Ed Williams", title = "Aviation Formulary", booktitle = "", url = "http://williams.best.vwh.net/avform.htm") +public final class GeoUtil { + /** + * Earth radius approximation in km. + */ + public static final double EARTH_RADIUS = 6371.009; // km. + + /** + * Radius of the WGS84 Ellipsoid in km. + */ + public static final double WGS84_RADIUS = 6378.137; // km + + /** + * Flattening of the WGS84 Ellipsoid. + */ + public static final double WGS84_FLATTENING = 0.00335281066474748; + + /** + * Eccentricity squared of the WGS84 Ellipsoid + */ + public static final double WGS84_ECCENTRICITY_SQUARED = 2 * WGS84_FLATTENING - (WGS84_FLATTENING * WGS84_FLATTENING); + + /** + * Dummy constructor. Do not instantiate. + */ + private GeoUtil() { + // Use static methods. Do not intantiate + } + + /** + * Compute the approximate on-earth-surface distance of two points using the + * Haversine formula + * + * Complexity: 5 trigonometric functions, 2 sqrt. + * + * Reference: + * <p> + * R. W. Sinnott,<br/> + * Virtues of the Haversine<br /> + * Sky and telescope, 68-2, 1984 + * </p> + * + * @param lat1 Latitude of first point in degree + * @param lon1 Longitude of first point in degree + * @param lat2 Latitude of second point in degree + * @param lon2 Longitude of second point in degree + * @return Distance in km (approximately) + */ + @Reference(authors = "Sinnott, R. W.", title = "Virtues of the Haversine", booktitle = "Sky and telescope, 68-2, 1984") + public static double haversineFormulaDeg(double lat1, double lon1, double lat2, double lon2) { + // Convert to radians: + lat1 = MathUtil.deg2rad(lat1); + lat2 = MathUtil.deg2rad(lat2); + lon1 = MathUtil.deg2rad(lon1); + lon2 = MathUtil.deg2rad(lon2); + return haversineFormulaRad(lat1, lon1, lat2, lon2); + } + + /** + * Compute the approximate on-earth-surface distance of two points using the + * Haversine formula + * + * Complexity: 5 trigonometric functions, 2 sqrt. + * + * Reference: + * <p> + * R. W. Sinnott,<br/> + * Virtues of the Haversine<br /> + * Sky and telescope, 68-2, 1984 + * </p> + * + * @param lat1 Latitude of first point in degree + * @param lon1 Longitude of first point in degree + * @param lat2 Latitude of second point in degree + * @param lon2 Longitude of second point in degree + * @return Distance in km (approximately) + */ + @Reference(authors = "Sinnott, R. W.", title = "Virtues of the Haversine", booktitle = "Sky and telescope, 68-2, 1984") + public static double haversineFormulaRad(double lat1, double lon1, double lat2, double lon2) { + // Haversine formula, higher precision at < 1 meters but maybe issues at + // antipodal points. + final double slat = Math.sin((lat1 - lat2) * .5); + final double slon = Math.sin((lon1 - lon2) * .5); + final double a = slat * slat + slon * slon * Math.cos(lat1) * Math.cos(lat2); + final double c = 2 * Math.atan2(Math.sqrt(a), Math.sqrt(1 - a)); + return EARTH_RADIUS * c; + } + + /** + * Compute the approximate on-earth-surface distance of two points. + * + * Uses Vincenty's Formula for the spherical case, which does not require + * iterations. + * + * Complexity: 7 trigonometric functions, 1 sqrt. + * + * Reference: + * <p> + * T. Vincenty<br /> + * Direct and inverse solutions of geodesics on the ellipsoid with application + * of nested equations<br /> + * Survey review 23 176, 1975 + * </p> + * + * @param lat1 Latitude of first point in degree + * @param lon1 Longitude of first point in degree + * @param lat2 Latitude of second point in degree + * @param lon2 Longitude of second point in degree + * @return Distance in km (approximately) + */ + @Reference(authors = "T. Vincenty", title = "Direct and inverse solutions of geodesics on the ellipsoid with application of nested equations", booktitle = "Survey review 23 176, 1975", url = "http://www.ngs.noaa.gov/PUBS_LIB/inverse.pdf") + public static double sphericalVincentyFormulaDeg(double lat1, double lon1, double lat2, double lon2) { + // Work in radians + lat1 = MathUtil.deg2rad(lat1); + lat2 = MathUtil.deg2rad(lat2); + lon1 = MathUtil.deg2rad(lon1); + lon2 = MathUtil.deg2rad(lon2); + return sphericalVincentyFormulaRad(lat1, lon1, lat2, lon2); + } + + /** + * Compute the approximate on-earth-surface distance of two points. + * + * Uses Vincenty's Formula for the spherical case, which does not require + * iterations. + * + * Complexity: 7 trigonometric functions, 1 sqrt. + * + * Reference: + * <p> + * T. Vincenty<br /> + * Direct and inverse solutions of geodesics on the ellipsoid with application + * of nested equations<br /> + * Survey review 23 176, 1975 + * </p> + * + * @param lat1 Latitude of first point in degree + * @param lon1 Longitude of first point in degree + * @param lat2 Latitude of second point in degree + * @param lon2 Longitude of second point in degree + * @return Distance in km (approximately) + */ + @Reference(authors = "T. Vincenty", title = "Direct and inverse solutions of geodesics on the ellipsoid with application of nested equations", booktitle = "Survey review 23 176, 1975", url = "http://www.ngs.noaa.gov/PUBS_LIB/inverse.pdf") + public static double sphericalVincentyFormulaRad(double lat1, double lon1, double lat2, double lon2) { + // Delta + final double dlon = lon1 - lon2; + + // Spherical special case of Vincenty's formula - no iterations needed + final double slat1 = Math.sin(lat1); + final double slat2 = Math.sin(lat2); + final double slond = Math.sin(dlon * .5); + final double clat1 = Math.cos(lat1); + final double clat2 = Math.cos(lat2); + final double clond = Math.cos(dlon * .5); + final double a = clat2 * slond; + final double b = (clat1 * slat2) - (slat1 * clat2 * clond); + final double d = Math.atan2(Math.sqrt(a * a + b * b), slat1 * slat2 + clat1 * clat2 * clond); + return EARTH_RADIUS * d; + } + + /** + * Compute the cross-track distance. + * + * @param lat1 Latitude of starting point. + * @param lon1 Longitude of starting point. + * @param lat2 Latitude of destination point. + * @param lon2 Longitude of destination point. + * @param latQ Latitude of query point. + * @param lonQ Longitude of query point. + * @param dist1Q Distance from starting point to query point in km. + * @return Cross-track distance in km. May be negative - this gives the side. + */ + public static double crossTrackDistanceDeg(double lat1, double lon1, double lat2, double lon2, double latQ, double lonQ, double dist1Q) { + // Convert to radians. + lat1 = MathUtil.deg2rad(lat1); + latQ = MathUtil.deg2rad(latQ); + lat2 = MathUtil.deg2rad(lat2); + lon1 = MathUtil.deg2rad(lon1); + lonQ = MathUtil.deg2rad(lonQ); + lon2 = MathUtil.deg2rad(lon2); + return crossTrackDistanceRad(lat1, lon1, lat2, lon2, latQ, lonQ, dist1Q); + } + + /** + * Compute the cross-track distance. + * + * @param lat1 Latitude of starting point. + * @param lon1 Longitude of starting point. + * @param lat2 Latitude of destination point. + * @param lon2 Longitude of destination point. + * @param latQ Latitude of query point. + * @param lonQ Longitude of query point. + * @param dist1Q Distance from starting point to query point in km. + * @return Cross-track distance in km. May be negative - this gives the side. + */ + public static double crossTrackDistanceRad(double lat1, double lon1, double lat2, double lon2, double latQ, double lonQ, double dist1Q) { + final double dlon12 = lon2 - lon1; + final double dlon1Q = lonQ - lon1; + + // Compute trigonometric functions only once. + final double slat1 = Math.sin(lat1); + final double slatQ = Math.sin(latQ); + final double slat2 = Math.sin(lat2); + final double clat1 = Math.cos(lat1); + final double clatQ = Math.cos(latQ); + final double clat2 = Math.cos(lat2); + + // Compute the course + final double crs12, crs1Q; + { + // y = sin(dlon) * cos(lat2) + double yE = Math.sin(dlon12) * clat2; + double yQ = Math.sin(dlon1Q) * clatQ; + + // x = cos(lat1) * sin(lat2) - sin(lat1) * cos(lat2) * cos(dlon) + double xE = clat1 * slat2 - slat1 * clat2 * Math.cos(dlon12); + double xQ = clat1 * slatQ - slat1 * clatQ * Math.cos(dlon1Q); + + crs12 = Math.atan2(yE, xE); + crs1Q = Math.atan2(yQ, xQ); + } + + // Calculate cross-track distance + return EARTH_RADIUS * Math.asin(Math.sin(dist1Q / EARTH_RADIUS) * Math.sin(crs1Q - crs12)); + } + + /** + * Compute the cross-track distance. + * + * XTD = asin(sin(dist_1Q)*sin(crs_1Q-crs_12)) + * + * @param lat1 Latitude of starting point. + * @param lon1 Longitude of starting point. + * @param lat2 Latitude of destination point. + * @param lon2 Longitude of destination point. + * @param latQ Latitude of query point. + * @param lonQ Longitude of query point. + * @return Cross-track distance in km. May be negative - this gives the side. + */ + public static double crossTrackDistance(double lat1, double lon1, double lat2, double lon2, double latQ, double lonQ) { + // Convert to radians. + lat1 = MathUtil.deg2rad(lat1); + latQ = MathUtil.deg2rad(latQ); + lat2 = MathUtil.deg2rad(lat2); + lon1 = MathUtil.deg2rad(lon1); + lonQ = MathUtil.deg2rad(lonQ); + lon2 = MathUtil.deg2rad(lon2); + return crossTrackDistanceRad(lat1, lon1, lat2, lon2, latQ, lonQ); + } + + /** + * Compute the cross-track distance. + * + * XTD = asin(sin(dist_SQ)*sin(crs_SQ-crs_SE)) + * + * @param lat1 Latitude of starting point. + * @param lon1 Longitude of starting point. + * @param lat2 Latitude of destination point. + * @param lon2 Longitude of destination point. + * @param latQ Latitude of query point. + * @param lonQ Longitude of query point. + * @return Cross-track distance in km. May be negative - this gives the side. + */ + public static double crossTrackDistanceRad(double lat1, double lon1, double lat2, double lon2, double latQ, double lonQ) { + final double dlon12 = lon2 - lon1; + final double dlon1Q = lonQ - lon1; + + // Compute trigonometric functions only once. + final double clat1 = Math.cos(lat1); + final double clatQ = Math.cos(latQ); + final double clat2 = Math.cos(lat2); + final double slat1 = Math.sin(lat1); + final double slatQ = Math.sin(latQ); + final double slat2 = Math.sin(lat2); + + // Haversine formula, higher precision at < 1 meters but maybe issues at + // antipodal points - we do not yet multiply with the radius! + double angDist1Q; + { + final double slat = Math.sin((latQ - lat1) * .5); + final double slon = Math.sin(dlon1Q * .5); + final double a = slat * slat + slon * slon * clat1 * clatQ; + angDist1Q = 2 * Math.atan2(Math.sqrt(a), Math.sqrt(1 - a)); + } + + // Compute the course + final double crs12, crs1Q; + { + // y = sin(dlon) * cos(lat2) + double yE = Math.sin(dlon12) * clat2; + double yQ = Math.sin(dlon1Q) * clatQ; + + // x = cos(lat1) * sin(lat2) - sin(lat1) * cos(lat2) * cos(dlon) + double xE = clat1 * slat2 - slat1 * clat2 * Math.cos(dlon12); + double xQ = clat1 * slatQ - slat1 * clatQ * Math.cos(dlon1Q); + + crs12 = Math.atan2(yE, xE); + crs1Q = Math.atan2(yQ, xQ); + } + + // Calculate cross-track distance + return EARTH_RADIUS * Math.asin(Math.sin(angDist1Q) * Math.sin(crs1Q - crs12)); + } + + /** + * The along track distance, is the distance from S to Q along the track S to + * E. + * + * ATD=acos(cos(dist_1Q)/cos(XTD)) + * + * FIXME: can we get a proper sign into this? + * + * @param lat1 Latitude of starting point. + * @param lon1 Longitude of starting point. + * @param lat2 Latitude of destination point. + * @param lon2 Longitude of destination point. + * @param latQ Latitude of query point. + * @param lonQ Longitude of query point. + * @return Along-track distance in km. May be negative - this gives the side. + */ + public static double alongTrackDistance(double lat1, double lon1, double lat2, double lon2, double latQ, double lonQ) { + double dist1Q = haversineFormulaDeg(lat1, lon1, latQ, lonQ); + double ctd = crossTrackDistanceDeg(lat1, lon1, lat2, lon2, latQ, lonQ, dist1Q); + return alongTrackDistance(lat1, lon1, lat2, lon2, latQ, lonQ, dist1Q, ctd); + } + + /** + * The along track distance, is the distance from S to Q along the track S to + * E. + * + * ATD=acos(cos(dist_SQ)/cos(XTD)) + * + * FIXME: can we get a proper sign into this? + * + * @param lat1 Latitude of starting point. + * @param lon1 Longitude of starting point. + * @param lat2 Latitude of destination point. + * @param lon2 Longitude of destination point. + * @param latQ Latitude of query point. + * @param lonQ Longitude of query point. + * @param dist1Q Distance S to Q + * @param ctd Cross-track-distance + * @return Along-track distance in km. May be negative - this gives the side. + */ + public static double alongTrackDistance(double lat1, double lon1, double lat2, double lon2, double latQ, double lonQ, double dist1Q, double ctd) { + // TODO: optimize the sign computation! + int sign = Math.abs(bearing(lat1, lon1, lat2, lon2) - bearing(lat1, lon1, latQ, lonQ)) < MathUtil.HALFPI ? +1 : -1; + return sign * EARTH_RADIUS * Math.acos(Math.cos(dist1Q / EARTH_RADIUS) / Math.cos(ctd / EARTH_RADIUS)); + // TODO: for short distances, use this instead? + // asin(sqrt( (sin(dist_1Q))^2 - (sin(XTD))^2 )/cos(XTD)) + } + + /** + * Point to rectangle minimum distance. + * + * Complexity: + * <ul> + * <li>Trivial cases (on longitude slice): no trigonometric functions.</li> + * <li>Cross-track case: 10+2 trig</li> + * <li>Corner case: 10+3 trig, 2 sqrt</li> + * </ul> + * + * @param plat Latitude of query point. + * @param plng Longitude of query point. + * @param rminlat Min latitude of rectangle. + * @param rminlng Min longitude of rectangle. + * @param rmaxlat Max latitude of rectangle. + * @param rmaxlng Max longitude of rectangle. + * @return Distance + */ + public static double latlngMinDistDeg(double plat, double plng, double rminlat, double rminlng, double rmaxlat, double rmaxlng) { + // Convert to radians. + plat = MathUtil.deg2rad(plat); + plng = MathUtil.deg2rad(plng); + rminlat = MathUtil.deg2rad(rminlat); + rminlng = MathUtil.deg2rad(rminlng); + rmaxlat = MathUtil.deg2rad(rmaxlat); + rmaxlng = MathUtil.deg2rad(rmaxlng); + + return latlngMinDistRad(plat, plng, rminlat, rminlng, rmaxlat, rmaxlng); + } + + /** + * Point to rectangle minimum distance. + * + * Complexity: + * <ul> + * <li>Trivial cases (on longitude slice): no trigonometric functions.</li> + * <li>Cross-track case: 10+2 trig</li> + * <li>Corner case: 10+3 trig, 2 sqrt</li> + * </ul> + * + * @param plat Latitude of query point. + * @param plng Longitude of query point. + * @param rminlat Min latitude of rectangle. + * @param rminlng Min longitude of rectangle. + * @param rmaxlat Max latitude of rectangle. + * @param rmaxlng Max longitude of rectangle. + * @return Distance + */ + public static double latlngMinDistRad(double plat, double plng, double rminlat, double rminlng, double rmaxlat, double rmaxlng) { + // FIXME: handle rectangles crossing the +-180 deg boundary correctly! + + // Degenerate rectangles: + if ((rminlat >= rmaxlat) && (rminlng >= rmaxlng)) { + return haversineFormulaRad(rminlat, rminlng, plat, plng); + } + + // The simplest case is when the query point is in the same "slice": + if (rminlng <= plng && plng <= rmaxlng) { + // Inside rectangle: + if (rminlat <= plat && plat <= rmaxlat) { + return 0; + } + // South: + if (plat < rminlat) { + return EARTH_RADIUS * (rminlat - plat); + } else { + // plat > rmaxlat + return EARTH_RADIUS * (plat - rmaxlat); + } + } + + // Determine whether going east or west is shorter. + double lngE = rminlng - plng; + lngE += (lngE < 0) ? MathUtil.TWOPI : 0; + double lngW = rmaxlng - plng; + lngW -= (lngW > 0) ? MathUtil.TWOPI : 0; + + // Compute sine and cosine values we will certainly need below: + final double slatQ = Math.sin(plat); + final double clatQ = Math.cos(plat); + final double slatN = Math.sin(rmaxlat); + final double clatN = Math.cos(rmaxlat); + final double slatS = Math.sin(rminlat); + final double clatS = Math.cos(rminlat); + + // East, to min edge: + if (lngE <= -lngW) { + final double slngD = Math.sin(lngE); + final double clngD = Math.cos(lngE); + + // Bearing to south + // Math.atan2(slngD * clatS, clatQ * slatS - slatQ * clatS * clngD); + // Bearing from south + final double bs = Math.atan2(slngD * clatQ, clatS * slatQ - slatS * clatQ * clngD); + // Bearing to north + // Math.atan2(slngD * clatN, clatQ * slatN - slatQ * clatN * clngD); + // Bearing from north + final double bn = Math.atan2(slngD * clatQ, clatN * slatQ - slatN * clatQ * clngD); + if (bs < MathUtil.HALFPI) { + if (bn > MathUtil.HALFPI) { + // Radians from south pole = abs(ATD) + final double radFromS = -MathUtil.HALFPI - plat; + + // Cross-track-distance to longitude line. + return EARTH_RADIUS * Math.asin(Math.sin(radFromS) * -slngD); + } + } + if (bs - MathUtil.HALFPI < MathUtil.HALFPI - bn) { + // Haversine to north corner. + final double slatN2 = Math.sin((plat - rmaxlat) * .5); + final double slon = Math.sin(lngE * .5); + final double aN = slatN2 * slatN2 + slon * slon * clatQ * clatN; + final double distN = 2 * Math.atan2(Math.sqrt(aN), Math.sqrt(1 - aN)); + return EARTH_RADIUS * distN; + } else { + // Haversine to south corner. + final double slatS2 = Math.sin((plat - rminlat) * .5); + final double slon = Math.sin(lngE * .5); + final double aS = slatS2 * slatS2 + slon * slon * clatQ * clatS; + final double distS = 2 * Math.atan2(Math.sqrt(aS), Math.sqrt(1 - aS)); + return EARTH_RADIUS * distS; + } + } else { // West, to max edge + final double slngD = Math.sin(lngW); + final double clngD = Math.cos(lngW); + + // Bearing to south + // Math.atan2(slngD * clatS, clatQ * slatS - slatQ * clatS * clngD); + // Bearing from south + final double bs = Math.atan2(slngD * clatQ, clatS * slatQ - slatS * clatQ * clngD); + // Bearing to north + // Math.atan2(slngD * clatN, clatQ * slatN - slatQ * clatN * clngD); + // Bearing from north + final double bn = Math.atan2(slngD * clatQ, clatN * slatQ - slatN * clatQ * clngD); + if (bs > -MathUtil.HALFPI) { + if (bn < -MathUtil.HALFPI) { + // Radians from south = abs(ATD) = distance from pole + final double radFromS = -MathUtil.HALFPI - plat; + // Cross-track-distance to longitude line. + return EARTH_RADIUS * Math.asin(Math.sin(radFromS) * slngD); + } + } + if (-MathUtil.HALFPI - bs < bn + MathUtil.HALFPI) { + // Haversine to north corner. + final double slatN2 = Math.sin((plat - rmaxlat) * .5); + final double slon = Math.sin(lngW * .5); + final double aN = slatN2 * slatN2 + slon * slon * clatQ * clatN; + final double distN = 2 * Math.atan2(Math.sqrt(aN), Math.sqrt(1 - aN)); + return EARTH_RADIUS * distN; + } else { + // Haversine to south corner. + final double slatS2 = Math.sin((plat - rminlat) * .5); + final double slon = Math.sin(lngW * .5); + final double aS = slatS2 * slatS2 + slon * slon * clatQ * clatS; + final double distS = 2 * Math.atan2(Math.sqrt(aS), Math.sqrt(1 - aS)); + return EARTH_RADIUS * distS; + } + } + } + + /** + * Compute the bearing from start to end. + * + * @param latS Start latitude, in degree + * @param lngS Start longitude, in degree + * @param latE End latitude, in degree + * @param lngE End longitude, in degree + * @return Bearing in radians + */ + public static double bearing(double latS, double lngS, double latE, double lngE) { + latS = MathUtil.deg2rad(latS); + latE = MathUtil.deg2rad(latE); + lngS = MathUtil.deg2rad(lngS); + lngE = MathUtil.deg2rad(lngE); + final double slatS = Math.sin(latS); + final double clatS = Math.cos(latS); + final double slatE = Math.sin(latE); + final double clatE = Math.cos(latE); + return Math.atan2(-Math.sin(lngS - lngE) * clatE, clatS * slatE - slatS * clatE * Math.cos(lngS - lngE)); + } + + /** + * Map a latitude,longitude pair to 3D X-Y-Z coordinates, using athe WGS84 + * ellipsoid. + * + * The coordinate system is chosen such that the earth rotates around the Z + * axis. + * + * @param lat Latitude in degree + * @param lng Longitude in degree + * @return Coordinate triple + */ + public static double[] latLngDegToXZYWGS84(double lat, double lng) { + // Switch to radians: + lat = Math.toRadians(lat); + lng = Math.toRadians(lng); + // Sine and cosines: + final double clat = Math.cos(lat), slat = Math.sin(lat); + final double clng = Math.cos(lng), slng = Math.sin(lng); + + // Eccentricity squared + final double v = WGS84_RADIUS / (Math.sqrt(1 - WGS84_ECCENTRICITY_SQUARED * slat * slat)); + + return new double[] { v * clat * clng, v * clat * slng, (1 - WGS84_ECCENTRICITY_SQUARED) * v * slat }; + } + + /** + * Convert Latitude-Longitude pair to X-Y-Z coordinates using a spherical + * approximation of the earth. + * + * The coordinate system is chosen such that the earth rotates around the Z + * axis. + * + * @param lat Latitude in degree + * @param lng Longitude in degree + * @return Coordinate triple + */ + public static double[] latLngDegToXZY(double lat, double lng) { + // Map to radians. + lat = MathUtil.rad2deg(lat); + lng = MathUtil.rad2deg(lng); + // Sine and cosines: + final double clat = Math.cos(lat), slat = Math.sin(lat); + final double clng = Math.cos(lng), slng = Math.sin(lng); + return new double[] { EARTH_RADIUS * clat * clng, EARTH_RADIUS * clat * slng, EARTH_RADIUS * slat }; + } + + /** + * Convert a 3D coordinate pair to the corresponding longitude. + * + * Only x and y are required - z gives the latitude. + * + * @param x X value + * @param y Y value + * @return Latitude + */ + public static double xyzToLatDegWGS84(double x, double y, double z) { + final double p = Math.sqrt(x * x + y * y); + double lat = Math.atan2(z, p * (1 - WGS84_ECCENTRICITY_SQUARED)); + + // Iteratively improving the lat value + // TODO: instead of a fixed number of iterations, check for convergence. + for (int i = 0; i < 10; i++) { + final double slat = Math.sin(lat); + final double v = WGS84_RADIUS / (Math.sqrt(1 - WGS84_ECCENTRICITY_SQUARED * slat * slat)); + lat = Math.atan2(z + WGS84_ECCENTRICITY_SQUARED * v * slat, p); + } + + return MathUtil.rad2deg(lat); + } + + /** + * Convert a 3D coordinate pair to the corresponding latitude. + * + * Only the z coordinate is required. + * + * @param z Z value + * @return Latitude + */ + public static double xyzToLatDeg(double z) { + return MathUtil.rad2deg(Math.asin(z / EARTH_RADIUS)); + } + + /** + * Convert a 3D coordinate pair to the corresponding longitude. + * + * Only x and y are required - z gives the latitude. + * + * @param x X value + * @param y Y value + * @return Latitude + */ + public static double xyzToLngDeg(double x, double y) { + return MathUtil.rad2deg(Math.atan2(y, x)); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/IntegerMinMax.java b/src/de/lmu/ifi/dbs/elki/math/IntegerMinMax.java new file mode 100644 index 00000000..4c89240b --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/IntegerMinMax.java @@ -0,0 +1,176 @@ +package de.lmu.ifi.dbs.elki.math; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Collection; +import de.lmu.ifi.dbs.elki.utilities.pairs.IntIntPair; + +/** + * Class to find the minimum and maximum int values in data. + * + * @author Erich Schubert + * @author Arthur Zimek + */ +public class IntegerMinMax extends IntIntPair { + /** + * Constructor without starting values. + * + * The minimum will be initialized to {@link Integer#MAX_VALUE}. + * + * The maximum will be initialized to {@link Integer#MIN_VALUE}. + * + * So that the first data added will replace both. + */ + public IntegerMinMax() { + super(Integer.MAX_VALUE, Integer.MIN_VALUE); + } + + /** + * Constructor with predefined minimum and maximum values. + * + * @param min Minimum value + * @param max Maximum value + */ + public IntegerMinMax(int min, int max) { + super(min, max); + } + + /** + * Process a single int value. + * + * If the new value is smaller than the current minimum, it will become the + * new minimum. + * + * If the new value is larger than the current maximum, it will become the new + * maximum. + * + * @param data New value + */ + public void put(int data) { + this.first = Math.min(this.first, data); + this.second = Math.max(this.second, data); + } + + /** + * Process a whole array of int values. + * + * If any of the values is smaller than the current minimum, it will become + * the new minimum. + * + * If any of the values is larger than the current maximum, it will become the + * new maximum. + * + * @param data Data to process + */ + public void put(int[] data) { + for(int value : data) { + this.put(value); + } + } + + /** + * Process a whole collection of Integer values. + * + * If any of the values is smaller than the current minimum, it will become + * the new minimum. + * + * If any of the values is larger than the current maximum, it will become the + * new maximum. + * + * @param data Data to process + */ + public void put(Collection<Integer> data) { + for(Integer value : data) { + this.put(value.intValue()); + } + } + + /** + * Get the current minimum. + * + * @return current minimum. + */ + public int getMin() { + return this.first; + } + + /** + * Get the current maximum. + * + * @return current maximum. + */ + public int getMax() { + return this.second; + } + + /** + * Return the difference between minimum and maximum. + * + * @return Difference of current Minimum and Maximum. + */ + public int getDiff() { + return this.getMax() - this.getMin(); + } + + /** + * Test whether the result is defined. + * + * @return true when at least one value has been added + */ + public boolean isValid() { + return (first <= second); + } + + /** + * Return minimum and maximum as array. + * + * @return Minimum, Maximum + */ + public int[] asIntArray() { + return new int[] { this.getMin(), this.getMax() }; + } + + /** + * Generate a new array of initialized IntegerMinMax objects (with default + * constructor) + * + * @param size Array size + * @return initialized array + */ + public static IntegerMinMax[] newArray(int size) { + IntegerMinMax ret[] = new IntegerMinMax[size]; + for(int i = 0; i < size; i++) { + ret[i] = new IntegerMinMax(); + } + return ret; + } + + /** + * Reset statistics. + */ + public void reset() { + first = Integer.MAX_VALUE; + second = Integer.MIN_VALUE; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/MathUtil.java b/src/de/lmu/ifi/dbs/elki/math/MathUtil.java index c44a0203..aaa19f0d 100644 --- a/src/de/lmu/ifi/dbs/elki/math/MathUtil.java +++ b/src/de/lmu/ifi/dbs/elki/math/MathUtil.java @@ -28,6 +28,7 @@ import java.util.Random; import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix; +import de.lmu.ifi.dbs.elki.math.linearalgebra.VMath; import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector; /** @@ -45,6 +46,11 @@ public final class MathUtil { public static final double TWOPI = 2 * Math.PI; /** + * Half the value of Pi. + */ + public static final double HALFPI = 0.5 * Math.PI; + + /** * Square root of two times Pi. */ public static final double SQRTTWOPI = Math.sqrt(TWOPI); @@ -55,17 +61,17 @@ public final class MathUtil { public static final double SQRT2 = Math.sqrt(2); /** - * Square root of 5 + * Square root of 5. */ public static final double SQRT5 = Math.sqrt(5); /** - * Square root of 0.5 == 1 / sqrt(2) + * Square root of 0.5 == 1 / sqrt(2). */ public static final double SQRTHALF = Math.sqrt(.5); /** - * Precomputed value of 1 / sqrt(pi) + * Precomputed value of 1 / sqrt(pi). */ public static final double ONE_BY_SQRTPI = 1 / Math.sqrt(Math.PI); @@ -75,26 +81,36 @@ public final class MathUtil { public static final double LOG2 = Math.log(2); /** - * Natural logarithm of 10 + * Natural logarithm of 10. */ public static final double LOG10 = Math.log(10); /** - * Math.log(Math.PI) + * Math.log(Math.PI). */ public static final double LOGPI = Math.log(Math.PI); /** - * Math.log(Math.PI) / 2 + * Math.log(Math.PI) / 2. */ public static final double LOGPIHALF = LOGPI / 2.; /** - * Math.log(Math.sqrt(2*Math.PI)) + * Math.log(Math.sqrt(2*Math.PI)). */ public static final double LOGSQRTTWOPI = Math.log(SQRTTWOPI); /** + * Constant for degrees to radians. + */ + public static final double DEG2RAD = Math.PI / 180.0; + + /** + * Constant for radians to degrees. + */ + public static final double RAD2DEG = 180 / Math.PI; + + /** * Fake constructor for static class. */ private MathUtil() { @@ -113,21 +129,19 @@ public final class MathUtil { * @return {@code sqrt(a<sup>2</sup> + b<sup>2</sup>)} */ public static double fastHypot(double a, double b) { - if(a < 0) { + if (a < 0) { a = -a; } - if(b < 0) { + if (b < 0) { b = -b; } - if(a > b) { + if (a > b) { final double r = b / a; return a * Math.sqrt(1 + r * r); - } - else if(b != 0) { + } else if (b != 0) { final double r = a / b; return b * Math.sqrt(1 + r * r); - } - else { + } else { return 0.0; } } @@ -145,17 +159,17 @@ public final class MathUtil { * @return {@code sqrt(a<sup>2</sup> + b<sup>2</sup> + c<sup>2</sup>)} */ public static double fastHypot3(double a, double b, double c) { - if(a < 0) { + if (a < 0) { a = -a; } - if(b < 0) { + if (b < 0) { b = -b; } - if(c < 0) { + if (c < 0) { c = -c; } double m = (a > b) ? ((a > c) ? a : c) : ((b > c) ? b : c); - if(m <= 0) { + if (m <= 0) { return 0.0; } a = a / m; @@ -165,7 +179,7 @@ public final class MathUtil { } /** - * Compute the Mahalanobis distance using the given weight matrix + * Compute the Mahalanobis distance using the given weight matrix. * * @param weightMatrix Weight Matrix * @param o1_minus_o2 Delta vector @@ -174,7 +188,23 @@ public final class MathUtil { public static double mahalanobisDistance(Matrix weightMatrix, Vector o1_minus_o2) { double sqrDist = o1_minus_o2.transposeTimesTimes(weightMatrix, o1_minus_o2); - if(sqrDist < 0 && Math.abs(sqrDist) < 0.000000001) { + if (sqrDist < 0 && Math.abs(sqrDist) < 0.000000001) { + sqrDist = Math.abs(sqrDist); + } + return Math.sqrt(sqrDist); + } + + /** + * Compute the Mahalanobis distance using the given weight matrix. + * + * @param weightMatrix Weight Matrix + * @param o1_minus_o2 Delta vector + * @return Mahalanobis distance + */ + public static double mahalanobisDistance(double[][] weightMatrix, double[] o1_minus_o2) { + double sqrDist = VMath.transposeTimesTimes(o1_minus_o2, weightMatrix, o1_minus_o2); + + if (sqrDist < 0 && Math.abs(sqrDist) < 0.000000001) { sqrDist = Math.abs(sqrDist); } return Math.sqrt(sqrDist); @@ -190,18 +220,18 @@ public final class MathUtil { * @param y second FeatureVector * @return the Pearson product-moment correlation coefficient for x and y */ - public static double pearsonCorrelationCoefficient(NumberVector<?, ?> x, NumberVector<?, ?> y) { + public static double pearsonCorrelationCoefficient(NumberVector<?> x, NumberVector<?> y) { final int xdim = x.getDimensionality(); final int ydim = y.getDimensionality(); - if(xdim != ydim) { + if (xdim != ydim) { throw new IllegalArgumentException("Invalid arguments: feature vectors differ in dimensionality."); } - if(xdim <= 0) { + if (xdim <= 0) { throw new IllegalArgumentException("Invalid arguments: dimensionality not positive."); } PearsonCorrelation pc = new PearsonCorrelation(); - for(int i = 0; i < xdim; i++) { - pc.put(x.doubleValue(i + 1), y.doubleValue(i + 1), 1.0); + for (int i = 0; i < xdim; i++) { + pc.put(x.doubleValue(i), y.doubleValue(i), 1.0); } return pc.getCorrelation(); } @@ -214,20 +244,21 @@ public final class MathUtil { * * @param x first FeatureVector * @param y second FeatureVector + * @param weights Weights * @return the Pearson product-moment correlation coefficient for x and y */ - public static double weightedPearsonCorrelationCoefficient(NumberVector<?, ?> x, NumberVector<?, ?> y, double[] weights) { + public static double weightedPearsonCorrelationCoefficient(NumberVector<?> x, NumberVector<?> y, double[] weights) { final int xdim = x.getDimensionality(); final int ydim = y.getDimensionality(); - if(xdim != ydim) { + if (xdim != ydim) { throw new IllegalArgumentException("Invalid arguments: feature vectors differ in dimensionality."); } - if(xdim != weights.length) { + if (xdim != weights.length) { throw new IllegalArgumentException("Dimensionality doesn't agree to weights."); } PearsonCorrelation pc = new PearsonCorrelation(); - for(int i = 0; i < xdim; i++) { - pc.put(x.doubleValue(i + 1), y.doubleValue(i + 1), weights[i]); + for (int i = 0; i < xdim; i++) { + pc.put(x.doubleValue(i), y.doubleValue(i), weights[i]); } return pc.getCorrelation(); } @@ -240,20 +271,21 @@ public final class MathUtil { * * @param x first FeatureVector * @param y second FeatureVector + * @param weights Weights * @return the Pearson product-moment correlation coefficient for x and y */ - public static double weightedPearsonCorrelationCoefficient(NumberVector<?, ?> x, NumberVector<?, ?> y, NumberVector<?, ?> weights) { + public static double weightedPearsonCorrelationCoefficient(NumberVector<?> x, NumberVector<?> y, NumberVector<?> weights) { final int xdim = x.getDimensionality(); final int ydim = y.getDimensionality(); - if(xdim != ydim) { + if (xdim != ydim) { throw new IllegalArgumentException("Invalid arguments: feature vectors differ in dimensionality."); } - if(xdim != weights.getDimensionality()) { + if (xdim != weights.getDimensionality()) { throw new IllegalArgumentException("Dimensionality doesn't agree to weights."); } PearsonCorrelation pc = new PearsonCorrelation(); - for(int i = 0; i < xdim; i++) { - pc.put(x.doubleValue(i + 1), y.doubleValue(i + 1), weights.doubleValue(i + 1)); + for (int i = 0; i < xdim; i++) { + pc.put(x.doubleValue(i), y.doubleValue(i), weights.doubleValue(i)); } return pc.getCorrelation(); } @@ -271,11 +303,11 @@ public final class MathUtil { public static double pearsonCorrelationCoefficient(double[] x, double[] y) { final int xdim = x.length; final int ydim = y.length; - if(xdim != ydim) { + if (xdim != ydim) { throw new IllegalArgumentException("Invalid arguments: feature vectors differ in dimensionality."); } PearsonCorrelation pc = new PearsonCorrelation(); - for(int i = 0; i < xdim; i++) { + for (int i = 0; i < xdim; i++) { pc.put(x[i], y[i], 1.0); } return pc.getCorrelation(); @@ -289,19 +321,20 @@ public final class MathUtil { * * @param x first FeatureVector * @param y second FeatureVector + * @param weights Weights * @return the Pearson product-moment correlation coefficient for x and y */ public static double weightedPearsonCorrelationCoefficient(double[] x, double[] y, double[] weights) { final int xdim = x.length; final int ydim = y.length; - if(xdim != ydim) { + if (xdim != ydim) { throw new IllegalArgumentException("Invalid arguments: feature vectors differ in dimensionality."); } - if(xdim != weights.length) { + if (xdim != weights.length) { throw new IllegalArgumentException("Dimensionality doesn't agree to weights."); } PearsonCorrelation pc = new PearsonCorrelation(); - for(int i = 0; i < xdim; i++) { + for (int i = 0; i < xdim; i++) { pc.put(x[i], y[i], weights[i]); } return pc.getCorrelation(); @@ -309,7 +342,7 @@ public final class MathUtil { /** * Compute the Factorial of n, often written as <code>c!</code> in - * mathematics.</p> + * mathematics. * <p> * Use this method if for large values of <code>n</code>. * </p> @@ -320,7 +353,7 @@ public final class MathUtil { */ public static BigInteger factorial(BigInteger n) { BigInteger nFac = BigInteger.valueOf(1); - while(n.compareTo(BigInteger.valueOf(1)) > 0) { + while (n.compareTo(BigInteger.valueOf(1)) > 0) { nFac = nFac.multiply(n); n = n.subtract(BigInteger.valueOf(1)); } @@ -336,7 +369,7 @@ public final class MathUtil { */ public static long factorial(int n) { long nFac = 1; - for(long i = n; i > 0; i--) { + for (long i = n; i > 0; i--) { nFac *= i; } return nFac; @@ -355,7 +388,7 @@ public final class MathUtil { public static long binomialCoefficient(long n, long k) { final long m = Math.max(k, n - k); double temp = 1; - for(long i = n, j = 1; i > m; i--, j++) { + for (long i = n, j = 1; i > m; i--, j++) { temp = temp * i / j; } return (long) temp; @@ -370,16 +403,14 @@ public final class MathUtil { */ public static double approximateFactorial(int n) { double nFac = 1.0; - for(int i = n; i > 0; i--) { + for (int i = n; i > 0; i--) { nFac *= i; } return nFac; } /** - * <p> - * Binomial coefficent, also known as "n choose k") - * </p> + * Binomial coefficent, also known as "n choose k"). * * @param n Total number of samples. n > 0 * @param k Number of elements to choose. <code>n >= k</code>, @@ -389,7 +420,7 @@ public final class MathUtil { public static double approximateBinomialCoefficient(int n, int k) { final int m = Math.max(k, n - k); long temp = 1; - for(int i = n, j = 1; i > m; i--, j++) { + for (int i = n, j = 1; i > m; i--, j++) { temp = temp * i / j; } return temp; @@ -402,11 +433,11 @@ public final class MathUtil { * @return Sum */ public static long sumFirstIntegers(final long i) { - return ((i - 1L) * i) / 2; + return ((i - 1L) * i) >> 1; } /** - * Produce an array of random numbers in [0:1] + * Produce an array of random numbers in [0:1]. * * @param len Length * @return Array @@ -416,7 +447,7 @@ public final class MathUtil { } /** - * Produce an array of random numbers in [0:1] + * Produce an array of random numbers in [0:1]. * * @param len Length * @param r Random generator @@ -424,62 +455,36 @@ public final class MathUtil { */ public static double[] randomDoubleArray(int len, Random r) { final double[] ret = new double[len]; - for(int i = 0; i < len; i++) { + for (int i = 0; i < len; i++) { ret[i] = r.nextDouble(); } return ret; } /** - * Convert Degree to Radians + * Convert Degree to Radians. + * + * This is essentially the same as {@link Math#toRadians}, but we keep it for + * now, it might be marginally faster, but certainly not slower. * * @param deg Degree value * @return Radian value */ public static double deg2rad(double deg) { - return deg * Math.PI / 180.0; + return deg * DEG2RAD; } /** - * Radians to Degree + * Radians to Degree. + * + * This is essentially the same as {@link Math#toRadians}, but we keep it for + * now, it might be marginally faster, but certainly not slower. * * @param rad Radians value * @return Degree value */ public static double rad2deg(double rad) { - return rad * 180 / Math.PI; - } - - /** - * Compute the approximate on-earth-surface distance of two points. - * - * @param lat1 Latitude of first point in degree - * @param lon1 Longitude of first point in degree - * @param lat2 Latitude of second point in degree - * @param lon2 Longitude of second point in degree - * @return Distance in km (approximately) - */ - public static double latlngDistance(double lat1, double lon1, double lat2, double lon2) { - final double EARTH_RADIUS = 6371; // km. - // Work in radians - lat1 = MathUtil.deg2rad(lat1); - lat2 = MathUtil.deg2rad(lat2); - lon1 = MathUtil.deg2rad(lon1); - lon2 = MathUtil.deg2rad(lon2); - // Delta - final double dlat = lat1 - lat2; - final double dlon = lon1 - lon2; - - // Spherical Law of Cosines - // NOTE: there seems to be a signedness issue in this code! - // double dist = Math.sin(lat1) * Math.sin(lat2) + Math.cos(lat1) * - // Math.cos(lat2) * Math.cos(dlon); - // return EARTH_RADIUS * Math.atan(dist); - - // Alternative: Havestine formula, higher precision at < 1 meters: - final double a = Math.sin(dlat / 2) * Math.sin(dlat / 2) + Math.sin(dlon / 2) * Math.sin(dlon / 2) * Math.cos(lat1) * Math.cos(lat2); - final double c = 2 * Math.atan2(Math.sqrt(a), Math.sqrt(1 - a)); - return EARTH_RADIUS * c; + return rad * RAD2DEG; } /** @@ -505,7 +510,7 @@ public final class MathUtil { // v1.transposeTimes(v2) / (v1.euclideanLength() * v2.euclideanLength()); // We can just compute all three in parallel. double s = 0, e1 = 0, e2 = 0; - for(int k = 0; k < v1.length; k++) { + for (int k = 0; k < v1.length; k++) { final double r1 = v1[k]; final double r2 = v2[k]; s += r1 * r2; @@ -541,7 +546,7 @@ public final class MathUtil { // v1'.transposeTimes(v2') / (v1'.euclideanLength()*v2'.euclideanLength()); // We can just compute all three in parallel. double s = 0, e1 = 0, e2 = 0; - for(int k = 0; k < v1.length; k++) { + for (int k = 0; k < v1.length; k++) { final double r1 = v1[k] - o[k]; final double r2 = v2[k] - o[k]; s += r1 * r2; @@ -635,53 +640,49 @@ public final class MathUtil { * @return Double value */ public static double floatToDoubleUpper(float f) { - if(Float.isNaN(f)) { + if (Float.isNaN(f)) { return Double.NaN; } - if(Float.isInfinite(f)) { - if(f > 0) { + if (Float.isInfinite(f)) { + if (f > 0) { return Double.POSITIVE_INFINITY; - } - else { - return Double.longBitsToDouble(0xc7efffffffffffffl); + } else { + return Double.longBitsToDouble(0xc7efffffffffffffL); } } long bits = Double.doubleToRawLongBits((double) f); - if((bits & 0x8000000000000000l) == 0) { // Positive - if(bits == 0l) { - return Double.longBitsToDouble(0x3690000000000000l); + if ((bits & 0x8000000000000000L) == 0) { // Positive + if (bits == 0L) { + return Double.longBitsToDouble(0x3690000000000000L); } - if(f == Float.MIN_VALUE) { + if (f == Float.MIN_VALUE) { // bits += 0x7_ffff_ffff_ffffl; - return Double.longBitsToDouble(0x36a7ffffffffffffl); + return Double.longBitsToDouble(0x36a7ffffffffffffL); } - if(Float.MIN_NORMAL > f && f >= Double.MIN_NORMAL) { + if (Float.MIN_NORMAL > f && f >= Double.MIN_NORMAL) { // The most tricky case: // a denormalized float, but a normalized double final long bits2 = Double.doubleToRawLongBits((double) Math.nextUp(f)); - bits = (bits >>> 1) + (bits2 >>> 1) - 1l; - } - else { - bits += 0xfffffffl; // 28 extra bits + bits = (bits >>> 1) + (bits2 >>> 1) - 1L; + } else { + bits += 0xfffffffL; // 28 extra bits } return Double.longBitsToDouble(bits); - } - else { - if(bits == 0x8000000000000000l) { + } else { + if (bits == 0x8000000000000000L) { return -0.0d; } - if(f == -Float.MIN_VALUE) { + if (f == -Float.MIN_VALUE) { // bits -= 0xf_ffff_ffff_ffffl; - return Double.longBitsToDouble(0xb690000000000001l); + return Double.longBitsToDouble(0xb690000000000001L); } - if(-Float.MIN_NORMAL < f && f <= -Double.MIN_NORMAL) { + if (-Float.MIN_NORMAL < f && f <= -Double.MIN_NORMAL) { // The most tricky case: // a denormalized float, but a normalized double final long bits2 = Double.doubleToRawLongBits((double) Math.nextUp(f)); - bits = (bits >>> 1) + (bits2 >>> 1) + 1l; - } - else { - bits -= 0xfffffffl; // 28 extra bits + bits = (bits >>> 1) + (bits2 >>> 1) + 1L; + } else { + bits -= 0xfffffffL; // 28 extra bits } return Double.longBitsToDouble(bits); } @@ -697,55 +698,51 @@ public final class MathUtil { * @return Double value */ public static double floatToDoubleLower(float f) { - if(Float.isNaN(f)) { + if (Float.isNaN(f)) { return Double.NaN; } - if(Float.isInfinite(f)) { - if(f < 0) { + if (Float.isInfinite(f)) { + if (f < 0) { return Double.NEGATIVE_INFINITY; - } - else { - return Double.longBitsToDouble(0x47efffffffffffffl); + } else { + return Double.longBitsToDouble(0x47efffffffffffffL); } } long bits = Double.doubleToRawLongBits((double) f); - if((bits & 0x8000000000000000l) == 0) { // Positive - if(bits == 0l) { + if ((bits & 0x8000000000000000L) == 0) { // Positive + if (bits == 0L) { return +0.0d; } - if(f == Float.MIN_VALUE) { + if (f == Float.MIN_VALUE) { // bits -= 0xf_ffff_ffff_ffffl; - return Double.longBitsToDouble(0x3690000000000001l); + return Double.longBitsToDouble(0x3690000000000001L); } - if(Float.MIN_NORMAL > f /* && f >= Double.MIN_NORMAL */) { + if (Float.MIN_NORMAL > f /* && f >= Double.MIN_NORMAL */) { // The most tricky case: // a denormalized float, but a normalized double final long bits2 = Double.doubleToRawLongBits((double) -Math.nextUp(-f)); - bits = (bits >>> 1) + (bits2 >>> 1) + 1l; // + (0xfff_ffffl << 18); - } - else { - bits -= 0xfffffffl; // 28 extra bits + bits = (bits >>> 1) + (bits2 >>> 1) + 1L; // + (0xfff_ffffL << 18); + } else { + bits -= 0xfffffffL; // 28 extra bits } return Double.longBitsToDouble(bits); - } - else { - if(bits == 0x8000000000000000l) { - return Double.longBitsToDouble(0xb690000000000000l); + } else { + if (bits == 0x8000000000000000L) { + return Double.longBitsToDouble(0xb690000000000000L); } - if(f == -Float.MIN_VALUE) { + if (f == -Float.MIN_VALUE) { // bits += 0x7_ffff_ffff_ffffl; - return Double.longBitsToDouble(0xb6a7ffffffffffffl); + return Double.longBitsToDouble(0xb6a7ffffffffffffL); } - if(-Float.MIN_NORMAL < f /* && f <= -Double.MIN_NORMAL */) { + if (-Float.MIN_NORMAL < f /* && f <= -Double.MIN_NORMAL */) { // The most tricky case: // a denormalized float, but a normalized double final long bits2 = Double.doubleToRawLongBits((double) -Math.nextUp(-f)); - bits = (bits >>> 1) + (bits2 >>> 1) - 1l; - } - else { - bits += 0xfffffffl; // 28 extra bits + bits = (bits >>> 1) + (bits2 >>> 1) - 1L; + } else { + bits += 0xfffffffL; // 28 extra bits } return Double.longBitsToDouble(bits); } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/math/MeanVarianceMinMax.java b/src/de/lmu/ifi/dbs/elki/math/MeanVarianceMinMax.java index 23e53bb5..4ea76002 100644 --- a/src/de/lmu/ifi/dbs/elki/math/MeanVarianceMinMax.java +++ b/src/de/lmu/ifi/dbs/elki/math/MeanVarianceMinMax.java @@ -101,6 +101,15 @@ public class MeanVarianceMinMax extends MeanVariance { public double getMax() { return this.max; } + + /** + * Get the current minimum and maximum. + * + * @return current minimum and maximum + */ + public DoubleMinMax getDoubleMinMax(){ + return new DoubleMinMax(this.min,this.max); + } /** * Return the difference between minimum and maximum. diff --git a/src/de/lmu/ifi/dbs/elki/math/Primes.java b/src/de/lmu/ifi/dbs/elki/math/Primes.java new file mode 100644 index 00000000..e9ac0463 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/Primes.java @@ -0,0 +1,148 @@ +package de.lmu.ifi.dbs.elki.math; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** + * Class for prime number handling. + * + * TODO: actually offer functions for testing primes and generating primes - as + * of now, we just offer a list of the first 1000 primes for use by other + * classes. + * + * @author Erich Schubert + */ +public final class Primes { + /** + * The first 1000 prime numbers + */ + public static final int[] FIRST_PRIMES = new int[] { // + 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, // + 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, // + 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, // + 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, // + 179, 181, 191, 193, 197, 199, 211, 223, 227, 229, // + 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, // + 283, 293, 307, 311, 313, 317, 331, 337, 347, 349, // + 353, 359, 367, 373, 379, 383, 389, 397, 401, 409, // + 419, 421, 431, 433, 439, 443, 449, 457, 461, 463, // + 467, 479, 487, 491, 499, 503, 509, 521, 523, 541, // + 547, 557, 563, 569, 571, 577, 587, 593, 599, 601, // + 607, 613, 617, 619, 631, 641, 643, 647, 653, 659, // + 661, 673, 677, 683, 691, 701, 709, 719, 727, 733, // + 739, 743, 751, 757, 761, 769, 773, 787, 797, 809, // + 811, 821, 823, 827, 829, 839, 853, 857, 859, 863, // + 877, 881, 883, 887, 907, 911, 919, 929, 937, 941, // + 947, 953, 967, 971, 977, 983, 991, 997, 1009, 1013, // + 1019, 1021, 1031, 1033, 1039, 1049, 1051, 1061, 1063, 1069, // + 1087, 1091, 1093, 1097, 1103, 1109, 1117, 1123, 1129, 1151, // + 1153, 1163, 1171, 1181, 1187, 1193, 1201, 1213, 1217, 1223, // + 1229, 1231, 1237, 1249, 1259, 1277, 1279, 1283, 1289, 1291, // + 1297, 1301, 1303, 1307, 1319, 1321, 1327, 1361, 1367, 1373, // + 1381, 1399, 1409, 1423, 1427, 1429, 1433, 1439, 1447, 1451, // + 1453, 1459, 1471, 1481, 1483, 1487, 1489, 1493, 1499, 1511, // + 1523, 1531, 1543, 1549, 1553, 1559, 1567, 1571, 1579, 1583, // + 1597, 1601, 1607, 1609, 1613, 1619, 1621, 1627, 1637, 1657, // + 1663, 1667, 1669, 1693, 1697, 1699, 1709, 1721, 1723, 1733, // + 1741, 1747, 1753, 1759, 1777, 1783, 1787, 1789, 1801, 1811, // + 1823, 1831, 1847, 1861, 1867, 1871, 1873, 1877, 1879, 1889, // + 1901, 1907, 1913, 1931, 1933, 1949, 1951, 1973, 1979, 1987, // + 1993, 1997, 1999, 2003, 2011, 2017, 2027, 2029, 2039, 2053, // + 2063, 2069, 2081, 2083, 2087, 2089, 2099, 2111, 2113, 2129, // + 2131, 2137, 2141, 2143, 2153, 2161, 2179, 2203, 2207, 2213, // + 2221, 2237, 2239, 2243, 2251, 2267, 2269, 2273, 2281, 2287, // + 2293, 2297, 2309, 2311, 2333, 2339, 2341, 2347, 2351, 2357, // + 2371, 2377, 2381, 2383, 2389, 2393, 2399, 2411, 2417, 2423, // + 2437, 2441, 2447, 2459, 2467, 2473, 2477, 2503, 2521, 2531, // + 2539, 2543, 2549, 2551, 2557, 2579, 2591, 2593, 2609, 2617, // + 2621, 2633, 2647, 2657, 2659, 2663, 2671, 2677, 2683, 2687, // + 2689, 2693, 2699, 2707, 2711, 2713, 2719, 2729, 2731, 2741, // + 2749, 2753, 2767, 2777, 2789, 2791, 2797, 2801, 2803, 2819, // + 2833, 2837, 2843, 2851, 2857, 2861, 2879, 2887, 2897, 2903, // + 2909, 2917, 2927, 2939, 2953, 2957, 2963, 2969, 2971, 2999, // + 3001, 3011, 3019, 3023, 3037, 3041, 3049, 3061, 3067, 3079, // + 3083, 3089, 3109, 3119, 3121, 3137, 3163, 3167, 3169, 3181, // + 3187, 3191, 3203, 3209, 3217, 3221, 3229, 3251, 3253, 3257, // + 3259, 3271, 3299, 3301, 3307, 3313, 3319, 3323, 3329, 3331, // + 3343, 3347, 3359, 3361, 3371, 3373, 3389, 3391, 3407, 3413, // + 3433, 3449, 3457, 3461, 3463, 3467, 3469, 3491, 3499, 3511, // + 3517, 3527, 3529, 3533, 3539, 3541, 3547, 3557, 3559, 3571, // + 3581, 3583, 3593, 3607, 3613, 3617, 3623, 3631, 3637, 3643, // + 3659, 3671, 3673, 3677, 3691, 3697, 3701, 3709, 3719, 3727, // + 3733, 3739, 3761, 3767, 3769, 3779, 3793, 3797, 3803, 3821, // + 3823, 3833, 3847, 3851, 3853, 3863, 3877, 3881, 3889, 3907, // + 3911, 3917, 3919, 3923, 3929, 3931, 3943, 3947, 3967, 3989, // + 4001, 4003, 4007, 4013, 4019, 4021, 4027, 4049, 4051, 4057, // + 4073, 4079, 4091, 4093, 4099, 4111, 4127, 4129, 4133, 4139, // + 4153, 4157, 4159, 4177, 4201, 4211, 4217, 4219, 4229, 4231, // + 4241, 4243, 4253, 4259, 4261, 4271, 4273, 4283, 4289, 4297, // + 4327, 4337, 4339, 4349, 4357, 4363, 4373, 4391, 4397, 4409, // + 4421, 4423, 4441, 4447, 4451, 4457, 4463, 4481, 4483, 4493, // + 4507, 4513, 4517, 4519, 4523, 4547, 4549, 4561, 4567, 4583, // + 4591, 4597, 4603, 4621, 4637, 4639, 4643, 4649, 4651, 4657, // + 4663, 4673, 4679, 4691, 4703, 4721, 4723, 4729, 4733, 4751, // + 4759, 4783, 4787, 4789, 4793, 4799, 4801, 4813, 4817, 4831, // + 4861, 4871, 4877, 4889, 4903, 4909, 4919, 4931, 4933, 4937, // + 4943, 4951, 4957, 4967, 4969, 4973, 4987, 4993, 4999, 5003, // + 5009, 5011, 5021, 5023, 5039, 5051, 5059, 5077, 5081, 5087, // + 5099, 5101, 5107, 5113, 5119, 5147, 5153, 5167, 5171, 5179, // + 5189, 5197, 5209, 5227, 5231, 5233, 5237, 5261, 5273, 5279, // + 5281, 5297, 5303, 5309, 5323, 5333, 5347, 5351, 5381, 5387, // + 5393, 5399, 5407, 5413, 5417, 5419, 5431, 5437, 5441, 5443, // + 5449, 5471, 5477, 5479, 5483, 5501, 5503, 5507, 5519, 5521, // + 5527, 5531, 5557, 5563, 5569, 5573, 5581, 5591, 5623, 5639, // + 5641, 5647, 5651, 5653, 5657, 5659, 5669, 5683, 5689, 5693, // + 5701, 5711, 5717, 5737, 5741, 5743, 5749, 5779, 5783, 5791, // + 5801, 5807, 5813, 5821, 5827, 5839, 5843, 5849, 5851, 5857, // + 5861, 5867, 5869, 5879, 5881, 5897, 5903, 5923, 5927, 5939, // + 5953, 5981, 5987, 6007, 6011, 6029, 6037, 6043, 6047, 6053, // + 6067, 6073, 6079, 6089, 6091, 6101, 6113, 6121, 6131, 6133, // + 6143, 6151, 6163, 6173, 6197, 6199, 6203, 6211, 6217, 6221, // + 6229, 6247, 6257, 6263, 6269, 6271, 6277, 6287, 6299, 6301, // + 6311, 6317, 6323, 6329, 6337, 6343, 6353, 6359, 6361, 6367, // + 6373, 6379, 6389, 6397, 6421, 6427, 6449, 6451, 6469, 6473, // + 6481, 6491, 6521, 6529, 6547, 6551, 6553, 6563, 6569, 6571, // + 6577, 6581, 6599, 6607, 6619, 6637, 6653, 6659, 6661, 6673, // + 6679, 6689, 6691, 6701, 6703, 6709, 6719, 6733, 6737, 6761, // + 6763, 6779, 6781, 6791, 6793, 6803, 6823, 6827, 6829, 6833, // + 6841, 6857, 6863, 6869, 6871, 6883, 6899, 6907, 6911, 6917, // + 6947, 6949, 6959, 6961, 6967, 6971, 6977, 6983, 6991, 6997, // + 7001, 7013, 7019, 7027, 7039, 7043, 7057, 7069, 7079, 7103, // + 7109, 7121, 7127, 7129, 7151, 7159, 7177, 7187, 7193, 7207, // + 7211, 7213, 7219, 7229, 7237, 7243, 7247, 7253, 7283, 7297, // + 7307, 7309, 7321, 7331, 7333, 7349, 7351, 7369, 7393, 7411, // + 7417, 7433, 7451, 7457, 7459, 7477, 7481, 7487, 7489, 7499, // + 7507, 7517, 7523, 7529, 7537, 7541, 7547, 7549, 7559, 7561, // + 7573, 7577, 7583, 7589, 7591, 7603, 7607, 7621, 7639, 7643, // + 7649, 7669, 7673, 7681, 7687, 7691, 7699, 7703, 7717, 7723, // + 7727, 7741, 7753, 7757, 7759, 7789, 7793, 7817, 7823, 7829, // + 7841, 7853, 7867, 7873, 7877, 7879, 7883, 7901, 7907, 7919 // + }; + + /** + * Pseudo constructor. + */ + private Primes() { + // Do not instantiate. + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/SinCosTable.java b/src/de/lmu/ifi/dbs/elki/math/SinCosTable.java new file mode 100644 index 00000000..16b89772 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/SinCosTable.java @@ -0,0 +1,308 @@ +package de.lmu.ifi.dbs.elki.math; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** + * Class to precompute / cache Sinus and Cosinus values. + * + * Note that the functions use integer offsets, not radians. + * + * TODO: add an interpolation function. + * + * TODO: add caching + * + * @author Erich Schubert + */ +public abstract class SinCosTable { + /** + * Number of steps. + */ + protected final int steps; + + /** + * Constructor. + * + * @param steps Number of steps (ideally, {@code steps % 4 = 0}!) + */ + private SinCosTable(final int steps) { + this.steps = steps; + } + + /** + * Get Cosine by step value. + * + * @param step Step value + * @return Cosinus + */ + public abstract double cos(int step); + + /** + * Get Sinus by step value. + * + * @param step Step value + * @return Sinus + */ + public abstract double sin(int step); + + /** + * Table that can't exploit much symmetry, because the steps are not divisible + * by 2. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + private static class FullTable extends SinCosTable { + /** + * Data store + */ + private final double[] costable; + + /** + * Data store + */ + private final double[] sintable; + + /** + * Constructor for tables with + * + * @param steps + */ + public FullTable(int steps) { + super(steps); + final double radstep = Math.toRadians(360. / steps); + this.costable = new double[steps]; + this.sintable = new double[steps]; + double ang = 0.; + for (int i = 0; i < steps; i++, ang += radstep) { + this.costable[i] = Math.cos(ang); + this.sintable[i] = Math.sin(ang); + } + } + + /** + * Get Cosine by step value. + * + * @param step Step value + * @return Cosinus + */ + @Override + public double cos(int step) { + step = Math.abs(step) % steps; + return costable[step]; + } + + /** + * Get Sinus by step value. + * + * @param step Step value + * @return Sinus + */ + @Override + public double sin(int step) { + step = step % steps; + if (step < 0) { + step += steps; + } + return sintable[step]; + } + } + + /** + * Table that exploits just one symmetry, as the number of steps is divisible + * by two. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + private static class HalfTable extends SinCosTable { + /** + * Number of steps div 2 + */ + private final int halfsteps; + + /** + * Data store + */ + private final double[] costable; + + /** + * Data store + */ + private final double[] sintable; + + /** + * Constructor for tables with + * + * @param steps + */ + public HalfTable(int steps) { + super(steps); + this.halfsteps = steps >> 1; + final double radstep = Math.toRadians(360. / steps); + this.costable = new double[halfsteps + 1]; + this.sintable = new double[halfsteps + 1]; + double ang = 0.; + for (int i = 0; i < halfsteps + 1; i++, ang += radstep) { + this.costable[i] = Math.cos(ang); + this.sintable[i] = Math.sin(ang); + } + } + + /** + * Get Cosine by step value. + * + * @param step Step value + * @return Cosinus + */ + @Override + public double cos(int step) { + // Tabularizing cosine is a bit more straightforward than sine + // As we can just drop the sign here: + step = Math.abs(step) % steps; + if (step < costable.length) { + return costable[step]; + } + // Symmetry at PI: + return costable[steps - step]; + } + + /** + * Get Sinus by step value. + * + * @param step Step value + * @return Sinus + */ + @Override + public double sin(int step) { + step = step % steps; + if (step < 0) { + step += steps; + } + if (step < sintable.length) { + return sintable[step]; + } + // Anti symmetry at PI: + return -sintable[steps - step]; + } + } + + /** + * Table that exploits both symmetries, as the number of steps is divisible by + * four. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + private static class QuarterTable extends SinCosTable { + /** + * Number of steps div 4 + */ + private final int quarsteps; + + /** + * Number of steps div 2 + */ + private final int halfsteps; + + /** + * Data store + */ + private final double[] costable; + + /** + * Constructor for tables with + * + * @param steps + */ + public QuarterTable(int steps) { + super(steps); + this.halfsteps = steps >> 1; + this.quarsteps = steps >> 2; + final double radstep = Math.toRadians(360. / steps); + this.costable = new double[quarsteps + 1]; + double ang = 0.; + for (int i = 0; i < quarsteps + 1; i++, ang += radstep) { + this.costable[i] = Math.cos(ang); + } + } + + /** + * Get Cosine by step value. + * + * @param step Step value + * @return Cosinus + */ + @Override + public double cos(int step) { + // Tabularizing cosine is a bit more straightforward than sine + // As we can just drop the sign here: + step = Math.abs(step) % steps; + if (step < costable.length) { + return costable[step]; + } + // Symmetry at PI: + if (step > halfsteps) { + step = steps - step; + if (step < costable.length) { + return costable[step]; + } + } + // Inverse symmetry at PI/2: + step = halfsteps - step; + return -costable[step]; + } + + /** + * Get Sinus by step value. + * + * @param step Step value + * @return Sinus + */ + @Override + public double sin(int step) { + return -cos(step + quarsteps); + } + } + + /** + * Make a table for the given number of steps. + * + * For step numbers divisible by 4, an optimized implementation will be used. + * + * @param steps Number of steps + * @return Table + */ + public static SinCosTable make(int steps) { + if ((steps & 0x3) == 0) { + return new QuarterTable(steps); + } + if ((steps & 0x1) == 0) { + return new HalfTable(steps); + } + return new FullTable(steps); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/CovarianceDimensionSimilarity.java b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/CovarianceDimensionSimilarity.java new file mode 100644 index 00000000..ad59ebfd --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/CovarianceDimensionSimilarity.java @@ -0,0 +1,82 @@ +package de.lmu.ifi.dbs.elki.math.dimensionsimilarity; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.data.NumberVector; +import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; + +/** + * Class to compute the dimension similarity based on covariances. + * + * @author Erich Schubert + */ +public class CovarianceDimensionSimilarity implements DimensionSimilarity<NumberVector<?>> { + /** + * Static instance + */ + public static final CovarianceDimensionSimilarity STATIC = new CovarianceDimensionSimilarity(); + + /** + * Constructor. Use static instance. + */ + protected CovarianceDimensionSimilarity() { + super(); + } + + @Override + public void computeDimensionSimilarites(Relation<? extends NumberVector<?>> relation, DBIDs subset, DimensionSimilarityMatrix matrix) { + final int dim = matrix.size(); + // FIXME: Use only necessary dimensions! + CovarianceMatrix covmat = CovarianceMatrix.make(relation, subset); + double[][] mat = covmat.destroyToSampleMatrix().getArrayRef(); + // Transform diagonal to 1 / stddev + for (int i = 0; i < mat.length; i++) { + mat[i][i] = 1. / Math.sqrt(mat[i][i]); + } + // Fill output matrix: + for (int x = 0; x < dim; x++) { + final int i = matrix.dim(x); + for (int y = x + 1; y < dim; y++) { + final int j = matrix.dim(y); + matrix.set(x, y, mat[i][j] * mat[i][i] * mat[j][j]); + } + } + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected CovarianceDimensionSimilarity makeInstance() { + return STATIC; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/DimensionSimilarity.java b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/DimensionSimilarity.java new file mode 100644 index 00000000..6b01b3f3 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/DimensionSimilarity.java @@ -0,0 +1,46 @@ +package de.lmu.ifi.dbs.elki.math.dimensionsimilarity; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.Parameterizable; + +/** + * Interface for computing pairwise dimension similarities, used for arranging + * dimensions in parallel coordinate plots. + * + * @author Erich Schubert + * + * @param <V> Object type + */ +public interface DimensionSimilarity<V> extends Parameterizable { + /** + * Compute the dimension similarity matrix + * + * @param relation Relation + * @param subset DBID subset (for sampling / selection) + * @param matrix Matrix to fill + */ + public void computeDimensionSimilarites(Relation<? extends V> relation, DBIDs subset, DimensionSimilarityMatrix matrix); +} diff --git a/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/DimensionSimilarityMatrix.java b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/DimensionSimilarityMatrix.java new file mode 100644 index 00000000..9f7a9707 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/DimensionSimilarityMatrix.java @@ -0,0 +1,247 @@ +package de.lmu.ifi.dbs.elki.math.dimensionsimilarity; + +import de.lmu.ifi.dbs.elki.math.geometry.PrimsMinimumSpanningTree; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** + * Class representing a similarity matrix between dimensions. + * + * @author Erich Schubert + */ +public abstract class DimensionSimilarityMatrix { + /** + * Adapter class for running Prim's minimum spanning tree algorithm. + */ + public static final PrimAdapter PRIM_ADAPTER = new PrimAdapter(); + + /** + * Flat, symmetric storage. We use a lower triangle matrix. + * + * Basic memory layout (X = undef, S = symmetric) + * + * <pre> + * X S S S S S + * 0 X S S S S + * 1 2 X S S S + * 3 4 5 X S S + * 6 7 8 9 X S + * 10 11 12 13 14 X + * </pre> + * + * + */ + private final double[] sim; + + /** + * Constructor. + * + * @param dims Number of dimensions to allocate. + */ + protected DimensionSimilarityMatrix(int dims) { + super(); + this.sim = new double[index(0, dims)]; + } + + /** + * Number of dimensions. + * + * @return Size of dimensions array. + */ + public abstract int size(); + + /** + * Get the dimension at position idx. + * + * @param idx Position + * @return Dimension + */ + public abstract int dim(int idx); + + /** + * Set the value of the given matrix position. + * + * Note that {@code x == y} is invalid! + * + * @param x X index coordinate + * @param y Y index coordinate + * @param val Value + */ + public void set(int x, int y, double val) { + sim[index(x, y)] = val; + } + + /** + * Get the value of the given matrix position. + * + * Note that {@code x == y} is invalid! + * + * @param x X index coordinate + * @param y Y index coordinate + * @return Value + */ + public double get(int x, int y) { + return sim[index(x, y)]; + } + + /** + * Indexing function for triangular matrix. + * + * @param x X coordinate + * @param y Y coordinate + * @return Array index + */ + private int index(int x, int y) { + assert (x != y); + if (x > y) { + return index(y, x); + } + return ((y * (y - 1)) >> 1) + x; + } + + @Override + public String toString() { + StringBuffer buf = new StringBuffer(); + final int d = size(); + for (int x = 1; x < d; x++) { + for (int y = 0; y < x; y++) { + if (y > 0) { + buf.append(' '); + } + buf.append(get(x, y)); + } + buf.append('\n'); + } + return buf.toString(); + } + + /** + * Complete matrix of pairwise dimension similarities. + * + * @author Erich Schubert + */ + public static class FullDimensionSimilarityMatrix extends DimensionSimilarityMatrix { + /** + * Number of dimensions. + */ + final int dims; + + /** + * Constructor. + * + * @param dims Number of dimensions + */ + public FullDimensionSimilarityMatrix(int dims) { + super(dims); + this.dims = dims; + } + + @Override + public int size() { + return dims; + } + + @Override + public int dim(int idx) { + return idx; + } + } + + /** + * Partial matrix of pairwise dimension similarities. + * + * @author Erich Schubert + */ + public static class PartialDimensionSimilarityMatrix extends DimensionSimilarityMatrix { + /** + * Enumeration of dimensions to use (so we could use a subset only!) + */ + final int[] dims; + + /** + * Constructor. + * + * @param dims Array of dimensions to process. + */ + public PartialDimensionSimilarityMatrix(int[] dims) { + super(dims.length); + this.dims = dims; + } + + @Override + public int size() { + return dims.length; + } + + @Override + public int dim(int idx) { + return dims[idx]; + } + } + + /** + * Adapter class for running prim's algorithm. + * + * @author Erich Schubert + */ + public static class PrimAdapter implements PrimsMinimumSpanningTree.Adapter<DimensionSimilarityMatrix> { + /** + * Constructor. Use static instance! + */ + protected PrimAdapter() { + super(); + } + + @Override + public double distance(DimensionSimilarityMatrix data, int i, int j) { + return -Math.abs(data.get(i, j)); + } + + @Override + public int size(DimensionSimilarityMatrix data) { + return data.size(); + } + + } + + /** + * Make a full dimension similarity matrix. + * + * @param dims Number of dimensions. + * @return Matrix + */ + public static DimensionSimilarityMatrix make(int dims) { + return new FullDimensionSimilarityMatrix(dims); + } + + /** + * Make a partial dimension similarity matrix. + * + * @param dims Array of relevant dimensions + * @return Matrix + */ + public static DimensionSimilarityMatrix make(int[] dims) { + return new PartialDimensionSimilarityMatrix(dims); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/HSMDimensionSimilarity.java b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/HSMDimensionSimilarity.java new file mode 100644 index 00000000..b221866c --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/HSMDimensionSimilarity.java @@ -0,0 +1,251 @@ +package de.lmu.ifi.dbs.elki.math.dimensionsimilarity;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.math.SinCosTable;
+import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
+
+/**
+ * FIXME: This needs serious TESTING before release. Large parts have been
+ * rewritten, but could not be tested at the time of rewriting.
+ *
+ * Compute the similarity of dimensions by using a hough transformation.
+ *
+ * Reference: <br>
+ * <p>
+ * A. Tatu, G. Albuquerque, M. Eisemann, P. Bak, H. Theisel, M. A. Magnor, and
+ * D. A. Keim.<br />
+ * Automated Analytical Methods to Support Visual Exploration of High-
+ * Dimensional Data. <br/>
+ * IEEEVisualization and Computer Graphics, 2011.
+ * </p>
+ *
+ * @author Erich Schubert
+ * @author Robert Rödler
+ */
+@Reference(authors = "A. Tatu, G. Albuquerque, M. Eisemann, P. Bak, H. Theisel, M. A. Magnor, and D. A. Keim.", title = "Automated Analytical Methods to Support Visual Exploration of High-Dimensional Data", booktitle = "IEEE Trans. Visualization and Computer Graphics, 2011", url = "http://dx.doi.org/10.1109/TVCG.2010.242")
+public class HSMDimensionSimilarity implements DimensionSimilarity<NumberVector<?>> {
+ /**
+ * Static instance.
+ */
+ public static final HSMDimensionSimilarity STATIC = new HSMDimensionSimilarity();
+
+ /**
+ * Angular resolution. Best if divisible by 4: smaller tables.
+ *
+ * The original publication used 50.
+ */
+ private final static int STEPS = 64;
+
+ /**
+ * Precompute sinus and cosinus
+ */
+ private final static SinCosTable table = SinCosTable.make(STEPS);
+
+ /**
+ * Constructor. Use static instance instead!
+ */
+ protected HSMDimensionSimilarity() {
+ super();
+ }
+
+ @Override
+ public void computeDimensionSimilarites(Relation<? extends NumberVector<?>> relation, DBIDs subset, DimensionSimilarityMatrix matrix) {
+ final int dim = matrix.size();
+ final int resolution = 500;
+ byte[][][][] pics = new byte[dim][dim][][]; // [resolution][resolution];
+
+ // Initialize / allocate "pictures":
+ for (int i = 0; i < dim - 1; i++) {
+ for (int j = i + 1; j < dim; j++) {
+ pics[i][j] = new byte[resolution][resolution];
+ }
+ }
+ // FIXME: Get/keep these statistics in the relation, or compute for the
+ // sample only.
+ double[] off = new double[dim], scale = new double[dim];
+ {
+ Pair<? extends NumberVector<?>, ? extends NumberVector<?>> mm = DatabaseUtil.computeMinMax(relation);
+ NumberVector<?> min = mm.first;
+ NumberVector<?> max = mm.second;
+ for (int d = 0; d < dim; d++) {
+ off[d] = min.doubleValue(matrix.dim(d));
+ final double m = max.doubleValue(matrix.dim(d));
+ scale[d] = (m > off[d]) ? 1. / (m - off[d]) : 1;
+ }
+ }
+ // Iterate dataset
+ for (DBIDIter id = subset.iter(); id.valid(); id.advance()) {
+ NumberVector<?> pvec = relation.get(id);
+ for (int i = 0; i < dim - 1; i++) {
+ for (int j = i + 1; j < dim; j++) {
+ double xi = (pvec.doubleValue(matrix.dim(i)) - off[i]) * scale[i];
+ double xj = (pvec.doubleValue(matrix.dim(j)) - off[j]) * scale[j];
+ drawLine(0, (int) (resolution * xi), resolution - 1, (int) (resolution * xj), pics[i][j]);
+ }
+ }
+ }
+
+ final double stepsq = (double) STEPS * (double) STEPS;
+ for (int x = 0; x < dim; x++) {
+ final int i = matrix.dim(x);
+ for (int y = x + 1; y < dim; y++) {
+ final int j = matrix.dim(y);
+ int[][] hough = houghTransformation(pics[i][j]);
+ pics[i][j] = null; // Release picture
+ // The original publication said "median", but judging from the text,
+ // meant "mean". Otherwise, always half of the cells are above the
+ // threshold, which doesn't match the explanation there.
+ double mean = sumMatrix(hough) / stepsq;
+ int abovemean = countAboveThreshold(hough, mean);
+
+ matrix.set(x, y, 1. - (abovemean / stepsq));
+ }
+ }
+ }
+
+ /**
+ * Compute the sum of a matix.
+ *
+ * @param mat Matrix
+ * @return Sum of all elements
+ */
+ private long sumMatrix(int[][] mat) {
+ long ret = 0;
+ for (int i = 0; i < mat[0].length; i++) {
+ for (int j = 0; j < mat.length; j++) {
+ ret += mat[i][j];
+ }
+ }
+ return ret;
+ }
+
+ /**
+ * Count the number of cells above the threshold.
+ *
+ * @param mat Matrix
+ * @param threshold Threshold
+ * @return Number of elements above the threshold.
+ */
+ private int countAboveThreshold(int[][] mat, double threshold) {
+ int ret = 0;
+ for (int i = 0; i < mat.length; i++) {
+ int[] row = mat[i];
+ for (int j = 0; j < row.length; j++) {
+ if (row[j] >= threshold) {
+ ret++;
+ }
+ }
+ }
+ return ret;
+ }
+
+ /**
+ * Perform a hough transformation on the binary image in "mat".
+ *
+ * @param mat Binary image
+ * @return Hough transformation of image.
+ */
+ private int[][] houghTransformation(byte[][] mat) {
+ final int xres = mat.length, yres = mat[0].length;
+ final double tscale = STEPS / Math.sqrt(xres * xres + yres * yres);
+ final int[][] ret = new int[STEPS][STEPS];
+
+ for (int x = 0; x < mat.length; x++) {
+ for (int y = 0; y < mat[0].length; y++) {
+ if (mat[x][y] > 0) {
+ for (int i = 0; i < STEPS; i++) {
+ final int d = (int) (tscale * (x * table.cos(i) + y * table.sin(i)));
+ if (d > 0 && d < STEPS) {
+ ret[d][i] += mat[x][y];
+ }
+ }
+ }
+ }
+ }
+
+ return ret;
+ }
+
+ /**
+ * Draw a line onto the array, using the classic Bresenham algorithm.
+ *
+ * @param x0 Start X
+ * @param y0 Start Y
+ * @param x1 End X
+ * @param y1 End Y
+ * @param pic Picture array
+ */
+ private static void drawLine(int x0, int y0, int x1, int y1, byte[][] pic) {
+ final int xres = pic.length, yres = pic[0].length;
+ // Ensure bounds
+ y0 = (y0 < 0) ? 0 : (y0 >= yres) ? (yres - 1) : y0;
+ y1 = (y1 < 0) ? 0 : (y1 >= yres) ? (yres - 1) : y1;
+ x0 = (x0 < 0) ? 0 : (x0 >= xres) ? (xres - 1) : x0;
+ x1 = (x1 < 0) ? 0 : (x1 >= xres) ? (xres - 1) : x1;
+ // Default slope
+ final int dx = +Math.abs(x1 - x0), sx = x0 < x1 ? 1 : -1;
+ final int dy = -Math.abs(y1 - y0), sy = y0 < y1 ? 1 : -1;
+ // Error counter
+ int err = dx + dy;
+
+ for (;;) {
+ pic[x0][y0] = 1;
+ if (x0 == x1 && y0 == y1) {
+ break;
+ }
+
+ final int e2 = err << 1;
+ if (e2 > dy) {
+ err += dy;
+ x0 += sx;
+ }
+ if (e2 < dx) {
+ err += dx;
+ y0 += sy;
+ }
+ }
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ @Override
+ protected HSMDimensionSimilarity makeInstance() {
+ return STATIC;
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/HiCSDimensionSimilarity.java b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/HiCSDimensionSimilarity.java new file mode 100644 index 00000000..468db679 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/HiCSDimensionSimilarity.java @@ -0,0 +1,269 @@ +package de.lmu.ifi.dbs.elki.math.dimensionsimilarity;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.ArrayList;
+import java.util.Random;
+
+import de.lmu.ifi.dbs.elki.algorithm.outlier.meta.HiCS;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.VectorUtil;
+import de.lmu.ifi.dbs.elki.data.VectorUtil.SortDBIDsBySingleDimension;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.math.statistics.tests.GoodnessOfFitTest;
+import de.lmu.ifi.dbs.elki.math.statistics.tests.KolmogorovSmirnovTest;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
+
+/**
+ * Use the statistical tests as used by HiCS to arrange dimensions.
+ *
+ * <p>
+ * Based on:<br />
+ * Fabian Keller, Emmanuel Müller, and Klemens Böhm.<br />
+ * HiCS: High Contrast Subspaces for Density-Based Outlier Ranking. <br />
+ * In ICDE, pages 1037–1048, 2012.
+ * </p>
+ *
+ * @author Erich Schubert
+ * @author Robert Rödler
+ */
+public class HiCSDimensionSimilarity implements DimensionSimilarity<NumberVector<?>> {
+ /**
+ * Monte-Carlo iterations
+ */
+ private int m = 50;
+
+ /**
+ * Alpha threshold
+ */
+ private double alpha = 0.1;
+
+ /**
+ * Statistical test to use
+ */
+ private GoodnessOfFitTest statTest;
+
+ /**
+ * Random generator
+ */
+ private RandomFactory rnd;
+
+ /**
+ * Constructor.
+ *
+ * @param statTest Test function
+ * @param m Number of monte-carlo iterations
+ * @param alpha Alpha threshold
+ * @param rnd Random source
+ */
+ public HiCSDimensionSimilarity(GoodnessOfFitTest statTest, int m, double alpha, RandomFactory rnd) {
+ super();
+ this.statTest = statTest;
+ this.m = m;
+ this.alpha = alpha;
+ this.rnd = rnd;
+ }
+
+ @Override
+ public void computeDimensionSimilarites(Relation<? extends NumberVector<?>> relation, DBIDs subset, DimensionSimilarityMatrix matrix) {
+ final Random random = rnd.getRandom();
+ final int dim = matrix.size();
+
+ // FIXME: only compute indexes necessary.
+ ArrayList<ArrayDBIDs> subspaceIndex = buildOneDimIndexes(relation, subset, matrix);
+
+ // compute two-element sets of subspaces
+ for (int x = 0; x < dim; x++) {
+ final int i = matrix.dim(x);
+ for (int y = x + 1; y < dim; y++) {
+ final int j = matrix.dim(y);
+ matrix.set(x, y, calculateContrast(relation, subset, subspaceIndex.get(x), subspaceIndex.get(y), i, j, random));
+ }
+ }
+ }
+
+ /**
+ * Calculates "index structures" for every attribute, i.e. sorts a
+ * ModifiableArray of every DBID in the database for every dimension and
+ * stores them in a list
+ *
+ * @param relation Relation to index
+ * @param ids IDs to use
+ * @param matrix Matrix (for dimension subset)
+ * @return List of sorted objects
+ */
+ private ArrayList<ArrayDBIDs> buildOneDimIndexes(Relation<? extends NumberVector<?>> relation, DBIDs ids, DimensionSimilarityMatrix matrix) {
+ final int dim = matrix.size();
+ ArrayList<ArrayDBIDs> subspaceIndex = new ArrayList<ArrayDBIDs>(dim);
+
+ SortDBIDsBySingleDimension comp = new VectorUtil.SortDBIDsBySingleDimension(relation);
+ for (int i = 0; i < dim; i++) {
+ ArrayModifiableDBIDs amDBIDs = DBIDUtil.newArray(ids);
+ comp.setDimension(matrix.dim(i));
+ amDBIDs.sort(comp);
+ subspaceIndex.add(amDBIDs);
+ }
+
+ return subspaceIndex;
+ }
+
+ /**
+ * Calculates the actual contrast of a given subspace
+ *
+ * @param relation Data relation
+ * @param subset Subset to process
+ * @param subspaceIndex1 Index of first subspace
+ * @param subspaceIndex2 Index of second subspace
+ * @param dim1 First dimension
+ * @param dim2 Second dimension
+ * @param random Random generator
+ * @return Contrast
+ */
+ private double calculateContrast(Relation<? extends NumberVector<?>> relation, DBIDs subset, ArrayDBIDs subspaceIndex1, ArrayDBIDs subspaceIndex2, int dim1, int dim2, Random random) {
+ final double alpha1 = Math.pow(alpha, .5);
+ final int windowsize = (int) (relation.size() * alpha1);
+
+ // TODO: speed up by keeping marginal distributions prepared.
+ // Instead of doing the random switch, do half-half.
+ double deviationSum = 0.0;
+ for (int i = 0; i < m; i++) {
+ // Randomly switch dimensions
+ final int cdim1;
+ ArrayDBIDs cindex1, cindex2;
+ if (random.nextDouble() > .5) {
+ cdim1 = dim1;
+ cindex1 = subspaceIndex1;
+ cindex2 = subspaceIndex2;
+ } else {
+ cdim1 = dim2;
+ cindex1 = subspaceIndex2;
+ cindex2 = subspaceIndex1;
+ }
+ // Build the sample
+ DBIDArrayIter iter = cindex2.iter();
+ HashSetModifiableDBIDs conditionalSample = DBIDUtil.newHashSet();
+ iter.seek(random.nextInt(subset.size() - windowsize));
+ for (int k = 0; k < windowsize && iter.valid(); k++, iter.advance()) {
+ conditionalSample.add(iter);
+ }
+ // Project the data
+ double[] fullValues = new double[subset.size()];
+ double[] sampleValues = new double[conditionalSample.size()];
+ {
+ int l = 0, s = 0;
+ // Note: we use the sorted index sets.
+ for (DBIDIter id = cindex1.iter(); id.valid(); id.advance(), l++) {
+ final double val = relation.get(id).doubleValue(cdim1);
+ fullValues[l] = val;
+ if (conditionalSample.contains(id)) {
+ sampleValues[s] = val;
+ s++;
+ }
+ }
+ assert (s == conditionalSample.size());
+ }
+ double contrast = statTest.deviation(fullValues, sampleValues);
+ if (Double.isNaN(contrast)) {
+ i--;
+ continue;
+ }
+ deviationSum += contrast;
+ }
+ return deviationSum / m;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ /**
+ * Statistical test to use
+ */
+ private GoodnessOfFitTest statTest;
+
+ /**
+ * Holds the value of {@link de.lmu.ifi.dbs.elki.algorithm.outlier.meta.HiCS.Parameterizer#M_ID}.
+ */
+ private int m = 50;
+
+ /**
+ * Holds the value of {@link de.lmu.ifi.dbs.elki.algorithm.outlier.meta.HiCS.Parameterizer#ALPHA_ID}.
+ */
+ private double alpha = 0.1;
+
+ /**
+ * Random generator.
+ */
+ private RandomFactory rnd;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ final IntParameter mP = new IntParameter(HiCS.Parameterizer.M_ID, 50);
+ mP.addConstraint(new GreaterConstraint(1));
+ if (config.grab(mP)) {
+ m = mP.intValue();
+ }
+
+ final DoubleParameter alphaP = new DoubleParameter(HiCS.Parameterizer.ALPHA_ID, 0.1);
+ alphaP.addConstraint(new GreaterConstraint(0));
+ if (config.grab(alphaP)) {
+ alpha = alphaP.doubleValue();
+ }
+
+ final ObjectParameter<GoodnessOfFitTest> testP = new ObjectParameter<GoodnessOfFitTest>(HiCS.Parameterizer.TEST_ID, GoodnessOfFitTest.class, KolmogorovSmirnovTest.class);
+ if (config.grab(testP)) {
+ statTest = testP.instantiateClass(config);
+ }
+
+ final RandomParameter rndP = new RandomParameter(HiCS.Parameterizer.SEED_ID);
+ if (config.grab(rndP)) {
+ rnd = rndP.getValue();
+ }
+ }
+
+ @Override
+ protected HiCSDimensionSimilarity makeInstance() {
+ return new HiCSDimensionSimilarity(statTest, m, alpha, rnd);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/MCEDimensionSimilarity.java b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/MCEDimensionSimilarity.java new file mode 100644 index 00000000..aecdf857 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/MCEDimensionSimilarity.java @@ -0,0 +1,288 @@ +package de.lmu.ifi.dbs.elki.math.dimensionsimilarity;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.ArrayList;
+import java.util.Arrays;
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.VectorUtil;
+import de.lmu.ifi.dbs.elki.data.VectorUtil.SortDBIDsBySingleDimension;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.math.MathUtil;
+import de.lmu.ifi.dbs.elki.math.Mean;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+
+/**
+ * Compute dimension similarity by using a nested means discretization.
+ *
+ * Reference:
+ * <p>
+ * Diansheng Guo<br />
+ * Coordinating computational and visual approaches for interactive feature
+ * selection and multivariate clustering<br />
+ * Information Visualization, 2(4), 2003.
+ * </p>
+ *
+ * @author Erich Schubert
+ */
+@Reference(authors = "Diansheng Guo", title = "Coordinating computational and visual approaches for interactive feature selection and multivariate clustering", booktitle = "Information Visualization, 2(4)", url = "http://dx.doi.org/10.1057/palgrave.ivs.9500053")
+public class MCEDimensionSimilarity implements DimensionSimilarity<NumberVector<?>> {
+ /**
+ * Static instance.
+ */
+ public static final MCEDimensionSimilarity STATIC = new MCEDimensionSimilarity();
+
+ /**
+ * Desired size: 35 observations.
+ *
+ * While this could trivially be made parameterizable, it is a reasonable rule
+ * of thumb and not expected to have a major effect.
+ */
+ public static final int TARGET = 35;
+
+ /**
+ * Constructor. Use static instance instead!
+ */
+ protected MCEDimensionSimilarity() {
+ super();
+ }
+
+ @Override
+ public void computeDimensionSimilarites(Relation<? extends NumberVector<?>> relation, DBIDs subset, DimensionSimilarityMatrix matrix) {
+ final int dim = matrix.size();
+
+ // Find a number of bins as recommended by Cheng et al.
+ double p = Math.log(subset.size() / (double) TARGET) / MathUtil.LOG2;
+ // As we are in 2d, take the root (*.5) But let's use at least 1, too.
+ // Check: for 10000 this should give 4, for 150 it gives 1.
+ int power = Math.max(1, (int) Math.floor(p * .5));
+ int gridsize = 1 << power;
+ double loggrid = Math.log((double) gridsize);
+
+ ArrayList<ArrayList<DBIDs>> parts = buildPartitions(relation, subset, power, matrix);
+
+ // Partition sizes
+ int[][] psizes = new int[dim][gridsize];
+ for (int d = 0; d < dim; d++) {
+ final ArrayList<DBIDs> partsd = parts.get(d);
+ final int[] sizesp = psizes[d];
+ for (int i = 0; i < gridsize; i++) {
+ sizesp[i] = partsd.get(i).size();
+ }
+ }
+
+ int[][] res = new int[gridsize][gridsize];
+ for (int x = 0; x < dim; x++) {
+ ArrayList<DBIDs> partsi = parts.get(x);
+ for (int y = x + 1; y < dim; y++) {
+ ArrayList<DBIDs> partsj = parts.get(y);
+ // Fill the intersection matrix
+ intersectionMatrix(res, partsi, partsj, gridsize);
+ matrix.set(x, y, 1. - getMCEntropy(res, psizes[x], psizes[y], subset.size(), gridsize, loggrid));
+ }
+ }
+ }
+
+ /**
+ * Intersect the two 1d grid decompositions, to obtain a 2d matrix.
+ *
+ * @param res Output matrix to fill
+ * @param partsx Partitions in first component
+ * @param partsy Partitions in second component.
+ * @param gridsize Size of partition decomposition
+ */
+ private void intersectionMatrix(int[][] res, ArrayList<? extends DBIDs> partsx, ArrayList<? extends DBIDs> partsy, int gridsize) {
+ for (int x = 0; x < gridsize; x++) {
+ final DBIDs px = partsx.get(x);
+ final int[] rowx = res[x];
+ for (int y = 0; y < gridsize; y++) {
+ rowx[y] = DBIDUtil.intersectionSize(px, partsy.get(y));
+ }
+ }
+ }
+
+ /**
+ * Compute the MCE entropy value.
+ *
+ * @param mat Partition size matrix
+ * @param psizesx Partition sizes on X
+ * @param psizesy Partition sizes on Y
+ * @param size Data set size
+ * @param gridsize Size of grids
+ * @param loggrid Logarithm of grid sizes, for normalization
+ * @return MCE score.
+ */
+ private double getMCEntropy(int[][] mat, int[] psizesx, int[] psizesy, int size, int gridsize, double loggrid) {
+ // Margin entropies:
+ double[] mx = new double[gridsize];
+ double[] my = new double[gridsize];
+
+ for (int i = 0; i < gridsize; i++) {
+ // Note: indexes are a bit tricky here, because we compute both margin
+ // entropies at the same time!
+ final double sumx = (double) psizesx[i];
+ final double sumy = (double) psizesy[i];
+ for (int j = 0; j < gridsize; j++) {
+ double px = mat[i][j] / sumx;
+ double py = mat[j][i] / sumy;
+
+ if (px > 0.) {
+ mx[i] -= px * Math.log(px);
+ }
+ if (py > 0.) {
+ my[i] -= py * Math.log(py);
+ }
+ }
+ }
+
+ // Weighted sums of margin entropies.
+ double sumx = 0., sumy = 0.;
+ for (int i = 0; i < gridsize; i++) {
+ sumx += mx[i] * psizesx[i];
+ sumy += my[i] * psizesy[i];
+ }
+
+ double max = ((sumx > sumy) ? sumx : sumy);
+ return max / (size * loggrid);
+ }
+
+ /**
+ * Calculates "index structures" for every attribute, i.e. sorts a
+ * ModifiableArray of every DBID in the database for every dimension and
+ * stores them in a list.
+ *
+ * @param relation Relation to index
+ * @param ids IDs to use
+ * @param matrix Matrix for dimension information
+ * @return List of sorted objects
+ */
+ private ArrayList<ArrayList<DBIDs>> buildPartitions(Relation<? extends NumberVector<?>> relation, DBIDs ids, int depth, DimensionSimilarityMatrix matrix) {
+ final int dim = matrix.size();
+ ArrayList<ArrayList<DBIDs>> subspaceIndex = new ArrayList<ArrayList<DBIDs>>(dim);
+ SortDBIDsBySingleDimension comp = new VectorUtil.SortDBIDsBySingleDimension(relation);
+ double[] tmp = new double[ids.size()];
+ Mean mean = new Mean();
+
+ for (int i = 0; i < dim; i++) {
+ final int d = matrix.dim(i);
+ // Index for a single dimension:
+ ArrayList<DBIDs> idx = new ArrayList<DBIDs>(1 << depth);
+ // First, we need a copy of the DBIDs and sort it.
+ ArrayModifiableDBIDs sids = DBIDUtil.newArray(ids);
+ comp.setDimension(d);
+ sids.sort(comp);
+ // Now we build the temp array, and compute the first mean.
+ DBIDArrayIter it = sids.iter();
+ for (int j = 0; j < tmp.length; j++, it.advance()) {
+ assert (it.valid());
+ tmp[j] = relation.get(it).doubleValue(d);
+ }
+ divide(it, tmp, idx, 0, tmp.length, depth, mean);
+ assert (idx.size() == (1 << depth));
+ subspaceIndex.add(idx);
+ }
+
+ return subspaceIndex;
+ }
+
+ /**
+ * Recursive call to further subdivide the array.
+ *
+ * @param it Iterator (will be reset!)
+ * @param data 1D data, sorted
+ * @param idx Output index
+ * @param start Interval start
+ * @param end Interval end
+ * @param depth Depth
+ * @param mean Mean working variable (will be reset!)
+ */
+ private void divide(DBIDArrayIter it, double[] data, ArrayList<DBIDs> idx, int start, int end, int depth, Mean mean) {
+ final int count = end - start;
+ if (depth == 0) {
+ if (count > 0) {
+ ModifiableDBIDs out = DBIDUtil.newHashSet(count);
+ it.seek(start);
+ for (int i = count; i > 0; i--, it.advance()) {
+ out.add(it);
+ }
+ idx.add(out);
+ } else {
+ idx.add(DBIDUtil.EMPTYDBIDS);
+ }
+ return;
+ } else {
+ if (count > 0) {
+ mean.reset();
+ for (int i = start; i < end; i++) {
+ mean.put(data[i]);
+ }
+ final double m = mean.getMean();
+ int pos = Arrays.binarySearch(data, start, end, m);
+ if (pos >= 0) {
+ // Ties: try to choose the most central element.
+ int opt = (start + end) >> 1;
+ while (Double.compare(data[pos], m) == 0) {
+ if (pos < opt) {
+ pos++;
+ } else if (pos > opt) {
+ pos--;
+ } else {
+ break;
+ }
+ }
+ } else {
+ pos = (-pos - 1);
+ }
+ divide(it, data, idx, start, pos, depth - 1, mean);
+ divide(it, data, idx, pos, end, depth - 1, mean);
+ } else {
+ // Corner case, that should barely happen. But for ties, we currently
+ // Do not yet assure that it doesn't happen!
+ divide(it, data, idx, start, end, depth - 1, mean);
+ divide(it, data, idx, start, end, depth - 1, mean);
+ }
+ }
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ @Override
+ protected MCEDimensionSimilarity makeInstance() {
+ return STATIC;
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/SURFINGDimensionSimilarity.java b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/SURFINGDimensionSimilarity.java new file mode 100644 index 00000000..551b4759 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/SURFINGDimensionSimilarity.java @@ -0,0 +1,133 @@ +package de.lmu.ifi.dbs.elki.math.dimensionsimilarity;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.BitSet;
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
+import de.lmu.ifi.dbs.elki.math.Mean;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+
+/**
+ * Compute the similarity of dimensions using the SURFING score. The parameter k
+ * for the k nearest neighbors is currently hard-coded to 10% of the set size.
+ *
+ * Note that the complexity is roughly O(n n k) * O(d^2), so this is a rather
+ * slow method without index support, and with k at 10% of n, is actually cubic.
+ * So try to use an appropriate index!
+ *
+ * Reference:
+ * <p>
+ * Christian Baumgartner, Claudia Plant, Karin Kailing, Hans-Peter Kriegel, and
+ * Peer Kröger<br />
+ * Subspace Selection for Clustering High-Dimensional Data<br />
+ * In IEEE International Conference on Data Mining, 2004.
+ * </p>
+ *
+ * TODO: make the subspace distance function and k parameterizable.
+ *
+ * @author Robert Rödler
+ * @author Erich Schubert
+ *
+ * @apiviz.uses SubspaceEuclideanDistanceFunction
+ */
+@Reference(authors = "Christian Baumgartner, Claudia Plant, Karin Kailing, Hans-Peter Kriegel, and Peer Kröger", title = "Subspace Selection for Clustering High-Dimensional Data", booktitle = "IEEE International Conference on Data Mining, 2004", url = "http://dx.doi.org/10.1109/ICDM.2004.10112")
+public class SURFINGDimensionSimilarity implements DimensionSimilarity<NumberVector<?>> {
+ /**
+ * Static instance.
+ */
+ public static final SURFINGDimensionSimilarity STATIC = new SURFINGDimensionSimilarity();
+
+ /**
+ * Constructor. Use static instance instead!
+ */
+ protected SURFINGDimensionSimilarity() {
+ super();
+ }
+
+ @Override
+ public void computeDimensionSimilarites(Relation<? extends NumberVector<?>> relation, DBIDs subset, DimensionSimilarityMatrix matrix) {
+ final int dim = matrix.size();
+ final Database db = relation.getDatabase();
+ Mean kdistmean = new Mean();
+ final int k = Math.max(1, subset.size() / 10);
+
+ double[] knns = new double[subset.size()];
+
+ // TODO: optimize by using 1d indexes?
+ for (int x = 0; x < dim; x++) {
+ final int i = matrix.dim(x);
+ for (int y = x + 1; y < dim; y++) {
+ final int j = matrix.dim(y);
+ BitSet dims = new BitSet(dim);
+ dims.set(i);
+ dims.set(j);
+ DistanceQuery<? extends NumberVector<?>, DoubleDistance> dq = db.getDistanceQuery(relation, new SubspaceEuclideanDistanceFunction(dims));
+ KNNQuery<? extends NumberVector<?>, DoubleDistance> knnq = db.getKNNQuery(dq, k);
+
+ kdistmean.reset();
+ int knn = 0;
+ for (DBIDIter id1 = subset.iter(); id1.valid(); id1.advance(), knn++) {
+ final double kdist = knnq.getKNNForDBID(id1, k).getKNNDistance().doubleValue();
+ kdistmean.put(kdist);
+ knns[knn] = kdist;
+ }
+ double mean = kdistmean.getMean();
+ // Deviation from mean:
+ double diff = 0.;
+ int below = 0;
+ for (int l = 0; l < knns.length; l++) {
+ diff += Math.abs(mean - knns[l]);
+ if (knns[l] < mean) {
+ below++;
+ }
+ }
+ matrix.set(x, y, (below > 0) ? diff / (2. * mean * below) : 0);
+ }
+ }
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ @Override
+ protected SURFINGDimensionSimilarity makeInstance() {
+ return STATIC;
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/SlopeDimensionSimilarity.java b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/SlopeDimensionSimilarity.java new file mode 100644 index 00000000..3c81da63 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/SlopeDimensionSimilarity.java @@ -0,0 +1,142 @@ +package de.lmu.ifi.dbs.elki.math.dimensionsimilarity;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
+
+/**
+ * Arrange dimensions based on the entropy of the slope spectrum.
+ *
+ * @author Erich Schubert
+ * @author Robert Rödler
+ */
+public class SlopeDimensionSimilarity implements DimensionSimilarity<NumberVector<?>> {
+ /**
+ * Static instance.
+ */
+ public static final SlopeDimensionSimilarity STATIC = new SlopeDimensionSimilarity();
+
+ /**
+ * Full precision.
+ */
+ protected final static int PRECISION = 40;
+
+ /**
+ * Precision for entropy normalization.
+ */
+ protected final static double LOG_PRECISION = Math.log(PRECISION);
+
+ /**
+ * Scaling factor.
+ */
+ protected final static double RESCALE = PRECISION * .5;
+
+ /**
+ * Constructor. Use static instance instead!
+ */
+ protected SlopeDimensionSimilarity() {
+ super();
+ }
+
+ @Override
+ public void computeDimensionSimilarites(Relation<? extends NumberVector<?>> relation, DBIDs subset, DimensionSimilarityMatrix matrix) {
+ final int dim = matrix.size();
+ final int size = subset.size();
+
+ // FIXME: Get/keep these statistics in the relation, or compute for the
+ // sample only.
+ double[] off = new double[dim], scale = new double[dim];
+ {
+ Pair<? extends NumberVector<?>, ? extends NumberVector<?>> mm = DatabaseUtil.computeMinMax(relation);
+ NumberVector<?> min = mm.first;
+ NumberVector<?> max = mm.second;
+ for (int d = 0; d < dim; d++) {
+ off[d] = min.doubleValue(matrix.dim(d));
+ final double m = max.doubleValue(matrix.dim(d));
+ scale[d] = (m > off[d]) ? 1. / (m - off[d]) : 1;
+ }
+ }
+
+ // Collect angular histograms.
+ // Note, we only fill half of the matrix
+ int[][][] angles = new int[dim][dim][PRECISION];
+
+ // Scratch buffer
+ double[] vec = new double[dim];
+ for (DBIDIter id = subset.iter(); id.valid(); id.advance()) {
+ final NumberVector<?> obj = relation.get(id);
+ // Map values to 0..1
+ for (int d = 0; d < dim; d++) {
+ vec[d] = (obj.doubleValue(matrix.dim(d)) - off[d]) * scale[d];
+ }
+ for (int i = 0; i < dim - 1; i++) {
+ for (int j = i + 1; j < dim; j++) {
+ // This will be on a scale of 0 .. 2:
+ final double delta = vec[j] - vec[i] + 1;
+ int div = (int) Math.round(delta * RESCALE);
+ // TODO: do we really need this check?
+ div = (div < 0) ? 0 : (div >= PRECISION) ? PRECISION - 1 : div;
+ angles[i][j][div] += 1;
+ }
+ }
+ }
+
+ // Compute entropy in each combination:
+ for (int x = 0; x < dim; x++) {
+ for (int y = x + 1; y < dim; y++) {
+ double entropy = 0.;
+ int[] as = angles[x][y];
+ for (int l = 0; l < PRECISION; l++) {
+ if (as[l] > 0) {
+ final double p = as[l] / (double) size;
+ entropy += p * Math.log(p);
+ }
+ }
+ entropy /= LOG_PRECISION;
+
+ matrix.set(x, y, 1 + entropy);
+ }
+ }
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ @Override
+ protected SlopeDimensionSimilarity makeInstance() {
+ return STATIC;
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/SlopeInversionDimensionSimilarity.java b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/SlopeInversionDimensionSimilarity.java new file mode 100644 index 00000000..aad58448 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/SlopeInversionDimensionSimilarity.java @@ -0,0 +1,158 @@ +package de.lmu.ifi.dbs.elki.math.dimensionsimilarity;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
+
+/**
+ * Arrange dimensions based on the entropy of the slope spectrum. In contrast to
+ * {@link SlopeDimensionSimilarity}, we also take the option of inverting an
+ * axis into account.
+ *
+ * TODO: shouldn't this be normalized by the single-dimension entropies or so?
+ *
+ * @author Erich Schubert
+ * @author Robert Rödler
+ */
+public class SlopeInversionDimensionSimilarity extends SlopeDimensionSimilarity {
+ /**
+ * Static instance.
+ */
+ public static final SlopeInversionDimensionSimilarity STATIC = new SlopeInversionDimensionSimilarity();
+
+ /**
+ * Constructor. Use static instance instead!
+ */
+ protected SlopeInversionDimensionSimilarity() {
+ super();
+ }
+
+ @Override
+ public void computeDimensionSimilarites(Relation<? extends NumberVector<?>> relation, DBIDs subset, DimensionSimilarityMatrix matrix) {
+ final int dim = matrix.size();
+ final int size = subset.size();
+
+ // Collect angular histograms.
+ // Note, we only fill half of the matrix
+ int[][][] angles = new int[dim][dim][PRECISION];
+ int[][][] angleI = new int[dim][dim][PRECISION];
+
+ // FIXME: Get/keep these statistics in the relation, or compute for the
+ // sample only.
+ double[] off = new double[dim], scale = new double[dim];
+ {
+ Pair<? extends NumberVector<?>, ? extends NumberVector<?>> mm = DatabaseUtil.computeMinMax(relation);
+ NumberVector<?> min = mm.first;
+ NumberVector<?> max = mm.second;
+ for (int d = 0; d < dim; d++) {
+ off[d] = min.doubleValue(matrix.dim(d));
+ final double m = max.doubleValue(matrix.dim(d));
+ scale[d] = (m > off[d]) ? 1. / (m - off[d]) : 1;
+ }
+ }
+
+ // Scratch buffer
+ double[] vec = new double[dim];
+ for (DBIDIter id = subset.iter(); id.valid(); id.advance()) {
+ final NumberVector<?> obj = relation.get(id);
+ // Map values to 0..1
+ for (int d = 0; d < dim; d++) {
+ vec[d] = (obj.doubleValue(matrix.dim(d)) - off[d]) * scale[d];
+ }
+ for (int i = 0; i < dim - 1; i++) {
+ for (int j = i + 1; j < dim; j++) {
+ {
+ // This will be on a scale of 0 .. 2:
+ final double delta = vec[j] - vec[i] + 1;
+ int div = (int) Math.round(delta * RESCALE);
+ // TODO: do we really need this check?
+ div = (div < 0) ? 0 : (div >= PRECISION) ? PRECISION - 1 : div;
+ angles[i][j][div] += 1;
+ }
+ {
+ // This will be on a scale of 0 .. 2:
+ final double delta = vec[j] + vec[i];
+ int div = (int) Math.round(delta * RESCALE);
+ // TODO: do we really need this check?
+ div = (div < 0) ? 0 : (div >= PRECISION) ? PRECISION - 1 : div;
+ angleI[i][j][div] += 1;
+ }
+ }
+ }
+ }
+
+ // Compute entropy in each combination:
+ for (int x = 0; x < dim; x++) {
+ for (int y = x + 1; y < dim; y++) {
+ double entropy = 0., entropyI = 0;
+ {
+ int[] as = angles[x][y];
+ for (int l = 0; l < PRECISION; l++) {
+ if (as[l] > 0) {
+ final double p = as[l] / (double) size;
+ entropy += p * Math.log(p);
+ }
+ }
+ }
+ {
+ int[] as = angleI[x][y];
+ for (int l = 0; l < PRECISION; l++) {
+ if (as[l] > 0) {
+ final double p = as[l] / (double) size;
+ entropyI += p * Math.log(p);
+ }
+ }
+ }
+ if (entropy >= entropyI) {
+ entropy = 1 + entropy / LOG_PRECISION;
+ matrix.set(x, y, entropy);
+ } else {
+ entropyI = 1 + entropyI / LOG_PRECISION;
+ // Negative sign to indicate the axes might be inversely related
+ matrix.set(x, y, -entropyI);
+ }
+ }
+ }
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ @Override
+ protected SlopeInversionDimensionSimilarity makeInstance() {
+ return STATIC;
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/package-info.java b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/package-info.java new file mode 100644 index 00000000..b6c27c57 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/dimensionsimilarity/package-info.java @@ -0,0 +1,26 @@ +/** + * <p>Functions to compute the similarity of dimensions (or the interestingness of the combination).</p> + */ +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +package de.lmu.ifi.dbs.elki.math.dimensionsimilarity;
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/geometry/AlphaShape.java b/src/de/lmu/ifi/dbs/elki/math/geometry/AlphaShape.java index 35858c67..1ab324c3 100644 --- a/src/de/lmu/ifi/dbs/elki/math/geometry/AlphaShape.java +++ b/src/de/lmu/ifi/dbs/elki/math/geometry/AlphaShape.java @@ -5,7 +5,7 @@ import java.util.BitSet; import java.util.List; import de.lmu.ifi.dbs.elki.data.spatial.Polygon; -import de.lmu.ifi.dbs.elki.math.geometry.SweepHullDelaunay2D.Triangle; + import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector; /* @@ -53,7 +53,7 @@ public class AlphaShape { /** * Delaunay triangulation */ - private ArrayList<Triangle> delaunay = null; + private ArrayList<SweepHullDelaunay2D.Triangle> delaunay = null; public AlphaShape(List<Vector> points, double alpha) { this.alpha2 = alpha * alpha; @@ -71,9 +71,9 @@ public class AlphaShape { List<Vector> cur = new ArrayList<Vector>(); for(int i = 0 /* = used.nextClearBit(0) */; i < delaunay.size() && i >= 0; i = used.nextClearBit(i + 1)) { - if(used.get(i) == false) { + if(!used.get(i)) { used.set(i); - Triangle tri = delaunay.get(i); + SweepHullDelaunay2D.Triangle tri = delaunay.get(i); if(tri.r2 <= alpha2) { // Check neighbors processNeighbor(cur, used, i, tri.ab, tri.b); @@ -96,7 +96,7 @@ public class AlphaShape { return; } used.set(ab); - final Triangle next = delaunay.get(ab); + final SweepHullDelaunay2D.Triangle next = delaunay.get(ab); if(next.r2 < alpha2) { // Continue where we left off... if(next.ab == i) { diff --git a/src/de/lmu/ifi/dbs/elki/math/geometry/PrimsMinimumSpanningTree.java b/src/de/lmu/ifi/dbs/elki/math/geometry/PrimsMinimumSpanningTree.java new file mode 100644 index 00000000..d6675256 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/geometry/PrimsMinimumSpanningTree.java @@ -0,0 +1,177 @@ +package de.lmu.ifi.dbs.elki.math.geometry; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; +import java.util.BitSet; + +import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; + +/** + * Prim's algorithm for finding the minimum spanning tree. + * + * Implementation for <em>dense</em> graphs, represented as distance matrix. + * + * Reference: + * <p> + * R. C. Prim<br /> + * Shortest connection networks and some generalizations<br /> + * In: Bell System Technical Journal, 36 (1957), pp. 1389–140 + * </p> + * + * @author Erich Schubert + */ +@Reference(authors = "R. C. Prim", title = "Shortest connection networks and some generalizations", booktitle = "Bell System Technical Journal, 36 (1957)") +public class PrimsMinimumSpanningTree { + /** + * Adapter class for double[][] matrixes. + */ + public static final Array2DAdapter ARRAY2D_ADAPTER = new Array2DAdapter(); + + /** + * Process a k x k distance matrix. + * + * @param mat Distance matrix + * @return list of node number pairs representing the edges + */ + public static int[] processDense(double[][] mat) { + return processDense(mat, ARRAY2D_ADAPTER); + } + + /** + * Process a k x k distance matrix. + * + * @param mat Distance matrix + * @return list of node number pairs representing the edges + */ + public static int[] processDense(Matrix mat) { + return processDense(mat.getArrayRef(), ARRAY2D_ADAPTER); + } + + /** + * Run Prim's algorithm on a dense graph. + * + * @param data Data set + * @param adapter Adapter instance + * @return list of node number pairs representing the edges + */ + public static <T> int[] processDense(T data, Adapter<T> adapter) { + // Number of nodes + final int n = adapter.size(data); + // Output array storage + int[] mst = new int[(n - 1) << 1]; + // Best distance for each node + double[] best = new double[n]; + Arrays.fill(best, Double.POSITIVE_INFINITY); + // Best previous node + int[] src = new int[n]; + // Nodes already handled + BitSet in = new BitSet(n); + + // We always start at "random" node 0 + // Note: we use this below in the "j" loop! + int current = 0; + in.set(current); + best[current] = 0; + + // Search + for (int i = n - 2; i >= 0; i--) { + // Update best and src from current: + int newbesti = -1; + double newbestd = Double.POSITIVE_INFINITY; + // Note: we assume we started with 0, and can thus skip it + for (int j = in.nextClearBit(1); j < n && j > 0; j = in.nextClearBit(j + 1)) { + final double dist = adapter.distance(data, current, j); + if (dist < best[j]) { + best[j] = dist; + src[j] = current; + } + if (best[j] < newbestd) { + newbestd = best[j]; + newbesti = j; + } + } + assert (newbesti >= 0); + // Flag + in.set(newbesti); + // Store edge + mst[i << 1] = newbesti; + mst[(i << 1) + 1] = src[newbesti]; + // Continue + current = newbesti; + } + return mst; + } + + /** + * Adapter interface to allow use with different data representations. + * + * @author Erich Schubert + * + * @param <T> Data reference + */ + public interface Adapter<T> { + /** + * Get the distance of two objects + * + * @param data Data set + * @param i First index + * @param j Second index + * @return Distance of objects number i and number j. + */ + public double distance(T data, int i, int j); + + /** + * Get number of objects in dataset + * + * @return Size + */ + public int size(T data); + } + + /** + * Adapter for a simple 2d double matrix. + * + * @author Erich Schubert + */ + public static class Array2DAdapter implements Adapter<double[][]> { + /** + * Constructor. Use static instance! + */ + private Array2DAdapter() { + // Use static instance! + } + + @Override + public double distance(double[][] data, int i, int j) { + return data[i][j]; + } + + @Override + public int size(double[][] data) { + return data.length; + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/geometry/SweepHullDelaunay2D.java b/src/de/lmu/ifi/dbs/elki/math/geometry/SweepHullDelaunay2D.java index 4f78cb76..afb8db26 100644 --- a/src/de/lmu/ifi/dbs/elki/math/geometry/SweepHullDelaunay2D.java +++ b/src/de/lmu/ifi/dbs/elki/math/geometry/SweepHullDelaunay2D.java @@ -54,7 +54,7 @@ public class SweepHullDelaunay2D { /** * Class logger */ - private static final Logging logger = Logging.getLogger(SweepHullDelaunay2D.class); + private static final Logging LOG = Logging.getLogger(SweepHullDelaunay2D.class); /** * The current set of points. @@ -152,7 +152,7 @@ public class SweepHullDelaunay2D { besttri.copyFrom(testtri); besti = i; } - else if(besttri.r2 * 4 < sort[i].first) { + else if(besttri.r2 * 4. < sort[i].first) { // Stop early, points are too far away from seed. break; } @@ -178,7 +178,7 @@ public class SweepHullDelaunay2D { hull.add(new IntIntPair(besttri.b, 0)); hull.add(new IntIntPair(besttri.c, 0)); - if(logger.isDebuggingFinest()) { + if(LOG.isDebuggingFinest()) { debugHull(); } @@ -286,8 +286,8 @@ public class SweepHullDelaunay2D { lasttri = tristart + newtris.size() - 1; } final int hullsize = hull.size(); - if(logger.isDebuggingFinest()) { - logger.debugFinest("Size: " + hullsize + " start: " + hstart + " end: " + hend); + if(LOG.isDebuggingFinest()) { + LOG.debugFinest("Size: " + hullsize + " start: " + hstart + " end: " + hend); } if(hend < hullsize) { ListIterator<IntIntPair> iter = hull.listIterator(); @@ -338,7 +338,7 @@ public class SweepHullDelaunay2D { iter.remove(); } } - if(logger.isDebuggingFinest()) { + if(LOG.isDebuggingFinest()) { debugHull(); } if(!hullonly) { @@ -410,11 +410,11 @@ public class SweepHullDelaunay2D { * Debug helper */ void debugHull() { - StringBuffer buf = new StringBuffer(); + StringBuilder buf = new StringBuilder(); for(IntIntPair p : hull) { - buf.append(p).append(" "); + buf.append(p).append(' '); } - logger.debugFinest(buf); + LOG.debugFinest(buf); } /** diff --git a/src/de/lmu/ifi/dbs/elki/math/geometry/XYCurve.java b/src/de/lmu/ifi/dbs/elki/math/geometry/XYCurve.java index 5d39de16..3a53f2a2 100644 --- a/src/de/lmu/ifi/dbs/elki/math/geometry/XYCurve.java +++ b/src/de/lmu/ifi/dbs/elki/math/geometry/XYCurve.java @@ -90,7 +90,7 @@ public class XYCurve implements Result, TextWriteable { */ public XYCurve(String labelx, String labely, int size) { super(); - this.data = new TDoubleArrayList(size * 2); + this.data = new TDoubleArrayList(size << 1); this.labelx = labelx; this.labely = labely; } @@ -295,21 +295,21 @@ public class XYCurve implements Result, TextWriteable { out.commentPrint(labely); out.flush(); for(int pos = 0; pos < data.size(); pos+=2) { - out.inlinePrint(data.get(pos)); - out.inlinePrint(data.get(pos + 1)); + out.inlinePrint(Double.toString(data.get(pos))); + out.inlinePrint(Double.toString(data.get(pos + 1))); out.flush(); } } @Override public String toString() { - StringBuffer buf = new StringBuffer(); + StringBuilder buf = new StringBuilder(); buf.append("XYCurve["); - buf.append(labelx).append(",").append(labely).append(":"); + buf.append(labelx).append(',').append(labely).append(':'); for(int pos = 0; pos < data.size(); pos += 2) { - buf.append(" ").append(data.get(pos)).append(",").append(data.get(pos + 1)); + buf.append(' ').append(data.get(pos)).append(',').append(data.get(pos + 1)); } - buf.append("]"); + buf.append(']'); return buf.toString(); } diff --git a/src/de/lmu/ifi/dbs/elki/math/histograms/AggregatingHistogram.java b/src/de/lmu/ifi/dbs/elki/math/histograms/AggregatingHistogram.java deleted file mode 100644 index 87b93910..00000000 --- a/src/de/lmu/ifi/dbs/elki/math/histograms/AggregatingHistogram.java +++ /dev/null @@ -1,254 +0,0 @@ -package de.lmu.ifi.dbs.elki.math.histograms; - -/* - This file is part of ELKI: - Environment for Developing KDD-Applications Supported by Index-Structures - - Copyright (C) 2012 - Ludwig-Maximilians-Universität München - Lehr- und Forschungseinheit für Datenbanksysteme - ELKI Development Team - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -import de.lmu.ifi.dbs.elki.math.MeanVariance; -import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleDoublePair; -import de.lmu.ifi.dbs.elki.utilities.pairs.IntIntPair; -import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; - -/** - * Class for the typical case of an aggregating (e.g. counting, averaging) - * Histogram. - * - * @author Erich Schubert - * - * @apiviz.composedOf de.lmu.ifi.dbs.elki.math.histograms.AggregatingHistogram.Adapter - * - * @param <T> Type of data in histogram - * @param <D> Type of input data - */ -public class AggregatingHistogram<T, D> extends ReplacingHistogram<T> { - /** - * The class we are using for putting data. - */ - private Adapter<T, D> putter; - - /** - * Adapter class for an AggregatingHistogram - * - * @author Erich Schubert - * - * @param <T> Histogram bin type - * @param <D> Incoming data type - */ - public static abstract class Adapter<T, D> extends ReplacingHistogram.Adapter<T> { - /** - * Update an existing histogram value with new data. - * - * @param existing Existing histogram data - * @param data New value - * @return Aggregated value - */ - public abstract T aggregate(T existing, D data); - } - - /** - * Constructor with Adapter. - * - * @param bins Number of bins - * @param min Minimum value - * @param max Maximum value - * @param adapter Adapter - */ - public AggregatingHistogram(int bins, double min, double max, Adapter<T, D> adapter) { - super(bins, min, max, adapter); - this.putter = adapter; - } - - /** - * Add a value to the histogram using the aggregation adapter. - * - * @param coord Coordinate - * @param value New value - */ - public void aggregate(double coord, D value) { - super.replace(coord, putter.aggregate(super.get(coord), value)); - } - - /** - * Convenience constructor for {@link MeanVariance}-based Histograms. Uses a - * constructor to initialize bins with new {@link MeanVariance} objects - * - * @param bins Number of bins - * @param min Minimum coordinate - * @param max Maximum coordinate - * @return New histogram for {@link MeanVariance}. - */ - public static AggregatingHistogram<MeanVariance, Double> MeanVarianceHistogram(int bins, double min, double max) { - return new AggregatingHistogram<MeanVariance, Double>(bins, min, max, new Adapter<MeanVariance, Double>() { - @Override - public MeanVariance make() { - return new MeanVariance(); - } - - @Override - public MeanVariance aggregate(MeanVariance existing, Double data) { - existing.put(data); - return existing; - } - }); - } - - /** - * Convenience constructor for Integer-based Histograms. Uses a constructor to - * initialize bins with Integer(0). Aggregation is done by adding the values - * - * @param bins Number of bins - * @param min Minimum coordinate - * @param max Maximum coordinate - * @return New histogram for Integers. - */ - public static AggregatingHistogram<Integer, Integer> IntSumHistogram(int bins, double min, double max) { - return new AggregatingHistogram<Integer, Integer>(bins, min, max, new Adapter<Integer, Integer>() { - @Override - public Integer make() { - return new Integer(0); - } - - @Override - public Integer aggregate(Integer existing, Integer data) { - return existing + data; - } - }); - } - - /** - * Convenience constructor for Long-based Histograms. Uses a constructor to - * initialize bins with Long(0L). Aggregation is done by adding the values - * - * @param bins Number of bins - * @param min Minimum coordinate - * @param max Maximum coordinate - * @return New histogram for Integers. - */ - public static AggregatingHistogram<Long, Long> LongSumHistogram(int bins, double min, double max) { - return new AggregatingHistogram<Long, Long>(bins, min, max, new Adapter<Long, Long>() { - @Override - public Long make() { - return new Long(0L); - } - - @Override - public Long aggregate(Long existing, Long data) { - return existing + data; - } - }); - } - - /** - * Convenience constructor for Double-based Histograms. Uses a constructor to - * initialize bins with Double(0.0). Aggregation is done by adding the values - * - * @param bins Number of bins - * @param min Minimum coordinate - * @param max Maximum coordinate - * @return New histogram for Double. - */ - public static AggregatingHistogram<Double, Double> DoubleSumHistogram(int bins, double min, double max) { - return new AggregatingHistogram<Double, Double>(bins, min, max, new Adapter<Double, Double>() { - @Override - public Double make() { - return new Double(0.0); - } - - @Override - public Double aggregate(Double existing, Double data) { - return existing + data; - } - }); - } - - /** - * Histograms that work like two {@link #IntSumHistogram}, component wise. - * - * @param bins Number of bins. - * @param min Minimum value - * @param max Maximum value - * @return Histogram object - */ - public static AggregatingHistogram<IntIntPair, IntIntPair> IntSumIntSumHistogram(int bins, double min, double max) { - return new AggregatingHistogram<IntIntPair, IntIntPair>(bins, min, max, new Adapter<IntIntPair, IntIntPair>() { - @Override - public IntIntPair make() { - return new IntIntPair(0, 0); - } - - @Override - public IntIntPair aggregate(IntIntPair existing, IntIntPair data) { - existing.first = existing.first + data.first; - existing.second = existing.second + data.second; - return existing; - } - }); - } - - /** - * Histograms that work like two {@link #LongSumHistogram}, component wise. - * - * @param bins Number of bins. - * @param min Minimum value - * @param max Maximum value - * @return Histogram object - */ - public static AggregatingHistogram<Pair<Long, Long>, Pair<Long, Long>> LongSumLongSumHistogram(int bins, double min, double max) { - return new AggregatingHistogram<Pair<Long, Long>, Pair<Long, Long>>(bins, min, max, new Adapter<Pair<Long, Long>, Pair<Long, Long>>() { - @Override - public Pair<Long, Long> make() { - return new Pair<Long, Long>(0L, 0L); - } - - @Override - public Pair<Long, Long> aggregate(Pair<Long, Long> existing, Pair<Long, Long> data) { - existing.setFirst(existing.getFirst() + data.getFirst()); - existing.setSecond(existing.getSecond() + data.getSecond()); - return existing; - } - }); - } - - /** - * Histograms that work like two {@link #DoubleSumHistogram}, component wise. - * - * @param bins Number of bins. - * @param min Minimum value - * @param max Maximum value - * @return Histogram object - */ - public static AggregatingHistogram<DoubleDoublePair, DoubleDoublePair> DoubleSumDoubleSumHistogram(int bins, double min, double max) { - return new AggregatingHistogram<DoubleDoublePair, DoubleDoublePair>(bins, min, max, new Adapter<DoubleDoublePair, DoubleDoublePair>() { - @Override - public DoubleDoublePair make() { - return new DoubleDoublePair(0., 0.); - } - - @Override - public DoubleDoublePair aggregate(DoubleDoublePair existing, DoubleDoublePair data) { - existing.first = existing.first + data.first; - existing.second = existing.second + data.second; - return existing; - } - }); - } -}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/histograms/FlexiHistogram.java b/src/de/lmu/ifi/dbs/elki/math/histograms/FlexiHistogram.java deleted file mode 100644 index 07d20dd2..00000000 --- a/src/de/lmu/ifi/dbs/elki/math/histograms/FlexiHistogram.java +++ /dev/null @@ -1,487 +0,0 @@ -package de.lmu.ifi.dbs.elki.math.histograms; - -/* - This file is part of ELKI: - Environment for Developing KDD-Applications Supported by Index-Structures - - Copyright (C) 2012 - Ludwig-Maximilians-Universität München - Lehr- und Forschungseinheit für Datenbanksysteme - ELKI Development Team - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -import java.util.ArrayList; -import java.util.Iterator; - -import de.lmu.ifi.dbs.elki.math.MeanVariance; -import de.lmu.ifi.dbs.elki.math.scales.LinearScale; -import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleDoublePair; -import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair; -import de.lmu.ifi.dbs.elki.utilities.pairs.IntIntPair; -import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; - -/** - * Histogram with flexible size, guaranteed to be in [bin, 2*bin[ - * - * @author Erich Schubert - * - * @apiviz.composedOf de.lmu.ifi.dbs.elki.math.histograms.FlexiHistogram.Adapter - * - * @param <T> Type of data in histogram - * @param <D> Type of input data - */ -public class FlexiHistogram<T, D> extends AggregatingHistogram<T, D> { - /** - * Adapter class, extended "maker". - */ - private Adapter<T, D> downsampler; - - /** - * Cache for elements when not yet initialized. - */ - private ArrayList<DoubleObjPair<D>> tempcache = null; - - /** - * Destination (minimum) size of the structure. At most 2*destsize bins are - * allowed. - */ - private int destsize; - - /** - * Adapter interface to specify bin creation, data caching and combination. - * - * @author Erich Schubert - * - * @param <T> Type of data in histogram - * @param <D> Type of input data - */ - public static abstract class Adapter<T, D> extends AggregatingHistogram.Adapter<T, D> { - /** - * Rule to combine two bins into one. - * - * first and second MAY be modified and returned. - * - * @param first First bin value - * @param second Second bin value - * @return combined bin value - */ - public abstract T downsample(T first, T second); - - /** - * Clone a data passed to the algorithm for computing the initial size. - * - * @param data Data to be cloned - * @return cloned data - */ - public abstract D cloneForCache(D data); - } - - /** - * Create a new histogram for an unknown data range. - * - * The generated histogram is guaranteed to have within {@code bins} and - * {@code 2*bins} bins in length. - * - * @param bins Target number of bins - * @param adapter Adapter for data types and combination rules. - */ - public FlexiHistogram(int bins, Adapter<T, D> adapter) { - super(bins, 0.0, 1.0, adapter); - this.destsize = bins; - this.downsampler = adapter; - tempcache = new ArrayList<DoubleObjPair<D>>(this.destsize * 2); - } - - private synchronized void materialize() { - // already materialized? - if(tempcache == null) { - return; - } - // we can't really initialize, but since we have to, we'll just stick - // to 0.0 and 1.0 as used in the constructor. - if(tempcache.size() <= 0) { - tempcache = null; - return; - } - double min = Double.MAX_VALUE; - double max = Double.MIN_VALUE; - for(DoubleObjPair<D> pair : tempcache) { - min = Math.min(min, pair.first); - max = Math.max(max, pair.first); - } - // use the LinearScale magic to round to "likely suiteable" step sizes. - LinearScale scale = new LinearScale(min, max); - min = scale.getMin(); - max = scale.getMax(); - this.base = min; - this.max = max; - this.binsize = (max - min) / this.destsize; - // initialize array - this.data = new ArrayList<T>(this.destsize * 2); - for(int i = 0; i < this.destsize; i++) { - this.data.add(downsampler.make()); - } - // re-insert data we have - for(DoubleObjPair<D> pair : tempcache) { - super.aggregate(pair.first, pair.second); - } - // delete cache, signal that we're initialized - tempcache = null; - } - - @Override - public synchronized void replace(double coord, T d) { - materialize(); - // super class put will already handle histogram resizing - super.replace(coord, d); - // but not resampling - testResample(); - } - - private void testResample() { - while(super.size >= 2 * this.destsize) { - // Resampling. - ArrayList<T> newdata = new ArrayList<T>(this.destsize * 2); - for(int i = 0; i < super.size; i += 2) { - if(i + 1 < super.size) { - newdata.add(downsampler.downsample(super.data.get(i), super.data.get(i + 1))); - } - else { - newdata.add(downsampler.downsample(super.data.get(i), super.make())); - } - } - // recalculate histogram base. - double base = super.base - super.offset * super.binsize; - // update data - super.data = newdata; - // update sizes - super.base = base; - super.offset = 0; - super.size = newdata.size(); - super.binsize = super.binsize * 2; - super.max = super.base + super.binsize * super.size; - } - } - - @Override - public T get(double coord) { - materialize(); - return super.get(coord); - } - - @Override - public double getBinsize() { - materialize(); - return super.getBinsize(); - } - - @Override - public double getCoverMaximum() { - materialize(); - return super.getCoverMaximum(); - } - - @Override - public double getCoverMinimum() { - materialize(); - return super.getCoverMinimum(); - } - - @Override - public ArrayList<T> getData() { - materialize(); - return super.getData(); - } - - @Override - public int getNumBins() { - materialize(); - return super.getNumBins(); - } - - @Override - public double getBinMean(int bin) { - materialize(); - return super.getBinMean(bin); - } - - @Override - public double getBinMin(int bin) { - materialize(); - return super.getBinMin(bin); - } - - @Override - public double getBinMax(int bin) { - materialize(); - return super.getBinMax(bin); - } - - @Override - public Iterator<DoubleObjPair<T>> iterator() { - materialize(); - return super.iterator(); - } - - @Override - public Iterator<DoubleObjPair<T>> reverseIterator() { - materialize(); - return super.reverseIterator(); - } - - @Override - public void aggregate(double coord, D value) { - if(tempcache != null) { - if(tempcache.size() < this.destsize * 2) { - tempcache.add(new DoubleObjPair<D>(coord, downsampler.cloneForCache(value))); - return; - } - else { - materialize(); - // .. and continue below! - } - } - // super class put will already handle histogram resizing - super.aggregate(coord, value); - // but not resampling - testResample(); - } - - /** - * Convenience constructor for Integer-based Histograms. Uses a constructor to - * initialize bins with Integer(0) - * - * @param bins Number of bins - * @return New histogram for Integer. - */ - public static FlexiHistogram<Integer, Integer> IntSumHistogram(int bins) { - return new FlexiHistogram<Integer, Integer>(bins, new Adapter<Integer, Integer>() { - @Override - public Integer make() { - return new Integer(0); - } - - @Override - public Integer cloneForCache(Integer data) { - // no need to clone, Integer are singletons - return data; - } - - @Override - public Integer downsample(Integer first, Integer second) { - return first + second; - } - - @Override - public Integer aggregate(Integer existing, Integer data) { - return existing + data; - } - }); - } - - /** - * Convenience constructor for Long-based Histograms. Uses a constructor to - * initialize bins with Long(0) - * - * @param bins Number of bins - * @return New histogram for Long. - */ - public static FlexiHistogram<Long, Long> LongSumHistogram(int bins) { - return new FlexiHistogram<Long, Long>(bins, new Adapter<Long, Long>() { - @Override - public Long make() { - return new Long(0); - } - - @Override - public Long cloneForCache(Long data) { - // no need to clone, Long are singletons - return data; - } - - @Override - public Long downsample(Long first, Long second) { - return first + second; - } - - @Override - public Long aggregate(Long existing, Long data) { - return existing + data; - } - }); - } - - /** - * Convenience constructor for Double-based Histograms. Uses a constructor to - * initialize bins with Double(0), and downsampling is done by summation. - * - * @param bins Number of bins - * @return New histogram for Doubles. - */ - public static FlexiHistogram<Double, Double> DoubleSumHistogram(int bins) { - return new FlexiHistogram<Double, Double>(bins, new Adapter<Double, Double>() { - @Override - public Double make() { - return new Double(0.0); - } - - @Override - public Double cloneForCache(Double data) { - // no need to clone, Doubles are singletons - return data; - } - - @Override - public Double downsample(Double first, Double second) { - return first + second; - } - - @Override - public Double aggregate(Double existing, Double data) { - return existing + data; - } - }); - } - - /** - * Convenience constructor for {@link MeanVariance}-based Histograms. Uses a - * constructor to initialize bins with new {@link MeanVariance} objects - * - * @param bins Number of bins - * @return New histogram for {@link MeanVariance}. - */ - public static FlexiHistogram<MeanVariance, Double> MeanVarianceHistogram(int bins) { - return new FlexiHistogram<MeanVariance, Double>(bins, new Adapter<MeanVariance, Double>() { - @Override - public MeanVariance make() { - return new MeanVariance(); - } - - @Override - public Double cloneForCache(Double data) { - return data; - } - - @Override - public MeanVariance downsample(MeanVariance first, MeanVariance second) { - first.put(second); - return first; - } - - @Override - public MeanVariance aggregate(MeanVariance existing, Double data) { - existing.put(data); - return existing; - } - }); - } - - /** - * Histograms that work like two {@link #IntSumHistogram}, component wise. - * - * @param bins Number of bins. - * @return New Histogram object - */ - public static FlexiHistogram<IntIntPair, IntIntPair> IntSumIntSumHistogram(int bins) { - return new FlexiHistogram<IntIntPair, IntIntPair>(bins, new Adapter<IntIntPair, IntIntPair>() { - @Override - public IntIntPair make() { - return new IntIntPair(0, 0); - } - - @Override - public IntIntPair cloneForCache(IntIntPair data) { - return new IntIntPair(data.first, data.second); - } - - @Override - public IntIntPair downsample(IntIntPair first, IntIntPair second) { - return new IntIntPair(first.first + second.first, first.second + second.second); - } - - @Override - public IntIntPair aggregate(IntIntPair existing, IntIntPair data) { - existing.first = existing.first + data.first; - existing.second = existing.second + data.second; - return existing; - } - }); - } - - /** - * Histograms that work like two {@link #LongSumHistogram}, component wise. - * - * @param bins Number of bins. - * @return New Histogram object - */ - public static FlexiHistogram<Pair<Long, Long>, Pair<Long, Long>> LongSumLongSumHistogram(int bins) { - return new FlexiHistogram<Pair<Long, Long>, Pair<Long, Long>>(bins, new Adapter<Pair<Long, Long>, Pair<Long, Long>>() { - @Override - public Pair<Long, Long> make() { - return new Pair<Long, Long>(0L, 0L); - } - - @Override - public Pair<Long, Long> cloneForCache(Pair<Long, Long> data) { - return new Pair<Long, Long>(data.getFirst(), data.getSecond()); - } - - @Override - public Pair<Long, Long> downsample(Pair<Long, Long> first, Pair<Long, Long> second) { - return new Pair<Long, Long>(first.getFirst() + second.getFirst(), first.getSecond() + second.getSecond()); - } - - @Override - public Pair<Long, Long> aggregate(Pair<Long, Long> existing, Pair<Long, Long> data) { - existing.setFirst(existing.getFirst() + data.getFirst()); - existing.setSecond(existing.getSecond() + data.getSecond()); - return existing; - } - }); - } - - /** - * Histograms that work like two {@link #DoubleSumHistogram}, component wise. - * - * @param bins Number of bins. - * @return New Histogram object - */ - public static FlexiHistogram<DoubleDoublePair, DoubleDoublePair> DoubleSumDoubleSumHistogram(int bins) { - return new FlexiHistogram<DoubleDoublePair, DoubleDoublePair>(bins, new Adapter<DoubleDoublePair, DoubleDoublePair>() { - @Override - public DoubleDoublePair make() { - return new DoubleDoublePair(0., 0.); - } - - @Override - public DoubleDoublePair cloneForCache(DoubleDoublePair data) { - return new DoubleDoublePair(data.first, data.second); - } - - @Override - public DoubleDoublePair downsample(DoubleDoublePair first, DoubleDoublePair second) { - return new DoubleDoublePair(first.first + second.first, first.second + second.second); - } - - @Override - public DoubleDoublePair aggregate(DoubleDoublePair existing, DoubleDoublePair data) { - existing.first = existing.first + data.first; - existing.second = existing.second + data.second; - return existing; - } - }); - } -} diff --git a/src/de/lmu/ifi/dbs/elki/math/histograms/ReplacingHistogram.java b/src/de/lmu/ifi/dbs/elki/math/histograms/ReplacingHistogram.java deleted file mode 100644 index 515fa016..00000000 --- a/src/de/lmu/ifi/dbs/elki/math/histograms/ReplacingHistogram.java +++ /dev/null @@ -1,452 +0,0 @@ -package de.lmu.ifi.dbs.elki.math.histograms; - -/* - This file is part of ELKI: - Environment for Developing KDD-Applications Supported by Index-Structures - - Copyright (C) 2012 - Ludwig-Maximilians-Universität München - Lehr- und Forschungseinheit für Datenbanksysteme - ELKI Development Team - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -import java.util.ArrayList; -import java.util.Iterator; - -import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleDoublePair; -import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair; -import de.lmu.ifi.dbs.elki.utilities.pairs.IntIntPair; - -/** - * Class to manage a simple Histogram. - * - * Note: the iterator returns pairs containing the coordinate and the bin value! - * - * @author Erich Schubert - * - * @apiviz.composedOf de.lmu.ifi.dbs.elki.math.histograms.ReplacingHistogram.Adapter - * - * @param <T> Histogram data type. - */ -public class ReplacingHistogram<T> implements Iterable<DoubleObjPair<T>> { - /** - * Interface to plug in a data type T. - * - * @author Erich Schubert - * - * @param <T> Data type - */ - public static abstract class Adapter<T> { - /** - * Construct a new T when needed. - * - * @return new T - */ - public abstract T make(); - } - - /** - * Array shift to account for negative indices. - */ - protected int offset = 0; - - /** - * Size of array storage. - */ - protected int size; - - /** - * Array 'base', i.e. the point of 0.0. Usually the minimum. - */ - protected double base; - - /** - * To avoid introducing an extra bucket for the maximum value. - */ - protected double max; - - /** - * Width of a bin. - */ - protected double binsize; - - /** - * Data storage - */ - protected ArrayList<T> data; - - /** - * Constructor for new elements - */ - private Adapter<T> maker; - - /** - * Histogram constructor - * - * @param bins Number of bins to use. - * @param min Minimum Value - * @param max Maximum Value - * @param maker Constructor for new elements. - */ - public ReplacingHistogram(int bins, double min, double max, Adapter<T> maker) { - this.base = min; - this.max = max; - this.binsize = (max - min) / bins; - this.size = bins; - this.data = new ArrayList<T>(bins); - this.maker = maker; - for(int i = 0; i < bins; i++) { - this.data.add(maker.make()); - } - } - - /** - * Histogram constructor without 'Constructor' to generate new elements. Empty - * bins will be initialized with 'null'. - * - * @param bins Number of bins - * @param min Minimum value - * @param max Maximum value. - */ - public ReplacingHistogram(int bins, double min, double max) { - this(bins, min, max, null); - } - - /** - * Get the data at a given Coordinate. - * - * @param coord Coordinate. - * @return data element there (which may be a new empty bin or null) - */ - public T get(double coord) { - int bin = getBinNr(coord); - // compare with allocated area - if(bin < 0) { - T n = maker.make(); - return n; - } - if(bin >= size) { - T n = maker.make(); - return n; - } - return data.get(bin); - } - - /** - * Put data at a given coordinate. Note: this replaces the contents, it - * doesn't "add" or "count". - * - * @param coord Coordinate - * @param d New Data - */ - public void replace(double coord, T d) { - int bin = getBinNr(coord); - putBin(bin, d); - } - - /** - * Compute the bin number. Has a special case for rounding max down to the - * last bin. - * - * @param coord Coordinate - * @return bin number - */ - protected int getBinNr(double coord) { - if(Double.isInfinite(coord) || Double.isNaN(coord)) { - throw new UnsupportedOperationException("Encountered non-finite value in Histogram: " + coord); - } - if(coord == max) { - // System.err.println("Triggered special case: "+ (Math.floor((coord - - // base) / binsize) + offset) + " vs. " + (size - 1)); - return size - 1; - } - return (int) Math.floor((coord - base) / binsize) + offset; - } - - /** - * Internal put function to handle the special cases of histogram resizing. - * - * @param bin bin number - * @param d data to put - */ - private void putBin(int bin, T d) { - if(bin < 0) { - // make sure to have enough space - data.ensureCapacity(size - bin); - // insert new data in front. - data.add(0, d); - // fill the gap. Note that bin < 0. - for(int i = bin + 1; i < 0; i++) { - data.add(1, maker.make()); - } - // We still have bin < 0, thus (size - bin) > size! - assert (data.size() == size - bin); - offset = offset - bin; - size = size - bin; - // drop max value when resizing - max = Double.MAX_VALUE; - } - else if(bin >= size) { - this.data.ensureCapacity(bin + 1); - while(data.size() < bin) { - data.add(maker.make()); - } - // add the new data. - data.add(d); - assert (data.size() == bin + 1); - size = bin + 1; - // drop max value when resizing - max = Double.MAX_VALUE; - } - else { - this.data.set(bin, d); - } - } - - /** - * Get the number of bins actually in use. - * - * @return number of bins - */ - public int getNumBins() { - return size; - } - - /** - * Get the size (width) of a bin. - * - * @return bin size - */ - public double getBinsize() { - return binsize; - } - - /** - * Mean of bin - * - * @param bin Bin number - * @return Mean - */ - public double getBinMean(int bin) { - return base + (bin + 0.5 - offset) * binsize; - } - - /** - * Minimum of bin - * - * @param bin Bin number - * @return Lower bound - */ - public double getBinMin(int bin) { - return base + (bin - offset) * binsize; - } - - /** - * Maximum of bin - * - * @param bin Bin number - * @return Upper bound - */ - public double getBinMax(int bin) { - return base + (bin + 1 - offset) * binsize; - } - - /** - * Get minimum (covered by bins, not data!) - * - * @return minimum - */ - public double getCoverMinimum() { - return base - offset * binsize; - } - - /** - * Get maximum (covered by bins, not data!) - * - * @return maximum - */ - public double getCoverMaximum() { - return base + (size - offset) * binsize; - } - - /** - * Get the raw data. Note that this does NOT include the coordinates. - * - * @return raw data array. - */ - public ArrayList<T> getData() { - return data; - } - - /** - * Make a new bin. - * - * @return new bin. - */ - protected T make() { - return maker.make(); - } - - /** - * Iterator class to iterate over all bins. - * - * @author Erich Schubert - * - * @apiviz.exclude - */ - protected class Iter implements Iterator<DoubleObjPair<T>> { - /** - * Current bin number - */ - int bin = 0; - - @Override - public boolean hasNext() { - return bin < size; - } - - @Override - public DoubleObjPair<T> next() { - DoubleObjPair<T> pair = new DoubleObjPair<T>(base + (bin + 0.5 - offset) * binsize, data.get(bin)); - bin++; - return pair; - } - - @Override - public void remove() { - throw new UnsupportedOperationException("Histogram iterators cannot be modified."); - } - } - - /** - * Iterator class to iterate over all bins. - * - * @author Erich Schubert - * - * @apiviz.exclude - */ - protected class RIter implements Iterator<DoubleObjPair<T>> { - /** - * Current bin number - */ - int bin = size - 1; - - @Override - public boolean hasNext() { - return bin >= 0; - } - - @Override - public DoubleObjPair<T> next() { - DoubleObjPair< T> pair = new DoubleObjPair<T>(base + (bin + 0.5 - offset) * binsize, data.get(bin)); - bin--; - return pair; - } - - @Override - public void remove() { - throw new UnsupportedOperationException("Histogram iterators cannot be modified."); - } - } - - /** - * Get an iterator over all histogram bins. - */ - @Override - public Iterator<DoubleObjPair<T>> iterator() { - return new Iter(); - } - - /** - * Get an iterator over all histogram bins. - */ - // TODO: is there some interface to implement. - public Iterator<DoubleObjPair<T>> reverseIterator() { - return new RIter(); - } - - /** - * Convenience constructor for Integer-based Histograms. Uses a constructor to - * initialize bins with Integer(0) - * - * @param bins Number of bins - * @param min Minimum coordinate - * @param max Maximum coordinate - * @return New histogram for Integers. - */ - public static ReplacingHistogram<Integer> IntHistogram(int bins, double min, double max) { - return new ReplacingHistogram<Integer>(bins, min, max, new Adapter<Integer>() { - @Override - public Integer make() { - return new Integer(0); - } - }); - } - - /** - * Convenience constructor for Double-based Histograms. Uses a constructor to - * initialize bins with Double(0) - * - * @param bins Number of bins - * @param min Minimum coordinate - * @param max Maximum coordinate - * @return New histogram for Doubles. - */ - public static ReplacingHistogram<Double> DoubleHistogram(int bins, double min, double max) { - return new ReplacingHistogram<Double>(bins, min, max, new Adapter<Double>() { - @Override - public Double make() { - return new Double(0.0); - } - }); - } - - /** - * Convenience constructor for Histograms with pairs of Integers Uses a - * constructor to initialize bins with Pair(Integer(0),Integer(0)) - * - * @param bins Number of bins - * @param min Minimum coordinate - * @param max Maximum coordinate - * @return New histogram for Integer pairs. - */ - public static ReplacingHistogram<IntIntPair> IntIntHistogram(int bins, double min, double max) { - return new ReplacingHistogram<IntIntPair>(bins, min, max, new Adapter<IntIntPair>() { - @Override - public IntIntPair make() { - return new IntIntPair(0, 0); - } - }); - } - - /** - * Convenience constructor for Histograms with pairs of Doubles Uses a - * constructor to initialize bins with Pair(Double(0),Double(0)) - * - * @param bins Number of bins - * @param min Minimum coordinate - * @param max Maximum coordinate - * @return New histogram for Double pairs. - */ - public static ReplacingHistogram<DoubleDoublePair> DoubleDoubleHistogram(int bins, double min, double max) { - return new ReplacingHistogram<DoubleDoublePair>(bins, min, max, new Adapter<DoubleDoublePair>() { - @Override - public DoubleDoublePair make() { - return new DoubleDoublePair(0.0, 0.0); - } - }); - } -} diff --git a/src/de/lmu/ifi/dbs/elki/math/histograms/package-info.java b/src/de/lmu/ifi/dbs/elki/math/histograms/package-info.java deleted file mode 100644 index b4b23466..00000000 --- a/src/de/lmu/ifi/dbs/elki/math/histograms/package-info.java +++ /dev/null @@ -1,26 +0,0 @@ -/** - * <p>Classes for computing histograms.</p> - */ -/* -This file is part of ELKI: -Environment for Developing KDD-Applications Supported by Index-Structures - -Copyright (C) 2012 -Ludwig-Maximilians-Universität München -Lehr- und Forschungseinheit für Datenbanksysteme -ELKI Development Team - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU Affero General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU Affero General Public License for more details. - -You should have received a copy of the GNU Affero General Public License -along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ -package de.lmu.ifi.dbs.elki.math.histograms;
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/AffineTransformation.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/AffineTransformation.java index 5d185ec5..5a4f40fb 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/AffineTransformation.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/AffineTransformation.java @@ -1,5 +1,7 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra; +import java.util.Arrays; + /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures @@ -23,7 +25,6 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra; along with this program. If not, see <http://www.gnu.org/licenses/>. */ - /** * Affine transformations implemented using homogeneous coordinates. * @@ -296,15 +297,25 @@ public class AffineTransformation { */ public Vector homogeneVector(Vector v) { assert (v.getDimensionality() == dim); - double[] dv = new double[dim + 1]; - for(int i = 0; i < dim; i++) { - dv[i] = v.get(i); - } + double[] dv = Arrays.copyOf(v.getArrayRef(), dim + 1); dv[dim] = 1.0; return new Vector(dv); } /** + * Transform an absolute vector into homogeneous coordinates. + * + * @param v initial vector + * @return vector of dim+1, with new column having the value 1.0 + */ + public double[] homogeneVector(double[] v) { + assert (v.length == dim); + double[] dv = Arrays.copyOf(v, dim + 1); + dv[dim] = 1.0; + return dv; + } + + /** * Transform a relative vector into homogeneous coordinates. * * @param v initial vector @@ -313,15 +324,26 @@ public class AffineTransformation { public Vector homogeneRelativeVector(Vector v) { assert (v.getDimensionality() == dim); // TODO: this only works properly when trans[dim][dim] == 1.0, right? - double[] dv = new double[dim + 1]; - for(int i = 0; i < dim; i++) { - dv[i] = v.get(i); - } + double[] dv = Arrays.copyOf(v.getArrayRef(), dim + 1); dv[dim] = 0.0; return new Vector(dv); } /** + * Transform a relative vector into homogeneous coordinates. + * + * @param v initial vector + * @return vector of dim+1, with new column having the value 0.0 + */ + public double[] homogeneRelativeVector(double[] v) { + assert (v.length == dim); + // TODO: this only works properly when trans[dim][dim] == 1.0, right? + double[] dv = Arrays.copyOf(v, dim + 1); + dv[dim] = 0.0; + return dv; + } + + /** * Project an homogeneous vector back into the original space. * * @param v Matrix of 1 x dim+1 containing the homogeneous vector @@ -345,11 +367,28 @@ public class AffineTransformation { * @param v Matrix of 1 x dim+1 containing the homogeneous vector * @return vector of dimension dim */ + public double[] unhomogeneVector(double[] v) { + assert (v.length == dim + 1); + // TODO: this only works properly when trans[dim][dim] == 1.0, right? + double[] dv = new double[dim]; + double scale = v[dim]; + assert (Math.abs(scale) > 0.0); + for(int i = 0; i < dim; i++) { + dv[i] = v[i] / scale; + } + return dv; + } + + /** + * Project an homogeneous vector back into the original space. + * + * @param v Matrix of 1 x dim+1 containing the homogeneous vector + * @return vector of dimension dim + */ public Vector unhomogeneRelativeVector(Vector v) { assert (v.getDimensionality() == dim + 1); double[] dv = new double[dim]; - double scale = v.get(dim); - assert (Math.abs(scale) == 0.0); + assert (Math.abs(v.get(dim)) < Double.MIN_NORMAL); for(int i = 0; i < dim; i++) { dv[i] = v.get(i); } @@ -357,6 +396,20 @@ public class AffineTransformation { } /** + * Project an homogeneous vector back into the original space. + * + * @param v Matrix of 1 x dim+1 containing the homogeneous vector + * @return vector of dimension dim + */ + public double[] unhomogeneRelativeVector(double[] v) { + assert (v.length == dim + 1); + double[] dv = new double[dim]; + System.arraycopy(v, 0, dv, 0, dim); + assert (Math.abs(v[dim]) < Double.MIN_NORMAL); + return dv; + } + + /** * Apply the transformation onto a vector * * @param v vector of dimensionality dim @@ -367,6 +420,16 @@ public class AffineTransformation { } /** + * Apply the transformation onto a vector + * + * @param v vector of dimensionality dim + * @return transformed vector of dimensionality dim + */ + public double[] apply(double[] v) { + return unhomogeneVector(VMath.times(trans.elements, homogeneVector(v))); + } + + /** * Apply the inverse transformation onto a vector * * @param v vector of dimensionality dim @@ -380,6 +443,19 @@ public class AffineTransformation { } /** + * Apply the inverse transformation onto a vector + * + * @param v vector of dimensionality dim + * @return transformed vector of dimensionality dim + */ + public double[] applyInverse(double[] v) { + if(inv == null) { + updateInverse(); + } + return unhomogeneVector(VMath.times(inv.elements, homogeneVector(v))); + } + + /** * Apply the transformation onto a vector * * @param v vector of dimensionality dim @@ -390,6 +466,16 @@ public class AffineTransformation { } /** + * Apply the transformation onto a vector + * + * @param v vector of dimensionality dim + * @return transformed vector of dimensionality dim + */ + public double[] applyRelative(double[] v) { + return unhomogeneRelativeVector(VMath.times(trans.elements, homogeneRelativeVector(v))); + } + + /** * Apply the inverse transformation onto a vector * * @param v vector of dimensionality dim @@ -401,4 +487,16 @@ public class AffineTransformation { } return unhomogeneRelativeVector(inv.times(homogeneRelativeVector(v))); } + /** + * Apply the inverse transformation onto a vector + * + * @param v vector of dimensionality dim + * @return transformed vector of dimensionality dim + */ + public double[] applyRelativeInverse(double[] v) { + if(inv == null) { + updateInverse(); + } + return unhomogeneRelativeVector(VMath.times(inv.elements, homogeneRelativeVector(v))); + } }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Centroid.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Centroid.java index a9939a56..c46ae89a 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Centroid.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Centroid.java @@ -27,7 +27,7 @@ import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.relation.Relation; -import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil; +import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; /** * Class to compute the centroid of some data. @@ -42,7 +42,7 @@ import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil; */ public class Centroid extends Vector { /** - * The current weight + * The current weight. */ protected double wsum; @@ -57,14 +57,14 @@ public class Centroid extends Vector { } /** - * Add a single value with weight 1.0 + * Add a single value with weight 1.0. * * @param val Value */ public void put(double[] val) { assert (val.length == elements.length); wsum += 1.0; - for(int i = 0; i < elements.length; i++) { + for (int i = 0; i < elements.length; i++) { final double delta = val[i] - elements[i]; elements[i] += delta / wsum; } @@ -76,10 +76,13 @@ public class Centroid extends Vector { * @param val data * @param weight weight */ - public void put(double val[], double weight) { + public void put(double[] val, double weight) { assert (val.length == elements.length); + if (weight == 0) { + return; // Skip zero weights. + } final double nwsum = weight + wsum; - for(int i = 0; i < elements.length; i++) { + for (int i = 0; i < elements.length; i++) { final double delta = val[i] - elements[i]; final double rval = delta * weight / nwsum; elements[i] += rval; @@ -88,7 +91,7 @@ public class Centroid extends Vector { } /** - * Add a single value with weight 1.0 + * Add a single value with weight 1.0. * * @param val Value */ @@ -107,15 +110,15 @@ public class Centroid extends Vector { } /** - * Add a single value with weight 1.0 + * Add a single value with weight 1.0. * * @param val Value */ - public void put(NumberVector<?, ?> val) { + public void put(NumberVector<?> val) { assert (val.getDimensionality() == elements.length); wsum += 1.0; - for(int i = 0; i < elements.length; i++) { - final double delta = val.doubleValue(i + 1) - elements[i]; + for (int i = 0; i < elements.length; i++) { + final double delta = val.doubleValue(i) - elements[i]; elements[i] += delta / wsum; } } @@ -126,11 +129,14 @@ public class Centroid extends Vector { * @param val data * @param weight weight */ - public void put(NumberVector<?, ?> val, double weight) { + public void put(NumberVector<?> val, double weight) { assert (val.getDimensionality() == elements.length); + if (weight == 0) { + return; // Skip zero weights. + } final double nwsum = weight + wsum; - for(int i = 0; i < elements.length; i++) { - final double delta = val.doubleValue(i + 1) - elements[i]; + for (int i = 0; i < elements.length; i++) { + final double delta = val.doubleValue(i) - elements[i]; final double rval = delta * weight / nwsum; elements[i] += rval; } @@ -138,23 +144,26 @@ public class Centroid extends Vector { } /** - * Get the data as vector + * Get the data as vector. * + * @param relation Data relation + * @param <F> vector type * @return the data */ - public <F extends NumberVector<? extends F, ?>> F toVector(Relation<? extends F> relation) { - return DatabaseUtil.assumeVectorField(relation).getFactory().newNumberVector(elements); + public <F extends NumberVector<?>> F toVector(Relation<? extends F> relation) { + return RelationUtil.getNumberVectorFactory(relation).newNumberVector(elements); } /** * Static Constructor from an existing matrix columns. * * @param mat Matrix to use the columns from. + * @return Centroid vector */ public static Centroid make(Matrix mat) { Centroid c = new Centroid(mat.getRowDimensionality()); int n = mat.getColumnDimensionality(); - for(int i = 0; i < n; i++) { + for (int i = 0; i < n; i++) { // TODO: avoid constructing the vector objects? c.put(mat.getCol(i)); } @@ -167,9 +176,9 @@ public class Centroid extends Vector { * @param relation Relation to use * @return Centroid of relation */ - public static Centroid make(Relation<? extends NumberVector<?, ?>> relation) { - Centroid c = new Centroid(DatabaseUtil.dimensionality(relation)); - for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { + public static Centroid make(Relation<? extends NumberVector<?>> relation) { + Centroid c = new Centroid(RelationUtil.dimensionality(relation)); + for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { c.put(relation.get(iditer)); } return c; @@ -180,12 +189,13 @@ public class Centroid extends Vector { * * @param relation Relation to use * @param ids IDs to use + * @return Centroid */ - public static Centroid make(Relation<? extends NumberVector<?, ?>> relation, DBIDs ids) { - Centroid c = new Centroid(DatabaseUtil.dimensionality(relation)); - for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { + public static Centroid make(Relation<? extends NumberVector<?>> relation, DBIDs ids) { + Centroid c = new Centroid(RelationUtil.dimensionality(relation)); + for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { c.put(relation.get(iter)); } return c; } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/CholeskyDecomposition.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/CholeskyDecomposition.java index 5209468f..8805b18c 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/CholeskyDecomposition.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/CholeskyDecomposition.java @@ -87,10 +87,10 @@ public class CholeskyDecomposition implements java.io.Serializable { } Lrowj[k] = s = (A[j][k] - s) / L[k][k]; d = d + s * s; - isspd = isspd & (A[k][j] == A[j][k]); + isspd &= (A[k][j] == A[j][k]); } d = A[j][j] - d; - isspd = isspd & (d > 0.0); + isspd &= (d > 0.0); L[j][j] = Math.sqrt(Math.max(d, 0.0)); for(int k = j + 1; k < n; k++) { L[j][k] = 0.0; diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/CovarianceMatrix.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/CovarianceMatrix.java index a416b917..87b7bf1d 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/CovarianceMatrix.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/CovarianceMatrix.java @@ -27,7 +27,7 @@ import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.relation.Relation; -import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil; +import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; /** * Class for computing covariance matrixes using stable mean and variance @@ -39,8 +39,9 @@ import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil; * easier to use APIs. * * For use in algorithms, it is more appropriate to use - * {@link de.lmu.ifi.dbs.elki.math.linearalgebra.pca.StandardCovarianceMatrixBuilder StandardCovarianceMatrixBuilder} - * since this class can be overriden with a stabilized covariance matrix builder! + * {@link de.lmu.ifi.dbs.elki.math.linearalgebra.pca.StandardCovarianceMatrixBuilder + * StandardCovarianceMatrixBuilder} since this class can be overridden with a + * stabilized covariance matrix builder! * * @author Erich Schubert * @@ -50,22 +51,27 @@ import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil; */ public class CovarianceMatrix { /** - * The means + * Error message reported when too little data (weight <= 1) in matrix. + */ + public static final String ERR_TOO_LITTLE_WEIGHT = "Too few elements (too little total weight) used to obtain a valid covariance matrix."; + + /** + * The means. */ double[] mean; /** - * The covariance matrix + * The covariance matrix. */ double[][] elements; /** - * Temporary storage, to avoid reallocations + * Temporary storage, to avoid reallocations. */ double[] nmea; /** - * The current weight + * The current weight. */ protected double wsum; @@ -83,7 +89,7 @@ public class CovarianceMatrix { } /** - * Add a single value with weight 1.0 + * Add a single value with weight 1.0. * * @param val Value */ @@ -120,7 +126,7 @@ public class CovarianceMatrix { * @param val data * @param weight weight */ - public void put(double val[], double weight) { + public void put(double[] val, double weight) { assert (val.length == mean.length); final double nwsum = wsum + weight; // Compute new means @@ -149,7 +155,7 @@ public class CovarianceMatrix { } /** - * Add a single value with weight 1.0 + * Add a single value with weight 1.0. * * @param val Value */ @@ -168,16 +174,16 @@ public class CovarianceMatrix { } /** - * Add a single value with weight 1.0 + * Add a single value with weight 1.0. * * @param val Value */ - public void put(NumberVector<?, ?> val) { + public void put(NumberVector<?> val) { assert (val.getDimensionality() == mean.length); final double nwsum = wsum + 1.0; // Compute new means for(int i = 0; i < mean.length; i++) { - final double delta = val.doubleValue(i + 1) - mean[i]; + final double delta = val.doubleValue(i) - mean[i]; nmea[i] = mean[i] + delta / nwsum; } // Update covariance matrix @@ -185,7 +191,7 @@ public class CovarianceMatrix { for(int j = i; j < mean.length; j++) { // We DO want to use the new mean once and the old mean once! // It does not matter which one is which. - double delta = (val.doubleValue(i + 1) - nmea[i]) * (val.doubleValue(j + 1) - mean[j]); + double delta = (val.doubleValue(i) - nmea[i]) * (val.doubleValue(j) - mean[j]); elements[i][j] = elements[i][j] + delta; // Optimize via symmetry if(i != j) { @@ -204,12 +210,12 @@ public class CovarianceMatrix { * @param val data * @param weight weight */ - public void put(NumberVector<?, ?> val, double weight) { + public void put(NumberVector<?> val, double weight) { assert (val.getDimensionality() == mean.length); final double nwsum = wsum + weight; // Compute new means for(int i = 0; i < mean.length; i++) { - final double delta = val.doubleValue(i + 1) - mean[i]; + final double delta = val.doubleValue(i) - mean[i]; final double rval = delta * weight / nwsum; nmea[i] = mean[i] + rval; } @@ -218,7 +224,7 @@ public class CovarianceMatrix { for(int j = i; j < mean.length; j++) { // We DO want to use the new mean once and the old mean once! // It does not matter which one is which. - double delta = (val.doubleValue(i + 1) - nmea[i]) * (val.doubleValue(j + 1) - mean[j]) * weight; + double delta = (val.doubleValue(i) - nmea[i]) * (val.doubleValue(j) - mean[j]) * weight; elements[i][j] = elements[i][j] + delta; // Optimize via symmetry if(i != j) { @@ -243,10 +249,12 @@ public class CovarianceMatrix { /** * Get the mean as vector. * + * @param relation Data relation + * @param <F> vector type * @return Mean vector */ - public <F extends NumberVector<? extends F, ?>> F getMeanVector(Relation<? extends F> relation) { - return DatabaseUtil.assumeVectorField(relation).getFactory().newNumberVector(mean); + public <F extends NumberVector<?>> F getMeanVector(Relation<? extends F> relation) { + return RelationUtil.getNumberVectorFactory(relation).newNumberVector(mean); } /** @@ -261,7 +269,7 @@ public class CovarianceMatrix { */ public Matrix makeSampleMatrix() { if(wsum <= 1.0) { - throw new IllegalStateException("Too few elements used to obtain a valid covariance matrix."); + throw new IllegalStateException(ERR_TOO_LITTLE_WEIGHT); } Matrix mat = new Matrix(elements); return mat.times(1.0 / (wsum - 1)); @@ -279,7 +287,7 @@ public class CovarianceMatrix { */ public Matrix makeNaiveMatrix() { if(wsum <= 0.0) { - throw new IllegalStateException("Too few elements used to obtain a valid covariance matrix."); + throw new IllegalStateException(ERR_TOO_LITTLE_WEIGHT); } Matrix mat = new Matrix(elements); return mat.times(1.0 / wsum); @@ -297,7 +305,7 @@ public class CovarianceMatrix { */ public Matrix destroyToSampleMatrix() { if(wsum <= 1.0) { - throw new IllegalStateException("Too few elements used to obtain a valid covariance matrix."); + throw new IllegalStateException(ERR_TOO_LITTLE_WEIGHT); } Matrix mat = new Matrix(elements).timesEquals(1.0 / (wsum - 1)); this.elements = null; @@ -316,7 +324,7 @@ public class CovarianceMatrix { */ public Matrix destroyToNaiveMatrix() { if(wsum <= 0.0) { - throw new IllegalStateException("Too few elements used to obtain a valid covariance matrix."); + throw new IllegalStateException(ERR_TOO_LITTLE_WEIGHT); } Matrix mat = new Matrix(elements).timesEquals(1.0 / wsum); this.elements = null; @@ -327,6 +335,7 @@ public class CovarianceMatrix { * Static Constructor. * * @param mat Matrix to use the columns of + * @return Covariance matrix */ public static CovarianceMatrix make(Matrix mat) { CovarianceMatrix c = new CovarianceMatrix(mat.getRowDimensionality()); @@ -342,9 +351,10 @@ public class CovarianceMatrix { * Static Constructor from a full relation. * * @param relation Relation to use. + * @return Covariance matrix */ - public static CovarianceMatrix make(Relation<? extends NumberVector<?, ?>> relation) { - CovarianceMatrix c = new CovarianceMatrix(DatabaseUtil.dimensionality(relation)); + public static CovarianceMatrix make(Relation<? extends NumberVector<?>> relation) { + CovarianceMatrix c = new CovarianceMatrix(RelationUtil.dimensionality(relation)); for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { c.put(relation.get(iditer)); } @@ -356,10 +366,11 @@ public class CovarianceMatrix { * * @param relation Relation to use. * @param ids IDs to add + * @return Covariance matrix */ - public static CovarianceMatrix make(Relation<? extends NumberVector<?, ?>> relation, DBIDs ids) { - CovarianceMatrix c = new CovarianceMatrix(DatabaseUtil.dimensionality(relation)); - for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { + public static CovarianceMatrix make(Relation<? extends NumberVector<?>> relation, DBIDs ids) { + CovarianceMatrix c = new CovarianceMatrix(RelationUtil.dimensionality(relation)); + for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { c.put(relation.get(iter)); } return c; diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/EigenvalueDecomposition.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/EigenvalueDecomposition.java index 48ce9c7b..54434cdd 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/EigenvalueDecomposition.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/EigenvalueDecomposition.java @@ -96,80 +96,71 @@ public class EigenvalueDecomposition implements java.io.Serializable { // Symmetric Householder reduction to tridiagonal form. private void tred2() { - // This is derived from the Algol procedures tred2 by // Bowdler, Martin, Reinsch, and Wilkinson, Handbook for // Auto. Comp., Vol.ii-Linear Algebra, and the corresponding // Fortran subroutine in EISPACK. - - for(int j = 0; j < n; j++) { - d[j] = V[n - 1][j]; - } + System.arraycopy(V[n - 1], 0, d, 0, n); // Householder reduction to tridiagonal form. - for(int i = n - 1; i > 0; i--) { - + for (int i = n - 1; i > 0; i--) { // Scale to avoid under/overflow. double scale = 0.0; double h = 0.0; - for(int k = 0; k < i; k++) { + for (int k = 0; k < i; k++) { scale = scale + Math.abs(d[k]); } - if(scale == 0.0) { + if (scale < Double.MIN_NORMAL) { e[i] = d[i - 1]; - for(int j = 0; j < i; j++) { + for (int j = 0; j < i; j++) { d[j] = V[i - 1][j]; V[i][j] = 0.0; V[j][i] = 0.0; } - } - else { - + } else { // Generate Householder vector. - - for(int k = 0; k < i; k++) { + for (int k = 0; k < i; k++) { d[k] /= scale; h += d[k] * d[k]; } double f = d[i - 1]; double g = Math.sqrt(h); - if(f > 0) { + if (f > 0) { g = -g; } e[i] = scale * g; h = h - f * g; d[i - 1] = f - g; - for(int j = 0; j < i; j++) { + for (int j = 0; j < i; j++) { e[j] = 0.0; } // Apply similarity transformation to remaining columns. - - for(int j = 0; j < i; j++) { + for (int j = 0; j < i; j++) { f = d[j]; V[j][i] = f; g = e[j] + V[j][j] * f; - for(int k = j + 1; k <= i - 1; k++) { + for (int k = j + 1; k <= i - 1; k++) { g += V[k][j] * d[k]; e[k] += V[k][j] * f; } e[j] = g; } f = 0.0; - for(int j = 0; j < i; j++) { + for (int j = 0; j < i; j++) { e[j] /= h; f += e[j] * d[j]; } double hh = f / (h + h); - for(int j = 0; j < i; j++) { + for (int j = 0; j < i; j++) { e[j] -= hh * d[j]; } - for(int j = 0; j < i; j++) { + for (int j = 0; j < i; j++) { f = d[j]; g = e[j]; - for(int k = j; k <= i - 1; k++) { + for (int k = j; k <= i - 1; k++) { V[k][j] -= (f * e[k] + g * d[k]); } d[j] = V[i - 1][j]; @@ -181,29 +172,29 @@ public class EigenvalueDecomposition implements java.io.Serializable { // Accumulate transformations. - for(int i = 0; i < n - 1; i++) { + for (int i = 0; i < n - 1; i++) { V[n - 1][i] = V[i][i]; V[i][i] = 1.0; double h = d[i + 1]; - if(h != 0.0) { - for(int k = 0; k <= i; k++) { + if (h > 0.0 || h < 0.0) { + for (int k = 0; k <= i; k++) { d[k] = V[k][i + 1] / h; } - for(int j = 0; j <= i; j++) { + for (int j = 0; j <= i; j++) { double g = 0.0; - for(int k = 0; k <= i; k++) { + for (int k = 0; k <= i; k++) { g += V[k][i + 1] * V[k][j]; } - for(int k = 0; k <= i; k++) { + for (int k = 0; k <= i; k++) { V[k][j] -= g * d[k]; } } } - for(int k = 0; k <= i; k++) { + for (int k = 0; k <= i; k++) { V[k][i + 1] = 0.0; } } - for(int j = 0; j < n; j++) { + for (int j = 0; j < n; j++) { d[j] = V[n - 1][j]; V[n - 1][j] = 0.0; } @@ -214,28 +205,23 @@ public class EigenvalueDecomposition implements java.io.Serializable { // Symmetric tridiagonal QL algorithm. private void tql2() { - // This is derived from the Algol procedures tql2, by // Bowdler, Martin, Reinsch, and Wilkinson, Handbook for // Auto. Comp., Vol.ii-Linear Algebra, and the corresponding // Fortran subroutine in EISPACK. - for(int i = 1; i < n; i++) { - e[i - 1] = e[i]; - } + System.arraycopy(e, 1, e, 0, n - 1); e[n - 1] = 0.0; double f = 0.0; double tst1 = 0.0; double eps = Math.pow(2.0, -52.0); - for(int l = 0; l < n; l++) { - + for (int l = 0; l < n; l++) { // Find small subdiagonal element - tst1 = Math.max(tst1, Math.abs(d[l]) + Math.abs(e[l])); int m = l; - while(m < n) { - if(Math.abs(e[m]) <= eps * tst1) { + while (m < n) { + if (Math.abs(e[m]) <= eps * tst1) { break; } m++; @@ -244,30 +230,28 @@ public class EigenvalueDecomposition implements java.io.Serializable { // If m == l, d[l] is an eigenvalue, // otherwise, iterate. - if(m > l) { + if (m > l) { int iter = 0; do { iter = iter + 1; // (Could check iteration count here.) // Compute implicit shift - double g = d[l]; double p = (d[l + 1] - g) / (2.0 * e[l]); double r = MathUtil.fastHypot(p, 1.0); - if(p < 0) { + if (p < 0) { r = -r; } d[l] = e[l] / (p + r); d[l + 1] = e[l] * (p + r); double dl1 = d[l + 1]; double h = g - d[l]; - for(int i = l + 2; i < n; i++) { + for (int i = l + 2; i < n; i++) { d[i] -= h; } f = f + h; // Implicit QL transformation. - p = d[m]; double c = 1.0; double c2 = c; @@ -275,7 +259,7 @@ public class EigenvalueDecomposition implements java.io.Serializable { double el1 = e[l + 1]; double s = 0.0; double s2 = 0.0; - for(int i = m - 1; i >= l; i--) { + for (int i = m - 1; i >= l; i--) { c3 = c2; c2 = c; s2 = s; @@ -289,8 +273,7 @@ public class EigenvalueDecomposition implements java.io.Serializable { d[i + 1] = h + s * (c * g + s * d[i]); // Accumulate transformation. - - for(int k = 0; k < n; k++) { + for (int k = 0; k < n; k++) { h = V[k][i + 1]; V[k][i + 1] = s * V[k][i] + c * h; V[k][i] = c * V[k][i] - s * h; @@ -301,30 +284,27 @@ public class EigenvalueDecomposition implements java.io.Serializable { d[l] = c * p; // Check for convergence. - } - while(Math.abs(e[l]) > eps * tst1); + while (Math.abs(e[l]) > eps * tst1); } d[l] = d[l] + f; e[l] = 0.0; - } // Sort eigenvalues and corresponding vectors. - - for(int i = 0; i < n - 1; i++) { + for (int i = 0; i < n - 1; i++) { int k = i; double p = d[i]; - for(int j = i + 1; j < n; j++) { - if(d[j] < p) { + for (int j = i + 1; j < n; j++) { + if (d[j] < p) { k = j; p = d[j]; } } - if(k != i) { + if (k != i) { d[k] = d[i]; d[i] = p; - for(int j = 0; j < n; j++) { + for (int j = 0; j < n; j++) { p = V[j][i]; V[j][i] = V[j][k]; V[j][k] = p; @@ -346,25 +326,25 @@ public class EigenvalueDecomposition implements java.io.Serializable { int low = 0; int high = n - 1; - for(int m = low + 1; m <= high - 1; m++) { + for (int m = low + 1; m <= high - 1; m++) { // Scale column. double scale = 0.0; - for(int i = m; i <= high; i++) { + for (int i = m; i <= high; i++) { scale = scale + Math.abs(H[i][m - 1]); } - if(scale != 0.0) { + if (scale > 0.0 || scale < 0.0) { // Compute Householder transformation. double h = 0.0; - for(int i = high; i >= m; i--) { + for (int i = high; i >= m; i--) { ort[i] = H[i][m - 1] / scale; h += ort[i] * ort[i]; } double g = Math.sqrt(h); - if(ort[m] > 0) { + if (ort[m] > 0) { g = -g; } h = h - ort[m] * g; @@ -373,24 +353,24 @@ public class EigenvalueDecomposition implements java.io.Serializable { // Apply Householder similarity transformation // H = (I-u*u'/h)*H*(I-u*u')/h) - for(int j = m; j < n; j++) { + for (int j = m; j < n; j++) { double f = 0.0; - for(int i = high; i >= m; i--) { + for (int i = high; i >= m; i--) { f += ort[i] * H[i][j]; } f = f / h; - for(int i = m; i <= high; i++) { + for (int i = m; i <= high; i++) { H[i][j] -= f * ort[i]; } } - for(int i = 0; i <= high; i++) { + for (int i = 0; i <= high; i++) { double f = 0.0; - for(int j = high; j >= m; j--) { + for (int j = high; j >= m; j--) { f += ort[j] * H[i][j]; } f = f / h; - for(int j = m; j <= high; j++) { + for (int j = m; j <= high; j++) { H[i][j] -= f * ort[j]; } } @@ -401,25 +381,25 @@ public class EigenvalueDecomposition implements java.io.Serializable { // Accumulate transformations (Algol's ortran). - for(int i = 0; i < n; i++) { - for(int j = 0; j < n; j++) { + for (int i = 0; i < n; i++) { + for (int j = 0; j < n; j++) { V[i][j] = (i == j ? 1.0 : 0.0); } } - for(int m = high - 1; m >= low + 1; m--) { - if(H[m][m - 1] != 0.0) { - for(int i = m + 1; i <= high; i++) { + for (int m = high - 1; m >= low + 1; m--) { + if (H[m][m - 1] != 0.0) { + for (int i = m + 1; i <= high; i++) { ort[i] = H[i][m - 1]; } - for(int j = m; j <= high; j++) { + for (int j = m; j <= high; j++) { double g = 0.0; - for(int i = m; i <= high; i++) { + for (int i = m; i <= high; i++) { g += ort[i] * V[i][j]; } // Double division avoids possible underflow g = (g / ort[m]) / H[m][m - 1]; - for(int i = m; i <= high; i++) { + for (int i = m; i <= high; i++) { V[i][j] += g * ort[i]; } } @@ -433,13 +413,12 @@ public class EigenvalueDecomposition implements java.io.Serializable { private void cdiv(double xr, double xi, double yr, double yi) { double r, d; - if(Math.abs(yr) > Math.abs(yi)) { + if (Math.abs(yr) > Math.abs(yi)) { r = yi / yr; d = yr + r * yi; cdivr = (xr + r * xi) / d; cdivi = (xi - r * xr) / d; - } - else { + } else { r = yr / yi; d = yi + r * yr; cdivr = (r * xr + xi) / d; @@ -470,12 +449,12 @@ public class EigenvalueDecomposition implements java.io.Serializable { // Store roots isolated by balanc and compute matrix norm double norm = 0.0; - for(int i = 0; i < nn; i++) { - if(i < low | i > high) { + for (int i = 0; i < nn; i++) { + if (i < low || i > high) { d[i] = H[i][i]; e[i] = 0.0; } - for(int j = Math.max(i - 1, 0); j < nn; j++) { + for (int j = Math.max(i - 1, 0); j < nn; j++) { norm = norm + Math.abs(H[i][j]); } } @@ -483,17 +462,17 @@ public class EigenvalueDecomposition implements java.io.Serializable { // Outer loop over eigenvalue index int iter = 0; - while(n >= low) { + while (n >= low) { // Look for single small sub-diagonal element int l = n; - while(l > low) { + while (l > low) { s = Math.abs(H[l - 1][l - 1]) + Math.abs(H[l][l]); - if(s == 0.0) { + if (s == 0.0) { s = norm; } - if(Math.abs(H[l][l - 1]) < eps * s) { + if (Math.abs(H[l][l - 1]) < eps * s) { break; } l--; @@ -502,7 +481,7 @@ public class EigenvalueDecomposition implements java.io.Serializable { // Check for convergence // One root found - if(l == n) { + if (l == n) { H[n][n] = H[n][n] + exshift; d[n] = H[n][n]; e[n] = 0.0; @@ -511,8 +490,7 @@ public class EigenvalueDecomposition implements java.io.Serializable { // Two roots found - } - else if(l == n - 1) { + } else if (l == n - 1) { w = H[n][n - 1] * H[n - 1][n]; p = (H[n - 1][n - 1] - H[n][n]) / 2.0; q = p * p + w; @@ -523,16 +501,15 @@ public class EigenvalueDecomposition implements java.io.Serializable { // Real pair - if(q >= 0) { - if(p >= 0) { + if (q >= 0) { + if (p >= 0) { z = p + z; - } - else { + } else { z = p - z; } d[n - 1] = x + z; d[n] = d[n - 1]; - if(z != 0.0) { + if (z != 0.0) { d[n] = x - w / z; } e[n - 1] = 0.0; @@ -547,7 +524,7 @@ public class EigenvalueDecomposition implements java.io.Serializable { // Row modification - for(int j = n - 1; j < nn; j++) { + for (int j = n - 1; j < nn; j++) { z = H[n - 1][j]; H[n - 1][j] = q * z + p * H[n][j]; H[n][j] = q * H[n][j] - p * z; @@ -555,7 +532,7 @@ public class EigenvalueDecomposition implements java.io.Serializable { // Column modification - for(int i = 0; i <= n; i++) { + for (int i = 0; i <= n; i++) { z = H[i][n - 1]; H[i][n - 1] = q * z + p * H[i][n]; H[i][n] = q * H[i][n] - p * z; @@ -563,7 +540,7 @@ public class EigenvalueDecomposition implements java.io.Serializable { // Accumulate transformations - for(int i = low; i <= high; i++) { + for (int i = low; i <= high; i++) { z = V[i][n - 1]; V[i][n - 1] = q * z + p * V[i][n]; V[i][n] = q * V[i][n] - p * z; @@ -571,8 +548,7 @@ public class EigenvalueDecomposition implements java.io.Serializable { // Complex pair - } - else { + } else { d[n - 1] = x + p; d[n] = x + p; e[n - 1] = z; @@ -583,24 +559,23 @@ public class EigenvalueDecomposition implements java.io.Serializable { // No convergence yet - } - else { + } else { // Form shift x = H[n][n]; y = 0.0; w = 0.0; - if(l < n) { + if (l < n) { y = H[n - 1][n - 1]; w = H[n][n - 1] * H[n - 1][n]; } // Wilkinson's original ad hoc shift - if(iter == 10) { + if (iter == 10) { exshift += x; - for(int i = low; i <= n; i++) { + for (int i = low; i <= n; i++) { H[i][i] -= x; } s = Math.abs(H[n][n - 1]) + Math.abs(H[n - 1][n - 2]); @@ -610,16 +585,16 @@ public class EigenvalueDecomposition implements java.io.Serializable { // MATLAB's new ad hoc shift - if(iter == 30) { + if (iter == 30) { s = (y - x) / 2.0; s = s * s + w; - if(s > 0) { + if (s > 0) { s = Math.sqrt(s); - if(y < x) { + if (y < x) { s = -s; } s = x - w / ((y - x) / 2.0 + s); - for(int i = low; i <= n; i++) { + for (int i = low; i <= n; i++) { H[i][i] -= s; } exshift += s; @@ -632,7 +607,7 @@ public class EigenvalueDecomposition implements java.io.Serializable { // Look for two consecutive small sub-diagonal elements int m = n - 2; - while(m >= l) { + while (m >= l) { z = H[m][m]; r = x - z; s = y - z; @@ -643,49 +618,48 @@ public class EigenvalueDecomposition implements java.io.Serializable { p = p / s; q = q / s; r = r / s; - if(m == l) { + if (m == l) { break; } - if(Math.abs(H[m][m - 1]) * (Math.abs(q) + Math.abs(r)) < eps * (Math.abs(p) * (Math.abs(H[m - 1][m - 1]) + Math.abs(z) + Math.abs(H[m + 1][m + 1])))) { + if (Math.abs(H[m][m - 1]) * (Math.abs(q) + Math.abs(r)) < eps * (Math.abs(p) * (Math.abs(H[m - 1][m - 1]) + Math.abs(z) + Math.abs(H[m + 1][m + 1])))) { break; } m--; } - for(int i = m + 2; i <= n; i++) { + for (int i = m + 2; i <= n; i++) { H[i][i - 2] = 0.0; - if(i > m + 2) { + if (i > m + 2) { H[i][i - 3] = 0.0; } } // Double QR step involving rows l:n and columns m:n - for(int k = m; k <= n - 1; k++) { + for (int k = m; k <= n - 1; k++) { boolean notlast = (k != n - 1); - if(k != m) { + if (k != m) { p = H[k][k - 1]; q = H[k + 1][k - 1]; r = (notlast ? H[k + 2][k - 1] : 0.0); x = Math.abs(p) + Math.abs(q) + Math.abs(r); - if(x != 0.0) { + if (x != 0.0) { p = p / x; q = q / x; r = r / x; } } - if(x == 0.0) { + if (x == 0.0) { break; } s = Math.sqrt(p * p + q * q + r * r); - if(p < 0) { + if (p < 0) { s = -s; } - if(s != 0) { - if(k != m) { + if (s != 0) { + if (k != m) { H[k][k - 1] = -s * x; - } - else if(l != m) { + } else if (l != m) { H[k][k - 1] = -H[k][k - 1]; } p = p + s; @@ -697,9 +671,9 @@ public class EigenvalueDecomposition implements java.io.Serializable { // Row modification - for(int j = k; j < nn; j++) { + for (int j = k; j < nn; j++) { p = H[k][j] + q * H[k + 1][j]; - if(notlast) { + if (notlast) { p = p + r * H[k + 2][j]; H[k + 2][j] = H[k + 2][j] - p * z; } @@ -709,9 +683,9 @@ public class EigenvalueDecomposition implements java.io.Serializable { // Column modification - for(int i = 0; i <= Math.min(n, k + 3); i++) { + for (int i = 0; i <= Math.min(n, k + 3); i++) { p = x * H[i][k] + y * H[i][k + 1]; - if(notlast) { + if (notlast) { p = p + z * H[i][k + 2]; H[i][k + 2] = H[i][k + 2] - p * r; } @@ -721,9 +695,9 @@ public class EigenvalueDecomposition implements java.io.Serializable { // Accumulate transformations - for(int i = low; i <= high; i++) { + for (int i = low; i <= high; i++) { p = x * V[i][k] + y * V[i][k + 1]; - if(notlast) { + if (notlast) { p = p + z * V[i][k + 2]; V[i][k + 2] = V[i][k + 2] - p * r; } @@ -737,127 +711,108 @@ public class EigenvalueDecomposition implements java.io.Serializable { // Backsubstitute to find vectors of upper triangular form - if(norm == 0.0) { + if (norm == 0.0) { return; } - for(n = nn - 1; n >= 0; n--) { + for (n = nn - 1; n >= 0; n--) { p = d[n]; q = e[n]; // Real vector - if(q == 0) { + if (q == 0) { int l = n; H[n][n] = 1.0; - for(int i = n - 1; i >= 0; i--) { + for (int i = n - 1; i >= 0; i--) { w = H[i][i] - p; r = 0.0; - for(int j = l; j <= n; j++) { + for (int j = l; j <= n; j++) { r = r + H[i][j] * H[j][n]; } - if(e[i] < 0.0) { + if (e[i] < 0.0) { z = w; s = r; - } - else { + } else { l = i; - if(e[i] == 0.0) { - if(w != 0.0) { + if (!(e[i] > 0.0)) { + if (w > 0.0 || w < 0.0) { H[i][n] = -r / w; - } - else { + } else { H[i][n] = -r / (eps * norm); } - // Solve real equations - - } - else { + } else { x = H[i][i + 1]; y = H[i + 1][i]; q = (d[i] - p) * (d[i] - p) + e[i] * e[i]; t = (x * s - z * r) / q; H[i][n] = t; - if(Math.abs(x) > Math.abs(z)) { + if (Math.abs(x) > Math.abs(z)) { H[i + 1][n] = (-r - w * t) / x; - } - else { + } else { H[i + 1][n] = (-s - y * t) / z; } } // Overflow control - t = Math.abs(H[i][n]); - if((eps * t) * t > 1) { - for(int j = i; j <= n; j++) { + if ((eps * t) * t > 1) { + for (int j = i; j <= n; j++) { H[j][n] = H[j][n] / t; } } } } - // Complex vector - - } - else if(q < 0) { + } else if (q < 0) { int l = n - 1; // Last vector component imaginary so matrix is triangular - - if(Math.abs(H[n][n - 1]) > Math.abs(H[n - 1][n])) { + if (Math.abs(H[n][n - 1]) > Math.abs(H[n - 1][n])) { H[n - 1][n - 1] = q / H[n][n - 1]; H[n - 1][n] = -(H[n][n] - p) / H[n][n - 1]; - } - else { + } else { cdiv(0.0, -H[n - 1][n], H[n - 1][n - 1] - p, q); H[n - 1][n - 1] = cdivr; H[n - 1][n] = cdivi; } H[n][n - 1] = 0.0; H[n][n] = 1.0; - for(int i = n - 2; i >= 0; i--) { - double ra, sa, vr, vi; - ra = 0.0; - sa = 0.0; - for(int j = l; j <= n; j++) { + for (int i = n - 2; i >= 0; i--) { + double ra = 0.0, sa = 0.0, vr, vi; + for (int j = l; j <= n; j++) { ra = ra + H[i][j] * H[j][n - 1]; sa = sa + H[i][j] * H[j][n]; } w = H[i][i] - p; - if(e[i] < 0.0) { + if (e[i] < 0.0) { z = w; r = ra; s = sa; - } - else { + } else { l = i; - if(e[i] == 0) { + if (!(e[i] > 0.0)) { cdiv(-ra, -sa, w, q); H[i][n - 1] = cdivr; H[i][n] = cdivi; - } - else { - + } else { // Solve complex equations - x = H[i][i + 1]; y = H[i + 1][i]; vr = (d[i] - p) * (d[i] - p) + e[i] * e[i] - q * q; vi = (d[i] - p) * 2.0 * q; - if(vr == 0.0 & vi == 0.0) { + if (vr == 0.0 && vi == 0.0) { vr = eps * norm * (Math.abs(w) + Math.abs(q) + Math.abs(x) + Math.abs(y) + Math.abs(z)); } cdiv(x * r - z * ra + q * sa, x * s - z * sa - q * ra, vr, vi); H[i][n - 1] = cdivr; H[i][n] = cdivi; - if(Math.abs(x) > (Math.abs(z) + Math.abs(q))) { + if (Math.abs(x) > (Math.abs(z) + Math.abs(q))) { H[i + 1][n - 1] = (-ra - w * H[i][n - 1] + q * H[i][n]) / x; H[i + 1][n] = (-sa - w * H[i][n] - q * H[i][n - 1]) / x; - } - else { + } else { cdiv(-r - y * H[i][n - 1], -s - y * H[i][n], z, q); H[i + 1][n - 1] = cdivr; H[i + 1][n] = cdivi; @@ -865,10 +820,9 @@ public class EigenvalueDecomposition implements java.io.Serializable { } // Overflow control - t = Math.max(Math.abs(H[i][n - 1]), Math.abs(H[i][n])); - if((eps * t) * t > 1) { - for(int j = i; j <= n; j++) { + if ((eps * t) * t > 1) { + for (int j = i; j <= n; j++) { H[j][n - 1] = H[j][n - 1] / t; H[j][n] = H[j][n] / t; } @@ -879,21 +833,17 @@ public class EigenvalueDecomposition implements java.io.Serializable { } // Vectors of isolated roots - - for(int i = 0; i < nn; i++) { - if(i < low | i > high) { - for(int j = i; j < nn; j++) { - V[i][j] = H[i][j]; - } + for (int i = 0; i < nn; i++) { + if (i < low || i > high) { + System.arraycopy(H[i], i, V[i], i, nn - i); } } // Back transformation to get eigenvectors of original matrix - - for(int j = nn - 1; j >= low; j--) { - for(int i = low; i <= high; i++) { + for (int j = nn - 1; j >= low; j--) { + for (int i = low; i <= high; i++) { z = 0.0; - for(int k = low; k <= Math.min(j, high); k++) { + for (int k = low; k <= Math.min(j, high); k++) { z = z + V[i][k] * H[k][j]; } V[i][j] = z; @@ -919,23 +869,21 @@ public class EigenvalueDecomposition implements java.io.Serializable { e = new double[n]; issymmetric = true; - for(int j = 0; (j < n) & issymmetric; j++) { - for(int i = 0; (i < n) & issymmetric; i++) { + for (int j = 0; (j < n) && issymmetric; j++) { + for (int i = 0; (i < n) && issymmetric; i++) { issymmetric = (A[i][j] == A[j][i]); - if(Double.isNaN(A[i][j])) { + if (Double.isNaN(A[i][j])) { throw new IllegalArgumentException("NaN in EigenvalueDecomposition!"); } - if(Double.isInfinite(A[i][j])) { + if (Double.isInfinite(A[i][j])) { throw new IllegalArgumentException("+-inf in EigenvalueDecomposition!"); } } } - if(issymmetric) { - for(int i = 0; i < n; i++) { - for(int j = 0; j < n; j++) { - V[i][j] = A[i][j]; - } + if (issymmetric) { + for (int i = 0; i < n; i++) { + System.arraycopy(A[i], 0, V[i], 0, n); } // Tridiagonalize. @@ -944,13 +892,12 @@ public class EigenvalueDecomposition implements java.io.Serializable { // Diagonalize. tql2(); - } - else { + } else { H = new double[n][n]; ort = new double[n]; - for(int j = 0; j < n; j++) { - for(int i = 0; i < n; i++) { + for (int j = 0; j < n; j++) { + for (int i = 0; i < n; i++) { H[i][j] = A[i][j]; } } @@ -1002,18 +949,17 @@ public class EigenvalueDecomposition implements java.io.Serializable { public Matrix getD() { Matrix X = new Matrix(n, n); double[][] D = X.getArrayRef(); - for(int i = 0; i < n; i++) { - for(int j = 0; j < n; j++) { + for (int i = 0; i < n; i++) { + for (int j = 0; j < n; j++) { D[i][j] = 0.0; } D[i][i] = d[i]; - if(e[i] > 0) { + if (e[i] > 0) { D[i][i + 1] = e[i]; - } - else if(e[i] < 0) { + } else if (e[i] < 0) { D[i][i - 1] = e[i]; } } return X; } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/LUDecomposition.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/LUDecomposition.java index 08634279..6e68352a 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/LUDecomposition.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/LUDecomposition.java @@ -1,6 +1,5 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra; - /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures @@ -94,7 +93,7 @@ public class LUDecomposition implements java.io.Serializable { this.n = n; // Use a "left-looking", dot-product, Crout/Doolittle algorithm. piv = new int[m]; - for(int i = 0; i < m; i++) { + for (int i = 0; i < m; i++) { piv[i] = i; } pivsign = 1; @@ -103,23 +102,23 @@ public class LUDecomposition implements java.io.Serializable { // Outer loop. - for(int j = 0; j < n; j++) { + for (int j = 0; j < n; j++) { // Make a copy of the j-th column to localize references. - for(int i = 0; i < m; i++) { + for (int i = 0; i < m; i++) { LUcolj[i] = LU[i][j]; } // Apply previous transformations. - for(int i = 0; i < m; i++) { + for (int i = 0; i < m; i++) { LUrowi = LU[i]; // Most of the time is spent in the following dot product. int kmax = Math.min(i, j); double s = 0.0; - for(int k = 0; k < kmax; k++) { + for (int k = 0; k < kmax; k++) { s += LUrowi[k] * LUcolj[k]; } @@ -129,13 +128,13 @@ public class LUDecomposition implements java.io.Serializable { // Find pivot and exchange if necessary. int p = j; - for(int i = j + 1; i < m; i++) { - if(Math.abs(LUcolj[i]) > Math.abs(LUcolj[p])) { + for (int i = j + 1; i < m; i++) { + if (Math.abs(LUcolj[i]) > Math.abs(LUcolj[p])) { p = i; } } - if(p != j) { - for(int k = 0; k < n; k++) { + if (p != j) { + for (int k = 0; k < n; k++) { double t = LU[p][k]; LU[p][k] = LU[j][k]; LU[j][k] = t; @@ -148,8 +147,8 @@ public class LUDecomposition implements java.io.Serializable { // Compute multipliers. - if(j < m & LU[j][j] != 0.0) { - for(int i = j + 1; i < m; i++) { + if (j < m && LU[j][j] != 0.0) { + for (int i = j + 1; i < m; i++) { LU[i][j] /= LU[j][j]; } } @@ -166,8 +165,8 @@ public class LUDecomposition implements java.io.Serializable { * @return true if U, and hence A, is nonsingular. */ public boolean isNonsingular() { - for(int j = 0; j < n; j++) { - if(LU[j][j] == 0) { + for (int j = 0; j < n; j++) { + if (LU[j][j] == 0) { return false; } } @@ -182,15 +181,13 @@ public class LUDecomposition implements java.io.Serializable { public Matrix getL() { Matrix X = new Matrix(m, n); double[][] L = X.getArrayRef(); - for(int i = 0; i < m; i++) { - for(int j = 0; j < n; j++) { - if(i > j) { + for (int i = 0; i < m; i++) { + for (int j = 0; j < n; j++) { + if (i > j) { L[i][j] = LU[i][j]; - } - else if(i == j) { + } else if (i == j) { L[i][j] = 1.0; - } - else { + } else { L[i][j] = 0.0; } } @@ -206,12 +203,11 @@ public class LUDecomposition implements java.io.Serializable { public Matrix getU() { Matrix X = new Matrix(n, n); double[][] U = X.getArrayRef(); - for(int i = 0; i < n; i++) { - for(int j = 0; j < n; j++) { - if(i <= j) { + for (int i = 0; i < n; i++) { + for (int j = 0; j < n; j++) { + if (i <= j) { U[i][j] = LU[i][j]; - } - else { + } else { U[i][j] = 0.0; } } @@ -226,9 +222,7 @@ public class LUDecomposition implements java.io.Serializable { */ public int[] getPivot() { int[] p = new int[m]; - for(int i = 0; i < m; i++) { - p[i] = piv[i]; - } + System.arraycopy(piv, 0, p, 0, m); return p; } @@ -239,8 +233,8 @@ public class LUDecomposition implements java.io.Serializable { */ public double[] getDoublePivot() { double[] vals = new double[m]; - for(int i = 0; i < m; i++) { - vals[i] = piv[i]; + for (int i = 0; i < m; i++) { + vals[i] = (double) piv[i]; } return vals; } @@ -252,11 +246,11 @@ public class LUDecomposition implements java.io.Serializable { * @exception IllegalArgumentException Matrix must be square */ public double det() { - if(m != n) { + if (m != n) { throw new IllegalArgumentException("Matrix must be square."); } double d = pivsign; - for(int j = 0; j < n; j++) { + for (int j = 0; j < n; j++) { d *= LU[j][j]; } return d; @@ -271,10 +265,10 @@ public class LUDecomposition implements java.io.Serializable { * @exception RuntimeException Matrix is singular. */ public Matrix solve(Matrix B) { - if(B.getRowDimensionality() != m) { + if (B.getRowDimensionality() != m) { throw new IllegalArgumentException("Matrix row dimensions must agree."); } - if(!this.isNonsingular()) { + if (!this.isNonsingular()) { throw new RuntimeException("Matrix is singular."); } @@ -298,10 +292,10 @@ public class LUDecomposition implements java.io.Serializable { public double[][] solve(double[][] B) { int mx = B.length; int nx = B[0].length; - if(mx != m) { + if (mx != m) { throw new IllegalArgumentException("Matrix row dimensions must agree."); } - if(!this.isNonsingular()) { + if (!this.isNonsingular()) { throw new RuntimeException("Matrix is singular."); } double[][] Xmat = new Matrix(B).getMatrix(piv, 0, nx - 1).getArrayRef(); @@ -317,23 +311,23 @@ public class LUDecomposition implements java.io.Serializable { */ private void solveInplace(double[][] B, int nx) { // Solve L*Y = B(piv,:) - for(int k = 0; k < n; k++) { - for(int i = k + 1; i < n; i++) { - for(int j = 0; j < nx; j++) { + for (int k = 0; k < n; k++) { + for (int i = k + 1; i < n; i++) { + for (int j = 0; j < nx; j++) { B[i][j] -= B[k][j] * LU[i][k]; } } } // Solve U*X = Y; - for(int k = n - 1; k >= 0; k--) { - for(int j = 0; j < nx; j++) { + for (int k = n - 1; k >= 0; k--) { + for (int j = 0; j < nx; j++) { B[k][j] /= LU[k][k]; } - for(int i = 0; i < k; i++) { - for(int j = 0; j < nx; j++) { + for (int i = 0; i < k; i++) { + for (int j = 0; j < nx; j++) { B[i][j] -= B[k][j] * LU[i][k]; } } } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/LinearEquationSystem.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/LinearEquationSystem.java index 80cbe1e6..53954c08 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/LinearEquationSystem.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/LinearEquationSystem.java @@ -23,12 +23,11 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra; along with this program. If not, see <http://www.gnu.org/licenses/>. */ +import gnu.trove.list.array.TIntArrayList; + import java.text.DecimalFormat; import java.text.DecimalFormatSymbols; import java.text.NumberFormat; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; import java.util.Locale; import de.lmu.ifi.dbs.elki.logging.Logging; @@ -44,7 +43,7 @@ public class LinearEquationSystem { /** * Logger. */ - private static final Logging logger = Logging.getLogger(LinearEquationSystem.class); + private static final Logging LOG = Logging.getLogger(LinearEquationSystem.class); /** * Indicates trivial pivot search strategy. @@ -114,24 +113,24 @@ public class LinearEquationSystem { * @param b the right hand side of the linear equation system */ public LinearEquationSystem(double[][] a, double[] b) { - if(a == null) { + if (a == null) { throw new IllegalArgumentException("Coefficient array is null!"); } - if(b == null) { + if (b == null) { throw new IllegalArgumentException("Right hand side is null!"); } - if(a.length != b.length) { + if (a.length != b.length) { throw new IllegalArgumentException("Coefficient matrix and right hand side " + "differ in row dimensionality!"); } coeff = a; rhs = b; row = new int[coeff.length]; - for(int i = 0; i < coeff.length; i++) { + for (int i = 0; i < coeff.length; i++) { row[i] = i; } col = new int[coeff[0].length]; - for(int j = 0; j < coeff[0].length; j++) { + for (int j = 0; j < coeff[0].length; j++) { col[j] = j; } rank = 0; @@ -152,19 +151,19 @@ public class LinearEquationSystem { * column[i] */ public LinearEquationSystem(double[][] a, double[] b, int[] rowPermutations, int[] columnPermutations) { - if(a == null) { + if (a == null) { throw new IllegalArgumentException("Coefficient array is null!"); } - if(b == null) { + if (b == null) { throw new IllegalArgumentException("Right hand side is null!"); } - if(a.length != b.length) { + if (a.length != b.length) { throw new IllegalArgumentException("Coefficient matrix and right hand side " + "differ in row dimensionality!"); } - if(rowPermutations.length != a.length) { + if (rowPermutations.length != a.length) { throw new IllegalArgumentException("Coefficient matrix and row permutation array " + "differ in row dimensionality!"); } - if(columnPermutations.length != a[0].length) { + if (columnPermutations.length != a[0].length) { throw new IllegalArgumentException("Coefficient matrix and column permutation array " + "differ in column dimensionality!"); } @@ -235,7 +234,7 @@ public class LinearEquationSystem { * ( 0 ... a_ii ... a_in )<br> * ( 0 ... a_(i+1)i ... a_(i+1)n ) <br> * ( 0 ... a_ni ... a_nn ) <br> - * </code> Then we search for x,y in {i,...n}, so that |a_xy| > |a_ij| + * </code> Then we search for x,y in {i,...n}, so that |a_xy| > |a_ij| */ public void solveByTotalPivotSearch() { solve(TOTAL_PIVOT_SEARCH); @@ -284,28 +283,27 @@ public class LinearEquationSystem { * @return a string representation of this equation system */ public String equationsToString(String prefix, NumberFormat nf) { - if((coeff == null) || (rhs == null) || (row == null) || (col == null)) { + if ((coeff == null) || (rhs == null) || (row == null) || (col == null)) { throw new NullPointerException(); } int[] coeffDigits = maxIntegerDigits(coeff); int rhsDigits = maxIntegerDigits(rhs); - StringBuffer buffer = new StringBuffer(); - buffer.append(prefix).append("\n").append(prefix); - for(int i = 0; i < coeff.length; i++) { - for(int j = 0; j < coeff[row[0]].length; j++) { + StringBuilder buffer = new StringBuilder(); + buffer.append(prefix).append('\n').append(prefix); + for (int i = 0; i < coeff.length; i++) { + for (int j = 0; j < coeff[row[0]].length; j++) { format(nf, buffer, coeff[row[i]][col[j]], coeffDigits[col[j]]); - buffer.append(" * x_" + col[j]); + buffer.append(" * x_").append(col[j]); } buffer.append(" ="); format(nf, buffer, rhs[row[i]], rhsDigits); - if(i < coeff.length - 1) { - buffer.append("\n").append(prefix); - } - else { - buffer.append("\n").append(prefix); + if (i < coeff.length - 1) { + buffer.append('\n').append(prefix); + } else { + buffer.append('\n').append(prefix); } } return buffer.toString(); @@ -339,7 +337,7 @@ public class LinearEquationSystem { * @return a string representation of the solution of this equation system */ public String solutionToString(int fractionDigits) { - if(!isSolvable()) { + if (!isSolvable()) { throw new IllegalStateException("System is not solvable!"); } @@ -350,29 +348,28 @@ public class LinearEquationSystem { nf.setNegativePrefix(""); nf.setPositivePrefix(""); - int row = coeff[0].length / 2; + int row = coeff[0].length >> 1; int params = u.length; int paramsDigits = integerDigits(params); int x0Digits = maxIntegerDigits(x_0); int[] uDigits = maxIntegerDigits(u); - StringBuffer buffer = new StringBuffer(); - for(int i = 0; i < x_0.length; i++) { + StringBuilder buffer = new StringBuilder(); + for (int i = 0; i < x_0.length; i++) { double value = x_0[i]; format(nf, buffer, value, x0Digits); - for(int j = 0; j < u[0].length; j++) { - if(i == row) { - buffer.append(" + a_" + j + " * "); - } - else { + for (int j = 0; j < u[0].length; j++) { + if (i == row) { + buffer.append(" + a_").append(j).append(" * "); + } else { buffer.append(" "); - for(int d = 0; d < paramsDigits; d++) { - buffer.append(" "); + for (int d = 0; d < paramsDigits; d++) { + buffer.append(' '); } } format(nf, buffer, u[i][j], uDigits[j]); } - buffer.append("\n"); + buffer.append('\n'); } return buffer.toString(); } @@ -395,7 +392,7 @@ public class LinearEquationSystem { // main loop, transformation to reduced row echelon form boolean exitLoop = false; - while(!exitLoop) { + while (!exitLoop) { k++; // pivot search for entry in remaining matrix @@ -407,7 +404,7 @@ public class LinearEquationSystem { IntIntPair pivotPos = new IntIntPair(0, 0); IntIntPair currPos = new IntIntPair(k, k); - switch(method){ + switch(method) { case TRIVAL_PIVOT_SEARCH: pivotPos = nonZeroPivotSearch(k); break; @@ -419,11 +416,11 @@ public class LinearEquationSystem { pivotCol = pivotPos.second; pivot = coeff[this.row[pivotRow]][col[pivotCol]]; - if(logger.isDebugging()) { - StringBuffer msg = new StringBuffer(); + if (LOG.isDebugging()) { + StringBuilder msg = new StringBuilder(); msg.append("equations ").append(equationsToString(4)); - msg.append(" *** pivot at (").append(pivotRow).append(",").append(pivotCol).append(") = ").append(pivot).append("\n"); - logger.debugFine(msg.toString()); + msg.append(" *** pivot at (").append(pivotRow).append(',').append(pivotCol).append(") = ").append(pivot).append('\n'); + LOG.debugFine(msg.toString()); } // permute rows and columns to get this entry onto @@ -433,13 +430,13 @@ public class LinearEquationSystem { // test conditions for exiting loop // after this iteration // reasons are: Math.abs(pivot) == 0 - if((Math.abs(pivot) <= Matrix.DELTA)) { + if ((Math.abs(pivot) <= Matrix.DELTA)) { exitLoop = true; } // pivoting only if Math.abs(pivot) > 0 // and k <= m - 1 - if((Math.abs(pivot) > Matrix.DELTA)) { + if ((Math.abs(pivot) > Matrix.DELTA)) { rank++; pivotOperation(k); } @@ -448,7 +445,7 @@ public class LinearEquationSystem { // after this iteration // reasons are: k == rows-1 : no more rows // k == cols-1 : no more columns - if(k == rows - 1 || k == cols - 1) { + if (k == rows - 1 || k == cols - 1) { exitLoop = true; } }// end while @@ -467,15 +464,15 @@ public class LinearEquationSystem { double max = 0; int i, j, pivotRow = k, pivotCol = k; double absValue; - for(i = k; i < coeff.length; i++) { - for(j = k; j < coeff[0].length; j++) { + for (i = k; i < coeff.length; i++) { + for (j = k; j < coeff[0].length; j++) { // compute absolute value of // current entry in absValue absValue = Math.abs(coeff[row[i]][col[j]]); // compare absValue with value max // found so far - if(max < absValue) { + if (max < absValue) { // remember new value and position max = absValue; pivotRow = i; @@ -496,14 +493,14 @@ public class LinearEquationSystem { int i, j; double absValue; - for(i = k; i < coeff.length; i++) { - for(j = k; j < coeff[0].length; j++) { + for (i = k; i < coeff.length; i++) { + for (j = k; j < coeff[0].length; j++) { // compute absolute value of // current entry in absValue absValue = Math.abs(coeff[row[i]][col[j]]); // check if absValue is non-zero - if(absValue > 0) { // found a pivot element + if (absValue > 0) { // found a pivot element return new IntIntPair(i, j); }// end if }// end for j @@ -541,20 +538,20 @@ public class LinearEquationSystem { // pivot row: set pivot to 1 coeff[row[k]][col[k]] = 1; - for(int i = k + 1; i < coeff[k].length; i++) { + for (int i = k + 1; i < coeff[k].length; i++) { coeff[row[k]][col[i]] /= pivot; } rhs[row[k]] /= pivot; - if(logger.isDebugging()) { - StringBuffer msg = new StringBuffer(); + if (LOG.isDebugging()) { + StringBuilder msg = new StringBuilder(); msg.append("set pivot element to 1 ").append(equationsToString(4)); - logger.debugFine(msg.toString()); + LOG.debugFine(msg.toString()); } // for (int i = k + 1; i < coeff.length; i++) { - for(int i = 0; i < coeff.length; i++) { - if(i == k) { + for (int i = 0; i < coeff.length; i++) { + if (i == k) { continue; } @@ -565,7 +562,7 @@ public class LinearEquationSystem { coeff[row[i]][col[k]] = 0; // modify entries a[i,j], i > k fixed, j = k+1...n-1 - for(int j = k + 1; j < coeff[0].length; j++) { + for (int j = k + 1; j < coeff[0].length; j++) { coeff[row[i]][col[j]] = coeff[row[i]][col[j]] - coeff[row[k]][col[j]] * q; }// end for j @@ -573,10 +570,10 @@ public class LinearEquationSystem { rhs[row[i]] = rhs[row[i]] - rhs[row[k]] * q; }// end for k - if(logger.isDebugging()) { - StringBuffer msg = new StringBuffer(); + if (LOG.isDebugging()) { + StringBuilder msg = new StringBuilder(); msg.append("after pivot operation ").append(equationsToString(4)); - logger.debugFine(msg.toString()); + LOG.debugFine(msg.toString()); } } @@ -587,65 +584,62 @@ public class LinearEquationSystem { */ private void solve(int method) throws NullPointerException { // solution exists - if(solved) { + if (solved) { return; } // bring in reduced row echelon form - if(!reducedRowEchelonForm) { + if (!reducedRowEchelonForm) { reducedRowEchelonForm(method); } - if(!isSolvable(method)) { - if(logger.isDebugging()) { - logger.debugFine("Equation system is not solvable!"); + if (!isSolvable(method)) { + if (LOG.isDebugging()) { + LOG.debugFine("Equation system is not solvable!"); } return; } // compute one special solution int cols = coeff[0].length; - List<Integer> boundIndices = new ArrayList<Integer>(); + TIntArrayList boundIndices = new TIntArrayList(); x_0 = new double[cols]; - for(int i = 0; i < coeff.length; i++) { - for(int j = i; j < coeff[row[i]].length; j++) { - if(coeff[row[i]][col[j]] == 1) { + for (int i = 0; i < coeff.length; i++) { + for (int j = i; j < coeff[row[i]].length; j++) { + if (coeff[row[i]][col[j]] == 1) { x_0[col[i]] = rhs[row[i]]; boundIndices.add(col[i]); break; } } } - List<Integer> freeIndices = new ArrayList<Integer>(); - for(int i = 0; i < coeff[0].length; i++) { - if(boundIndices.contains(i)) { + TIntArrayList freeIndices = new TIntArrayList(); + for (int i = 0; i < coeff[0].length; i++) { + if (boundIndices.contains(i)) { continue; } freeIndices.add(i); } - StringBuffer msg = new StringBuffer(); - if(logger.isDebugging()) { - msg.append("\nSpecial solution x_0 = [").append(FormatUtil.format(x_0, ",", 4)).append("]"); + StringBuilder msg = new StringBuilder(); + if (LOG.isDebugging()) { + msg.append("\nSpecial solution x_0 = [").append(FormatUtil.format(x_0, ",", 4)).append(']'); msg.append("\nbound Indices ").append(boundIndices); msg.append("\nfree Indices ").append(freeIndices); } // compute solution space of homogeneous linear equation system - Integer[] freeParameters = freeIndices.toArray(new Integer[freeIndices.size()]); - Integer[] boundParameters = boundIndices.toArray(new Integer[boundIndices.size()]); - Arrays.sort(boundParameters); + boundIndices.sort(); int freeIndex = 0; int boundIndex = 0; u = new double[cols][freeIndices.size()]; - for(int j = 0; j < u[0].length; j++) { - for(int i = 0; i < u.length; i++) { - if(freeIndex < freeParameters.length && i == freeParameters[freeIndex]) { + for (int j = 0; j < u[0].length; j++) { + for (int i = 0; i < u.length; i++) { + if (freeIndex < freeIndices.size() && i == freeIndices.get(freeIndex)) { u[i][j] = 1; - } - else if(boundIndex < boundParameters.length && i == boundParameters[boundIndex]) { - u[i][j] = -coeff[row[boundIndex]][freeParameters[freeIndex]]; + } else if (boundIndex < boundIndices.size() && i == boundIndices.get(boundIndex)) { + u[i][j] = -coeff[row[boundIndex]][freeIndices.get(freeIndex)]; boundIndex++; } } @@ -654,12 +648,12 @@ public class LinearEquationSystem { } - if(logger.isDebugging()) { + if (LOG.isDebugging()) { msg.append("\nU"); - for(double[] anU : u) { - msg.append("\n").append(FormatUtil.format(anU, ",", 4)); + for (double[] anU : u) { + msg.append('\n').append(FormatUtil.format(anU, ",", 4)); } - logger.debugFine(msg.toString()); + LOG.debugFine(msg.toString()); } solved = true; @@ -672,17 +666,17 @@ public class LinearEquationSystem { * @return true if linear system in solvable */ private boolean isSolvable(int method) throws NullPointerException { - if(solved) { + if (solved) { return solvable; } - if(!reducedRowEchelonForm) { + if (!reducedRowEchelonForm) { reducedRowEchelonForm(method); } // test if rank(coeff) == rank(coeff|rhs) - for(int i = rank; i < rhs.length; i++) { - if(Math.abs(rhs[row[i]]) > Matrix.DELTA) { + for (int i = rank; i < rhs.length; i++) { + if (Math.abs(rhs[row[i]]) > Matrix.DELTA) { solvable = false; return false; // not solvable } @@ -700,8 +694,8 @@ public class LinearEquationSystem { */ private int[] maxIntegerDigits(double[][] values) { int[] digits = new int[values[0].length]; - for(int j = 0; j < values[0].length; j++) { - for(double[] value : values) { + for (int j = 0; j < values[0].length; j++) { + for (double[] value : values) { digits[j] = Math.max(digits[j], integerDigits(value[j])); } } @@ -716,7 +710,7 @@ public class LinearEquationSystem { */ private int maxIntegerDigits(double[] values) { int digits = 0; - for(double value : values) { + for (double value : values) { digits = Math.max(digits, integerDigits(value)); } return digits; @@ -730,7 +724,7 @@ public class LinearEquationSystem { */ private int integerDigits(double d) { double value = Math.abs(d); - if(value < 10) { + if (value < 10) { return 1; } return (int) Math.log10(value) + 1; @@ -746,16 +740,15 @@ public class LinearEquationSystem { * @param value the value to append * @param maxIntegerDigits the maximum number of integer digits */ - private void format(NumberFormat nf, StringBuffer buffer, double value, int maxIntegerDigits) { - if(value >= 0) { + private void format(NumberFormat nf, StringBuilder buffer, double value, int maxIntegerDigits) { + if (value >= 0) { buffer.append(" + "); - } - else { + } else { buffer.append(" - "); } int digits = maxIntegerDigits - integerDigits(value); - for(int d = 0; d < digits; d++) { - buffer.append(" "); + for (int d = 0; d < digits; d++) { + buffer.append(' '); } buffer.append(nf.format(Math.abs(value))); } @@ -768,4 +761,4 @@ public class LinearEquationSystem { public int subspacedim() { return coeff[0].length - coeff.length; } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Matrix.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Matrix.java index eec21404..5a0d1e74 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Matrix.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Matrix.java @@ -23,8 +23,11 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra; along with this program. If not, see <http://www.gnu.org/licenses/>. */ +import gnu.trove.list.array.TDoubleArrayList; + import java.io.BufferedReader; import java.io.StreamTokenizer; +import java.util.ArrayList; import java.util.Arrays; import java.util.logging.Logger; @@ -55,6 +58,26 @@ public class Matrix { public static final double DELTA = 1E-3; /** + * Error: matrix not square. + */ + public static final String ERR_NOTSQUARE = "All rows must have the same length."; + + /** + * Error: matrix indexes incorrect + */ + public static final String ERR_REINDEX = "Submatrix indices incorrect."; + + /** + * Error when matrix dimensions do not agree. + */ + public static final String ERR_MATRIX_DIMENSIONS = "Matrix must consist of the same no of rows!"; + + /** + * Error when matrix inner dimensions do not agree. + */ + private static final String ERR_MATRIX_INNERDIM = "Matrix inner dimensions must agree."; + + /** * Array for internal storage of elements. * * @serial internal array storage. @@ -89,8 +112,8 @@ public class Matrix { public Matrix(final int m, final int n, final double s) { this.columndimension = n; elements = new double[m][n]; - for(int i = 0; i < m; i++) { - for(int j = 0; j < n; j++) { + for (int i = 0; i < m; i++) { + for (int j = 0; j < n; j++) { elements[i][j] = s; } } @@ -105,9 +128,9 @@ public class Matrix { */ public Matrix(final double[][] elements) { columndimension = elements[0].length; - for(int i = 0; i < elements.length; i++) { - if(elements[i].length != columndimension) { - throw new IllegalArgumentException("All rows must have the same length."); + for (int i = 0; i < elements.length; i++) { + if (elements[i].length != columndimension) { + throw new IllegalArgumentException(ERR_NOTSQUARE); } } this.elements = elements; @@ -122,8 +145,8 @@ public class Matrix { public Matrix(final RationalNumber[][] q) { columndimension = q[0].length; elements = new double[q.length][columndimension]; - for(int row = 0; row < q.length; row++) { - for(int col = 0; col < q[row].length; col++) { + for (int row = 0; row < q.length; row++) { + for (int col = 0; col < q[row].length; col++) { elements[row][col] = q[row][col].doubleValue(); } } @@ -139,12 +162,12 @@ public class Matrix { */ public Matrix(final double values[], final int m) { columndimension = (m != 0 ? values.length / m : 0); - if(m * columndimension != values.length) { + if (m * columndimension != values.length) { throw new IllegalArgumentException("Array length must be a multiple of m."); } elements = new double[m][columndimension]; - for(int i = 0; i < m; i++) { - for(int j = 0; j < columndimension; j++) { + for (int i = 0; i < m; i++) { + for (int j = 0; j < columndimension; j++) { elements[i][j] = values[i + j * m]; } } @@ -152,7 +175,7 @@ public class Matrix { /** * Constructor, cloning an existing matrix. - * + * * @param mat Matrix to clone */ public Matrix(Matrix mat) { @@ -166,13 +189,13 @@ public class Matrix { * @return new matrix * @throws IllegalArgumentException All rows must have the same length */ - public final static Matrix constructWithCopy(final double[][] A) { + public static final Matrix constructWithCopy(final double[][] A) { final int m = A.length; final int n = A[0].length; final Matrix X = new Matrix(m, n); - for(int i = 0; i < m; i++) { - if(A[i].length != n) { - throw new IllegalArgumentException("All rows must have the same length."); + for (int i = 0; i < m; i++) { + if (A[i].length != n) { + throw new IllegalArgumentException(ERR_NOTSQUARE); } System.arraycopy(A[i], 0, X.elements[i], 0, n); } @@ -187,7 +210,7 @@ public class Matrix { */ public static final Matrix unitMatrix(final int dim) { final double[][] e = new double[dim][dim]; - for(int i = 0; i < dim; i++) { + for (int i = 0; i < dim; i++) { e[i][i] = 1; } return new Matrix(e); @@ -213,8 +236,8 @@ public class Matrix { */ public static final Matrix random(final int m, final int n) { final Matrix A = new Matrix(m, n); - for(int i = 0; i < m; i++) { - for(int j = 0; j < n; j++) { + for (int i = 0; i < m; i++) { + for (int j = 0; j < n; j++) { A.elements[i][j] = Math.random(); } } @@ -230,7 +253,7 @@ public class Matrix { */ public static final Matrix identity(final int m, final int n) { final Matrix A = new Matrix(m, n); - for(int i = 0; i < Math.min(m, n); i++) { + for (int i = 0; i < Math.min(m, n); i++) { A.elements[i][i] = 1.0; } return A; @@ -245,7 +268,7 @@ public class Matrix { */ public static final Matrix diagonal(final double[] diagonal) { final Matrix result = new Matrix(diagonal.length, diagonal.length); - for(int i = 0; i < diagonal.length; i++) { + for (int i = 0; i < diagonal.length; i++) { result.elements[i][i] = diagonal[i]; } return result; @@ -260,7 +283,7 @@ public class Matrix { */ public static final Matrix diagonal(final Vector diagonal) { final Matrix result = new Matrix(diagonal.elements.length, diagonal.elements.length); - for(int i = 0; i < diagonal.elements.length; i++) { + for (int i = 0; i < diagonal.elements.length; i++) { result.elements[i][i] = diagonal.elements[i]; } return result; @@ -273,7 +296,7 @@ public class Matrix { */ public final Matrix copy() { final Matrix X = new Matrix(elements.length, columndimension); - for(int i = 0; i < elements.length; i++) { + for (int i = 0; i < elements.length; i++) { System.arraycopy(elements[i], 0, X.elements[i], 0, columndimension); } return X; @@ -302,11 +325,9 @@ public class Matrix { * @return Two-dimensional array copy of matrix elements. */ public final double[][] getArrayCopy() { - final double[][] C = new double[elements.length][columndimension]; - for(int i = 0; i < elements.length; i++) { - for(int j = 0; j < columndimension; j++) { - C[i][j] = elements[i][j]; - } + final double[][] C = new double[elements.length][]; + for (int i = 0; i < elements.length; i++) { + C[i] = elements[i].clone(); } return C; } @@ -367,7 +388,7 @@ public class Matrix { public final Matrix increment(final int i, final int j, final double s) { elements[i][j] += s; return this; -} + } /** * Make a one-dimensional row packed copy of the internal array. @@ -376,10 +397,8 @@ public class Matrix { */ public final double[] getRowPackedCopy() { double[] vals = new double[elements.length * columndimension]; - for(int i = 0; i < elements.length; i++) { - for(int j = 0; j < columndimension; j++) { - vals[i * columndimension + j] = elements[i][j]; - } + for (int i = 0; i < elements.length; i++) { + System.arraycopy(elements[i], 0, vals, i * columndimension, columndimension); } return vals; } @@ -391,8 +410,8 @@ public class Matrix { */ public final double[] getColumnPackedCopy() { final double[] vals = new double[elements.length * columndimension]; - for(int i = 0; i < elements.length; i++) { - for(int j = 0; j < columndimension; j++) { + for (int i = 0; i < elements.length; i++) { + for (int j = 0; j < columndimension; j++) { vals[i + j * elements.length] = elements[i][j]; } } @@ -412,14 +431,11 @@ public class Matrix { public final Matrix getMatrix(final int i0, final int i1, final int j0, final int j1) { final Matrix X = new Matrix(i1 - i0 + 1, j1 - j0 + 1); try { - for(int i = i0; i <= i1; i++) { - for(int j = j0; j <= j1; j++) { - X.elements[i - i0][j - j0] = elements[i][j]; - } + for (int i = i0; i <= i1; i++) { + System.arraycopy(elements[i], j0, X.elements[i - i0], 0, j1 - j0 + 1); } - } - catch(ArrayIndexOutOfBoundsException e) { - throw new ArrayIndexOutOfBoundsException("Submatrix indices"); + } catch (ArrayIndexOutOfBoundsException e) { + throw new ArrayIndexOutOfBoundsException(ERR_REINDEX); } return X; } @@ -435,14 +451,13 @@ public class Matrix { public final Matrix getMatrix(final int[] r, final int[] c) { final Matrix X = new Matrix(r.length, c.length); try { - for(int i = 0; i < r.length; i++) { - for(int j = 0; j < c.length; j++) { + for (int i = 0; i < r.length; i++) { + for (int j = 0; j < c.length; j++) { X.elements[i][j] = elements[r[i]][c[j]]; } } - } - catch(ArrayIndexOutOfBoundsException e) { - throw new ArrayIndexOutOfBoundsException("Submatrix indices"); + } catch (ArrayIndexOutOfBoundsException e) { + throw new ArrayIndexOutOfBoundsException(ERR_REINDEX); } return X; } @@ -459,14 +474,11 @@ public class Matrix { public final Matrix getMatrix(final int[] r, final int j0, final int j1) { final Matrix X = new Matrix(r.length, j1 - j0 + 1); try { - for(int i = 0; i < r.length; i++) { - for(int j = j0; j <= j1; j++) { - X.elements[i][j - j0] = elements[r[i]][j]; - } + for (int i = 0; i < r.length; i++) { + System.arraycopy(elements[r[i]], j0, X.elements[i], 0, j1 - j0 + 1); } - } - catch(ArrayIndexOutOfBoundsException e) { - throw new ArrayIndexOutOfBoundsException("Submatrix indices"); + } catch (ArrayIndexOutOfBoundsException e) { + throw new ArrayIndexOutOfBoundsException(ERR_REINDEX); } return X; } @@ -483,14 +495,13 @@ public class Matrix { public final Matrix getMatrix(final int i0, final int i1, final int[] c) { final Matrix X = new Matrix(i1 - i0 + 1, c.length); try { - for(int i = i0; i <= i1; i++) { - for(int j = 0; j < c.length; j++) { + for (int i = i0; i <= i1; i++) { + for (int j = 0; j < c.length; j++) { X.elements[i - i0][j] = elements[i][c[j]]; } } - } - catch(ArrayIndexOutOfBoundsException e) { - throw new ArrayIndexOutOfBoundsException("Submatrix indices"); + } catch (ArrayIndexOutOfBoundsException e) { + throw new ArrayIndexOutOfBoundsException(ERR_REINDEX); } return X; } @@ -507,14 +518,11 @@ public class Matrix { */ public final void setMatrix(final int i0, final int i1, final int j0, final int j1, final Matrix X) { try { - for(int i = i0; i <= i1; i++) { - for(int j = j0; j <= j1; j++) { - elements[i][j] = X.elements[i - i0][j - j0]; - } + for (int i = i0; i <= i1; i++) { + System.arraycopy(X.elements[i - i0], 0, elements[i], j0, j1 - j0 + 1); } - } - catch(ArrayIndexOutOfBoundsException e) { - throw new ArrayIndexOutOfBoundsException("Submatrix indices: " + e); + } catch (ArrayIndexOutOfBoundsException e) { + throw new ArrayIndexOutOfBoundsException(ERR_REINDEX); } } @@ -528,14 +536,13 @@ public class Matrix { */ public final void setMatrix(final int[] r, final int[] c, final Matrix X) { try { - for(int i = 0; i < r.length; i++) { - for(int j = 0; j < c.length; j++) { + for (int i = 0; i < r.length; i++) { + for (int j = 0; j < c.length; j++) { elements[r[i]][c[j]] = X.elements[i][j]; } } - } - catch(ArrayIndexOutOfBoundsException e) { - throw new ArrayIndexOutOfBoundsException("Submatrix indices"); + } catch (ArrayIndexOutOfBoundsException e) { + throw new ArrayIndexOutOfBoundsException(ERR_REINDEX); } } @@ -550,14 +557,11 @@ public class Matrix { */ public final void setMatrix(final int[] r, final int j0, final int j1, final Matrix X) { try { - for(int i = 0; i < r.length; i++) { - for(int j = j0; j <= j1; j++) { - elements[r[i]][j] = X.elements[i][j - j0]; - } + for (int i = 0; i < r.length; i++) { + System.arraycopy(X.elements[i], 0, elements[r[i]], j0, j1 - j0 + 1); } - } - catch(ArrayIndexOutOfBoundsException e) { - throw new ArrayIndexOutOfBoundsException("Submatrix indices"); + } catch (ArrayIndexOutOfBoundsException e) { + throw new ArrayIndexOutOfBoundsException(ERR_REINDEX); } } @@ -572,14 +576,13 @@ public class Matrix { */ public final void setMatrix(final int i0, final int i1, final int[] c, final Matrix X) { try { - for(int i = i0; i <= i1; i++) { - for(int j = 0; j < c.length; j++) { + for (int i = i0; i <= i1; i++) { + for (int j = 0; j < c.length; j++) { elements[i][c[j]] = X.elements[i - i0][j]; } } - } - catch(ArrayIndexOutOfBoundsException e) { - throw new ArrayIndexOutOfBoundsException("Submatrix indices"); + } catch (ArrayIndexOutOfBoundsException e) { + throw new ArrayIndexOutOfBoundsException(ERR_REINDEX); } } @@ -601,12 +604,10 @@ public class Matrix { * @param row the value of the column to be set */ public final void setRow(final int j, final Vector row) { - if(row.elements.length != columndimension) { - throw new IllegalArgumentException("Matrix must consist of the same no of columns!"); - } - for(int i = 0; i < columndimension; i++) { - elements[j][i] = row.elements[i]; + if (row.elements.length != columndimension) { + throw new IllegalArgumentException(ERR_MATRIX_DIMENSIONS); } + System.arraycopy(row.elements, 0, elements[j], 0, columndimension); } /** @@ -617,7 +618,7 @@ public class Matrix { */ public final Vector getCol(final int j) { final Vector v = new Vector(elements.length); - for(int i = 0; i < elements.length; i++) { + for (int i = 0; i < elements.length; i++) { v.elements[i] = elements[i][j]; } return v; @@ -630,10 +631,10 @@ public class Matrix { * @param column the value of the column to be set */ public final void setCol(final int j, final Vector column) { - if(column.elements.length != elements.length) { - throw new IllegalArgumentException("Matrix must consist of the same no of rows!"); + if (column.elements.length != elements.length) { + throw new IllegalArgumentException(ERR_MATRIX_DIMENSIONS); } - for(int i = 0; i < elements.length; i++) { + for (int i = 0; i < elements.length; i++) { elements[i][j] = column.elements[i]; } } @@ -645,8 +646,8 @@ public class Matrix { */ public final Matrix transpose() { final Matrix X = new Matrix(columndimension, elements.length); - for(int i = 0; i < elements.length; i++) { - for(int j = 0; j < columndimension; j++) { + for (int i = 0; i < elements.length; i++) { + for (int j = 0; j < columndimension; j++) { X.elements[j][i] = elements[i][j]; } } @@ -682,8 +683,8 @@ public class Matrix { */ public final Matrix plusEquals(final Matrix B) { checkMatrixDimensions(B); - for(int i = 0; i < elements.length; i++) { - for(int j = 0; j < columndimension; j++) { + for (int i = 0; i < elements.length; i++) { + for (int j = 0; j < columndimension; j++) { elements[i][j] += B.elements[i][j]; } } @@ -699,8 +700,8 @@ public class Matrix { */ public final Matrix plusTimesEquals(final Matrix B, final double s) { checkMatrixDimensions(B); - for(int i = 0; i < elements.length; i++) { - for(int j = 0; j < columndimension; j++) { + for (int i = 0; i < elements.length; i++) { + for (int j = 0; j < columndimension; j++) { elements[i][j] += s * B.elements[i][j]; } } @@ -736,8 +737,8 @@ public class Matrix { */ public final Matrix minusEquals(final Matrix B) { checkMatrixDimensions(B); - for(int i = 0; i < elements.length; i++) { - for(int j = 0; j < columndimension; j++) { + for (int i = 0; i < elements.length; i++) { + for (int j = 0; j < columndimension; j++) { elements[i][j] -= B.elements[i][j]; } } @@ -753,8 +754,8 @@ public class Matrix { */ public final Matrix minusTimesEquals(final Matrix B, final double s) { checkMatrixDimensions(B); - for(int i = 0; i < elements.length; i++) { - for(int j = 0; j < columndimension; j++) { + for (int i = 0; i < elements.length; i++) { + for (int j = 0; j < columndimension; j++) { elements[i][j] -= s * B.elements[i][j]; } } @@ -778,8 +779,8 @@ public class Matrix { * @return replace A by s*A */ public final Matrix timesEquals(final double s) { - for(int i = 0; i < elements.length; i++) { - for(int j = 0; j < columndimension; j++) { + for (int i = 0; i < elements.length; i++) { + for (int j = 0; j < columndimension; j++) { elements[i][j] *= s; } } @@ -795,23 +796,22 @@ public class Matrix { */ public final Matrix times(final Matrix B) { // Optimized implementation, exploiting the storage layout - if(B.elements.length != this.columndimension) { - throw new IllegalArgumentException("Matrix inner dimensions must agree: "+getRowDimensionality()+","+getColumnDimensionality()+" * "+B.getRowDimensionality()+","+B.getColumnDimensionality()); + if (B.elements.length != this.columndimension) { + throw new IllegalArgumentException(ERR_MATRIX_INNERDIM); } final Matrix X = new Matrix(this.elements.length, B.columndimension); // Optimized ala Jama. jik order. final double[] Bcolj = new double[this.columndimension]; - for(int j = 0; j < X.columndimension; j++) { + for (int j = 0; j < X.columndimension; j++) { // Make a linear copy of column j from B - // TODO: use column getter from B? - for(int k = 0; k < this.columndimension; k++) { + for (int k = 0; k < this.columndimension; k++) { Bcolj[k] = B.elements[k][j]; } // multiply it with each row from A - for(int i = 0; i < this.elements.length; i++) { + for (int i = 0; i < this.elements.length; i++) { final double[] Arowi = this.elements[i]; double s = 0; - for(int k = 0; k < this.columndimension; k++) { + for (int k = 0; k < this.columndimension; k++) { s += Arowi[k] * Bcolj[k]; } X.elements[i][j] = s; @@ -828,15 +828,15 @@ public class Matrix { * @throws IllegalArgumentException Matrix inner dimensions must agree. */ public final Vector times(final Vector B) { - if(B.elements.length != this.columndimension) { - throw new IllegalArgumentException("Matrix inner dimensions must agree."); + if (B.elements.length != this.columndimension) { + throw new IllegalArgumentException(ERR_MATRIX_INNERDIM); } final Vector X = new Vector(this.elements.length); // multiply it with each row from A - for(int i = 0; i < this.elements.length; i++) { + for (int i = 0; i < this.elements.length; i++) { final double[] Arowi = this.elements[i]; double s = 0; - for(int k = 0; k < this.columndimension; k++) { + for (int k = 0; k < this.columndimension; k++) { s += Arowi[k] * B.elements[k]; } X.elements[i] = s; @@ -852,14 +852,14 @@ public class Matrix { * @throws IllegalArgumentException Matrix inner dimensions must agree. */ public final Vector transposeTimes(final Vector B) { - if(B.elements.length != elements.length) { - throw new IllegalArgumentException("Matrix inner dimensions must agree."); + if (B.elements.length != elements.length) { + throw new IllegalArgumentException(ERR_MATRIX_INNERDIM); } final Vector X = new Vector(this.columndimension); // multiply it with each row from A - for(int i = 0; i < this.columndimension; i++) { + for (int i = 0; i < this.columndimension; i++) { double s = 0; - for(int k = 0; k < elements.length; k++) { + for (int k = 0; k < elements.length; k++) { s += elements[k][i] * B.elements[k]; } X.elements[i] = s; @@ -875,20 +875,20 @@ public class Matrix { * @throws IllegalArgumentException Matrix inner dimensions must agree. */ public final Matrix transposeTimes(final Matrix B) { - if(B.elements.length != elements.length) { - throw new IllegalArgumentException("Matrix inner dimensions must agree."); + if (B.elements.length != elements.length) { + throw new IllegalArgumentException(ERR_MATRIX_INNERDIM); } final Matrix X = new Matrix(this.columndimension, B.columndimension); final double[] Bcolj = new double[elements.length]; - for(int j = 0; j < X.columndimension; j++) { + for (int j = 0; j < X.columndimension; j++) { // Make a linear copy of column j from B - for(int k = 0; k < elements.length; k++) { + for (int k = 0; k < elements.length; k++) { Bcolj[k] = B.elements[k][j]; } // multiply it with each row from A - for(int i = 0; i < this.columndimension; i++) { + for (int i = 0; i < this.columndimension; i++) { double s = 0; - for(int k = 0; k < elements.length; k++) { + for (int k = 0; k < elements.length; k++) { s += elements[k][i] * Bcolj[k]; } X.elements[i][j] = s; @@ -905,17 +905,17 @@ public class Matrix { * @throws IllegalArgumentException Matrix inner dimensions must agree. */ public final Matrix timesTranspose(final Matrix B) { - if(B.columndimension != this.columndimension) { - throw new IllegalArgumentException("Matrix inner dimensions must agree."); + if (B.columndimension != this.columndimension) { + throw new IllegalArgumentException(ERR_MATRIX_INNERDIM); } final Matrix X = new Matrix(this.elements.length, B.elements.length); - for(int j = 0; j < X.elements.length; j++) { + for (int j = 0; j < X.elements.length; j++) { final double[] Browj = B.elements[j]; // multiply it with each row from A - for(int i = 0; i < this.elements.length; i++) { + for (int i = 0; i < this.elements.length; i++) { final double[] Arowi = this.elements[i]; double s = 0; - for(int k = 0; k < this.columndimension; k++) { + for (int k = 0; k < this.columndimension; k++) { s += Arowi[k] * Browj[k]; } X.elements[i][j] = s; @@ -933,23 +933,23 @@ public class Matrix { */ public final Matrix transposeTimesTranspose(Matrix B) { // Optimized implementation, exploiting the storage layout - if(this.elements.length != B.columndimension) { - throw new IllegalArgumentException("Matrix inner dimensions must agree: "+getRowDimensionality()+","+getColumnDimensionality()+" * "+B.getRowDimensionality()+","+B.getColumnDimensionality()); + if (this.elements.length != B.columndimension) { + throw new IllegalArgumentException("Matrix inner dimensions must agree: " + getRowDimensionality() + "," + getColumnDimensionality() + " * " + B.getRowDimensionality() + "," + B.getColumnDimensionality()); } final Matrix X = new Matrix(this.columndimension, B.elements.length); // Optimized ala Jama. jik order. final double[] Acolj = new double[this.elements.length]; - for(int j = 0; j < X.elements.length; j++) { + for (int j = 0; j < X.elements.length; j++) { // Make a linear copy of column j from B - for(int k = 0; k < this.elements.length; k++) { + for (int k = 0; k < this.elements.length; k++) { Acolj[k] = this.elements[k][j]; } final double[] Xrow = X.elements[j]; // multiply it with each row from A - for(int i = 0; i < B.elements.length; i++) { + for (int i = 0; i < B.elements.length; i++) { final double[] Browi = B.elements[i]; double s = 0; - for(int k = 0; k < B.columndimension; k++) { + for (int k = 0; k < B.columndimension; k++) { s += Browi[k] * Acolj[k]; } Xrow[i] = s; @@ -1011,7 +1011,7 @@ public class Matrix { */ public final double trace() { double t = 0; - for(int i = 0; i < Math.min(elements.length, columndimension); i++) { + for (int i = 0; i < Math.min(elements.length, columndimension); i++) { t += elements[i][i]; } return t; @@ -1024,9 +1024,9 @@ public class Matrix { */ public final double norm1() { double f = 0; - for(int j = 0; j < columndimension; j++) { + for (int j = 0; j < columndimension; j++) { double s = 0; - for(int i = 0; i < elements.length; i++) { + for (int i = 0; i < elements.length; i++) { s += Math.abs(elements[i][j]); } f = Math.max(f, s); @@ -1050,9 +1050,9 @@ public class Matrix { */ public final double normInf() { double f = 0; - for(int i = 0; i < elements.length; i++) { + for (int i = 0; i < elements.length; i++) { double s = 0; - for(int j = 0; j < columndimension; j++) { + for (int j = 0; j < columndimension; j++) { s += Math.abs(elements[i][j]); } f = Math.max(f, s); @@ -1067,8 +1067,8 @@ public class Matrix { */ public final double normF() { double f = 0; - for(int i = 0; i < elements.length; i++) { - for(int j = 0; j < columndimension; j++) { + for (int i = 0; i < elements.length; i++) { + for (int j = 0; j < columndimension; j++) { f = MathUtil.fastHypot(f, elements[i][j]); } } @@ -1079,14 +1079,14 @@ public class Matrix { * Normalizes the columns of this matrix to length of 1.0. */ public final void normalizeColumns() { - for(int col = 0; col < columndimension; col++) { + for (int col = 0; col < columndimension; col++) { double norm = 0.0; - for(int row = 0; row < elements.length; row++) { + for (int row = 0; row < elements.length; row++) { norm = norm + (elements[row][col] * elements[row][col]); } norm = Math.sqrt(norm); - if(norm != 0) { - for(int row = 0; row < elements.length; row++) { + if (norm != 0) { + for (int row = 0; row < elements.length; row++) { elements[row][col] /= norm; } } @@ -1105,36 +1105,34 @@ public class Matrix { * columns of this matrix */ public final boolean linearlyIndependent(final Matrix columnMatrix) { - if(columnMatrix.columndimension != 1) { + if (columnMatrix.columndimension != 1) { throw new IllegalArgumentException("a.getColumnDimension() != 1"); } - if(this.elements.length != columnMatrix.elements.length) { - throw new IllegalArgumentException("a.getRowDimension() != b.getRowDimension()"); + if (this.elements.length != columnMatrix.elements.length) { + throw new IllegalArgumentException(ERR_MATRIX_DIMENSIONS); } - if(this.columndimension + columnMatrix.columndimension > this.elements.length) { + if (this.columndimension + columnMatrix.columndimension > this.elements.length) { return false; } - final StringBuffer msg = LoggingConfiguration.DEBUG ? new StringBuffer() : null; + final StringBuilder msg = LoggingConfiguration.DEBUG ? new StringBuilder() : null; final double[][] a = new double[columndimension + 1][elements.length - 1]; final double[] b = new double[columndimension + 1]; - for(int i = 0; i < a.length; i++) { - for(int j = 0; j < a[i].length; j++) { - if(i < columndimension) { + for (int i = 0; i < a.length; i++) { + for (int j = 0; j < a[i].length; j++) { + if (i < columndimension) { a[i][j] = elements[j][i]; - } - else { + } else { a[i][j] = columnMatrix.elements[j][0]; } } } - for(int i = 0; i < b.length; i++) { - if(i < columndimension) { + for (int i = 0; i < b.length; i++) { + if (i < columndimension) { b[i] = elements[elements.length - 1][i]; - } - else { + } else { b[i] = columnMatrix.elements[i][0]; } } @@ -1145,31 +1143,31 @@ public class Matrix { final double[][] coefficients = les.getCoefficents(); final double[] rhs = les.getRHS(); - if(msg != null) { - msg.append("\na' " + FormatUtil.format(this.getArrayRef())); - msg.append("\nb' " + FormatUtil.format(columnMatrix.getColumnPackedCopy())); + if (msg != null) { + msg.append("\na' ").append(FormatUtil.format(this.getArrayRef())); + msg.append("\nb' ").append(FormatUtil.format(columnMatrix.getColumnPackedCopy())); - msg.append("\na " + FormatUtil.format(a)); - msg.append("\nb " + FormatUtil.format(b)); - msg.append("\nleq " + les.equationsToString(4)); + msg.append("\na ").append(FormatUtil.format(a)); + msg.append("\nb ").append(FormatUtil.format(b)); + msg.append("\nleq ").append(les.equationsToString(4)); } - for(int i = 0; i < coefficients.length; i++) { + for (int i = 0; i < coefficients.length; i++) { boolean allCoefficientsZero = true; - for(int j = 0; j < coefficients[i].length; j++) { + for (int j = 0; j < coefficients[i].length; j++) { final double value = coefficients[i][j]; - if(Math.abs(value) > DELTA) { + if (Math.abs(value) > DELTA) { allCoefficientsZero = false; break; } } // allCoefficients=0 && rhs=0 -> linearly dependent - if(allCoefficientsZero) { + if (allCoefficientsZero) { final double value = rhs[i]; - if(Math.abs(value) < DELTA) { - if(msg != null) { - msg.append("\nvalue " + value + "[" + i + "]"); - msg.append("\nlinearly independent " + false); + if (Math.abs(value) < DELTA) { + if (msg != null) { + msg.append("\nvalue ").append(value).append('[').append(i).append(']'); + msg.append("\nlinearly independent ").append(false); Logger.getLogger(this.getClass().getName()).fine(msg.toString()); } return false; @@ -1177,8 +1175,8 @@ public class Matrix { } } - if(msg != null) { - msg.append("\nlinearly independent " + true); + if (msg != null) { + msg.append("\nlinearly independent ").append(true); Logger.getLogger(this.getClass().getName()).fine(msg.toString()); } return true; @@ -1195,18 +1193,18 @@ public class Matrix { final RationalNumber[][] gauss = exactGaussElimination(); // reduced form - for(int row = gauss.length - 1; row > 0; row--) { + for (int row = gauss.length - 1; row > 0; row--) { int firstCol = -1; - for(int col = 0; col < gauss[row].length && firstCol == -1; col++) { + for (int col = 0; col < gauss[row].length && firstCol == -1; col++) { // if(gauss.get(row, col) != 0.0) // i.e. == 1 - if(gauss[row][col].equals(RationalNumber.ONE)) { + if (gauss[row][col].equals(RationalNumber.ONE)) { firstCol = col; } } - if(firstCol > -1) { - for(int currentRow = row - 1; currentRow >= 0; currentRow--) { + if (firstCol > -1) { + for (int currentRow = row - 1; currentRow >= 0; currentRow--) { RationalNumber multiplier = gauss[currentRow][firstCol].copy(); - for(int col = firstCol; col < gauss[currentRow].length; col++) { + for (int col = firstCol; col < gauss[currentRow].length; col++) { RationalNumber subtrahent = gauss[row][col].times(multiplier); gauss[currentRow][col] = gauss[currentRow][col].minus(subtrahent); } @@ -1225,8 +1223,8 @@ public class Matrix { */ private final RationalNumber[][] exactGaussElimination() { final RationalNumber[][] gauss = new RationalNumber[elements.length][this.columndimension]; - for(int row = 0; row < elements.length; row++) { - for(int col = 0; col < this.columndimension; col++) { + for (int row = 0; row < elements.length; row++) { + for (int col = 0; col < this.columndimension; col++) { gauss[row][col] = new RationalNumber(elements[row][col]); } } @@ -1245,10 +1243,10 @@ public class Matrix { int firstRow = -1; // 1. find first column unequal to zero - for(int col = 0; col < gauss[0].length && firstCol == -1; col++) { - for(int row = 0; row < gauss.length && firstCol == -1; row++) { + for (int col = 0; col < gauss[0].length && firstCol == -1; col++) { + for (int row = 0; row < gauss.length && firstCol == -1; row++) { // if(gauss.get(row, col) != 0.0) - if(!gauss[row][col].equals(RationalNumber.ZERO)) { + if (!gauss[row][col].equals(RationalNumber.ZERO)) { firstCol = col; firstRow = row; } @@ -1256,8 +1254,8 @@ public class Matrix { } // 2. set row as first row - if(firstCol != -1) { - if(firstRow != 0) { + if (firstCol != -1) { + if (firstRow != 0) { final RationalNumber[] row = new RationalNumber[gauss[firstRow].length]; System.arraycopy(gauss[firstRow], 0, row, 0, gauss[firstRow].length); System.arraycopy(gauss[0], 0, gauss[firstRow], 0, gauss[firstRow].length); @@ -1265,19 +1263,19 @@ public class Matrix { } // 3. create leading 1 - if(!gauss[0][firstCol].equals(RationalNumber.ONE)) { + if (!gauss[0][firstCol].equals(RationalNumber.ONE)) { final RationalNumber inverse = gauss[0][firstCol].multiplicativeInverse(); - for(int col = 0; col < gauss[0].length; col++) { + for (int col = 0; col < gauss[0].length; col++) { gauss[0][col] = gauss[0][col].times(inverse); } } // 4. eliminate values unequal to zero below leading 1 - for(int row = 1; row < gauss.length; row++) { + for (int row = 1; row < gauss.length; row++) { final RationalNumber multiplier = gauss[row][firstCol].copy(); // if(multiplier != 0.0) - if(!multiplier.equals(RationalNumber.ZERO)) { - for(int col = firstCol; col < gauss[row].length; col++) { + if (!multiplier.equals(RationalNumber.ZERO)) { + for (int col = firstCol; col < gauss[row].length; col++) { final RationalNumber subtrahent = gauss[0][col].times(multiplier); gauss[row][col] = gauss[row][col].minus(subtrahent); } @@ -1285,7 +1283,7 @@ public class Matrix { } // 5. recursion - if(gauss.length > 1) { + if (gauss.length > 1) { final RationalNumber[][] subMatrix = new RationalNumber[gauss.length - 1][gauss[1].length]; System.arraycopy(gauss, 1, subMatrix, 0, gauss.length - 1); final RationalNumber[][] eliminatedSubMatrix = exactGaussElimination(subMatrix); @@ -1301,12 +1299,12 @@ public class Matrix { * @return true, if this matrix is symmetric, false otherwise */ public final boolean isSymmetric() { - if(elements.length != columndimension) { + if (elements.length != columndimension) { return false; } - for(int i = 0; i < elements.length; i++) { - for(int j = i + 1; j < columndimension; j++) { - if(elements[i][j] != elements[j][i]) { + for (int i = 0; i < elements.length; i++) { + for (int j = i + 1; j < columndimension; j++) { + if (elements[i][j] != elements[j][i]) { return false; } } @@ -1323,16 +1321,16 @@ public class Matrix { public final Matrix completeBasis() { Matrix basis = copy(); Matrix result = null; - for(int i = 0; i < elements.length; i++) { + for (int i = 0; i < elements.length; i++) { final Matrix e_i = new Matrix(elements.length, 1); e_i.elements[0][i] = 1.0; final boolean li = basis.linearlyIndependent(e_i); - if(li) { - if(result == null) { + // TODO: efficiency - appendColumns is expensive. + if (li) { + if (result == null) { result = e_i.copy(); - } - else { + } else { result = result.appendColumns(e_i); } basis = basis.appendColumns(e_i); @@ -1350,16 +1348,16 @@ public class Matrix { public final Matrix completeToOrthonormalBasis() { Matrix basis = copy(); Matrix result = null; - for(int i = 0; i < elements.length; i++) { + for (int i = 0; i < elements.length; i++) { final Matrix e_i = new Matrix(elements.length, 1); e_i.elements[i][0] = 1.0; final boolean li = basis.linearlyIndependent(e_i); - if(li) { - if(result == null) { + // TODO: efficiency - appendColumns is expensive. + if (li) { + if (result == null) { result = e_i.copy(); - } - else { + } else { result = result.appendColumns(e_i); } basis = basis.appendColumns(e_i); @@ -1376,17 +1374,16 @@ public class Matrix { * @return the new matrix with the appended columns */ public final Matrix appendColumns(final Matrix columns) { - if(elements.length != columns.elements.length) { - throw new IllegalArgumentException("m.getRowDimension() != column.getRowDimension()"); + if (elements.length != columns.elements.length) { + throw new IllegalArgumentException(ERR_MATRIX_DIMENSIONS); } final Matrix result = new Matrix(elements.length, columndimension + columns.columndimension); - for(int i = 0; i < result.columndimension; i++) { + for (int i = 0; i < result.columndimension; i++) { // FIXME: optimize - excess copying! - if(i < columndimension) { + if (i < columndimension) { result.setCol(i, getCol(i)); - } - else { + } else { result.setCol(i, columns.getCol(i - columndimension)); } } @@ -1402,10 +1399,10 @@ public class Matrix { Matrix v = copy(); // FIXME: optimize - excess copying! - for(int i = 1; i < columndimension; i++) { + for (int i = 1; i < columndimension; i++) { final Vector u_i = getCol(i); final Vector sum = new Vector(elements.length); - for(int j = 0; j < i; j++) { + for (int j = 0; j < i; j++) { final Vector v_j = v.getCol(j); double scalar = u_i.transposeTimes(v_j) / v_j.transposeTimes(v_j); sum.plusTimesEquals(v_j, scalar); @@ -1428,7 +1425,7 @@ public class Matrix { */ public final Matrix cheatToAvoidSingularity(final double constant) { final Matrix a = this.copy(); - for(int i = 0; i < a.columndimension && i < a.elements.length; i++) { + for (int i = 0; i < a.columndimension && i < a.elements.length; i++) { // if(a.get(i, i) < constant) { a.elements[i][i] += constant; @@ -1460,48 +1457,45 @@ public class Matrix { tokenizer.wordChars(0, 255); tokenizer.whitespaceChars(0, ' '); tokenizer.eolIsSignificant(true); - java.util.Vector<Double> v = new java.util.Vector<Double>(); + TDoubleArrayList v = new TDoubleArrayList(); // Ignore initial empty lines - while(tokenizer.nextToken() == StreamTokenizer.TT_EOL) { + while (tokenizer.nextToken() == StreamTokenizer.TT_EOL) { // ignore initial empty lines } - if(tokenizer.ttype == StreamTokenizer.TT_EOF) { + if (tokenizer.ttype == StreamTokenizer.TT_EOF) { throw new java.io.IOException("Unexpected EOF on matrix read."); } do { - v.addElement(Double.valueOf(tokenizer.sval)); // Read & store 1st + v.add(Double.parseDouble(tokenizer.sval)); // Read & store 1st // row. } - while(tokenizer.nextToken() == StreamTokenizer.TT_WORD); + while (tokenizer.nextToken() == StreamTokenizer.TT_WORD); int n = v.size(); // Now we've got the number of columns! - double row[] = new double[n]; - for(int j = 0; j < n; j++) { - // extract the elements of the 1st row. - row[j] = v.elementAt(j); - } - // v.removeAllElements(); - java.util.Vector<double[]> rowV = new java.util.Vector<double[]>(); - rowV.addElement(row); // Start storing rows instead of columns. - while(tokenizer.nextToken() == StreamTokenizer.TT_WORD) { + double row[] = v.toArray(); + ArrayList<double[]> rowV = new ArrayList<double[]>(); + rowV.add(row); // Start storing rows instead of columns. + while (tokenizer.nextToken() == StreamTokenizer.TT_WORD) { // While non-empty lines - rowV.addElement(row = new double[n]); + rowV.add(row = new double[n]); int j = 0; do { - if(j >= n) { + if (j >= n) { throw new java.io.IOException("Row " + v.size() + " is too long."); } - row[j++] = (Double.valueOf(tokenizer.sval)); + row[j++] = Double.parseDouble(tokenizer.sval); } - while(tokenizer.nextToken() == StreamTokenizer.TT_WORD); - if(j < n) { + while (tokenizer.nextToken() == StreamTokenizer.TT_WORD); + if (j < n) { throw new java.io.IOException("Row " + v.size() + " is too short."); } } int m = rowV.size(); // Now we've got the number of rows. double[][] A = new double[m][]; - rowV.copyInto(A); // copy the rows out of the vector + for (int i = 0; i < m; i++) { + A[i] = rowV.get(i); + } return new Matrix(A); } @@ -1509,7 +1503,7 @@ public class Matrix { * Check if size(A) == size(B) */ protected void checkMatrixDimensions(Matrix B) { - if(B.getRowDimensionality() != getRowDimensionality() || B.getColumnDimensionality() != getColumnDimensionality()) { + if (B.getRowDimensionality() != getRowDimensionality() || B.getColumnDimensionality() != getColumnDimensionality()) { throw new IllegalArgumentException("Matrix dimensions must agree."); } } @@ -1526,25 +1520,25 @@ public class Matrix { @Override public boolean equals(Object obj) { - if(this == obj) { + if (this == obj) { return true; } - if(obj == null) { + if (obj == null) { return false; } - if(getClass() != obj.getClass()) { + if (getClass() != obj.getClass()) { return false; } final Matrix other = (Matrix) obj; - if(this.elements.length != other.elements.length) { + if (this.elements.length != other.elements.length) { return false; } - if(this.columndimension != other.columndimension) { + if (this.columndimension != other.columndimension) { return false; } - for(int i = 0; i < this.elements.length; i++) { - for(int j = 0; j < this.columndimension; j++) { - if(this.elements[i][j] != other.elements[i][j]) { + for (int i = 0; i < this.elements.length; i++) { + for (int j = 0; j < this.columndimension; j++) { + if (this.elements[i][j] != other.elements[i][j]) { return false; } } @@ -1561,25 +1555,25 @@ public class Matrix { * @return true if delta smaller than maximum */ public boolean almostEquals(Object obj, double maxdelta) { - if(this == obj) { + if (this == obj) { return true; } - if(obj == null) { + if (obj == null) { return false; } - if(getClass() != obj.getClass()) { + if (getClass() != obj.getClass()) { return false; } final Matrix other = (Matrix) obj; - if(this.elements.length != other.elements.length) { + if (this.elements.length != other.elements.length) { return false; } - if(this.columndimension != other.columndimension) { + if (this.columndimension != other.columndimension) { return false; } - for(int i = 0; i < this.elements.length; i++) { - for(int j = 0; j < this.columndimension; j++) { - if(Math.abs(this.elements[i][j] - other.elements[i][j]) > maxdelta) { + for (int i = 0; i < this.elements.length; i++) { + for (int j = 0; j < this.columndimension; j++) { + if (Math.abs(this.elements[i][j] - other.elements[i][j]) > maxdelta) { return false; } } @@ -1605,4 +1599,4 @@ public class Matrix { public String toString() { return FormatUtil.format(this); } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/ProjectedCentroid.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/ProjectedCentroid.java index 435ac3d7..2a24a74d 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/ProjectedCentroid.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/ProjectedCentroid.java @@ -29,7 +29,7 @@ import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.relation.Relation; -import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil; +import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; /** * Centroid only using a subset of dimensions. @@ -57,11 +57,11 @@ public class ProjectedCentroid extends Centroid { public ProjectedCentroid(BitSet dims, int dim) { super(dim); this.dims = dims; - assert (dims.size() <= dim); + assert (dims.length() <= dim) : (dims.length() + " > " + dim + " ?!?"); } /** - * Add a single value with weight 1.0 + * Add a single value with weight 1.0. * * @param val Value */ @@ -82,8 +82,11 @@ public class ProjectedCentroid extends Centroid { * @param weight weight */ @Override - public void put(double val[], double weight) { + public void put(double[] val, double weight) { assert (val.length == elements.length); + if (weight == 0) { + return; // Skip zero weights. + } final double nwsum = weight + wsum; for(int i = dims.nextSetBit(0); i >= 0; i = dims.nextSetBit(i + 1)) { final double delta = val[i] - elements[i]; @@ -94,16 +97,16 @@ public class ProjectedCentroid extends Centroid { } /** - * Add a single value with weight 1.0 + * Add a single value with weight 1.0. * * @param val Value */ @Override - public void put(NumberVector<?, ?> val) { + public void put(NumberVector<?> val) { assert (val.getDimensionality() == elements.length); wsum += 1.0; for(int i = dims.nextSetBit(0); i >= 0; i = dims.nextSetBit(i + 1)) { - final double delta = val.doubleValue(i + 1) - elements[i]; + final double delta = val.doubleValue(i) - elements[i]; elements[i] += delta / wsum; } } @@ -115,11 +118,14 @@ public class ProjectedCentroid extends Centroid { * @param weight weight */ @Override - public void put(NumberVector<?, ?> val, double weight) { + public void put(NumberVector<?> val, double weight) { assert (val.getDimensionality() == elements.length); + if (weight == 0) { + return; // Skip zero weights. + } final double nwsum = weight + wsum; for(int i = dims.nextSetBit(0); i >= 0; i = dims.nextSetBit(i + 1)) { - final double delta = val.doubleValue(i + 1) - elements[i]; + final double delta = val.doubleValue(i) - elements[i]; final double rval = delta * weight / nwsum; elements[i] += rval; } @@ -131,10 +137,11 @@ public class ProjectedCentroid extends Centroid { * * @param dims Dimensions to use (indexed with 0) * @param relation Relation to process + * @return Centroid */ - public static ProjectedCentroid make(BitSet dims, Relation<? extends NumberVector<?, ?>> relation) { - ProjectedCentroid c = new ProjectedCentroid(dims, DatabaseUtil.dimensionality(relation)); - assert (dims.size() <= DatabaseUtil.dimensionality(relation)); + public static ProjectedCentroid make(BitSet dims, Relation<? extends NumberVector<?>> relation) { + ProjectedCentroid c = new ProjectedCentroid(dims, RelationUtil.dimensionality(relation)); + assert (dims.size() <= RelationUtil.dimensionality(relation)); for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { c.put(relation.get(iditer)); } @@ -147,11 +154,12 @@ public class ProjectedCentroid extends Centroid { * @param dims Dimensions to use (indexed with 0) * @param relation Relation to process * @param ids IDs to process + * @return Centroid */ - public static ProjectedCentroid make(BitSet dims, Relation<? extends NumberVector<?, ?>> relation, DBIDs ids) { - ProjectedCentroid c = new ProjectedCentroid(dims, DatabaseUtil.dimensionality(relation)); - assert (dims.size() <= DatabaseUtil.dimensionality(relation)); - for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { + public static ProjectedCentroid make(BitSet dims, Relation<? extends NumberVector<?>> relation, DBIDs ids) { + ProjectedCentroid c = new ProjectedCentroid(dims, RelationUtil.dimensionality(relation)); + assert (dims.length() <= RelationUtil.dimensionality(relation)); + for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { c.put(relation.get(iter)); } return c; diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/ProjectionResult.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/ProjectionResult.java index d8858657..9aa1fa7c 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/ProjectionResult.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/ProjectionResult.java @@ -38,12 +38,12 @@ public interface ProjectionResult { * * @return number of strong (correlated) dimensions */ - public int getCorrelationDimension(); + int getCorrelationDimension(); /** * Projection matrix * * @return projection matrix */ - public Matrix similarityMatrix(); + Matrix similarityMatrix(); } diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/SingularValueDecomposition.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/SingularValueDecomposition.java index 183a8034..772e92eb 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/SingularValueDecomposition.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/SingularValueDecomposition.java @@ -120,7 +120,7 @@ public class SingularValueDecomposition { s[k] = -s[k]; } for(int j = k + 1; j < n; j++) { - if((k < nct) & (s[k] != 0.0)) { + if((k < nct) && (s[k] != 0.0)) { // Apply the transformation. double t = 0; for(int i = k; i < m; i++) { @@ -137,7 +137,7 @@ public class SingularValueDecomposition { e[j] = A[k][j]; } - if(wantu & (k < nct)) { + if(wantu && (k < nct)) { // Place the transformation in U for subsequent back // multiplication. @@ -163,7 +163,7 @@ public class SingularValueDecomposition { e[k + 1] += 1.0; } e[k] = -e[k]; - if((k + 1 < m) & (e[k] != 0.0)) { + if((k + 1 < m) && (e[k] != 0.0)) { // Apply the transformation. for(int i = k + 1; i < m; i++) { work[i] = 0.0; @@ -245,7 +245,7 @@ public class SingularValueDecomposition { // If required, generate V. if(wantv) { for(int k = n - 1; k >= 0; k--) { - if((k < nrt) & (e[k] != 0.0)) { + if((k < nrt) && (e[k] != 0.0)) { for(int j = k + 1; j < nu; j++) { double t = 0; for(int i = k + 1; i < n; i++) { @@ -383,7 +383,7 @@ public class SingularValueDecomposition { double b = ((spm1 + sp) * (spm1 - sp) + epm1 * epm1) / 2.0; double c = (sp * epm1) * (sp * epm1); double shift = 0.0; - if((b != 0.0) | (c != 0.0)) { + if((b != 0.0) || (c != 0.0)) { shift = Math.sqrt(b * b + c); if(b < 0.0) { shift = -shift; diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/SortedEigenPairs.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/SortedEigenPairs.java index 5fa023ca..dcc2f2c0 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/SortedEigenPairs.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/SortedEigenPairs.java @@ -185,9 +185,9 @@ public class SortedEigenPairs { */ @Override public String toString() { - StringBuffer result = new StringBuffer(); + StringBuilder result = new StringBuilder(); for(EigenPair eigenPair : eigenPairs) { - result.append("\n").append(eigenPair); + result.append('\n').append(eigenPair); } return result.toString(); } diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/VMath.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/VMath.java index 97466b20..6b6d7090 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/VMath.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/VMath.java @@ -41,6 +41,26 @@ public final class VMath { public static final double DELTA = 1E-5; /** + * Error message (in assertions!) when vector dimensionalities do not agree. + */ + public static final String ERR_VEC_DIMENSIONS = "Vector dimensions do not agree."; + + /** + * Error message (in assertions!) when matrix dimensionalities do not agree. + */ + public static final String ERR_MATRIX_DIMENSIONS = "Matrix dimensions do not agree."; + + /** + * Error message (in assertions!) when matrix dimensionalities do not agree. + */ + public static final String ERR_MATRIX_INNERDIM = "Matrix inner dimensions do not agree."; + + /** + * Error message (in assertions!) when dimensionalities do not agree. + */ + private static final String ERR_DIMENSIONS = "Dimensionalities do not agree."; + + /** * Fake constructor. Static class. */ private VMath() { @@ -53,19 +73,18 @@ public final class VMath { * @param dimensionality dimensionality * @return Random vector of length 1.0 */ - public final static double[] randomNormalizedVector(final int dimensionality) { + public static final double[] randomNormalizedVector(final int dimensionality) { final double[] v = new double[dimensionality]; - for(int i = 0; i < dimensionality; i++) { + for (int i = 0; i < dimensionality; i++) { v[i] = Math.random(); } double norm = euclideanLength(v); - if(norm != 0) { - for(int row = 0; row < v.length; row++) { + if (norm != 0) { + for (int row = 0; row < v.length; row++) { v[row] /= norm; } return v; - } - else { + } else { return randomNormalizedVector(dimensionality); } } @@ -77,7 +96,7 @@ public final class VMath { * @param i the index * @return the ith unit vector of the specified dimensionality */ - public final static double[] unitVector(final int dimensionality, final int i) { + public static final double[] unitVector(final int dimensionality, final int i) { final double[] v = new double[dimensionality]; v[i] = 1; return v; @@ -89,7 +108,7 @@ public final class VMath { * @param v original vector * @return a copy of this vector */ - public final static double[] copy(final double[] v) { + public static final double[] copy(final double[] v) { return Arrays.copyOf(v, v.length); } @@ -99,9 +118,9 @@ public final class VMath { * @param v Vector * @return Matrix */ - public final static double[][] transpose(final double[] v) { + public static final double[][] transpose(final double[] v) { double[][] re = new double[v.length][1]; - for(int i = 0; i < v.length; i++) { + for (int i = 0; i < v.length; i++) { re[i][0] = v[i]; } return re; @@ -114,10 +133,10 @@ public final class VMath { * @param v2 second vector * @return the sum v1 + v2 */ - public final static double[] plus(final double[] v1, final double[] v2) { - assert (v1.length == v2.length) : "Vector dimensions must agree."; + public static final double[] plus(final double[] v1, final double[] v2) { + assert (v1.length == v2.length) : ERR_VEC_DIMENSIONS; final double[] result = new double[v1.length]; - for(int i = 0; i < result.length; i++) { + for (int i = 0; i < result.length; i++) { result[i] = v1[i] + v2[i]; } return result; @@ -131,10 +150,10 @@ public final class VMath { * @param s2 the scalar * @return the result of v1 + v2 * s2 */ - public final static double[] plusTimes(final double[] v1, final double[] v2, final double s2) { - assert (v1.length == v2.length) : "Vector dimensions must agree."; + public static final double[] plusTimes(final double[] v1, final double[] v2, final double s2) { + assert (v1.length == v2.length) : ERR_VEC_DIMENSIONS; final double[] result = new double[v1.length]; - for(int i = 0; i < result.length; i++) { + for (int i = 0; i < result.length; i++) { result[i] = v1[i] + v2[i] * s2; } return result; @@ -148,10 +167,10 @@ public final class VMath { * @param v2 second vector * @return the result of v1 * s1 + v2 */ - public final static double[] timesPlus(final double[] v1, final double s1, final double[] v2) { - assert (v1.length == v2.length) : "Vector dimensions must agree."; + public static final double[] timesPlus(final double[] v1, final double s1, final double[] v2) { + assert (v1.length == v2.length) : ERR_VEC_DIMENSIONS; final double[] result = new double[v1.length]; - for(int i = 0; i < result.length; i++) { + for (int i = 0; i < result.length; i++) { result[i] = v1[i] * s1 + v2[i]; } return result; @@ -166,10 +185,10 @@ public final class VMath { * @param s2 the scalar for v2 * @return the result of v1 * s1 + v2 * s2 */ - public final static double[] timesPlusTimes(final double[] v1, final double s1, final double[] v2, final double s2) { - assert (v1.length == v2.length) : "Vector dimensions must agree."; + public static final double[] timesPlusTimes(final double[] v1, final double s1, final double[] v2, final double s2) { + assert (v1.length == v2.length) : ERR_VEC_DIMENSIONS; final double[] result = new double[v1.length]; - for(int i = 0; i < result.length; i++) { + for (int i = 0; i < result.length; i++) { result[i] = v1[i] * s1 + v2[i] * s2; } return result; @@ -182,9 +201,9 @@ public final class VMath { * @param v2 second vector * @return v1 = v1 + v2 */ - public final static double[] plusEquals(final double[] v1, final double[] v2) { - assert (v1.length == v2.length) : "Vector dimensions must agree."; - for(int i = 0; i < v1.length; i++) { + public static final double[] plusEquals(final double[] v1, final double[] v2) { + assert (v1.length == v2.length) : ERR_VEC_DIMENSIONS; + for (int i = 0; i < v1.length; i++) { v1[i] += v2[i]; } return v1; @@ -198,9 +217,9 @@ public final class VMath { * @param s2 scalar vor v2 * @return v1 = v1 + v2 * s2 */ - public final static double[] plusTimesEquals(final double[] v1, final double[] v2, final double s2) { - assert (v1.length == v2.length) : "Vector dimensions must agree."; - for(int i = 0; i < v1.length; i++) { + public static final double[] plusTimesEquals(final double[] v1, final double[] v2, final double s2) { + assert (v1.length == v2.length) : ERR_VEC_DIMENSIONS; + for (int i = 0; i < v1.length; i++) { v1[i] += s2 * v2[i]; } return v1; @@ -214,9 +233,9 @@ public final class VMath { * @param v2 another vector * @return v1 = v1 * s1 + v2 */ - public final static double[] timesPlusEquals(final double[] v1, final double s1, final double[] v2) { - assert (v1.length == v2.length) : "Vector dimensions must agree."; - for(int i = 0; i < v1.length; i++) { + public static final double[] timesPlusEquals(final double[] v1, final double s1, final double[] v2) { + assert (v1.length == v2.length) : ERR_VEC_DIMENSIONS; + for (int i = 0; i < v1.length; i++) { v1[i] = v1[i] * s1 + v2[i]; } return v1; @@ -231,9 +250,9 @@ public final class VMath { * @param s2 scalar for v2 * @return v1 = v1 * s1 + v2 * s2 */ - public final static double[] timesPlusTimesEquals(final double[] v1, final double s1, final double[] v2, final double s2) { - assert (v1.length == v2.length) : "Vector dimensions must agree."; - for(int i = 0; i < v1.length; i++) { + public static final double[] timesPlusTimesEquals(final double[] v1, final double s1, final double[] v2, final double s2) { + assert (v1.length == v2.length) : ERR_VEC_DIMENSIONS; + for (int i = 0; i < v1.length; i++) { v1[i] = v1[i] * s1 + v2[i] * s2; } return v1; @@ -246,9 +265,9 @@ public final class VMath { * @param d value to add * @return v1 + d */ - public final static double[] plus(final double[] v1, final double d) { + public static final double[] plus(final double[] v1, final double d) { final double[] result = new double[v1.length]; - for(int i = 0; i < result.length; i++) { + for (int i = 0; i < result.length; i++) { result[i] = v1[i] + d; } return result; @@ -261,8 +280,8 @@ public final class VMath { * @param d value to add * @return Modified vector */ - public final static double[] plusEquals(final double[] v1, final double d) { - for(int i = 0; i < v1.length; i++) { + public static final double[] plusEquals(final double[] v1, final double d) { + for (int i = 0; i < v1.length; i++) { v1[i] += d; } return v1; @@ -275,9 +294,9 @@ public final class VMath { * @param v2 the vector to be subtracted from this vector * @return v1 - v2 */ - public final static double[] minus(final double[] v1, final double[] v2) { + public static final double[] minus(final double[] v1, final double[] v2) { final double[] sub = new double[v1.length]; - for(int i = 0; i < v1.length; i++) { + for (int i = 0; i < v1.length; i++) { sub[i] = v1[i] - v2[i]; } return sub; @@ -291,9 +310,9 @@ public final class VMath { * @param s2 the scaling factor for v2 * @return v1 - v2 * s2 */ - public final static double[] minusTimes(final double[] v1, final double[] v2, final double s2) { + public static final double[] minusTimes(final double[] v1, final double[] v2, final double s2) { final double[] sub = new double[v1.length]; - for(int i = 0; i < v1.length; i++) { + for (int i = 0; i < v1.length; i++) { sub[i] = v1[i] - v2[i] * s2; } return sub; @@ -307,9 +326,9 @@ public final class VMath { * @param v2 the vector to be subtracted from this vector * @return v1 * s1 - v2 */ - public final static double[] timesMinus(final double[] v1, final double s1, final double[] v2) { + public static final double[] timesMinus(final double[] v1, final double s1, final double[] v2) { final double[] sub = new double[v1.length]; - for(int i = 0; i < v1.length; i++) { + for (int i = 0; i < v1.length; i++) { sub[i] = v1[i] * s1 - v2[i]; } return sub; @@ -324,9 +343,9 @@ public final class VMath { * @param s2 the scaling factor for v2 * @return v1 * s1 - v2 * s2 */ - public final static double[] timesMinusTimes(final double[] v1, final double s1, final double[] v2, final double s2) { + public static final double[] timesMinusTimes(final double[] v1, final double s1, final double[] v2, final double s2) { final double[] sub = new double[v1.length]; - for(int i = 0; i < v1.length; i++) { + for (int i = 0; i < v1.length; i++) { sub[i] = v1[i] * s1 - v2[i] * s2; } return sub; @@ -339,9 +358,9 @@ public final class VMath { * @param v2 another vector * @return v1 = v1 - v2 */ - public final static double[] minusEquals(final double[] v1, final double[] v2) { - assert (v1.length == v2.length) : "Vector dimensions must agree."; - for(int i = 0; i < v1.length; i++) { + public static final double[] minusEquals(final double[] v1, final double[] v2) { + assert (v1.length == v2.length) : ERR_VEC_DIMENSIONS; + for (int i = 0; i < v1.length; i++) { v1[i] -= v2[i]; } return v1; @@ -355,9 +374,9 @@ public final class VMath { * @param s2 scalar for v2 * @return v1 = v1 - v2 * s2 */ - public final static double[] minusTimesEquals(final double[] v1, final double[] v2, final double s2) { - assert (v1.length == v2.length) : "Vector dimensions must agree."; - for(int i = 0; i < v1.length; i++) { + public static final double[] minusTimesEquals(final double[] v1, final double[] v2, final double s2) { + assert (v1.length == v2.length) : ERR_VEC_DIMENSIONS; + for (int i = 0; i < v1.length; i++) { v1[i] -= v2[i] * s2; } return v1; @@ -371,9 +390,9 @@ public final class VMath { * @param v2 another vector * @return v1 = v1 * s1 - v2 */ - public final static double[] timesMinusEquals(final double[] v1, final double s1, final double[] v2) { - assert (v1.length == v2.length) : "Vector dimensions must agree."; - for(int i = 0; i < v1.length; i++) { + public static final double[] timesMinusEquals(final double[] v1, final double s1, final double[] v2) { + assert (v1.length == v2.length) : ERR_VEC_DIMENSIONS; + for (int i = 0; i < v1.length; i++) { v1[i] = v1[i] * s1 - v2[i]; } return v1; @@ -388,9 +407,9 @@ public final class VMath { * @param s2 Scalar * @return v1 = v1 * s1 - v2 * s2 */ - public final static double[] timesMinusTimesEquals(final double[] v1, final double s1, final double[] v2, final double s2) { - assert (v1.length == v2.length) : "Vector dimensions must agree."; - for(int i = 0; i < v1.length; i++) { + public static final double[] timesMinusTimesEquals(final double[] v1, final double s1, final double[] v2, final double s2) { + assert (v1.length == v2.length) : ERR_VEC_DIMENSIONS; + for (int i = 0; i < v1.length; i++) { v1[i] = v1[i] * s1 - v2[i] * s2; } return v1; @@ -403,9 +422,9 @@ public final class VMath { * @param d Value to subtract * @return v1 - d */ - public final static double[] minus(final double[] v1, final double d) { + public static final double[] minus(final double[] v1, final double d) { final double[] result = new double[v1.length]; - for(int i = 0; i < v1.length; i++) { + for (int i = 0; i < v1.length; i++) { result[i] = v1[i] - d; } return result; @@ -418,8 +437,8 @@ public final class VMath { * @param d Value to subtract * @return v1 = v1 - d */ - public final static double[] minusEquals(final double[] v1, final double d) { - for(int i = 0; i < v1.length; i++) { + public static final double[] minusEquals(final double[] v1, final double d) { + for (int i = 0; i < v1.length; i++) { v1[i] -= d; } return v1; @@ -432,9 +451,9 @@ public final class VMath { * @param s1 the scalar to be multiplied * @return v1 * s1 */ - public final static double[] times(final double[] v1, final double s1) { + public static final double[] times(final double[] v1, final double s1) { final double[] v = new double[v1.length]; - for(int i = 0; i < v1.length; i++) { + for (int i = 0; i < v1.length; i++) { v[i] = v1[i] * s1; } return v; @@ -447,8 +466,8 @@ public final class VMath { * @param s scalar * @return v1 = v1 * s1 */ - public final static double[] timesEquals(final double[] v1, final double s) { - for(int i = 0; i < v1.length; i++) { + public static final double[] timesEquals(final double[] v1, final double s) { + for (int i = 0; i < v1.length; i++) { v1[i] *= s; } return v1; @@ -461,12 +480,12 @@ public final class VMath { * @param m2 other matrix * @return Matrix product, v1 * m2 */ - public final static double[][] times(final double[] v1, final double[][] m2) { - assert (m2.length == 1) : "Matrix inner dimensions must agree."; + public static final double[][] times(final double[] v1, final double[][] m2) { + assert (m2.length == 1) : ERR_MATRIX_INNERDIM; final int columndimension = m2[0].length; final double[][] re = new double[v1.length][columndimension]; - for(int j = 0; j < columndimension; j++) { - for(int i = 0; i < v1.length; i++) { + for (int j = 0; j < columndimension; j++) { + for (int i = 0; i < v1.length; i++) { re[i][j] = v1[i] * m2[0][j]; } } @@ -480,13 +499,13 @@ public final class VMath { * @param m2 other matrix * @return Matrix product, v1<sup>T</sup> * m2 */ - public final static double[][] transposeTimes(final double[] v1, final double[][] m2) { - assert (m2.length == v1.length) : "Matrix inner dimensions must agree."; + public static final double[][] transposeTimes(final double[] v1, final double[][] m2) { + assert (m2.length == v1.length) : ERR_MATRIX_INNERDIM; final int columndimension = m2[0].length; final double[][] re = new double[1][columndimension]; - for(int j = 0; j < columndimension; j++) { + for (int j = 0; j < columndimension; j++) { double s = 0; - for(int k = 0; k < v1.length; k++) { + for (int k = 0; k < v1.length; k++) { s += v1[k] * m2[k][j]; } re[0][j] = s; @@ -501,10 +520,10 @@ public final class VMath { * @param v2 other vector * @return Matrix product, v1<sup>T</sup> * v2 */ - public final static double transposeTimes(final double[] v1, final double[] v2) { - assert (v2.length == v1.length) : "Matrix inner dimensions must agree."; + public static final double transposeTimes(final double[] v1, final double[] v2) { + assert (v2.length == v1.length) : ERR_MATRIX_INNERDIM; double s = 0; - for(int k = 0; k < v1.length; k++) { + for (int k = 0; k < v1.length; k++) { s += v1[k] * v2[k]; } return s; @@ -517,12 +536,12 @@ public final class VMath { * @param m2 other matrix * @return Matrix product, v1 * m2^T */ - public final static double[][] timesTranspose(final double[] v1, final double[][] m2) { - assert (m2[0].length == 1) : "Matrix inner dimensions must agree."; + public static final double[][] timesTranspose(final double[] v1, final double[][] m2) { + assert (m2[0].length == 1) : ERR_MATRIX_INNERDIM; final double[][] re = new double[v1.length][m2.length]; - for(int j = 0; j < m2.length; j++) { - for(int i = 0; i < v1.length; i++) { + for (int j = 0; j < m2.length; j++) { + for (int i = 0; i < v1.length; i++) { re[i][j] = v1[i] * m2[j][0]; } } @@ -536,10 +555,10 @@ public final class VMath { * @param v2 other vector * @return Matrix product, v1 * v2^T */ - public final static double[][] timesTranspose(final double[] v1, final double[] v2) { + public static final double[][] timesTranspose(final double[] v1, final double[] v2) { final double[][] re = new double[v1.length][v2.length]; - for(int j = 0; j < v2.length; j++) { - for(int i = 0; i < v1.length; i++) { + for (int j = 0; j < v2.length; j++) { + for (int i = 0; i < v1.length; i++) { re[i][j] = v1[i] * v2[j]; } } @@ -547,7 +566,7 @@ public final class VMath { } /** - * Returns the scalar product of this vector and the specified vector v. + * Returns the scalar product (dot product) of this vector and the specified vector v. * * This is the same as transposeTimes. * @@ -555,10 +574,10 @@ public final class VMath { * @param v2 other vector * @return double the scalar product of vectors v1 and v2 */ - public final static double scalarProduct(final double[] v1, final double[] v2) { - assert (v1.length == v2.length) : "Vector dimensions must agree."; + public static final double scalarProduct(final double[] v1, final double[] v2) { + assert (v1.length == v2.length) : ERR_VEC_DIMENSIONS; double scalarProduct = 0.0; - for(int row = 0; row < v1.length; row++) { + for (int row = 0; row < v1.length; row++) { scalarProduct += v1[row] * v2[row]; } return scalarProduct; @@ -570,9 +589,9 @@ public final class VMath { * @param v1 vector * @return euclidean length of this vector */ - public final static double euclideanLength(final double[] v1) { + public static final double euclideanLength(final double[] v1) { double acc = 0.0; - for(int row = 0; row < v1.length; row++) { + for (int row = 0; row < v1.length; row++) { final double v = v1[row]; acc += v * v; } @@ -585,11 +604,11 @@ public final class VMath { * @param v1 vector * @return normalized copy of v1 */ - public final static double[] normalize(final double[] v1) { + public static final double[] normalize(final double[] v1) { double norm = euclideanLength(v1); double[] re = new double[v1.length]; - if(norm != 0) { - for(int row = 0; row < v1.length; row++) { + if (norm != 0) { + for (int row = 0; row < v1.length; row++) { re[row] = v1[row] / norm; } } @@ -602,10 +621,10 @@ public final class VMath { * @param v1 vector * @return normalized v1 */ - public final static double[] normalizeEquals(final double[] v1) { + public static final double[] normalizeEquals(final double[] v1) { double norm = euclideanLength(v1); - if(norm != 0) { - for(int row = 0; row < v1.length; row++) { + if (norm != 0) { + for (int row = 0; row < v1.length; row++) { v1[row] /= norm; } } @@ -619,12 +638,12 @@ public final class VMath { * @param m2 the subspace matrix * @return the projection of p into the subspace formed by v */ - public final static double[] project(final double[] v1, final double[][] m2) { - assert (v1.length == m2.length) : "v1 and m2 differ in dimensionality!"; + public static final double[] project(final double[] v1, final double[][] m2) { + assert (v1.length == m2.length) : ERR_DIMENSIONS; final int columndimension = m2[0].length; double[] sum = new double[v1.length]; - for(int i = 0; i < columndimension; i++) { + for (int i = 0; i < columndimension; i++) { // TODO: optimize - copy less. double[] v_i = getCol(m2, i); plusTimesEquals(sum, v_i, scalarProduct(v1, v_i)); @@ -638,7 +657,7 @@ public final class VMath { * @param v1 elements * @return hash code */ - public final static int hashCode(final double[] v1) { + public static final int hashCode(final double[] v1) { return Arrays.hashCode(v1); } @@ -649,7 +668,7 @@ public final class VMath { * @param v2 second vector * @return comparison result */ - public final static boolean equals(final double[] v1, final double[] v2) { + public static final boolean equals(final double[] v1, final double[] v2) { return Arrays.equals(v1, v2); } @@ -658,7 +677,7 @@ public final class VMath { * * @param v1 vector */ - public final static void clear(final double[] v1) { + public static final void clear(final double[] v1) { Arrays.fill(v1, 0.0); } @@ -668,7 +687,7 @@ public final class VMath { * @param v1 first vector * @return modified v1, rotated by 90 degrees */ - public final static double[] rotate90Equals(final double[] v1) { + public static final double[] rotate90Equals(final double[] v1) { assert (v1.length == 2) : "rotate90Equals is only valid for 2d vectors."; double temp = v1[0]; v1[0] = v1[1]; @@ -684,9 +703,9 @@ public final class VMath { * @param dim the dimensionality of the unit matrix * @return the unit matrix of the specified dimension */ - public final static double[][] unitMatrix(final int dim) { + public static final double[][] unitMatrix(final int dim) { final double[][] e = new double[dim][dim]; - for(int i = 0; i < dim; i++) { + for (int i = 0; i < dim; i++) { e[i][i] = 1; } return e; @@ -698,7 +717,7 @@ public final class VMath { * @param dim the dimensionality of the unit matrix * @return the zero matrix of the specified dimension */ - public final static double[][] zeroMatrix(final int dim) { + public static final double[][] zeroMatrix(final int dim) { final double[][] z = new double[dim][dim]; return z; } @@ -710,10 +729,10 @@ public final class VMath { * @param n Number of columns. * @return An m-by-n matrix with uniformly distributed random elements. */ - public final static double[][] random(final int m, final int n) { + public static final double[][] random(final int m, final int n) { final double[][] A = new double[m][n]; - for(int i = 0; i < m; i++) { - for(int j = 0; j < n; j++) { + for (int i = 0; i < m; i++) { + for (int j = 0; j < n; j++) { A[i][j] = Math.random(); } } @@ -727,9 +746,9 @@ public final class VMath { * @param n Number of columns. * @return An m-by-n matrix with ones on the diagonal and zeros elsewhere. */ - public final static double[][] identity(final int m, final int n) { + public static final double[][] identity(final int m, final int n) { final double[][] A = new double[m][n]; - for(int i = 0; i < Math.min(m, n); i++) { + for (int i = 0; i < Math.min(m, n); i++) { A[i][i] = 1.0; } return A; @@ -742,9 +761,9 @@ public final class VMath { * @param v1 the values on the diagonal * @return the resulting matrix */ - public final static double[][] diagonal(final double[] v1) { + public static final double[][] diagonal(final double[] v1) { final double[][] result = new double[v1.length][v1.length]; - for(int i = 0; i < v1.length; i++) { + for (int i = 0; i < v1.length; i++) { result[i][i] = v1[i]; } return result; @@ -759,7 +778,7 @@ public final class VMath { public static final double[][] copy(final double[][] m1) { final int columndimension = m1[0].length; final double[][] X = new double[m1.length][columndimension]; - for(int i = 0; i < m1.length; i++) { + for (int i = 0; i < m1.length; i++) { System.arraycopy(m1[i], 0, X[i], 0, columndimension); } return X; @@ -774,10 +793,8 @@ public final class VMath { public static final double[] rowPackedCopy(final double[][] m1) { final int columndimension = m1[0].length; double[] vals = new double[m1.length * columndimension]; - for(int i = 0; i < m1.length; i++) { - for(int j = 0; j < columndimension; j++) { - vals[i * columndimension + j] = m1[i][j]; - } + for (int i = 0; i < m1.length; i++) { + System.arraycopy(m1[i], 0, vals, i * columndimension, columndimension); } return vals; } @@ -791,8 +808,8 @@ public final class VMath { public static final double[] columnPackedCopy(final double[][] m1) { final int columndimension = m1[0].length; final double[] vals = new double[m1.length * columndimension]; - for(int i = 0; i < m1.length; i++) { - for(int j = 0; j < columndimension; j++) { + for (int i = 0; i < m1.length; i++) { + for (int j = 0; j < columndimension; j++) { vals[i + j * m1.length] = m1[i][j]; } } @@ -811,10 +828,8 @@ public final class VMath { */ public static final double[][] getMatrix(final double[][] m1, final int r0, final int r1, final int c0, final int c1) { final double[][] X = new double[r1 - r0 + 1][c1 - c0 + 1]; - for(int i = r0; i <= r1; i++) { - for(int j = c0; j <= c1; j++) { - X[i - r0][j - c0] = m1[i][j]; - } + for (int i = r0; i <= r1; i++) { + System.arraycopy(m1[i], c0, X[i - r0], 0, c1 - c0 + 1); } return X; } @@ -829,8 +844,8 @@ public final class VMath { */ public static final double[][] getMatrix(final double[][] m1, final int[] r, final int[] c) { final double[][] X = new double[r.length][c.length]; - for(int i = 0; i < r.length; i++) { - for(int j = 0; j < c.length; j++) { + for (int i = 0; i < r.length; i++) { + for (int j = 0; j < c.length; j++) { X[i][j] = m1[r[i]][c[j]]; } } @@ -848,10 +863,8 @@ public final class VMath { */ public static final double[][] getMatrix(final double[][] m1, final int[] r, final int c0, final int c1) { final double[][] X = new double[r.length][c1 - c0 + 1]; - for(int i = 0; i < r.length; i++) { - for(int j = c0; j <= c1; j++) { - X[i][j - c0] = m1[r[i]][j]; - } + for (int i = 0; i < r.length; i++) { + System.arraycopy(m1[r[i]], c0, X[i], 0, c1 - c0 + 1); } return X; } @@ -867,8 +880,8 @@ public final class VMath { */ public static final double[][] getMatrix(final double[][] m1, final int r0, final int r1, final int[] c) { final double[][] X = new double[r1 - r0 + 1][c.length]; - for(int i = r0; i <= r1; i++) { - for(int j = 0; j < c.length; j++) { + for (int i = r0; i <= r1; i++) { + for (int j = 0; j < c.length; j++) { X[i - r0][j] = m1[i][c[j]]; } } @@ -886,10 +899,8 @@ public final class VMath { * @param m2 New values for m1(r0:r1,c0:c1) */ public static final void setMatrix(final double[][] m1, final int r0, final int r1, final int c0, final int c1, final double[][] m2) { - for(int i = r0; i <= r1; i++) { - for(int j = c0; j <= c1; j++) { - m1[i][j] = m2[i - r0][j - c0]; - } + for (int i = r0; i <= r1; i++) { + System.arraycopy(m2[i - r0], 0, m1[i], c0, c1 - c0 + 1); } } @@ -902,8 +913,8 @@ public final class VMath { * @param m2 New values for m1(r(:),c(:)) */ public static final void setMatrix(final double[][] m1, final int[] r, final int[] c, final double[][] m2) { - for(int i = 0; i < r.length; i++) { - for(int j = 0; j < c.length; j++) { + for (int i = 0; i < r.length; i++) { + for (int j = 0; j < c.length; j++) { m1[r[i]][c[j]] = m2[i][j]; } } @@ -919,10 +930,8 @@ public final class VMath { * @param m2 New values for m1(r(:),c0:c1) */ public static final void setMatrix(final double[][] m1, final int[] r, final int c0, final int c1, final double[][] m2) { - for(int i = 0; i < r.length; i++) { - for(int j = c0; j <= c1; j++) { - m1[r[i]][j] = m2[i][j - c0]; - } + for (int i = 0; i < r.length; i++) { + System.arraycopy(m2[i], 0, m1[r[i]], c0, c1 - c0 + 1); } } @@ -936,8 +945,8 @@ public final class VMath { * @param m2 New values for m1(r0:r1,c(:)) */ public static final void setMatrix(final double[][] m1, final int r0, final int r1, final int[] c, final double[][] m2) { - for(int i = r0; i <= r1; i++) { - for(int j = 0; j < c.length; j++) { + for (int i = r0; i <= r1; i++) { + for (int j = 0; j < c.length; j++) { m1[i][c[j]] = m2[i - r0][j]; } } @@ -963,10 +972,8 @@ public final class VMath { */ public static final void setRow(final double[][] m1, final int r, final double[] row) { final int columndimension = getColumnDimensionality(m1); - assert (row.length == columndimension) : "Matrix must consist of the same no of columns!"; - for(int i = 0; i < columndimension; i++) { - m1[r][i] = row[i]; - } + assert (row.length == columndimension) : ERR_DIMENSIONS; + System.arraycopy(row, 0, m1[r], 0, columndimension); } /** @@ -976,9 +983,9 @@ public final class VMath { * @param col Column number * @return Column */ - public final static double[] getCol(double[][] m1, int col) { + public static final double[] getCol(double[][] m1, int col) { double[] ret = new double[m1.length]; - for(int i = 0; i < ret.length; i++) { + for (int i = 0; i < ret.length; i++) { ret[i] = m1[i][col]; } return ret; @@ -992,8 +999,8 @@ public final class VMath { * @param column the value of the column to be set */ public static final void setCol(final double[][] m1, final int c, final double[] column) { - assert (column.length == m1.length) : "Matrix must consist of the same no of rows!"; - for(int i = 0; i < m1.length; i++) { + assert (column.length == m1.length) : ERR_DIMENSIONS; + for (int i = 0; i < m1.length; i++) { m1[i][c] = column[i]; } } @@ -1007,8 +1014,8 @@ public final class VMath { public static final double[][] transpose(final double[][] m1) { final int columndimension = getColumnDimensionality(m1); final double[][] re = new double[columndimension][m1.length]; - for(int i = 0; i < m1.length; i++) { - for(int j = 0; j < columndimension; j++) { + for (int i = 0; i < m1.length; i++) { + for (int j = 0; j < columndimension; j++) { re[j][i] = m1[i][j]; } } @@ -1047,9 +1054,9 @@ public final class VMath { */ public static final double[][] plusEquals(final double[][] m1, final double[][] m2) { final int columndimension = getColumnDimensionality(m1); - assert (getRowDimensionality(m1) == getRowDimensionality(m2) && columndimension == getColumnDimensionality(m2)) : "Matrix dimensions must agree."; - for(int i = 0; i < m1.length; i++) { - for(int j = 0; j < columndimension; j++) { + assert (getRowDimensionality(m1) == getRowDimensionality(m2) && columndimension == getColumnDimensionality(m2)) : ERR_MATRIX_DIMENSIONS; + for (int i = 0; i < m1.length; i++) { + for (int j = 0; j < columndimension; j++) { m1[i][j] += m2[i][j]; } } @@ -1066,9 +1073,9 @@ public final class VMath { */ public static final double[][] plusTimesEquals(final double[][] m1, final double[][] m2, final double s2) { final int columndimension = getColumnDimensionality(m1); - assert (getRowDimensionality(m1) == getRowDimensionality(m2) && columndimension == getColumnDimensionality(m2)) : "Matrix dimensions must agree."; - for(int i = 0; i < m1.length; i++) { - for(int j = 0; j < columndimension; j++) { + assert (getRowDimensionality(m1) == getRowDimensionality(m2) && columndimension == getColumnDimensionality(m2)) : ERR_MATRIX_DIMENSIONS; + for (int i = 0; i < m1.length; i++) { + for (int j = 0; j < columndimension; j++) { m1[i][j] += s2 * m2[i][j]; } } @@ -1107,9 +1114,9 @@ public final class VMath { */ public static final double[][] minusEquals(final double[][] m1, final double[][] m2) { final int columndimension = getColumnDimensionality(m1); - assert (getRowDimensionality(m1) == getRowDimensionality(m2) && columndimension == getColumnDimensionality(m2)) : "Matrix dimensions must agree."; - for(int i = 0; i < m1.length; i++) { - for(int j = 0; j < columndimension; j++) { + assert (getRowDimensionality(m1) == getRowDimensionality(m2) && columndimension == getColumnDimensionality(m2)) : ERR_MATRIX_DIMENSIONS; + for (int i = 0; i < m1.length; i++) { + for (int j = 0; j < columndimension; j++) { m1[i][j] -= m2[i][j]; } } @@ -1125,11 +1132,11 @@ public final class VMath { * @return m1 = m1 - s2 * m2, overwriting m1 */ public static final double[][] minusTimesEquals(final double[][] m1, final double[][] m2, final double s2) { - assert (getRowDimensionality(m1) == getRowDimensionality(m2) && getColumnDimensionality(m1) == getColumnDimensionality(m2)) : "Matrix dimensions must agree."; - for(int i = 0; i < m1.length; i++) { + assert (getRowDimensionality(m1) == getRowDimensionality(m2) && getColumnDimensionality(m1) == getColumnDimensionality(m2)) : ERR_MATRIX_DIMENSIONS; + for (int i = 0; i < m1.length; i++) { final double[] row1 = m1[i]; final double[] row2 = m2[i]; - for(int j = 0; j < row1.length; j++) { + for (int j = 0; j < row1.length; j++) { row1[j] -= s2 * row2[j]; } } @@ -1155,9 +1162,9 @@ public final class VMath { * @return m1 = s1 * m1, overwriting m1 */ public static final double[][] timesEquals(final double[][] m1, final double s1) { - for(int i = 0; i < m1.length; i++) { + for (int i = 0; i < m1.length; i++) { final double[] row = m1[i]; - for(int j = 0; j < row.length; j++) { + for (int j = 0; j < row.length; j++) { row[j] *= s1; } } @@ -1175,21 +1182,21 @@ public final class VMath { final int columndimension = getColumnDimensionality(m1); final int bcolumndimension = getColumnDimensionality(m2); // Optimized implementation, exploiting the storage layout - assert (m2.length == columndimension) : "Matrix inner dimensions must agree: " + getRowDimensionality(m1) + "," + getColumnDimensionality(m1) + " * " + getRowDimensionality(m2) + "," + getColumnDimensionality(m2); + assert (m2.length == columndimension) : ERR_MATRIX_INNERDIM; final double[][] r2 = new double[m1.length][bcolumndimension]; // Optimized ala Jama. jik order. final double[] Bcolj = new double[columndimension]; - for(int j = 0; j < bcolumndimension; j++) { + for (int j = 0; j < bcolumndimension; j++) { // Make a linear copy of column j from B // TODO: use column getter from B? - for(int k = 0; k < columndimension; k++) { + for (int k = 0; k < columndimension; k++) { Bcolj[k] = m2[k][j]; } // multiply it with each row from A - for(int i = 0; i < m1.length; i++) { + for (int i = 0; i < m1.length; i++) { final double[] Arowi = m1[i]; double s = 0; - for(int k = 0; k < columndimension; k++) { + for (int k = 0; k < columndimension; k++) { s += Arowi[k] * Bcolj[k]; } r2[i][j] = s; @@ -1206,13 +1213,13 @@ public final class VMath { * @return Matrix product, m1 * v2 */ public static final double[] times(final double[][] m1, final double[] v2) { - assert (v2.length == getColumnDimensionality(m1)) : "Matrix inner dimensions must agree."; + assert (v2.length == getColumnDimensionality(m1)) : ERR_MATRIX_INNERDIM; final double[] re = new double[m1.length]; // multiply it with each row from A - for(int i = 0; i < m1.length; i++) { + for (int i = 0; i < m1.length; i++) { final double[] Arowi = m1[i]; double s = 0; - for(int k = 0; k < Arowi.length; k++) { + for (int k = 0; k < Arowi.length; k++) { s += Arowi[k] * v2[k]; } re[i] = s; @@ -1229,12 +1236,12 @@ public final class VMath { */ public static final double[] transposeTimes(final double[][] m1, final double[] v2) { final int columndimension = getColumnDimensionality(m1); - assert (v2.length == m1.length) : "Matrix inner dimensions must agree."; + assert (v2.length == m1.length) : ERR_MATRIX_INNERDIM; final double[] re = new double[columndimension]; // multiply it with each row from A - for(int i = 0; i < columndimension; i++) { + for (int i = 0; i < columndimension; i++) { double s = 0; - for(int k = 0; k < m1.length; k++) { + for (int k = 0; k < m1.length; k++) { s += m1[k][i] * v2[k]; } re[i] = s; @@ -1252,18 +1259,18 @@ public final class VMath { public static final double[][] transposeTimes(final double[][] m1, final double[][] m2) { final int coldim1 = getColumnDimensionality(m1); final int coldim2 = getColumnDimensionality(m2); - assert (m2.length == m1.length) : "Matrix inner dimensions must agree."; + assert (m2.length == m1.length) : ERR_MATRIX_INNERDIM; final double[][] re = new double[coldim1][coldim2]; final double[] Bcolj = new double[m1.length]; - for(int j = 0; j < coldim2; j++) { + for (int j = 0; j < coldim2; j++) { // Make a linear copy of column j from B - for(int k = 0; k < m1.length; k++) { + for (int k = 0; k < m1.length; k++) { Bcolj[k] = m2[k][j]; } // multiply it with each row from A - for(int i = 0; i < coldim1; i++) { + for (int i = 0; i < coldim1; i++) { double s = 0; - for(int k = 0; k < m1.length; k++) { + for (int k = 0; k < m1.length; k++) { s += m1[k][i] * Bcolj[k]; } re[i][j] = s; @@ -1273,6 +1280,27 @@ public final class VMath { } /** + * Linear algebraic matrix multiplication, a<sup>T</sup> * B * c + * + * @param B matrix + * @param c vector on the right + * @return Matrix product, a<sup>T</sup> * B * c + */ + public static double transposeTimesTimes(final double[] a, final double[][] B, final double[] c) { + assert (B.length == a.length) : ERR_MATRIX_INNERDIM; + double sum = 0.0; + for (int j = 0; j < B[0].length; j++) { + // multiply it with each row from A + double s = 0; + for (int k = 0; k < a.length; k++) { + s += a[k] * B[k][j]; + } + sum += s * c[j]; + } + return sum; + } + + /** * Linear algebraic matrix multiplication, m1 * m2^T * * @param m1 Input matrix @@ -1280,15 +1308,15 @@ public final class VMath { * @return Matrix product, m1 * m2^T */ public static final double[][] timesTranspose(final double[][] m1, final double[][] m2) { - assert (getColumnDimensionality(m2) == getColumnDimensionality(m1)) : "Matrix inner dimensions must agree."; + assert (getColumnDimensionality(m2) == getColumnDimensionality(m1)) : ERR_MATRIX_INNERDIM; final double[][] re = new double[m1.length][m2.length]; - for(int j = 0; j < re.length; j++) { + for (int j = 0; j < re.length; j++) { final double[] Browj = m2[j]; // multiply it with each row from A - for(int i = 0; i < m1.length; i++) { + for (int i = 0; i < m1.length; i++) { final double[] Arowi = m1[i]; double s = 0; - for(int k = 0; k < Browj.length; k++) { + for (int k = 0; k < Browj.length; k++) { s += Arowi[k] * Browj[k]; } re[i][j] = s; @@ -1306,21 +1334,21 @@ public final class VMath { */ public static final double[][] transposeTimesTranspose(final double[][] m1, final double[][] m2) { // Optimized implementation, exploiting the storage layout - assert (m1.length == getColumnDimensionality(m2)) : "Matrix inner dimensions must agree: " + getRowDimensionality(m1) + "," + getColumnDimensionality(m1) + " * " + getRowDimensionality(m2) + "," + getColumnDimensionality(m2); + assert (m1.length == getColumnDimensionality(m2)) : ERR_MATRIX_INNERDIM; final double[][] re = new double[getColumnDimensionality(m1)][m2.length]; // Optimized ala Jama. jik order. final double[] Acolj = new double[m1.length]; - for(int j = 0; j < re.length; j++) { + for (int j = 0; j < re.length; j++) { // Make a linear copy of column j from B - for(int k = 0; k < m1.length; k++) { + for (int k = 0; k < m1.length; k++) { Acolj[k] = m1[k][j]; } final double[] Xrow = re[j]; // multiply it with each row from A - for(int i = 0; i < m2.length; i++) { + for (int i = 0; i < m2.length; i++) { final double[] Browi = m2[i]; double s = 0; - for(int k = 0; k < m1.length; k++) { + for (int k = 0; k < m1.length; k++) { s += Browi[k] * Acolj[k]; } Xrow[i] = s; @@ -1335,10 +1363,10 @@ public final class VMath { * @param m1 Input matrix * @return values on the diagonal of the Matrix */ - public final static double[] getDiagonal(final double[][] m1) { + public static final double[] getDiagonal(final double[][] m1) { final int dim = Math.min(getColumnDimensionality(m1), m1.length); final double[] diagonal = new double[dim]; - for(int i = 0; i < dim; i++) { + for (int i = 0; i < dim; i++) { diagonal[i] = m1[i][i]; } return diagonal; @@ -1349,20 +1377,19 @@ public final class VMath { * * @param m1 Input matrix */ - public final static void normalizeColumns(final double[][] m1) { + public static final void normalizeColumns(final double[][] m1) { final int columndimension = getColumnDimensionality(m1); - for(int col = 0; col < columndimension; col++) { + for (int col = 0; col < columndimension; col++) { double norm = 0.0; - for(int row = 0; row < m1.length; row++) { + for (int row = 0; row < m1.length; row++) { norm = norm + (m1[row][col] * m1[row][col]); } norm = Math.sqrt(norm); - if(norm != 0) { - for(int row = 0; row < m1.length; row++) { + if (norm != 0) { + for (int row = 0; row < m1.length; row++) { m1[row][col] /= norm; } - } - else { + } else { // TODO: else: throw an exception? } } @@ -1382,12 +1409,11 @@ public final class VMath { final int rcolumndimension = columndimension + ccolumndimension; final double[][] result = new double[m1.length][rcolumndimension]; - for(int i = 0; i < rcolumndimension; i++) { + for (int i = 0; i < rcolumndimension; i++) { // FIXME: optimize - excess copying! - if(i < columndimension) { + if (i < columndimension) { setCol(result, i, getCol(m1, i)); - } - else { + } else { setCol(result, i, getCol(m2, i - columndimension)); } } @@ -1405,10 +1431,10 @@ public final class VMath { double[][] v = copy(m1); // FIXME: optimize - excess copying! - for(int i = 1; i < columndimension; i++) { + for (int i = 1; i < columndimension; i++) { final double[] u_i = getCol(m1, i); final double[] sum = new double[m1.length]; - for(int j = 0; j < i; j++) { + for (int j = 0; j < i; j++) { final double[] v_j = getCol(v, j); double scalar = scalarProduct(u_i, v_j) / scalarProduct(v_j, v_j); plusEquals(sum, times(v_j, scalar)); @@ -1452,25 +1478,25 @@ public final class VMath { * @return true if delta smaller than maximum */ public static final boolean almostEquals(final double[][] m1, final double[][] m2, final double maxdelta) { - if(m1 == m2) { + if (m1 == m2) { return true; } - if(m2 == null) { + if (m2 == null) { return false; } - if(m1.getClass() != m2.getClass()) { + if (m1.getClass() != m2.getClass()) { return false; } - if(m1.length != m2.length) { + if (m1.length != m2.length) { return false; } final int columndimension = getColumnDimensionality(m1); - if(columndimension != getColumnDimensionality(m2)) { + if (columndimension != getColumnDimensionality(m2)) { return false; } - for(int i = 0; i < m1.length; i++) { - for(int j = 0; j < columndimension; j++) { - if(Math.abs(m1[i][j] - m2[i][j]) > maxdelta) { + for (int i = 0; i < m1.length; i++) { + for (int j = 0; j < columndimension; j++) { + if (Math.abs(m1[i][j] - m2[i][j]) > maxdelta) { return false; } } @@ -1509,4 +1535,17 @@ public final class VMath { public static final int getColumnDimensionality(final double[][] m1) { return m1[0].length; } -}
\ No newline at end of file + + /** + * Cross product for 3d vectors, i.e. <code>vo = v1 x v2</code> + * + * @param vo Output vector + * @param v1 First input vector + * @param v2 Second input vector + */ + public static void cross3D(double[] vo, double[] v1, double[] v2) { + vo[0] = (v1[1] * v2[2]) - (v1[2] * v2[1]); + vo[1] = (v1[2] * v2[0]) - (v1[0] * v2[2]); + vo[2] = (v1[0] * v2[1]) - (v1[1] * v2[0]); + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Vector.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Vector.java index 0a674d87..b0e1c78e 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Vector.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/Vector.java @@ -27,8 +27,6 @@ import java.util.Arrays; import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.utilities.FormatUtil; -import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayAdapter; -import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; /** * Provides a vector object that encapsulates an m x 1 - matrix object. @@ -37,7 +35,7 @@ import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter * * @apiviz.landmark */ -public class Vector implements NumberVector<Vector, Double> { +public class Vector implements NumberVector<Double> { /** * Array for internal storage of elements. * @@ -46,6 +44,21 @@ public class Vector implements NumberVector<Vector, Double> { protected final double[] elements; /** + * Error message (in assertions!) when vector dimensionalities do not agree. + */ + public static final String ERR_VEC_DIMENSIONS = "Vector dimensions do not agree."; + + /** + * Error message (in assertions!) when matrix dimensionalities do not agree. + */ + public static final String ERR_MATRIX_INNERDIM = "Matrix inner dimensions do not agree."; + + /** + * Error message (in assertions!) when dimensionalities do not agree. + */ + private static final String ERR_DIMENSIONS = "Dimensionalities do not agree."; + + /** * Construct a vector from a given array. * * @param values array of doubles @@ -64,7 +77,7 @@ public class Vector implements NumberVector<Vector, Double> { } /** - * Returns a randomly created vector of length 1.0 + * Returns a randomly created vector of length 1.0. * * @param dimensionality dimensionality * @return the dimensionality of the vector @@ -72,13 +85,13 @@ public class Vector implements NumberVector<Vector, Double> { public static final Vector randomNormalizedVector(final int dimensionality) { final Vector v = new Vector(dimensionality); double norm = 0; - while(norm <= 0) { - for(int i = 0; i < dimensionality; i++) { + while (norm <= 0) { + for (int i = 0; i < dimensionality; i++) { v.elements[i] = Math.random(); } norm = v.euclideanLength(); } - for(int row = 0; row < dimensionality; row++) { + for (int row = 0; row < dimensionality; row++) { v.elements[row] /= norm; } return v; @@ -106,9 +119,6 @@ public class Vector implements NumberVector<Vector, Double> { return new Vector(elements.clone()); } - /** - * Clone the Vector object. - */ @Override public Vector clone() { return this.copy(); @@ -173,9 +183,9 @@ public class Vector implements NumberVector<Vector, Double> { * @return the resulting vector */ public final Vector plus(final Vector v) { - assert (this.elements.length == v.elements.length) : "Vector dimensions must agree."; + assert (this.elements.length == v.elements.length) : ERR_VEC_DIMENSIONS; final Vector result = new Vector(elements.length); - for(int i = 0; i < elements.length; i++) { + for (int i = 0; i < elements.length; i++) { result.elements[i] = elements[i] + v.elements[i]; } return result; @@ -190,39 +200,39 @@ public class Vector implements NumberVector<Vector, Double> { * @return the resulting vector */ public final Vector plusTimes(final Vector v, final double s) { - assert (this.elements.length == v.elements.length) : "Vector dimensions must agree."; + assert (this.elements.length == v.elements.length) : ERR_VEC_DIMENSIONS; final Vector result = new Vector(elements.length); - for(int i = 0; i < elements.length; i++) { + for (int i = 0; i < elements.length; i++) { result.elements[i] = elements[i] + v.elements[i] * s; } return result; } /** - * A = A + B + * a = a + b. * - * @param B another matrix - * @return A + B in this Matrix + * @param b another vector + * @return a + b in this vector */ - public final Vector plusEquals(final Vector B) { - assert (this.elements.length == B.elements.length) : "Vector dimensions must agree."; - for(int i = 0; i < elements.length; i++) { - elements[i] += B.elements[i]; + public final Vector plusEquals(final Vector b) { + assert (this.elements.length == b.elements.length) : ERR_VEC_DIMENSIONS; + for (int i = 0; i < elements.length; i++) { + elements[i] += b.elements[i]; } return this; } /** - * A = A + s * B + * a = a + s * b. * - * @param B another matrix + * @param b another vector * @param s Scalar - * @return A + s * B in this Matrix + * @return a + s * b in this vector */ - public final Vector plusTimesEquals(final Vector B, final double s) { - assert (this.elements.length == B.elements.length) : "Vector dimensions must agree."; - for(int i = 0; i < elements.length; i++) { - elements[i] += s * B.elements[i]; + public final Vector plusTimesEquals(final Vector b, final double s) { + assert (this.elements.length == b.elements.length) : ERR_VEC_DIMENSIONS; + for (int i = 0; i < elements.length; i++) { + elements[i] += s * b.elements[i]; } return this; } @@ -234,7 +244,7 @@ public class Vector implements NumberVector<Vector, Double> { * @return Modified vector */ public final Vector plusEquals(final double d) { - for(int i = 0; i < elements.length; i++) { + for (int i = 0; i < elements.length; i++) { elements[i] += d; } return this; @@ -248,7 +258,7 @@ public class Vector implements NumberVector<Vector, Double> { */ public final Vector minus(final Vector v) { final Vector sub = new Vector(elements.length); - for(int i = 0; i < elements.length; i++) { + for (int i = 0; i < elements.length; i++) { sub.elements[i] = elements[i] - v.elements[i]; } return sub; @@ -263,37 +273,37 @@ public class Vector implements NumberVector<Vector, Double> { */ public final Vector minusTimes(final Vector v, final double s) { final Vector sub = new Vector(elements.length); - for(int i = 0; i < elements.length; i++) { + for (int i = 0; i < elements.length; i++) { sub.elements[i] = elements[i] - v.elements[i] * s; } return sub; } /** - * A = A - B + * a = a - b. * - * @param B another matrix - * @return A - B in this Matrix + * @param b another vector + * @return a - b in this vector */ - public final Vector minusEquals(final Vector B) { - assert (this.elements.length == B.elements.length) : "Vector dimensions must agree."; - for(int i = 0; i < elements.length; i++) { - elements[i] -= B.elements[i]; + public final Vector minusEquals(final Vector b) { + assert (this.elements.length == b.elements.length) : ERR_VEC_DIMENSIONS; + for (int i = 0; i < elements.length; i++) { + elements[i] -= b.elements[i]; } return this; } /** - * A = A - s * B + * a = a - s * b. * - * @param B another matrix + * @param b another vector * @param s Scalar - * @return A - s * B in this Matrix + * @return a - s * b in this vector */ - public final Vector minusTimesEquals(final Vector B, final double s) { - assert (this.elements.length == B.elements.length) : "Vector dimensions must agree."; - for(int i = 0; i < elements.length; i++) { - elements[i] -= s * B.elements[i]; + public final Vector minusTimesEquals(final Vector b, final double s) { + assert (this.elements.length == b.elements.length) : ERR_VEC_DIMENSIONS; + for (int i = 0; i < elements.length; i++) { + elements[i] -= s * b.elements[i]; } return this; } @@ -305,7 +315,7 @@ public class Vector implements NumberVector<Vector, Double> { * @return Modified vector */ public final Vector minusEquals(final double d) { - for(int i = 0; i < elements.length; i++) { + for (int i = 0; i < elements.length; i++) { elements[i] -= d; } return this; @@ -320,36 +330,36 @@ public class Vector implements NumberVector<Vector, Double> { */ public final Vector times(final double s) { final Vector v = new Vector(elements.length); - for(int i = 0; i < elements.length; i++) { + for (int i = 0; i < elements.length; i++) { v.elements[i] = elements[i] * s; } return v; } /** - * Multiply a matrix by a scalar in place, A = s*A + * Multiply a matrix by a scalar in place, A = s*A. * * @param s scalar * @return replace A by s*A */ public final Vector timesEquals(final double s) { - for(int i = 0; i < elements.length; i++) { + for (int i = 0; i < elements.length; i++) { elements[i] *= s; } return this; } /** - * Linear algebraic matrix multiplication, A * B + * Linear algebraic matrix multiplication, A * B. * * @param B another matrix * @return Matrix product, A * B */ public final Matrix times(final Matrix B) { - assert (B.elements.length == 1) : "Matrix inner dimensions must agree."; + assert (B.elements.length == 1) : ERR_MATRIX_INNERDIM; final Matrix X = new Matrix(this.elements.length, B.columndimension); - for(int j = 0; j < B.columndimension; j++) { - for(int i = 0; i < this.elements.length; i++) { + for (int j = 0; j < B.columndimension; j++) { + for (int i = 0; i < this.elements.length; i++) { X.elements[i][j] = elements[i] * B.elements[0][j]; } } @@ -357,18 +367,18 @@ public class Vector implements NumberVector<Vector, Double> { } /** - * Linear algebraic matrix multiplication, A<sup>T</sup> * B + * Linear algebraic matrix multiplication, A<sup>T</sup> * B. * * @param B another matrix * @return Matrix product, A<sup>T</sup> * B */ public final Matrix transposeTimes(final Matrix B) { - assert (B.elements.length == this.elements.length) : "Matrix inner dimensions must agree."; + assert (B.elements.length == this.elements.length) : ERR_MATRIX_INNERDIM; final Matrix X = new Matrix(1, B.columndimension); - for(int j = 0; j < B.columndimension; j++) { + for (int j = 0; j < B.columndimension; j++) { // multiply it with each row from A double s = 0; - for(int k = 0; k < this.elements.length; k++) { + for (int k = 0; k < this.elements.length; k++) { s += this.elements[k] * B.elements[k][j]; } X.elements[0][j] = s; @@ -377,19 +387,19 @@ public class Vector implements NumberVector<Vector, Double> { } /** - * Linear algebraic matrix multiplication, a<sup>T</sup> * B * c + * Linear algebraic matrix multiplication, a<sup>T</sup> * B * c. * * @param B matrix * @param c vector on the right - * @return Matrix product, a<sup>T</sup> * B + * @return Matrix product, a<sup>T</sup> * B * c */ public final double transposeTimesTimes(final Matrix B, final Vector c) { - assert (B.elements.length == this.elements.length) : "Matrix inner dimensions must agree."; + assert (B.elements.length == this.elements.length) : ERR_MATRIX_INNERDIM; double sum = 0.0; - for(int j = 0; j < B.columndimension; j++) { + for (int j = 0; j < B.columndimension; j++) { // multiply it with each row from A double s = 0; - for(int k = 0; k < this.elements.length; k++) { + for (int k = 0; k < this.elements.length; k++) { s += this.elements[k] * B.elements[k][j]; } sum += s * c.elements[j]; @@ -398,31 +408,31 @@ public class Vector implements NumberVector<Vector, Double> { } /** - * Linear algebraic matrix multiplication, A<sup>T</sup> * B + * Linear algebraic matrix multiplication, A<sup>T</sup> * B. * * @param B another vector * @return Matrix product, A<sup>T</sup> * B */ public final double transposeTimes(final Vector B) { - assert (B.elements.length == this.elements.length) : "Matrix inner dimensions must agree."; + assert (B.elements.length == this.elements.length) : ERR_MATRIX_INNERDIM; double s = 0; - for(int k = 0; k < this.elements.length; k++) { + for (int k = 0; k < this.elements.length; k++) { s += this.elements[k] * B.elements[k]; } return s; } /** - * Linear algebraic matrix multiplication, A * B^T + * Linear algebraic matrix multiplication, A * B^T. * * @param B another matrix * @return Matrix product, A * B^T */ public final Matrix timesTranspose(final Matrix B) { - assert (B.columndimension == 1) : "Matrix inner dimensions must agree."; + assert (B.columndimension == 1) : ERR_MATRIX_INNERDIM; final Matrix X = new Matrix(this.elements.length, B.elements.length); - for(int j = 0; j < B.elements.length; j++) { - for(int i = 0; i < this.elements.length; i++) { + for (int j = 0; j < B.elements.length; j++) { + for (int i = 0; i < this.elements.length; i++) { X.elements[i][j] = elements[i] * B.elements[j][0]; } } @@ -430,15 +440,15 @@ public class Vector implements NumberVector<Vector, Double> { } /** - * Linear algebraic matrix multiplication, A * B^T + * Linear algebraic matrix multiplication, A * B^T. * * @param B another matrix * @return Matrix product, A * B^T */ public final Matrix timesTranspose(final Vector B) { final Matrix X = new Matrix(this.elements.length, B.elements.length); - for(int j = 0; j < B.elements.length; j++) { - for(int i = 0; i < this.elements.length; i++) { + for (int j = 0; j < B.elements.length; j++) { + for (int i = 0; i < this.elements.length; i++) { X.elements[i][j] = elements[i] * B.elements[j]; } } @@ -452,7 +462,7 @@ public class Vector implements NumberVector<Vector, Double> { */ public final double euclideanLength() { double acc = 0.0; - for(int row = 0; row < elements.length; row++) { + for (int row = 0; row < elements.length; row++) { final double v = elements[row]; acc += v * v; } @@ -461,11 +471,13 @@ public class Vector implements NumberVector<Vector, Double> { /** * Normalizes this vector to the length of 1.0. + * + * @return this vector */ public final Vector normalize() { double norm = euclideanLength(); - if(norm != 0) { - for(int row = 0; row < elements.length; row++) { + if (norm != 0) { + for (int row = 0; row < elements.length; row++) { elements[row] /= norm; } } @@ -480,9 +492,9 @@ public class Vector implements NumberVector<Vector, Double> { * @return the projection of p into the subspace formed by v */ public final Vector projection(final Matrix v) { - assert (elements.length == v.elements.length) : "p and v differ in row dimensionality!"; + assert (elements.length == v.elements.length) : ERR_DIMENSIONS; Vector sum = new Vector(elements.length); - for(int i = 0; i < v.columndimension; i++) { + for (int i = 0; i < v.columndimension; i++) { // TODO: optimize - copy less? Vector v_i = v.getCol(i); sum.plusTimesEquals(v_i, this.transposeTimes(v_i)); @@ -497,17 +509,17 @@ public class Vector implements NumberVector<Vector, Double> { @Override public boolean equals(Object obj) { - if(this == obj) { + if (this == obj) { return true; } - if(obj == null) { + if (obj == null) { return false; } - if(getClass() != obj.getClass()) { + if (getClass() != obj.getClass()) { return false; } final Vector other = (Vector) obj; - if(this.elements.length != other.elements.length) { + if (this.elements.length != other.elements.length) { return false; } return Arrays.equals(this.elements, other.elements); @@ -525,7 +537,7 @@ public class Vector implements NumberVector<Vector, Double> { /** * Returns a string representation of this vector without adding extra - * whitespace + * whitespace. * * @return a string representation of this vector. */ @@ -553,81 +565,71 @@ public class Vector implements NumberVector<Vector, Double> { return this; } + /** + * Cross product for 3d vectors, i.e. <code>this x other</code> + * + * @param other Other vector + * @return Cross product of this vector and the other vector + */ + public Vector cross3D(Vector other) { + assert (elements.length == 3 && other.elements.length == 3); + Vector out = new Vector(3); + out.elements[0] = (elements[1] * other.elements[2]) - (elements[2] * other.elements[1]); + out.elements[1] = (elements[2] * other.elements[0]) - (elements[0] * other.elements[2]); + out.elements[2] = (elements[0] * other.elements[1]) - (elements[1] * other.elements[0]); + return out; + } + // ////// NumberVector API. A bit hackish. :-( @Override public double getMin(int dimension) { - return elements[dimension - 1]; + return elements[dimension]; } @Override public double getMax(int dimension) { - return elements[dimension - 1]; + return elements[dimension]; } @Override + @Deprecated public Double getValue(int dimension) { - return elements[dimension - 1]; + return Double.valueOf(elements[dimension]); } @Override public double doubleValue(int dimension) { - return elements[dimension - 1]; + return elements[dimension]; } @Override public float floatValue(int dimension) { - return (float) elements[dimension - 1]; + return (float) elements[dimension]; } @Override public int intValue(int dimension) { - return (int) elements[dimension - 1]; + return (int) elements[dimension]; } @Override public long longValue(int dimension) { - return (long) elements[dimension - 1]; + return (long) elements[dimension]; } @Override public short shortValue(int dimension) { - return (short) elements[dimension - 1]; + return (short) elements[dimension]; } @Override public byte byteValue(int dimension) { - return (byte) elements[dimension - 1]; + return (byte) elements[dimension]; } @Override public Vector getColumnVector() { return copy(); } - - @Override - public Vector newNumberVector(double[] values) { - return new Vector(values); - } - - @Override - public <A> Vector newNumberVector(A array, NumberArrayAdapter<?, A> adapter) { - double[] raw = new double[adapter.size(array)]; - for(int i = 0; i < raw.length; i++) { - raw[i] = adapter.getDouble(array, i); - } - return new Vector(raw); - } - - @Override - public <A> Vector newFeatureVector(A array, ArrayAdapter<Double, A> adapter) { - if(adapter instanceof NumberArrayAdapter) { - return newNumberVector(array, (NumberArrayAdapter<?, A>) adapter); - } - double[] raw = new double[adapter.size(array)]; - for(int i = 0; i < raw.length; i++) { - raw[i] = adapter.get(array, i); - } - return new Vector(raw); - } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/FittingFunction.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/FittingFunction.java index b04c64d5..9e77d810 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/FittingFunction.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/FittingFunction.java @@ -37,5 +37,5 @@ public interface FittingFunction { * @param params Function parameters parameters * @return Array consisting of y value and parameter gradients */ - public FittingFunctionResult eval(double x, double[] params); + FittingFunctionResult eval(double x, double[] params); } diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/LevenbergMarquardtMethod.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/LevenbergMarquardtMethod.java index d87e1208..20200f99 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/LevenbergMarquardtMethod.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/fitting/LevenbergMarquardtMethod.java @@ -248,10 +248,8 @@ public class LevenbergMarquardtMethod { // build covmat out of fitting matrix by multiplying diagonal elements with // 1+lambda for(int i = 0; i < numfit; i++) { - for(int j = 0; j < numfit; j++) { - covmat[i][j] = alpha[i][j]; - } - covmat[i][i] = alpha[i][i] * (1.0 + lambda); + System.arraycopy(alpha[i], 0, covmat[i], 0, numfit); + covmat[i][i] *= (1.0 + lambda); } // System.out.println("Chisq: " + chisq); // System.out.println("Lambda: " + lambda); @@ -289,14 +287,10 @@ public class LevenbergMarquardtMethod { // keep modified covmat as new alpha matrix // and da as new beta for(int i = 0; i < numfit; i++) { - for(int j = 0; j < numfit; j++) { - alpha[i][j] = covmat[i][j]; - } + System.arraycopy(covmat[i], 0, alpha[i], 0, numfit); beta[i] = deltaparams[i]; } - for(int i = 0; i < numparams; i++) { - params[i] = paramstry[i]; - } + System.arraycopy(paramstry, 0, params, 0, numparams); } else { // TODO: Do we need a larger limit than MAX_VALUE? diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/AbstractCovarianceMatrixBuilder.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/AbstractCovarianceMatrixBuilder.java index c14986bd..3e12dc0e 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/AbstractCovarianceMatrixBuilder.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/AbstractCovarianceMatrixBuilder.java @@ -23,15 +23,13 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import java.util.Collection; -import java.util.Iterator; - import de.lmu.ifi.dbs.elki.data.NumberVector; +import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs; -import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair; import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult; import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix; import de.lmu.ifi.dbs.elki.utilities.optionhandling.Parameterizable; @@ -44,7 +42,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.Parameterizable; * * @param <V> Vector class in use */ -public abstract class AbstractCovarianceMatrixBuilder<V extends NumberVector<? extends V, ?>> implements Parameterizable, CovarianceMatrixBuilder<V> { +public abstract class AbstractCovarianceMatrixBuilder<V extends NumberVector<?>> implements Parameterizable, CovarianceMatrixBuilder<V> { @Override public Matrix processDatabase(Relation<? extends V> database) { return processIds(database.getDBIDs(), database); @@ -54,19 +52,17 @@ public abstract class AbstractCovarianceMatrixBuilder<V extends NumberVector<? e public abstract Matrix processIds(DBIDs ids, Relation<? extends V> database); @Override - public <D extends NumberDistance<?, ?>> Matrix processQueryResults(Collection<? extends DistanceResultPair<D>> results, Relation<? extends V> database, int k) { + public <D extends NumberDistance<D, ?>> Matrix processQueryResults(DistanceDBIDResult<D> results, Relation<? extends V> database, int k) { ModifiableDBIDs ids = DBIDUtil.newArray(k); int have = 0; - for(Iterator<? extends DistanceResultPair<D>> it = results.iterator(); it.hasNext() && have < k; have++) { - ids.add(it.next().getDBID()); + for(DBIDIter it = results.iter(); it.valid() && have < k; it.advance(), have++) { + ids.add(it); } return processIds(ids, database); } @Override - final public <D extends NumberDistance<?, ?>> Matrix processQueryResults(Collection<? extends DistanceResultPair<D>> results, Relation<? extends V> database) { + public final <D extends NumberDistance<D, ?>> Matrix processQueryResults(DistanceDBIDResult<D> results, Relation<? extends V> database) { return processQueryResults(results, database, results.size()); } - - // TODO: Allow KNNlist to avoid building the DBID array? }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/CompositeEigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/CompositeEigenPairFilter.java index bc9486c5..6a1c3898 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/CompositeEigenPairFilter.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/CompositeEigenPairFilter.java @@ -42,7 +42,7 @@ public class CompositeEigenPairFilter implements EigenPairFilter { /** * The list of filters to use. */ - public static final OptionID EIGENPAIR_FILTER_COMPOSITE_LIST = OptionID.getOrCreateOptionID("pca.filter.composite.list", "A comma separated list of the class names of the filters to be used. " + "The specified filters will be applied sequentially in the given order."); + public static final OptionID EIGENPAIR_FILTER_COMPOSITE_LIST = new OptionID("pca.filter.composite.list", "A comma separated list of the class names of the filters to be used. " + "The specified filters will be applied sequentially in the given order."); /** * The filters to be applied. diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/CovarianceMatrixBuilder.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/CovarianceMatrixBuilder.java index 5098ffac..fce6dbb3 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/CovarianceMatrixBuilder.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/CovarianceMatrixBuilder.java @@ -23,12 +23,10 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import java.util.Collection; - import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; -import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair; import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult; import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix; @@ -39,44 +37,46 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix; * * @param <V> Vector base type */ -public interface CovarianceMatrixBuilder<V extends NumberVector<? extends V, ?>> { +public interface CovarianceMatrixBuilder<V extends NumberVector<?>> { /** - * Compute Covariance Matrix for a complete database + * Compute Covariance Matrix for a complete database. * * @param database the database used * @return Covariance Matrix */ - public Matrix processDatabase(Relation<? extends V> database); + Matrix processDatabase(Relation<? extends V> database); /** - * Compute Covariance Matrix for a collection of database IDs + * Compute Covariance Matrix for a collection of database IDs. * * @param ids a collection of ids * @param database the database used * @return Covariance Matrix */ - public Matrix processIds(DBIDs ids, Relation<? extends V> database); + Matrix processIds(DBIDs ids, Relation<? extends V> database); /** - * Compute Covariance Matrix for a QueryResult Collection + * Compute Covariance Matrix for a QueryResult Collection. * * By default it will just collect the ids and run processIds * * @param results a collection of QueryResults * @param database the database used * @param k the number of entries to process + * @param <D> distance type * @return Covariance Matrix */ - public <D extends NumberDistance<?, ?>> Matrix processQueryResults(Collection<? extends DistanceResultPair<D>> results, Relation<? extends V> database, int k); + <D extends NumberDistance<D, ?>> Matrix processQueryResults(DistanceDBIDResult<D> results, Relation<? extends V> database, int k); /** - * Compute Covariance Matrix for a QueryResult Collection + * Compute Covariance Matrix for a QueryResult Collection. * * By default it will just collect the ids and run processIds * * @param results a collection of QueryResults * @param database the database used + * @param <D> distance type * @return Covariance Matrix */ - public <D extends NumberDistance<?, ?>> Matrix processQueryResults(Collection<? extends DistanceResultPair<D>> results, Relation<? extends V> database); + <D extends NumberDistance<D, ?>> Matrix processQueryResults(DistanceDBIDResult<D> results, Relation<? extends V> database); }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/DropEigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/DropEigenPairFilter.java index 1657a096..8791b6a2 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/DropEigenPairFilter.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/DropEigenPairFilter.java @@ -84,7 +84,7 @@ public class DropEigenPairFilter implements EigenPairFilter { double[] ev = eigenPairs.eigenValues(); // calc the eigenvalue sum. double eigenValueSum = 0.0; - for(int i = 0; i < ev.length; i++) { + for (int i = 0; i < ev.length; i++) { eigenValueSum += ev[i]; } // Minimum value @@ -92,13 +92,13 @@ public class DropEigenPairFilter implements EigenPairFilter { // Now find the maximum contrast, scanning backwards. double prev_sum = ev[ev.length - 1]; double prev_rel = 1.0; - for(int i = 2; i <= ev.length; i++) { + for (int i = 2; i <= ev.length; i++) { double curr_sum = prev_sum + ev[ev.length - i]; double curr_rel = ev[ev.length - i] / curr_sum * i; // not too weak? - if(ev[ev.length - i] >= weakEigenvalue) { + if (ev[ev.length - i] >= weakEigenvalue) { double contrast = curr_rel - prev_rel; - if(contrast > maxContrast) { + if (contrast > maxContrast) { maxContrast = contrast; contrastMaximum = ev.length - i; } @@ -107,11 +107,11 @@ public class DropEigenPairFilter implements EigenPairFilter { prev_rel = curr_rel; } - for(int i = 0; i <= contrastMaximum; i++) { + for (int i = 0; i <= contrastMaximum; i++) { EigenPair eigenPair = eigenPairs.getEigenPair(i); strongEigenPairs.add(eigenPair); } - for(int i = contrastMaximum + 1; i < eigenPairs.size(); i++) { + for (int i = contrastMaximum + 1; i < eigenPairs.size(); i++) { EigenPair eigenPair = eigenPairs.getEigenPair(i); weakEigenPairs.add(eigenPair); } @@ -132,8 +132,9 @@ public class DropEigenPairFilter implements EigenPairFilter { @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - DoubleParameter walphaP = new DoubleParameter(WeakEigenPairFilter.EIGENPAIR_FILTER_WALPHA, new GreaterEqualConstraint(0.0), DEFAULT_WALPHA); - if(config.grab(walphaP)) { + DoubleParameter walphaP = new DoubleParameter(WeakEigenPairFilter.EIGENPAIR_FILTER_WALPHA, DEFAULT_WALPHA); + walphaP.addConstraint(new GreaterEqualConstraint(0.0)); + if (config.grab(walphaP)) { walpha = walphaP.getValue(); } } @@ -143,4 +144,4 @@ public class DropEigenPairFilter implements EigenPairFilter { return new DropEigenPairFilter(walpha); } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/EigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/EigenPairFilter.java index 553a111f..be1eae17 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/EigenPairFilter.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/EigenPairFilter.java @@ -48,5 +48,5 @@ public interface EigenPairFilter extends Parameterizable { * @param eigenPairs the eigenPairs (i.e. the eigenvectors and * @return the filtered eigenpairs */ - public FilteredEigenPairs filter(SortedEigenPairs eigenPairs); -} + FilteredEigenPairs filter(SortedEigenPairs eigenPairs); +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/FilteredEigenPairs.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/FilteredEigenPairs.java index 94c00a7e..4ab120fb 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/FilteredEigenPairs.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/FilteredEigenPairs.java @@ -23,10 +23,10 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import de.lmu.ifi.dbs.elki.math.linearalgebra.EigenPair; - import java.util.List; +import de.lmu.ifi.dbs.elki.math.linearalgebra.EigenPair; + /** * Encapsulates weak and strong eigenpairs that have been filtered out * by an eigenpair filter. @@ -48,7 +48,7 @@ public class FilteredEigenPairs { /** * Creates a new object that encapsulates weak and strong eigenpairs - * that have been filtered out by an eigenpair filter + * that have been filtered out by an eigenpair filter. * * @param weakEigenPairs the weak eigenpairs * @param strongEigenPairs the strong eigenpairs diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/FirstNEigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/FirstNEigenPairFilter.java index 08482b71..bd6b4400 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/FirstNEigenPairFilter.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/FirstNEigenPairFilter.java @@ -50,12 +50,12 @@ public class FirstNEigenPairFilter implements EigenPairFilter { /** * The logger for this class. */ - private static final Logging logger = Logging.getLogger(FirstNEigenPairFilter.class); + private static final Logging LOG = Logging.getLogger(FirstNEigenPairFilter.class); /** * Paremeter n */ - public static final OptionID EIGENPAIR_FILTER_N = OptionID.getOrCreateOptionID("pca.filter.n", "The number of strong eigenvectors: n eigenvectors with the n highest" + "eigenvalues are marked as strong eigenvectors."); + public static final OptionID EIGENPAIR_FILTER_N = new OptionID("pca.filter.n", "The number of strong eigenvectors: n eigenvectors with the n highest" + "eigenvalues are marked as strong eigenvectors."); /** * The threshold for strong eigenvectors: n eigenvectors with the n highest @@ -75,8 +75,8 @@ public class FirstNEigenPairFilter implements EigenPairFilter { @Override public FilteredEigenPairs filter(SortedEigenPairs eigenPairs) { - StringBuffer msg = new StringBuffer(); - if(logger.isDebugging()) { + StringBuilder msg = new StringBuilder(); + if(LOG.isDebugging()) { msg.append("sortedEigenPairs ").append(eigenPairs.toString()); msg.append("\nn = ").append(n); } @@ -96,10 +96,10 @@ public class FirstNEigenPairFilter implements EigenPairFilter { } } - if(logger.isDebugging()) { + if(LOG.isDebugging()) { msg.append("\nstrong EigenPairs = ").append(strongEigenPairs); msg.append("\nweak EigenPairs = ").append(weakEigenPairs); - logger.debugFine(msg.toString()); + LOG.debugFine(msg.toString()); } return new FilteredEigenPairs(weakEigenPairs, strongEigenPairs); @@ -121,9 +121,10 @@ public class FirstNEigenPairFilter implements EigenPairFilter { @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - IntParameter nP = new IntParameter(EIGENPAIR_FILTER_N, new GreaterEqualConstraint(0)); + IntParameter nP = new IntParameter(EIGENPAIR_FILTER_N); + nP.addConstraint(new GreaterEqualConstraint(0)); if(config.grab(nP)) { - n = nP.getValue(); + n = nP.intValue(); } } diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/LimitEigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/LimitEigenPairFilter.java index e8d2b844..d7d999b4 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/LimitEigenPairFilter.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/LimitEigenPairFilter.java @@ -25,7 +25,6 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; import java.util.ArrayList; import java.util.List; -import java.util.Vector; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.math.linearalgebra.EigenPair; @@ -57,17 +56,17 @@ public class LimitEigenPairFilter implements EigenPairFilter { /** * The logger for this class. */ - private static final Logging logger = Logging.getLogger(LimitEigenPairFilter.class); + private static final Logging LOG = Logging.getLogger(LimitEigenPairFilter.class); /** * "absolute" Flag */ - public static final OptionID EIGENPAIR_FILTER_ABSOLUTE = OptionID.getOrCreateOptionID("pca.filter.absolute", "Flag to mark delta as an absolute value."); + public static final OptionID EIGENPAIR_FILTER_ABSOLUTE = new OptionID("pca.filter.absolute", "Flag to mark delta as an absolute value."); /** * Parameter delta */ - public static final OptionID EIGENPAIR_FILTER_DELTA = OptionID.getOrCreateOptionID("pca.filter.delta", "The threshold for strong Eigenvalues. If not otherwise specified, delta " + "is a relative value w.r.t. the (absolute) highest Eigenvalues and has to be " + "a double between 0 and 1. To mark delta as an absolute value, use " + "the option -" + EIGENPAIR_FILTER_ABSOLUTE.getName() + "."); + public static final OptionID EIGENPAIR_FILTER_DELTA = new OptionID("pca.filter.delta", "The threshold for strong Eigenvalues. If not otherwise specified, delta " + "is a relative value w.r.t. the (absolute) highest Eigenvalues and has to be " + "a double between 0 and 1. To mark delta as an absolute value, use " + "the option -" + EIGENPAIR_FILTER_ABSOLUTE.getName() + "."); /** * The default value for delta. @@ -98,28 +97,27 @@ public class LimitEigenPairFilter implements EigenPairFilter { @Override public FilteredEigenPairs filter(SortedEigenPairs eigenPairs) { - StringBuffer msg = new StringBuffer(); - if(logger.isDebugging()) { + StringBuilder msg = new StringBuilder(); + if (LOG.isDebugging()) { msg.append("delta = ").append(delta); } // determine limit double limit; - if(absolute) { + if (absolute) { limit = delta; - } - else { + } else { double max = Double.NEGATIVE_INFINITY; - for(int i = 0; i < eigenPairs.size(); i++) { + for (int i = 0; i < eigenPairs.size(); i++) { EigenPair eigenPair = eigenPairs.getEigenPair(i); double eigenValue = Math.abs(eigenPair.getEigenvalue()); - if(max < eigenValue) { + if (max < eigenValue) { max = eigenValue; } } limit = max * delta; } - if(logger.isDebugging()) { + if (LOG.isDebugging()) { msg.append("\nlimit = ").append(limit); } @@ -128,20 +126,19 @@ public class LimitEigenPairFilter implements EigenPairFilter { List<EigenPair> weakEigenPairs = new ArrayList<EigenPair>(); // determine strong and weak eigenpairs - for(int i = 0; i < eigenPairs.size(); i++) { + for (int i = 0; i < eigenPairs.size(); i++) { EigenPair eigenPair = eigenPairs.getEigenPair(i); double eigenValue = Math.abs(eigenPair.getEigenvalue()); - if(eigenValue >= limit) { + if (eigenValue >= limit) { strongEigenPairs.add(eigenPair); - } - else { + } else { weakEigenPairs.add(eigenPair); } } - if(logger.isDebugging()) { + if (LOG.isDebugging()) { msg.append("\nstrong EigenPairs = ").append(strongEigenPairs); msg.append("\nweak EigenPairs = ").append(weakEigenPairs); - logger.debugFine(msg.toString()); + LOG.debugFine(msg.toString()); } return new FilteredEigenPairs(weakEigenPairs, strongEigenPairs); @@ -169,15 +166,16 @@ public class LimitEigenPairFilter implements EigenPairFilter { protected void makeOptions(Parameterization config) { super.makeOptions(config); Flag absoluteF = new Flag(EIGENPAIR_FILTER_ABSOLUTE); - if(config.grab(absoluteF)) { - absolute = absoluteF.getValue(); + if (config.grab(absoluteF)) { + absolute = absoluteF.isTrue(); } - DoubleParameter deltaP = new DoubleParameter(EIGENPAIR_FILTER_DELTA, new GreaterEqualConstraint(0), DEFAULT_DELTA); - if(config.grab(deltaP)) { - delta = deltaP.getValue(); + DoubleParameter deltaP = new DoubleParameter(EIGENPAIR_FILTER_DELTA, DEFAULT_DELTA); + deltaP.addConstraint(new GreaterEqualConstraint(0)); + if (config.grab(deltaP)) { + delta = deltaP.doubleValue(); // TODO: make this a global constraint? - if(absolute && deltaP.tookDefaultValue()) { + if (absolute && deltaP.tookDefaultValue()) { config.reportError(new WrongParameterValueException("Illegal parameter setting: " + "Flag " + absoluteF.getName() + " is set, " + "but no value for " + deltaP.getName() + " is specified.")); } } @@ -186,16 +184,15 @@ public class LimitEigenPairFilter implements EigenPairFilter { // delta must be >= 0 and <= 1 if it's a relative value // Since relative or absolute is dependent on the absolute flag this is a // global constraint! - List<ParameterConstraint<Number>> cons = new Vector<ParameterConstraint<Number>>(); + List<ParameterConstraint<? super Double>> cons = new ArrayList<ParameterConstraint<? super Double>>(); // TODO: Keep the constraint here - applies to non-conditional case as - // well, - // and is set above. - ParameterConstraint<Number> aboveNull = new GreaterEqualConstraint(0); + // well, and is set above. + ParameterConstraint<Number> aboveNull = new GreaterEqualConstraint(0.); cons.add(aboveNull); - ParameterConstraint<Number> underOne = new LessEqualConstraint(1); + ParameterConstraint<Number> underOne = new LessEqualConstraint(1.); cons.add(underOne); - GlobalParameterConstraint gpc = new ParameterFlagGlobalConstraint<Number, Double>(deltaP, cons, absoluteF, false); + GlobalParameterConstraint gpc = new ParameterFlagGlobalConstraint<Double>(deltaP, cons, absoluteF, false); config.checkConstraint(gpc); } @@ -204,4 +201,4 @@ public class LimitEigenPairFilter implements EigenPairFilter { return new LimitEigenPairFilter(delta, absolute); } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/NormalizingEigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/NormalizingEigenPairFilter.java index 29be965c..55c9c438 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/NormalizingEigenPairFilter.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/NormalizingEigenPairFilter.java @@ -45,7 +45,7 @@ public class NormalizingEigenPairFilter implements EigenPairFilter { /** * The logger for this class. */ - private static final Logging logger = Logging.getLogger(NormalizingEigenPairFilter.class); + private static final Logging LOG = Logging.getLogger(NormalizingEigenPairFilter.class); /** * Provides a new EigenPairFilter that normalizes all eigenvectors s.t. @@ -67,11 +67,11 @@ public class NormalizingEigenPairFilter implements EigenPairFilter { normalizeEigenPair(eigenPair); strongEigenPairs.add(eigenPair); } - if(logger.isDebugging()) { - final StringBuffer msg = new StringBuffer(); + if(LOG.isDebugging()) { + final StringBuilder msg = new StringBuilder(); msg.append("strong EigenPairs = ").append(strongEigenPairs); msg.append("\nweak EigenPairs = ").append(weakEigenPairs); - logger.debugFine(msg.toString()); + LOG.debugFine(msg.toString()); } return new FilteredEigenPairs(weakEigenPairs, strongEigenPairs); diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredAutotuningRunner.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredAutotuningRunner.java index 157dfedc..73462c2f 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredAutotuningRunner.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredAutotuningRunner.java @@ -23,27 +23,27 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import de.lmu.ifi.dbs.elki.data.NumberVector; -import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; -import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair; -import de.lmu.ifi.dbs.elki.database.query.DoubleDistanceResultPair; import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDList; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.ModifiableDistanceDBIDResult; import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; +import de.lmu.ifi.dbs.elki.logging.LoggingUtil; +import de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid; import de.lmu.ifi.dbs.elki.math.linearalgebra.EigenPair; import de.lmu.ifi.dbs.elki.math.linearalgebra.EigenvalueDecomposition; import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix; import de.lmu.ifi.dbs.elki.math.linearalgebra.SortedEigenPairs; -import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; /** @@ -58,14 +58,14 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; * @param <V> vector type */ @Reference(authors = "H.-P. Kriegel, P. Kröger, E. Schubert, A. Zimek", title = "A General Framework for Increasing the Robustness of PCA-based Correlation Clustering Algorithms", booktitle = "Proceedings of the 20th International Conference on Scientific and Statistical Database Management (SSDBM), Hong Kong, China, 2008", url = "http://dx.doi.org/10.1007/978-3-540-69497-7_27") -public class PCAFilteredAutotuningRunner<V extends NumberVector<? extends V, ?>> extends PCAFilteredRunner<V> { +public class PCAFilteredAutotuningRunner<V extends NumberVector<?>> extends PCAFilteredRunner<V> { /** * Constructor. * - * @param covarianceMatrixBuilder - * @param eigenPairFilter - * @param big - * @param small + * @param covarianceMatrixBuilder Covariance matrix builder + * @param eigenPairFilter Eigen pair filter + * @param big Replacement for large values + * @param small Replacement for small values */ public PCAFilteredAutotuningRunner(CovarianceMatrixBuilder<V> covarianceMatrixBuilder, EigenPairFilter eigenPairFilter, double big, double small) { super(covarianceMatrixBuilder, eigenPairFilter, big, small); @@ -75,43 +75,71 @@ public class PCAFilteredAutotuningRunner<V extends NumberVector<? extends V, ?>> public PCAFilteredResult processIds(DBIDs ids, Relation<? extends V> database) { // Assume Euclidean distance. In the context of PCA, the neighborhood should // be L2-spherical to be unbiased. - V center = DatabaseUtil.centroid(database, ids); - List<DoubleDistanceResultPair> dres = new ArrayList<DoubleDistanceResultPair>(ids.size()); + V center = Centroid.make(database, ids).toVector(database); + DoubleDistanceDBIDList dres = new DoubleDistanceDBIDList(ids.size()); for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { - DBID id = iter.getDBID(); - final double dist = EuclideanDistanceFunction.STATIC.doubleDistance(center, database.get(id)); - dres.add(new DoubleDistanceResultPair(dist, id)); + final double dist = EuclideanDistanceFunction.STATIC.doubleDistance(center, database.get(iter)); + dres.add(dist, iter); } - Collections.sort(dres); + dres.sort(); return processQueryResult(dres, database); } + /** + * Candidate + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + static class Cand { + /** Candidate matrix */ + Matrix m; + + /** Score */ + double explain; + + /** Dimensionality */ + int dim; + + /** + * Constructor. + * + * @param m Matrix + * @param explain Explains core + * @param dim Dimensionality + */ + Cand(Matrix m, double explain, int dim) { + this.m = m; + this.explain = explain; + this.dim = dim; + } + } + @Override - public <D extends NumberDistance<?, ?>> PCAFilteredResult processQueryResult(Collection<? extends DistanceResultPair<D>> results, Relation<? extends V> database) { + public <D extends NumberDistance<D, ?>> PCAFilteredResult processQueryResult(DistanceDBIDResult<D> results, Relation<? extends V> database) { assertSortedByDistance(results); - final int dim = DatabaseUtil.dimensionality(database); + final int dim = RelationUtil.dimensionality(database); List<Matrix> best = new LinkedList<Matrix>(); - for(int i = 0; i < dim; i++) { + for (int i = 0; i < dim; i++) { best.add(null); } double[] beststrength = new double[dim]; - for(int i = 0; i < dim; i++) { + for (int i = 0; i < dim; i++) { beststrength[i] = -1; } int[] bestk = new int[dim]; // 'history' - LinkedList<Matrix> prevM = new LinkedList<Matrix>(); - LinkedList<Double> prevS = new LinkedList<Double>(); - LinkedList<Integer> prevD = new LinkedList<Integer>(); + LinkedList<Cand> prev = new LinkedList<Cand>(); // TODO: starting parameter shouldn't be hardcoded... int smooth = 3; int startk = 4; - if(startk > results.size() - 1) { + if (startk > results.size() - 1) { startk = results.size() - 1; } // TODO: add smoothing options, handle border cases better. - for(int k = startk; k < results.size(); k++) { + for (int k = startk; k < results.size(); k++) { // sorted eigenpairs, eigenvectors, eigenvalues Matrix covMat = covarianceMatrixBuilder.processQueryResults(results, database); EigenvalueDecomposition evd = new EigenvalueDecomposition(covMat); @@ -125,49 +153,41 @@ public class PCAFilteredAutotuningRunner<V extends NumberVector<? extends V, ?>> assert ((thisdim > 0) && (thisdim <= dim)); double thisexplain = computeExplainedVariance(filteredEigenPairs); - prevM.add(covMat); - prevS.add(thisexplain); - prevD.add(thisdim); - assert (prevS.size() == prevM.size()); - assert (prevS.size() == prevD.size()); + prev.add(new Cand(covMat, thisexplain, thisdim)); - if(prevS.size() >= 2 * smooth + 1) { + if (prev.size() >= 2 * smooth + 1) { // all the same dimension? boolean samedim = true; - for(Iterator<Integer> it = prevD.iterator(); it.hasNext();) { - if(it.next().intValue() != thisdim) { + for (Iterator<Cand> it = prev.iterator(); it.hasNext();) { + if (it.next().dim != thisdim) { samedim = false; } } - if(samedim) { + if (samedim) { // average their explain values double avgexplain = 0.0; - for(Iterator<Double> it = prevS.iterator(); it.hasNext();) { - avgexplain += it.next().doubleValue(); + for (Iterator<Cand> it = prev.iterator(); it.hasNext();) { + avgexplain += it.next().explain; } - avgexplain /= prevS.size(); + avgexplain /= prev.size(); - if(avgexplain > beststrength[thisdim - 1]) { + if (avgexplain > beststrength[thisdim - 1]) { beststrength[thisdim - 1] = avgexplain; - best.set(thisdim - 1, prevM.get(smooth)); + best.set(thisdim - 1, prev.get(smooth).m); bestk[thisdim - 1] = k - smooth; } } - prevM.removeFirst(); - prevS.removeFirst(); - prevD.removeFirst(); - assert (prevS.size() == prevM.size()); - assert (prevS.size() == prevD.size()); + prev.removeFirst(); } } // Try all dimensions, lowest first. - for(int i = 0; i < dim; i++) { - if(beststrength[i] > 0.0) { + for (int i = 0; i < dim; i++) { + if (beststrength[i] > 0.0) { // If the best was the lowest or the biggest k, skip it! - if(bestk[i] == startk + smooth) { + if (bestk[i] == startk + smooth) { continue; } - if(bestk[i] == results.size() - smooth - 1) { + if (bestk[i] == results.size() - smooth - 1) { continue; } Matrix covMat = best.get(i); @@ -184,18 +204,18 @@ public class PCAFilteredAutotuningRunner<V extends NumberVector<? extends V, ?>> } /** - * Compute the explained variance for a FilteredEigenPairs + * Compute the explained variance for a FilteredEigenPairs. * - * @param filteredEigenPairs + * @param filteredEigenPairs Filtered eigenpairs * @return explained variance by the strong eigenvectors. */ private double computeExplainedVariance(FilteredEigenPairs filteredEigenPairs) { double strongsum = 0.0; double weaksum = 0.0; - for(EigenPair ep : filteredEigenPairs.getStrongEigenPairs()) { + for (EigenPair ep : filteredEigenPairs.getStrongEigenPairs()) { strongsum += ep.getEigenvalue(); } - for(EigenPair ep : filteredEigenPairs.getWeakEigenPairs()) { + for (EigenPair ep : filteredEigenPairs.getWeakEigenPairs()) { weaksum += ep.getEigenvalue(); } return strongsum / (strongsum / weaksum); @@ -204,18 +224,29 @@ public class PCAFilteredAutotuningRunner<V extends NumberVector<? extends V, ?>> /** * Ensure that the results are sorted by distance. * - * @param results + * @param results Results to process + * @param <D> distance type */ - private <D extends NumberDistance<?, ?>> void assertSortedByDistance(Collection<? extends DistanceResultPair<D>> results) { + private <D extends NumberDistance<D, ?>> void assertSortedByDistance(DistanceDBIDResult<D> results) { // TODO: sort results instead? double dist = -1.0; - for(Iterator<? extends DistanceResultPair<D>> it = results.iterator(); it.hasNext();) { - double qr = it.next().getDistance().doubleValue(); - if(qr < dist) { - System.err.println("WARNING: results not sorted by distance!"); + boolean sorted = true; + for (DistanceDBIDResultIter<D> it = results.iter(); it.valid(); it.advance()) { + double qr = it.getDistance().doubleValue(); + if (qr < dist) { + sorted = false; } dist = qr; } + if (!sorted) { + try { + ModifiableDistanceDBIDResult.class.cast(results).sort(); + } catch (ClassCastException e) { + LoggingUtil.warning("WARNING: results not sorted by distance!", e); + } catch (UnsupportedOperationException e) { + LoggingUtil.warning("WARNING: results not sorted by distance!", e); + } + } } /** @@ -225,10 +256,10 @@ public class PCAFilteredAutotuningRunner<V extends NumberVector<? extends V, ?>> * * @apiviz.exclude */ - public static class Parameterizer<V extends NumberVector<? extends V, ?>> extends PCAFilteredRunner.Parameterizer<V> { + public static class Parameterizer<V extends NumberVector<?>> extends PCAFilteredRunner.Parameterizer<V> { @Override protected PCAFilteredAutotuningRunner<V> makeInstance() { return new PCAFilteredAutotuningRunner<V>(covarianceMatrixBuilder, eigenPairFilter, big, small); } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredRunner.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredRunner.java index 2391446d..59dca276 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredRunner.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCAFilteredRunner.java @@ -23,12 +23,10 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import java.util.Collection; - import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; -import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair; import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult; import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.math.linearalgebra.EigenvalueDecomposition; import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix; @@ -53,18 +51,18 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * * @param <V> Vector class to use */ -public class PCAFilteredRunner<V extends NumberVector<? extends V, ?>> extends PCARunner<V> { +public class PCAFilteredRunner<V extends NumberVector<?>> extends PCARunner<V> { /** * Parameter to specify the filter for determination of the strong and weak * eigenvectors, must be a subclass of {@link EigenPairFilter}. - * <p/> + * <p> * Default value: {@link PercentageEigenPairFilter} * </p> - * <p/> + * <p> * Key: {@code -pca.filter} * </p> */ - public static final OptionID PCA_EIGENPAIR_FILTER = OptionID.getOrCreateOptionID("pca.filter", "Filter class to determine the strong and weak eigenvectors."); + public static final OptionID PCA_EIGENPAIR_FILTER = new OptionID("pca.filter", "Filter class to determine the strong and weak eigenvectors."); /** * Parameter to specify a constant big value to reset high eigenvalues, must @@ -76,7 +74,7 @@ public class PCAFilteredRunner<V extends NumberVector<? extends V, ?>> extends P * Key: {@code -pca.big} * </p> */ - public static final OptionID BIG_ID = OptionID.getOrCreateOptionID("pca.big", "A constant big value to reset high eigenvalues."); + public static final OptionID BIG_ID = new OptionID("pca.big", "A constant big value to reset high eigenvalues."); /** * Parameter to specify a constant small value to reset low eigenvalues, must @@ -88,7 +86,7 @@ public class PCAFilteredRunner<V extends NumberVector<? extends V, ?>> extends P * Key: {@code -pca.small} * </p> */ - public static final OptionID SMALL_ID = OptionID.getOrCreateOptionID("pca.small", "A constant small value to reset low eigenvalues."); + public static final OptionID SMALL_ID = new OptionID("pca.small", "A constant small value to reset low eigenvalues."); /** * Holds the instance of the EigenPairFilter specified by @@ -109,10 +107,10 @@ public class PCAFilteredRunner<V extends NumberVector<? extends V, ?>> extends P /** * Constructor. * - * @param covarianceMatrixBuilder - * @param eigenPairFilter - * @param big - * @param small + * @param covarianceMatrixBuilder Covariance matrix builder + * @param eigenPairFilter Eigen pair filter + * @param big Replacement for large eigenvalues + * @param small Replacement for small eigenvalues */ public PCAFilteredRunner(CovarianceMatrixBuilder<V> covarianceMatrixBuilder, EigenPairFilter eigenPairFilter, double big, double small) { super(covarianceMatrixBuilder); @@ -122,7 +120,7 @@ public class PCAFilteredRunner<V extends NumberVector<? extends V, ?>> extends P } /** - * Run PCA on a collection of database IDs + * Run PCA on a collection of database IDs. * * @param ids a collection of ids * @param database the database used @@ -134,21 +132,23 @@ public class PCAFilteredRunner<V extends NumberVector<? extends V, ?>> extends P } /** - * Run PCA on a QueryResult Collection + * Run PCA on a QueryResult Collection. * * @param results a collection of QueryResults * @param database the database used + * @param <D> distance type * @return PCA result */ @Override - public <D extends NumberDistance<?, ?>> PCAFilteredResult processQueryResult(Collection<? extends DistanceResultPair<D>> results, Relation<? extends V> database) { + public <D extends NumberDistance<D, ?>> PCAFilteredResult processQueryResult(DistanceDBIDResult<D> results, Relation<? extends V> database) { return processCovarMatrix(covarianceMatrixBuilder.processQueryResults(results, database)); } /** - * Process an existing Covariance Matrix + * Process an existing Covariance Matrix. * * @param covarMatrix the matrix used for performing PCA + * @return Filtered result */ @Override public PCAFilteredResult processCovarMatrix(Matrix covarMatrix) { @@ -158,9 +158,10 @@ public class PCAFilteredRunner<V extends NumberVector<? extends V, ?>> extends P } /** - * Process an existing eigenvalue decomposition + * Process an existing eigenvalue decomposition. * * @param evd eigenvalue decomposition to use + * @return filtered result */ @Override public PCAFilteredResult processEVD(EigenvalueDecomposition evd) { @@ -185,7 +186,7 @@ public class PCAFilteredRunner<V extends NumberVector<? extends V, ?>> extends P * * @apiviz.exclude */ - public static class Parameterizer<V extends NumberVector<? extends V, ?>> extends PCARunner.Parameterizer<V> { + public static class Parameterizer<V extends NumberVector<?>> extends PCARunner.Parameterizer<V> { /** * Holds the instance of the EigenPairFilter specified by * {@link #PCA_EIGENPAIR_FILTER}. @@ -205,24 +206,26 @@ public class PCAFilteredRunner<V extends NumberVector<? extends V, ?>> extends P @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - ObjectParameter<EigenPairFilter> EIGENPAIR_FILTER_PARAM = new ObjectParameter<EigenPairFilter>(PCA_EIGENPAIR_FILTER, EigenPairFilter.class, PercentageEigenPairFilter.class); - if(config.grab(EIGENPAIR_FILTER_PARAM)) { - eigenPairFilter = EIGENPAIR_FILTER_PARAM.instantiateClass(config); + ObjectParameter<EigenPairFilter> filterP = new ObjectParameter<EigenPairFilter>(PCA_EIGENPAIR_FILTER, EigenPairFilter.class, PercentageEigenPairFilter.class); + if (config.grab(filterP)) { + eigenPairFilter = filterP.instantiateClass(config); } - DoubleParameter BIG_PARAM = new DoubleParameter(BIG_ID, new GreaterConstraint(0), 1.0); - if(config.grab(BIG_PARAM)) { - big = BIG_PARAM.getValue(); + DoubleParameter bigP = new DoubleParameter(BIG_ID, 1.0); + bigP.addConstraint(new GreaterConstraint(0)); + if (config.grab(bigP)) { + big = bigP.doubleValue(); } - DoubleParameter SMALL_PARAM = new DoubleParameter(SMALL_ID, new GreaterEqualConstraint(0), 0.0); - if(config.grab(SMALL_PARAM)) { - small = SMALL_PARAM.getValue(); + DoubleParameter smallP = new DoubleParameter(SMALL_ID, 0.0); + smallP.addConstraint(new GreaterEqualConstraint(0)); + if (config.grab(smallP)) { + small = smallP.doubleValue(); } // global constraint small <--> big - config.checkConstraint(new LessGlobalConstraint<Double>(SMALL_PARAM, BIG_PARAM)); + config.checkConstraint(new LessGlobalConstraint<Double>(smallP, bigP)); } @Override @@ -230,4 +233,4 @@ public class PCAFilteredRunner<V extends NumberVector<? extends V, ?>> extends P return new PCAFilteredRunner<V>(covarianceMatrixBuilder, eigenPairFilter, big, small); } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCARunner.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCARunner.java index 661fa5c5..4fb9dbdf 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCARunner.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PCARunner.java @@ -23,12 +23,11 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import java.util.Collection; import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; -import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair; import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult; import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.math.linearalgebra.EigenvalueDecomposition; import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix; @@ -57,7 +56,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * * @param <V> Vector type */ -public class PCARunner<V extends NumberVector<? extends V, ?>> implements Parameterizable { +public class PCARunner<V extends NumberVector<?>> implements Parameterizable { /** * Parameter to specify the class to compute the covariance matrix, must be a * subclass of {@link CovarianceMatrixBuilder}. @@ -68,7 +67,7 @@ public class PCARunner<V extends NumberVector<? extends V, ?>> implements Parame * Key: {@code -pca.covariance} * </p> */ - public static final OptionID PCA_COVARIANCE_MATRIX = OptionID.getOrCreateOptionID("pca.covariance", "Class used to compute the covariance matrix."); + public static final OptionID PCA_COVARIANCE_MATRIX = new OptionID("pca.covariance", "Class used to compute the covariance matrix."); /** * The covariance computation class. @@ -86,7 +85,7 @@ public class PCARunner<V extends NumberVector<? extends V, ?>> implements Parame } /** - * Run PCA on the complete database + * Run PCA on the complete database. * * @param database the database used * @return PCA result @@ -96,7 +95,7 @@ public class PCARunner<V extends NumberVector<? extends V, ?>> implements Parame } /** - * Run PCA on a collection of database IDs + * Run PCA on a collection of database IDs. * * @param ids a collection of ids * @param database the database used @@ -107,18 +106,19 @@ public class PCARunner<V extends NumberVector<? extends V, ?>> implements Parame } /** - * Run PCA on a QueryResult Collection + * Run PCA on a QueryResult Collection. * * @param results a collection of QueryResults * @param database the database used + * @param <D> distance type * @return PCA result */ - public <D extends NumberDistance<?, ?>> PCAResult processQueryResult(Collection<? extends DistanceResultPair<D>> results, Relation<? extends V> database) { + public <D extends NumberDistance<D, ?>> PCAResult processQueryResult(DistanceDBIDResult<D> results, Relation<? extends V> database) { return processCovarMatrix(covarianceMatrixBuilder.processQueryResults(results, database)); } /** - * Process an existing covariance Matrix + * Process an existing covariance Matrix. * * @param covarMatrix the matrix used for performing pca * @return PCA result @@ -130,7 +130,7 @@ public class PCARunner<V extends NumberVector<? extends V, ?>> implements Parame } /** - * Process an existing eigenvalue decomposition + * Process an existing eigenvalue decomposition. * * @param evd eigenvalue decomposition to use * @return PCA result @@ -141,7 +141,7 @@ public class PCARunner<V extends NumberVector<? extends V, ?>> implements Parame } /** - * Get covariance matrix builder + * Get covariance matrix builder. * * @return covariance matrix builder in use */ @@ -165,7 +165,7 @@ public class PCARunner<V extends NumberVector<? extends V, ?>> implements Parame * * @apiviz.exclude */ - public static class Parameterizer<V extends NumberVector<? extends V, ?>> extends AbstractParameterizer { + public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer { /** * The covariance computation class. */ diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PercentageEigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PercentageEigenPairFilter.java index 321c12cc..0daa3d94 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PercentageEigenPairFilter.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/PercentageEigenPairFilter.java @@ -33,7 +33,8 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.IntervalConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessConstraint; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; @@ -51,7 +52,7 @@ public class PercentageEigenPairFilter implements EigenPairFilter { /** * The logger for this class. */ - private static final Logging logger = Logging.getLogger(PercentageEigenPairFilter.class); + private static final Logging LOG = Logging.getLogger(PercentageEigenPairFilter.class); /** * The threshold for 'strong' eigenvectors: the 'strong' eigenvectors explain @@ -63,7 +64,7 @@ public class PercentageEigenPairFilter implements EigenPairFilter { * Key: {@code -pca.filter.alpha} * </p> */ - public static final OptionID ALPHA_ID = OptionID.getOrCreateOptionID("pca.filter.alpha", "The share (0.0 to 1.0) of variance that needs to be explained by the 'strong' eigenvectors." + "The filter class will choose the number of strong eigenvectors by this share."); + public static final OptionID ALPHA_ID = new OptionID("pca.filter.alpha", "The share (0.0 to 1.0) of variance that needs to be explained by the 'strong' eigenvectors." + "The filter class will choose the number of strong eigenvectors by this share."); /** * The default value for alpha. @@ -88,8 +89,8 @@ public class PercentageEigenPairFilter implements EigenPairFilter { @Override public FilteredEigenPairs filter(SortedEigenPairs eigenPairs) { - StringBuffer msg = new StringBuffer(); - if(logger.isDebugging()) { + StringBuilder msg = new StringBuilder(); + if (LOG.isDebugging()) { msg.append("alpha = ").append(alpha); msg.append("\nsortedEigenPairs = ").append(eigenPairs); } @@ -100,37 +101,35 @@ public class PercentageEigenPairFilter implements EigenPairFilter { // determine sum of eigenvalues double totalSum = 0; - for(int i = 0; i < eigenPairs.size(); i++) { + for (int i = 0; i < eigenPairs.size(); i++) { EigenPair eigenPair = eigenPairs.getEigenPair(i); totalSum += eigenPair.getEigenvalue(); } - if(logger.isDebugging()) { + if (LOG.isDebugging()) { msg.append("\ntotalSum = ").append(totalSum); } // determine strong and weak eigenpairs double currSum = 0; boolean found = false; - for(int i = 0; i < eigenPairs.size(); i++) { + for (int i = 0; i < eigenPairs.size(); i++) { EigenPair eigenPair = eigenPairs.getEigenPair(i); currSum += eigenPair.getEigenvalue(); - if(currSum / totalSum >= alpha) { - if(!found) { + if (currSum / totalSum >= alpha) { + if (!found) { found = true; strongEigenPairs.add(eigenPair); - } - else { + } else { weakEigenPairs.add(eigenPair); } - } - else { + } else { strongEigenPairs.add(eigenPair); } } - if(logger.isDebugging()) { + if (LOG.isDebugging()) { msg.append("\nstrong EigenPairs = ").append(strongEigenPairs); msg.append("\nweak EigenPairs = ").append(weakEigenPairs); - logger.debugFine(msg.toString()); + LOG.debugFine(msg.toString()); } return new FilteredEigenPairs(weakEigenPairs, strongEigenPairs); @@ -153,9 +152,11 @@ public class PercentageEigenPairFilter implements EigenPairFilter { @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - DoubleParameter alphaP = new DoubleParameter(ALPHA_ID, new IntervalConstraint(0.0, IntervalConstraint.IntervalBoundary.OPEN, 1.0, IntervalConstraint.IntervalBoundary.OPEN), DEFAULT_ALPHA); - if(config.grab(alphaP)) { - alpha = alphaP.getValue(); + DoubleParameter alphaP = new DoubleParameter(ALPHA_ID, DEFAULT_ALPHA); + alphaP.addConstraint(new GreaterConstraint(0.0)); + alphaP.addConstraint(new LessConstraint(1.0)); + if (config.grab(alphaP)) { + alpha = alphaP.doubleValue(); } } @@ -164,4 +165,4 @@ public class PercentageEigenPairFilter implements EigenPairFilter { return new PercentageEigenPairFilter(alpha); } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/ProgressiveEigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/ProgressiveEigenPairFilter.java index f66a1e96..4c359dad 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/ProgressiveEigenPairFilter.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/ProgressiveEigenPairFilter.java @@ -32,8 +32,9 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint; import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.IntervalConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessConstraint; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; @@ -86,7 +87,7 @@ public class ProgressiveEigenPairFilter implements EigenPairFilter { /** * Parameter progressive alpha. */ - public static final OptionID EIGENPAIR_FILTER_PALPHA = OptionID.getOrCreateOptionID("pca.filter.progressivealpha", "The share (0.0 to 1.0) of variance that needs to be explained by the 'strong' eigenvectors." + "The filter class will choose the number of strong eigenvectors by this share."); + public static final OptionID EIGENPAIR_FILTER_PALPHA = new OptionID("pca.filter.progressivealpha", "The share (0.0 to 1.0) of variance that needs to be explained by the 'strong' eigenvectors." + "The filter class will choose the number of strong eigenvectors by this share."); /** * The default value for alpha. @@ -132,7 +133,7 @@ public class ProgressiveEigenPairFilter implements EigenPairFilter { // determine sum of eigenvalues double totalSum = 0; - for(int i = 0; i < eigenPairs.size(); i++) { + for (int i = 0; i < eigenPairs.size(); i++) { EigenPair eigenPair = eigenPairs.getEigenPair(i); totalSum += eigenPair.getEigenvalue(); } @@ -142,35 +143,35 @@ public class ProgressiveEigenPairFilter implements EigenPairFilter { double currSum = 0; boolean found = false; int i; - for(i = 0; i < eigenPairs.size(); i++) { + for (i = 0; i < eigenPairs.size(); i++) { EigenPair eigenPair = eigenPairs.getEigenPair(i); // weak Eigenvector? - if(eigenPair.getEigenvalue() < expectedVariance) { + if (eigenPair.getEigenvalue() < expectedVariance) { break; } currSum += eigenPair.getEigenvalue(); // calculate progressive alpha level double alpha = 1.0 - (1.0 - palpha) * (1.0 - (i + 1) / eigenPairs.size()); - if(currSum / totalSum >= alpha || i == eigenPairs.size() - 1) { + if (currSum / totalSum >= alpha || i == eigenPairs.size() - 1) { found = true; strongEigenPairs.add(eigenPair); break; } } // if we didn't hit our alpha level, we consider all vectors to be weak! - if(!found) { + if (!found) { assert (weakEigenPairs.size() == 0); weakEigenPairs = strongEigenPairs; strongEigenPairs = new ArrayList<EigenPair>(); } - for(; i < eigenPairs.size(); i++) { + for (; i < eigenPairs.size(); i++) { EigenPair eigenPair = eigenPairs.getEigenPair(i); weakEigenPairs.add(eigenPair); } // the code using this method doesn't expect an empty strong set, // if we didn't find any strong ones, we make all vectors strong - if(strongEigenPairs.size() == 0) { + if (strongEigenPairs.size() == 0) { return new FilteredEigenPairs(new ArrayList<EigenPair>(), weakEigenPairs); } return new FilteredEigenPairs(weakEigenPairs, strongEigenPairs); @@ -198,13 +199,16 @@ public class ProgressiveEigenPairFilter implements EigenPairFilter { @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - DoubleParameter palphaP = new DoubleParameter(EIGENPAIR_FILTER_PALPHA, new IntervalConstraint(0.0, IntervalConstraint.IntervalBoundary.OPEN, 1.0, IntervalConstraint.IntervalBoundary.OPEN), DEFAULT_PALPHA); - if(config.grab(palphaP)) { + DoubleParameter palphaP = new DoubleParameter(EIGENPAIR_FILTER_PALPHA, DEFAULT_PALPHA); + palphaP.addConstraint(new GreaterConstraint(0.0)); + palphaP.addConstraint(new LessConstraint(1.0)); + if (config.grab(palphaP)) { palpha = palphaP.getValue(); } - DoubleParameter walphaP = new DoubleParameter(WeakEigenPairFilter.EIGENPAIR_FILTER_WALPHA, new GreaterEqualConstraint(0.0), DEFAULT_WALPHA); - if(config.grab(walphaP)) { + DoubleParameter walphaP = new DoubleParameter(WeakEigenPairFilter.EIGENPAIR_FILTER_WALPHA, DEFAULT_WALPHA); + walphaP.addConstraint(new GreaterEqualConstraint(0.0)); + if (config.grab(walphaP)) { walpha = walphaP.getValue(); } } @@ -214,4 +218,4 @@ public class ProgressiveEigenPairFilter implements EigenPairFilter { return new ProgressiveEigenPairFilter(palpha, walpha); } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/RANSACCovarianceMatrixBuilder.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/RANSACCovarianceMatrixBuilder.java new file mode 100644 index 00000000..e39820c4 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/RANSACCovarianceMatrixBuilder.java @@ -0,0 +1,190 @@ +package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.data.NumberVector; +import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; +import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; +import de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix; +import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix; +import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.ChiSquaredDistribution; +import de.lmu.ifi.dbs.elki.utilities.RandomFactory; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter; + +/** + * RANSAC based approach to a more robust covariance matrix computation. + * + * This is an <b>experimental</b> adoption of RANSAC to this problem, not a + * generic RANSAC implementation! + * + * While using RANSAC for PCA at first sounds like a good idea, <b>it does not + * work very well in high-dimensional spaces</b>. The problem is that PCA has + * O(n^2) degrees of freedom, so we need to sample very many objects, then + * perform an O(n^3) matrix operation to compute PCA, for each attempt. + * + * References: + * + * RANSAC for PCA was a side note in: + * <p> + * Hans-Peter Kriegel, Peer Kröger, Erich Schubert, Arthur Zimek<br /> + * Outlier Detection in Arbitrarily Oriented Subspaces<br /> + * in: Proc. IEEE International Conference on Data Mining (ICDM 2012) + * </p> + * + * The basic RANSAC idea was explained in: + * <p> + * Random sample consensus: a paradigm for model fitting with applications to + * image analysis and automated cartography<br /> + * M.A. Fischler, R.C. Bolles<br /> + * Communications of the ACM, Vol. 24 Issue 6 + * </p> + * + * @author Erich Schubert + * + * @param <V> Vector type + */ +@Reference(authors = "Hans-Peter Kriegel, Peer Kröger, Erich Schubert, Arthur Zimek", title = "Outlier Detection in Arbitrarily Oriented Subspaces", booktitle = "Proc. IEEE International Conference on Data Mining (ICDM 2012)") +public class RANSACCovarianceMatrixBuilder<V extends NumberVector<?>> extends AbstractCovarianceMatrixBuilder<V> { + /** + * Number of iterations to perform + */ + int iterations = 1000; + + /** + * Random generator + */ + RandomFactory rnd; + + /** + * Constructor. + * + * @param iterations Number of iterations (attempts) to try + * @param rnd random generator + */ + public RANSACCovarianceMatrixBuilder(int iterations, RandomFactory rnd) { + super(); + this.iterations = iterations; + this.rnd = rnd; + } + + @Reference(title = "Random sample consensus: a paradigm for model fitting with applications to image analysis and automated cartography", authors = "M.A. Fischler, R.C. Bolles", booktitle = "Communications of the ACM, Vol. 24 Issue 6", url = "http://dx.doi.org/10.1145/358669.358692") + @Override + public Matrix processIds(DBIDs ids, Relation<? extends V> relation) { + final int dim = RelationUtil.dimensionality(relation); + + DBIDs best = DBIDUtil.EMPTYDBIDS; + double tresh = ChiSquaredDistribution.quantile(0.85, dim); + + for (int i = 0; i < iterations; i++) { + DBIDs sample = DBIDUtil.randomSample(ids, dim + 1, rnd); + CovarianceMatrix cv = CovarianceMatrix.make(relation, sample); + Vector centroid = cv.getMeanVector(); + Matrix p = cv.destroyToSampleMatrix().inverse(); + + ModifiableDBIDs support = DBIDUtil.newHashSet(); + for (DBIDIter id = ids.iter(); id.valid(); id.advance()) { + Vector vec = relation.get(id).getColumnVector().minusEquals(centroid); + double sqlen = vec.transposeTimesTimes(p, vec); + if (sqlen < tresh) { + support.add(id); + } + } + + if (support.size() > best.size()) { + best = support; + } + if (support.size() >= ids.size()) { + break; // Can't get better than this! + } + } + // logger.warning("Consensus size: "+best.size()+" of "+ids.size()); + // Fall back to regular PCA + if (best.size() <= dim) { + return CovarianceMatrix.make(relation, ids).destroyToSampleMatrix(); + } + // Return estimation based on consensus set. + return CovarianceMatrix.make(relation, best).destroyToSampleMatrix(); + } + + /** + * Parameterization class + * + * @author Erich Schubert + * + * @apiviz.exclude + * + * @param <V> Vector type + */ + public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer { + /** + * Number of iterations. + */ + public static final OptionID ITER_ID = new OptionID("ransacpca.iterations", "The number of iterations to perform."); + + /** + * Random seed + */ + public static final OptionID SEED_ID = new OptionID("ransacpca.seed", "Random seed (optional)."); + + /** + * Number of iterations to perform + */ + int iterations = 1000; + + /** + * Random generator + */ + RandomFactory rnd; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + IntParameter iterP = new IntParameter(ITER_ID, 1000); + iterP.addConstraint(new GreaterConstraint(0)); + if (config.grab(iterP)) { + iterations = iterP.intValue(); + } + RandomParameter rndP = new RandomParameter(SEED_ID); + if (config.grab(rndP)) { + rnd = rndP.getValue(); + } + } + + @Override + protected RANSACCovarianceMatrixBuilder<V> makeInstance() { + return new RANSACCovarianceMatrixBuilder<V>(iterations, rnd); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/RelativeEigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/RelativeEigenPairFilter.java index 59b2b750..38b0fcc2 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/RelativeEigenPairFilter.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/RelativeEigenPairFilter.java @@ -57,7 +57,7 @@ public class RelativeEigenPairFilter implements EigenPairFilter { /** * Parameter relative alpha. */ - public static final OptionID EIGENPAIR_FILTER_RALPHA = OptionID.getOrCreateOptionID("pca.filter.relativealpha", "The sensitivity niveau for weak eigenvectors: An eigenvector which is at less than " + "the given share of the statistical average variance is considered weak."); + public static final OptionID EIGENPAIR_FILTER_RALPHA = new OptionID("pca.filter.relativealpha", "The sensitivity niveau for weak eigenvectors: An eigenvector which is at less than " + "the given share of the statistical average variance is considered weak."); /** * The default value for ralpha. @@ -93,21 +93,21 @@ public class RelativeEigenPairFilter implements EigenPairFilter { // find the last eigenvector that is considered 'strong' by the weak rule // applied to the remaining vectors only double eigenValueSum = eigenPairs.getEigenPair(eigenPairs.size() - 1).getEigenvalue(); - for(int i = eigenPairs.size() - 2; i >= 0; i--) { + for (int i = eigenPairs.size() - 2; i >= 0; i--) { EigenPair eigenPair = eigenPairs.getEigenPair(i); eigenValueSum += eigenPair.getEigenvalue(); double needEigenvalue = eigenValueSum / (eigenPairs.size() - i) * ralpha; - if(eigenPair.getEigenvalue() >= needEigenvalue) { + if (eigenPair.getEigenvalue() >= needEigenvalue) { contrastAtMax = i; break; } } - for(int i = 0; i <= contrastAtMax /* && i < eigenPairs.size() */; i++) { + for (int i = 0; i <= contrastAtMax /* && i < eigenPairs.size() */; i++) { EigenPair eigenPair = eigenPairs.getEigenPair(i); strongEigenPairs.add(eigenPair); } - for(int i = contrastAtMax + 1; i < eigenPairs.size(); i++) { + for (int i = contrastAtMax + 1; i < eigenPairs.size(); i++) { EigenPair eigenPair = eigenPairs.getEigenPair(i); weakEigenPairs.add(eigenPair); } @@ -128,8 +128,9 @@ public class RelativeEigenPairFilter implements EigenPairFilter { @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - DoubleParameter ralphaP = new DoubleParameter(EIGENPAIR_FILTER_RALPHA, new GreaterEqualConstraint(0.0), DEFAULT_RALPHA); - if(config.grab(ralphaP)) { + DoubleParameter ralphaP = new DoubleParameter(EIGENPAIR_FILTER_RALPHA, DEFAULT_RALPHA); + ralphaP.addConstraint(new GreaterEqualConstraint(0.0)); + if (config.grab(ralphaP)) { ralpha = ralphaP.getValue(); } } @@ -139,4 +140,4 @@ public class RelativeEigenPairFilter implements EigenPairFilter { return new RelativeEigenPairFilter(ralpha); } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/SignificantEigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/SignificantEigenPairFilter.java index ab04cbb5..c22e9592 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/SignificantEigenPairFilter.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/SignificantEigenPairFilter.java @@ -87,32 +87,32 @@ public class SignificantEigenPairFilter implements EigenPairFilter { double maxContrast = 0.0; // calc the eigenvalue sum. double eigenValueSum = 0.0; - for(int i = 0; i < eigenPairs.size(); i++) { + for (int i = 0; i < eigenPairs.size(); i++) { EigenPair eigenPair = eigenPairs.getEigenPair(i); eigenValueSum += eigenPair.getEigenvalue(); } double weakEigenvalue = eigenValueSum / eigenPairs.size() * walpha; // now find the maximum contrast. double currSum = eigenPairs.getEigenPair(eigenPairs.size() - 1).getEigenvalue(); - for(int i = eigenPairs.size() - 2; i >= 0; i--) { + for (int i = eigenPairs.size() - 2; i >= 0; i--) { EigenPair eigenPair = eigenPairs.getEigenPair(i); currSum += eigenPair.getEigenvalue(); // weak? - if(eigenPair.getEigenvalue() < weakEigenvalue) { + if (eigenPair.getEigenvalue() < weakEigenvalue) { continue; } double contrast = eigenPair.getEigenvalue() / (currSum / (eigenPairs.size() - i)); - if(contrast > maxContrast) { + if (contrast > maxContrast) { maxContrast = contrast; contrastMaximum = i; } } - for(int i = 0; i <= contrastMaximum /* && i < eigenPairs.size() */; i++) { + for (int i = 0; i <= contrastMaximum /* && i < eigenPairs.size() */; i++) { EigenPair eigenPair = eigenPairs.getEigenPair(i); strongEigenPairs.add(eigenPair); } - for(int i = contrastMaximum + 1; i < eigenPairs.size(); i++) { + for (int i = contrastMaximum + 1; i < eigenPairs.size(); i++) { EigenPair eigenPair = eigenPairs.getEigenPair(i); weakEigenPairs.add(eigenPair); } @@ -133,8 +133,9 @@ public class SignificantEigenPairFilter implements EigenPairFilter { @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - DoubleParameter walphaP = new DoubleParameter(WeakEigenPairFilter.EIGENPAIR_FILTER_WALPHA, new GreaterEqualConstraint(0.0), DEFAULT_WALPHA); - if(config.grab(walphaP)) { + DoubleParameter walphaP = new DoubleParameter(WeakEigenPairFilter.EIGENPAIR_FILTER_WALPHA, DEFAULT_WALPHA); + walphaP.addConstraint(new GreaterEqualConstraint(0.0)); + if (config.grab(walphaP)) { walpha = walphaP.getValue(); } } @@ -144,4 +145,4 @@ public class SignificantEigenPairFilter implements EigenPairFilter { return new SignificantEigenPairFilter(walpha); } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/StandardCovarianceMatrixBuilder.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/StandardCovarianceMatrixBuilder.java index 2c88d490..c7104e3b 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/StandardCovarianceMatrixBuilder.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/StandardCovarianceMatrixBuilder.java @@ -39,9 +39,9 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix; * * @param <V> Vector class to use. */ -public class StandardCovarianceMatrixBuilder<V extends NumberVector<? extends V, ?>> extends AbstractCovarianceMatrixBuilder<V> { +public class StandardCovarianceMatrixBuilder<V extends NumberVector<?>> extends AbstractCovarianceMatrixBuilder<V> { /** - * Compute Covariance Matrix for a complete database + * Compute Covariance Matrix for a complete database. * * @param database the database used * @return Covariance Matrix @@ -52,7 +52,7 @@ public class StandardCovarianceMatrixBuilder<V extends NumberVector<? extends V, } /** - * Compute Covariance Matrix for a collection of database IDs + * Compute Covariance Matrix for a collection of database IDs. * * @param ids a collection of ids * @param database the database used diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/WeakEigenPairFilter.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/WeakEigenPairFilter.java index fbca039d..3c60cd0d 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/WeakEigenPairFilter.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/WeakEigenPairFilter.java @@ -52,7 +52,7 @@ public class WeakEigenPairFilter implements EigenPairFilter { * and * {@link de.lmu.ifi.dbs.elki.math.linearalgebra.pca.SignificantEigenPairFilter} */ - public static final OptionID EIGENPAIR_FILTER_WALPHA = OptionID.getOrCreateOptionID("pca.filter.weakalpha", "The minimum strength of the statistically expected variance (1/n) share an eigenvector " + "needs to have to be considered 'strong'."); + public static final OptionID EIGENPAIR_FILTER_WALPHA = new OptionID("pca.filter.weakalpha", "The minimum strength of the statistically expected variance (1/n) share an eigenvector " + "needs to have to be considered 'strong'."); /** * The default value for walpha. @@ -85,26 +85,25 @@ public class WeakEigenPairFilter implements EigenPairFilter { // determine sum of eigenvalues double totalSum = 0; - for(int i = 0; i < eigenPairs.size(); i++) { + for (int i = 0; i < eigenPairs.size(); i++) { EigenPair eigenPair = eigenPairs.getEigenPair(i); totalSum += eigenPair.getEigenvalue(); } double expectEigenvalue = totalSum / eigenPairs.size() * walpha; // determine strong and weak eigenpairs - for(int i = 0; i < eigenPairs.size(); i++) { + for (int i = 0; i < eigenPairs.size(); i++) { EigenPair eigenPair = eigenPairs.getEigenPair(i); - if(eigenPair.getEigenvalue() > expectEigenvalue) { + if (eigenPair.getEigenvalue() > expectEigenvalue) { strongEigenPairs.add(eigenPair); - } - else { + } else { weakEigenPairs.add(eigenPair); } } // the code using this method doesn't expect an empty strong set, // if we didn't find any strong ones, we make all vectors strong - if(strongEigenPairs.size() == 0) { + if (strongEigenPairs.size() == 0) { return new FilteredEigenPairs(new ArrayList<EigenPair>(), weakEigenPairs); } return new FilteredEigenPairs(weakEigenPairs, strongEigenPairs); @@ -127,8 +126,9 @@ public class WeakEigenPairFilter implements EigenPairFilter { @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - DoubleParameter walphaP = new DoubleParameter(EIGENPAIR_FILTER_WALPHA, new GreaterEqualConstraint(0.0), DEFAULT_WALPHA); - if(config.grab(walphaP)) { + DoubleParameter walphaP = new DoubleParameter(EIGENPAIR_FILTER_WALPHA, DEFAULT_WALPHA); + walphaP.addConstraint(new GreaterEqualConstraint(0.0)); + if (config.grab(walphaP)) { walpha = walphaP.getValue(); } } @@ -138,4 +138,4 @@ public class WeakEigenPairFilter implements EigenPairFilter { return new WeakEigenPairFilter(walpha); } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/WeightedCovarianceMatrixBuilder.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/WeightedCovarianceMatrixBuilder.java index fb7f60c3..3066a831 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/WeightedCovarianceMatrixBuilder.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/WeightedCovarianceMatrixBuilder.java @@ -23,17 +23,17 @@ package de.lmu.ifi.dbs.elki.math.linearalgebra.pca; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import java.util.Collection; -import java.util.Iterator; - import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; -import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair; -import de.lmu.ifi.dbs.elki.database.query.DoubleDistanceResultPair; +import de.lmu.ifi.dbs.elki.database.ids.DistanceDBIDPair; +import de.lmu.ifi.dbs.elki.database.ids.DoubleDistanceDBIDPair; import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction; import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter; import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid; @@ -41,7 +41,6 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix; import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix; import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions.ConstantWeight; import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.weightfunctions.WeightFunction; -import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; @@ -74,7 +73,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; @Title("Weighted Covariance Matrix / PCA") @Description("A PCA modification by using weights while building the covariance matrix, to obtain more stable results") @Reference(authors = "H.-P. Kriegel, P. Kröger, E. Schubert, A. Zimek", title = "A General Framework for Increasing the Robustness of PCA-based Correlation Clustering Algorithms", booktitle = "Proceedings of the 20th International Conference on Scientific and Statistical Database Management (SSDBM), Hong Kong, China, 2008", url = "http://dx.doi.org/10.1007/978-3-540-69497-7_27") -public class WeightedCovarianceMatrixBuilder<V extends NumberVector<? extends V, ?>> extends AbstractCovarianceMatrixBuilder<V> { +public class WeightedCovarianceMatrixBuilder<V extends NumberVector<?>> extends AbstractCovarianceMatrixBuilder<V> { /** * Parameter to specify the weight function to use in weighted PCA, must * implement @@ -84,7 +83,7 @@ public class WeightedCovarianceMatrixBuilder<V extends NumberVector<? extends V, * Key: {@code -pca.weight} * </p> */ - public static final OptionID WEIGHT_ID = OptionID.getOrCreateOptionID("pca.weight", "Weight function to use in weighted PCA."); + public static final OptionID WEIGHT_ID = new OptionID("pca.weight", "Weight function to use in weighted PCA."); /** * Holds the weight function. @@ -92,7 +91,7 @@ public class WeightedCovarianceMatrixBuilder<V extends NumberVector<? extends V, protected WeightFunction weightfunction; /** - * Holds the distance function used for weight calculation + * Holds the distance function used for weight calculation. */ // TODO: make configurable? private PrimitiveDistanceFunction<? super V, DoubleDistance> weightDistance = EuclideanDistanceFunction.STATIC; @@ -100,7 +99,7 @@ public class WeightedCovarianceMatrixBuilder<V extends NumberVector<? extends V, /** * Constructor. * - * @param weightfunction + * @param weightfunction Weighting function */ public WeightedCovarianceMatrixBuilder(WeightFunction weightfunction) { super(); @@ -112,19 +111,23 @@ public class WeightedCovarianceMatrixBuilder<V extends NumberVector<? extends V, * distance information, we'll need to compute it ourselves. Covariance is * tied to Euclidean distance, so it probably does not make much sense to add * support for other distance functions? + * + * @param ids Database ids to process + * @param relation Relation to process + * @return Covariance matrix */ @Override - public Matrix processIds(DBIDs ids, Relation<? extends V> database) { - final int dim = DatabaseUtil.dimensionality(database); + public Matrix processIds(DBIDs ids, Relation<? extends V> relation) { + final int dim = RelationUtil.dimensionality(relation); final CovarianceMatrix cmat = new CovarianceMatrix(dim); - final V centroid = Centroid.make(database, ids).toVector(database); + final V centroid = Centroid.make(relation, ids).toVector(relation); // find maximum distance double maxdist = 0.0; double stddev = 0.0; { for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { - V obj = database.get(iter); + V obj = relation.get(iter); double distance = weightDistance.distance(centroid, obj).doubleValue(); stddev += distance * distance; if(distance > maxdist) { @@ -139,7 +142,7 @@ public class WeightedCovarianceMatrixBuilder<V extends NumberVector<? extends V, } for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { - V obj = database.get(iter); + V obj = relation.get(iter); double distance = weightDistance.distance(centroid, obj).doubleValue(); double weight = weightfunction.getWeight(distance, maxdist, stddev); cmat.put(obj, weight); @@ -148,18 +151,19 @@ public class WeightedCovarianceMatrixBuilder<V extends NumberVector<? extends V, } /** - * Compute Covariance Matrix for a QueryResult Collection + * Compute Covariance Matrix for a QueryResult Collection. * * By default it will just collect the ids and run processIds * * @param results a collection of QueryResults * @param database the database used * @param k number of elements to process + * @param <D> distance type * @return Covariance Matrix */ @Override - public <D extends NumberDistance<?, ?>> Matrix processQueryResults(Collection<? extends DistanceResultPair<D>> results, Relation<? extends V> database, int k) { - final int dim = DatabaseUtil.dimensionality(database); + public <D extends NumberDistance<D, ?>> Matrix processQueryResults(DistanceDBIDResult<D> results, Relation<? extends V> database, int k) { + final int dim = RelationUtil.dimensionality(database); final CovarianceMatrix cmat = new CovarianceMatrix(dim); // avoid bad parameters @@ -172,11 +176,11 @@ public class WeightedCovarianceMatrixBuilder<V extends NumberVector<? extends V, double stddev = 0.0; { int i = 0; - for(Iterator<? extends DistanceResultPair<D>> it = results.iterator(); it.hasNext() && i < k; i++) { - DistanceResultPair<D> res = it.next(); + for (DistanceDBIDResultIter<D> it = results.iter(); it.valid() && i < k; it.advance(), k++) { + DistanceDBIDPair<D> res = it.getDistancePair(); final double dist; - if(res instanceof DoubleDistanceResultPair) { - dist = ((DoubleDistanceResultPair) res).getDoubleDistance(); + if(res instanceof DoubleDistanceDBIDPair) { + dist = ((DoubleDistanceDBIDPair) res).doubleDistance(); } else { dist = res.getDistance().doubleValue(); @@ -194,11 +198,11 @@ public class WeightedCovarianceMatrixBuilder<V extends NumberVector<? extends V, // calculate weighted PCA int i = 0; - for(Iterator<? extends DistanceResultPair<D>> it = results.iterator(); it.hasNext() && i < k; i++) { - DistanceResultPair<? extends NumberDistance<?, ?>> res = it.next(); + for (DistanceDBIDResultIter<D> it = results.iter(); it.valid() && i < k; it.advance(), k++) { + DistanceDBIDPair<D> res = it.getDistancePair(); final double dist; - if(res instanceof DoubleDistanceResultPair) { - dist = ((DoubleDistanceResultPair) res).getDoubleDistance(); + if(res instanceof DoubleDistanceDBIDPair) { + dist = ((DoubleDistanceDBIDPair) res).doubleDistance(); } else { dist = res.getDistance().doubleValue(); @@ -218,7 +222,10 @@ public class WeightedCovarianceMatrixBuilder<V extends NumberVector<? extends V, * * @apiviz.exclude */ - public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractParameterizer { + public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer { + /** + * Weight function. + */ protected WeightFunction weightfunction = null; @Override diff --git a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/GaussStddevWeight.java b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/GaussStddevWeight.java index f73d4a7d..9ef589d6 100644 --- a/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/GaussStddevWeight.java +++ b/src/de/lmu/ifi/dbs/elki/math/linearalgebra/pca/weightfunctions/GaussStddevWeight.java @@ -40,7 +40,7 @@ public final class GaussStddevWeight implements WeightFunction { * * In fact, in most use cases we could leave this away. */ - private final static double scaling = 1 / MathUtil.SQRTTWOPI; + private static final double scaling = 1 / MathUtil.SQRTTWOPI; /** * Get Gaussian Weight using standard deviation for scaling. max is ignored. diff --git a/src/de/lmu/ifi/dbs/elki/math/scales/Scales.java b/src/de/lmu/ifi/dbs/elki/math/scales/Scales.java index edad211d..097b6fce 100644 --- a/src/de/lmu/ifi/dbs/elki/math/scales/Scales.java +++ b/src/de/lmu/ifi/dbs/elki/math/scales/Scales.java @@ -26,46 +26,53 @@ package de.lmu.ifi.dbs.elki.math.scales; import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; import de.lmu.ifi.dbs.elki.math.DoubleMinMax; -import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil; import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; /** - * Scales helper class. - * Currently, this will just compute a linear scale for each axis. - * It is planned to add functionality to include some analysis to - * be able to automatically choose log scales when appropriate. + * Scales helper class. Currently, this will just compute a linear scale for + * each axis. It is planned to add functionality to include some analysis to be + * able to automatically choose log scales when appropriate. * * @author Erich Schubert - * + * * @apiviz.has LinearScale oneway - - computes */ -public class Scales { +public final class Scales { + /** + * Fake constructor. + */ + private Scales() { + // Do not instantiate. + } + /** * Compute a linear scale for each dimension. * * @param <O> vector type * @param db Database - * @return Scales, indexed starting with 0 (like Vector, not database objects!) + * @return Scales, indexed starting with 0 (like Vector, not database + * objects!) */ - public static <O extends NumberVector<?,? extends Number>> LinearScale[] calcScales(Relation<O> db) { + public static <O extends NumberVector<? extends Number>> LinearScale[] calcScales(Relation<O> db) { if (db == null) { throw new AbortException("No database was given to Scales.calcScales."); } - int dim = DatabaseUtil.dimensionality(db); - DoubleMinMax minmax[] = DoubleMinMax.newArray(dim); - LinearScale scales[] = new LinearScale[dim]; - + int dim = RelationUtil.dimensionality(db); + DoubleMinMax[] minmax = DoubleMinMax.newArray(dim); + LinearScale[] scales = new LinearScale[dim]; + // analyze data - for(DBIDIter iditer = db.iterDBIDs(); iditer.valid(); iditer.advance()) { + for (DBIDIter iditer = db.iterDBIDs(); iditer.valid(); iditer.advance()) { O v = db.get(iditer); - for(int d = 0; d < dim; d++) { - minmax[d].put(v.doubleValue(d+1)); + for (int d = 0; d < dim; d++) { + minmax[d].put(v.doubleValue(d)); } } - + // generate scales - for(int d = 0; d < dim; d++) { + for (int d = 0; d < dim; d++) { scales[d] = new LinearScale(minmax[d].getMin(), minmax[d].getMax()); } return scales; diff --git a/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/AbstractSpatialSorter.java b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/AbstractSpatialSorter.java index 942fe64b..7c1265dd 100644 --- a/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/AbstractSpatialSorter.java +++ b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/AbstractSpatialSorter.java @@ -59,37 +59,37 @@ public abstract class AbstractSpatialSorter implements SpatialSorter { * @param dim Dimension to sort by * @param threshold Threshold value * @param desc Inversion flag + * @param <T> Object type * @return Pivot position */ protected <T extends SpatialComparable> int pivotizeList1D(List<T> objs, int start, int end, int dim, double threshold, boolean desc) { threshold = 2 * threshold; // faster int s = start, e = end; - while(s < e) { - if(!desc) { + while (s < e) { + if (!desc) { double sminmax = getMinPlusMaxObject(objs, s, dim); - while((sminmax < threshold) && s + 1 <= e && s + 1 < end) { + while ((sminmax < threshold) && s + 1 <= e && s + 1 < end) { s++; sminmax = getMinPlusMaxObject(objs, s, dim); } double eminmax = getMinPlusMaxObject(objs, e - 1, dim); - while((eminmax >= threshold) && s < e - 1 && start < e - 1) { + while ((eminmax >= threshold) && s < e - 1 && start < e - 1) { e--; eminmax = getMinPlusMaxObject(objs, e - 1, dim); } - } - else { + } else { double sminmax = getMinPlusMaxObject(objs, s, dim); - while((sminmax > threshold) && s + 1 <= e && s + 1 < end) { + while ((sminmax > threshold) && s + 1 <= e && s + 1 < end) { s++; sminmax = getMinPlusMaxObject(objs, s, dim); } double eminmax = getMinPlusMaxObject(objs, e - 1, dim); - while((eminmax <= threshold) && s < e - 1 && start < e - 1) { + while ((eminmax <= threshold) && s < e - 1 && start < e - 1) { e--; eminmax = getMinPlusMaxObject(objs, e - 1, dim); } } - if(s >= e) { + if (s >= e) { assert (s == e); break; } @@ -102,7 +102,7 @@ public abstract class AbstractSpatialSorter implements SpatialSorter { } /** - * Compute getMin(dim) + getMax(dim) for the spatial object + * Compute getMin(dim) + getMax(dim) for the spatial object. * * @param objs Objects * @param s index @@ -120,25 +120,25 @@ public abstract class AbstractSpatialSorter implements SpatialSorter { * @param objs Objects * @return Array of min, max pairs (length = 2 * dim) */ - public static <T extends SpatialComparable> double[] computeMinMax(List<T> objs) { + public static double[] computeMinMax(List<? extends SpatialComparable> objs) { final int dim = objs.get(0).getDimensionality(); // Compute min and max for each dimension: - double[] mm = new double[dim * 2]; + double[] mm = new double[dim << 1]; { - for(int d = 0; d < dim; d++) { - mm[d * 2] = Double.POSITIVE_INFINITY; - mm[d * 2 + 1] = Double.NEGATIVE_INFINITY; + for (int d = 0; d < dim; d++) { + mm[d << 1] = Double.POSITIVE_INFINITY; + mm[(d << 1) + 1] = Double.NEGATIVE_INFINITY; } - for(SpatialComparable obj : objs) { - for(int d = 0; d < dim; d++) { - mm[2 * d] = Math.min(mm[2 * d], obj.getMin(d + 1)); - mm[2 * d + 1] = Math.max(mm[2 * d + 1], obj.getMax(d + 1)); + for (SpatialComparable obj : objs) { + for (int d = 0; d < dim; d++) { + mm[d << 1] = Math.min(mm[d << 1], obj.getMin(d)); + mm[(d << 1) + 1] = Math.max(mm[(d << 1) + 1], obj.getMax(d)); } } - for(int d = 0; d < dim; d++) { - assert (mm[2 * d] <= mm[2 * d + 1]); + for (int d = 0; d < dim; d++) { + assert (mm[d << 1] <= mm[(d << 1) + 1]); } } return mm; } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/BinarySplitSpatialSorter.java b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/BinarySplitSpatialSorter.java index 0b45022c..1ba58511 100644 --- a/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/BinarySplitSpatialSorter.java +++ b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/BinarySplitSpatialSorter.java @@ -73,6 +73,7 @@ public class BinarySplitSpatialSorter extends AbstractSpatialSorter { * @param curdim Current dimension * @param dims Number of dimensions * @param comp Comparator to use + * @param <T> Object type */ private <T extends SpatialComparable> void binarySplitSort(List<T> objs, final int start, final int end, int curdim, final int dims, DimC comp) { final int mid = start + ((end - start) >>> 1); @@ -80,7 +81,7 @@ public class BinarySplitSpatialSorter extends AbstractSpatialSorter { comp.dim = curdim; QuickSelect.quickSelect(objs, comp, start, end, mid); // Recurse - final int nextdim = (curdim % dims) + 1; + final int nextdim = (curdim + 1) % dims; if(start < mid - 1) { binarySplitSort(objs, start, mid, nextdim, dims, comp); } @@ -100,6 +101,9 @@ public class BinarySplitSpatialSorter extends AbstractSpatialSorter { * @apiviz.exclude */ private static class DimC implements Comparator<SpatialComparable> { + /** + * Dimension. + */ public int dim = -1; @Override diff --git a/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/HilbertSpatialSorter.java b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/HilbertSpatialSorter.java index 9b4af341..317e47c1 100644 --- a/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/HilbertSpatialSorter.java +++ b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/HilbertSpatialSorter.java @@ -61,19 +61,19 @@ public class HilbertSpatialSorter extends AbstractSpatialSorter { final int dim = minmax.length >> 1; List<HilbertRef<T>> tmp = new ArrayList<HilbertRef<T>>(end - start); int[] buf = new int[dim]; - for(int i = start; i < end; i++) { + for (int i = start; i < end; i++) { T v = objs.get(i); // Convert into integers - for(int d = 0; d < dim; d++) { - double val = (v.getMin(d + 1) + v.getMax(d + 1)) / 2; - val = Integer.MAX_VALUE * ((val - minmax[2 * d]) / (minmax[2 * d + 1] - minmax[2 * d])); + for (int d = 0, d2 = 0; d < dim; d++, d2 += 2) { + double val = (v.getMin(d) + v.getMax(d)) * .5; + val = Integer.MAX_VALUE * ((val - minmax[d2]) / (minmax[d2 + 1] - minmax[d2])); buf[d] = (int) val; } tmp.add(new HilbertRef<T>(v, coordinatesToHilbert(buf, Integer.SIZE - 1, 1))); } // Sort and copy back Collections.sort(tmp); - for(int i = start; i < end; i++) { + for (int i = start; i < end; i++) { objs.set(i, tmp.get(i - start).vec); } } @@ -86,19 +86,20 @@ public class HilbertSpatialSorter extends AbstractSpatialSorter { */ private static class HilbertRef<T extends SpatialComparable> implements Comparable<HilbertRef<T>> { /** - * The referenced object + * The referenced object. */ protected T vec; /** - * Hilbert representation + * Hilbert representation. */ protected long[] bits; /** * Constructor. * - * @param vec + * @param vec Vector + * @param bits Bit representation */ protected HilbertRef(T vec, long[] bits) { super(); @@ -118,6 +119,7 @@ public class HilbertSpatialSorter extends AbstractSpatialSorter { * * @param coords Original coordinates * @param bitsperdim Number of bits to use. + * @param offset offset * @return Hilbert address */ public static long[] coordinatesToHilbert(long[] coords, int bitsperdim, int offset) { @@ -127,7 +129,7 @@ public class HilbertSpatialSorter extends AbstractSpatialSorter { int rotation = 0; long[] refl = BitsUtil.zero(numdim); - for(int i = 0; i < bitsperdim; i++) { + for (int i = 0; i < bitsperdim; i++) { final long[] hist = interleaveBits(coords, i + offset); // System.err.println(BitsUtil.toString(hist, // numdim)+" rot:"+rotation+" refl: "+BitsUtil.toString(refl, numdim)); @@ -141,7 +143,7 @@ public class HilbertSpatialSorter extends AbstractSpatialSorter { // numbits)+" bits: "+BitsUtil.toString(bits, numdim)); refl = hist; BitsUtil.flipI(refl, rotation); - if(!BitsUtil.get(bits, 0)) { + if (!BitsUtil.get(bits, 0)) { BitsUtil.flipI(refl, (nextrot - 1 + numdim) % numdim); } rotation = nextrot; @@ -151,11 +153,12 @@ public class HilbertSpatialSorter extends AbstractSpatialSorter { } /** - * Interleave one int per dimension (using the "bitsperdim" highest bits) to - * a hilbert address. + * Interleave one int per dimension (using the "bitsperdim" highest bits) to a + * hilbert address. * * @param coords Original coordinates * @param bitsperdim Number of bits to use. + * @param offset offset * @return Hilbert address */ public static long[] coordinatesToHilbert(int[] coords, int bitsperdim, int offset) { @@ -165,7 +168,7 @@ public class HilbertSpatialSorter extends AbstractSpatialSorter { int rotation = 0; long[] refl = BitsUtil.zero(numdim); - for(int i = 0; i < bitsperdim; i++) { + for (int i = 0; i < bitsperdim; i++) { final long[] hist = interleaveBits(coords, i + offset); // System.err.println(BitsUtil.toString(hist, // numdim)+" rot:"+rotation+" refl: "+BitsUtil.toString(refl, numdim)); @@ -179,7 +182,7 @@ public class HilbertSpatialSorter extends AbstractSpatialSorter { // numbits)+" bits: "+BitsUtil.toString(bits, numdim)); refl = hist; BitsUtil.flipI(refl, rotation); - if(!BitsUtil.get(bits, 0)) { + if (!BitsUtil.get(bits, 0)) { BitsUtil.flipI(refl, (nextrot - 1 + numdim) % numdim); } rotation = nextrot; @@ -194,6 +197,7 @@ public class HilbertSpatialSorter extends AbstractSpatialSorter { * * @param coords Original coordinates * @param bitsperdim Number of bits to use. + * @param offset offset * @return Hilbert address */ public static long[] coordinatesToHilbert(short[] coords, int bitsperdim, int offset) { @@ -203,7 +207,7 @@ public class HilbertSpatialSorter extends AbstractSpatialSorter { int rotation = 0; long[] refl = BitsUtil.zero(numdim); - for(int i = 0; i < bitsperdim; i++) { + for (int i = 0; i < bitsperdim; i++) { final long[] hist = interleaveBits(coords, i + offset); // System.err.println(BitsUtil.toString(hist, // numdim)+" rot:"+rotation+" refl: "+BitsUtil.toString(refl, numdim)); @@ -217,7 +221,7 @@ public class HilbertSpatialSorter extends AbstractSpatialSorter { // numbits)+" bits: "+BitsUtil.toString(bits, numdim)); refl = hist; BitsUtil.flipI(refl, rotation); - if(!BitsUtil.get(bits, 0)) { + if (!BitsUtil.get(bits, 0)) { BitsUtil.flipI(refl, (nextrot - 1 + numdim) % numdim); } rotation = nextrot; @@ -232,16 +236,17 @@ public class HilbertSpatialSorter extends AbstractSpatialSorter { * * @param coords Original coordinates * @param bitsperdim Number of bits to use. + * @param offset offset * @return Hilbert address */ public static long[] coordinatesToHilbert(byte[] coords, int bitsperdim, int offset) { final int numdim = coords.length; final int numbits = numdim * bitsperdim; final long[] output = BitsUtil.zero(numbits); - + int rotation = 0; long[] refl = BitsUtil.zero(numdim); - for(int i = 0; i < bitsperdim; i++) { + for (int i = 0; i < bitsperdim; i++) { final long[] hist = interleaveBits(coords, i + offset); // System.err.println(BitsUtil.toString(hist, // numdim)+" rot:"+rotation+" refl: "+BitsUtil.toString(refl, numdim)); @@ -255,12 +260,12 @@ public class HilbertSpatialSorter extends AbstractSpatialSorter { // numbits)+" bits: "+BitsUtil.toString(bits, numdim)); refl = hist; BitsUtil.flipI(refl, rotation); - if(!BitsUtil.get(bits, 0)) { + if (!BitsUtil.get(bits, 0)) { BitsUtil.flipI(refl, (nextrot - 1 + numdim) % numdim); } rotation = nextrot; } - + return output; } @@ -276,8 +281,8 @@ public class HilbertSpatialSorter extends AbstractSpatialSorter { final long[] bitset = BitsUtil.zero(numdim); // convert longValues into zValues final long mask = 1L << 63 - iter; - for(int dim = 0; dim < numdim; dim++) { - if((coords[dim] & mask) != 0) { + for (int dim = 0; dim < numdim; dim++) { + if ((coords[dim] & mask) != 0) { BitsUtil.setI(bitset, dim); } } @@ -296,8 +301,8 @@ public class HilbertSpatialSorter extends AbstractSpatialSorter { final long[] bitset = BitsUtil.zero(numdim); // convert longValues into zValues final long mask = 1L << 31 - iter; - for(int dim = 0; dim < numdim; dim++) { - if((coords[dim] & mask) != 0) { + for (int dim = 0; dim < numdim; dim++) { + if ((coords[dim] & mask) != 0) { BitsUtil.setI(bitset, dim); } } @@ -316,8 +321,8 @@ public class HilbertSpatialSorter extends AbstractSpatialSorter { final long[] bitset = BitsUtil.zero(numdim); // convert longValues into zValues final long mask = 1L << 15 - iter; - for(int dim = 0; dim < numdim; dim++) { - if((coords[dim] & mask) != 0) { + for (int dim = 0; dim < numdim; dim++) { + if ((coords[dim] & mask) != 0) { BitsUtil.setI(bitset, dim); } } @@ -336,11 +341,11 @@ public class HilbertSpatialSorter extends AbstractSpatialSorter { final long[] bitset = BitsUtil.zero(numdim); // convert longValues into zValues final long mask = 1L << 7 - iter; - for(int dim = 0; dim < numdim; dim++) { - if((coords[dim] & mask) != 0) { + for (int dim = 0; dim < numdim; dim++) { + if ((coords[dim] & mask) != 0) { BitsUtil.setI(bitset, dim); } } return bitset; } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/PeanoSpatialSorter.java b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/PeanoSpatialSorter.java index 50cf1946..865197ae 100644 --- a/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/PeanoSpatialSorter.java +++ b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/PeanoSpatialSorter.java @@ -95,6 +95,7 @@ public class PeanoSpatialSorter extends AbstractSpatialSorter { * @param desc Current ordering */ protected <T extends SpatialComparable> void peanoSort(List<T> objs, int start, int end, double[] mms, int dim, BitSet bits, boolean desc) { + final int dims = mms.length >> 1; // Find the splitting points. final double min = mms[2 * dim], max = mms[2 * dim + 1]; final double tfirst = (min + min + max) / 3.; @@ -116,14 +117,14 @@ public class PeanoSpatialSorter extends AbstractSpatialSorter { // Split the data set into three parts int fsplit, ssplit; if(!inv) { - fsplit = pivotizeList1D(objs, start, end, dim + 1, tfirst, false); - ssplit = (fsplit < end - 1) ? pivotizeList1D(objs, fsplit, end, dim + 1, tsecond, false) : fsplit; + fsplit = pivotizeList1D(objs, start, end, dim, tfirst, false); + ssplit = (fsplit < end - 1) ? pivotizeList1D(objs, fsplit, end, dim, tsecond, false) : fsplit; } else { - fsplit = pivotizeList1D(objs, start, end, dim + 1, tsecond, true); - ssplit = (fsplit < end - 1) ? pivotizeList1D(objs, fsplit, end, dim + 1, tfirst, true) : fsplit; + fsplit = pivotizeList1D(objs, start, end, dim, tsecond, true); + ssplit = (fsplit < end - 1) ? pivotizeList1D(objs, fsplit, end, dim, tfirst, true) : fsplit; } - int nextdim = (dim + 1) % objs.get(0).getDimensionality(); + int nextdim = (dim + 1) % dims; // Do we need to update the min/max values? if(start < fsplit - 1) { mms[2 * dim] = !inv ? min : tsecond; diff --git a/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/SpatialSorter.java b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/SpatialSorter.java index 2473dff5..fe23a854 100644 --- a/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/SpatialSorter.java +++ b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/SpatialSorter.java @@ -39,7 +39,7 @@ public interface SpatialSorter { * @param <T> actual type we sort * @param objs the spatial objects to be sorted */ - public <T extends SpatialComparable> void sort(List<T> objs); + <T extends SpatialComparable> void sort(List<T> objs); /** * Sort part of the list (start to end). @@ -50,5 +50,5 @@ public interface SpatialSorter { * @param end End of range (e.g. <code>site()</code>) * @param minmax Array with dim pairs of (min, max) of value ranges */ - public <T extends SpatialComparable> void sort(List<T> objs, int start, int end, double[] minmax); -} + <T extends SpatialComparable> void sort(List<T> objs, int start, int end, double[] minmax); +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/ZCurveSpatialSorter.java b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/ZCurveSpatialSorter.java index b8fc63bd..c5a91699 100644 --- a/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/ZCurveSpatialSorter.java +++ b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/ZCurveSpatialSorter.java @@ -68,7 +68,7 @@ public class ZCurveSpatialSorter extends AbstractSpatialSorter { return; } } - int split = pivotizeList1D(objs, start, end, dim + 1, spos, false); + int split = pivotizeList1D(objs, start, end, dim, spos, false); assert (start <= split && split <= end); int nextdim = (dim + 1) % objs.get(0).getDimensionality(); // LoggingUtil.warning("dim: " + dim + " min: " + min + " split: " + spos + diff --git a/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/ZCurveTransformer.java b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/ZCurveTransformer.java index 108721eb..26137cb3 100644 --- a/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/ZCurveTransformer.java +++ b/src/de/lmu/ifi/dbs/elki/math/spacefillingcurves/ZCurveTransformer.java @@ -30,7 +30,7 @@ import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.relation.Relation; -import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil; +import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; /** * Class to transform a relation to its Z coordinates. @@ -39,17 +39,17 @@ import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil; */ public class ZCurveTransformer { /** - * Maximum values in each dimension + * Maximum values in each dimension. */ private final double[] maxValues; /** - * Minimum values in each dimension + * Minimum values in each dimension. */ private final double[] minValues; /** - * Dimensionality + * Dimensionality. */ private final int dimensionality; @@ -59,8 +59,8 @@ public class ZCurveTransformer { * @param relation Relation to transform * @param ids IDs subset to process */ - public ZCurveTransformer(Relation<? extends NumberVector<?, ?>> relation, DBIDs ids) { - this.dimensionality = DatabaseUtil.dimensionality(relation); + public ZCurveTransformer(Relation<? extends NumberVector<?>> relation, DBIDs ids) { + this.dimensionality = RelationUtil.dimensionality(relation); this.minValues = new double[dimensionality]; this.maxValues = new double[dimensionality]; @@ -68,9 +68,9 @@ public class ZCurveTransformer { Arrays.fill(minValues, Double.POSITIVE_INFINITY); Arrays.fill(maxValues, Double.NEGATIVE_INFINITY); for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { - NumberVector<?, ?> vector = relation.get(iter); + NumberVector<?> vector = relation.get(iter); for(int dim = 0; dim < dimensionality; ++dim) { - double dimValue = vector.doubleValue(dim + 1); + double dimValue = vector.doubleValue(dim); minValues[dim] = Math.min(minValues[dim], dimValue); maxValues[dim] = Math.max(maxValues[dim], dimValue); } @@ -84,7 +84,7 @@ public class ZCurveTransformer { * @return Z curve value as bigint */ @Deprecated - public BigInteger asBigInteger(NumberVector<?, ?> vector) { + public BigInteger asBigInteger(NumberVector<?> vector) { return new BigInteger(asByteArray(vector)); } @@ -94,13 +94,13 @@ public class ZCurveTransformer { * @param vector Vector to transform * @return Z curve value as byte array */ - public byte[] asByteArray(NumberVector<?, ?> vector) { + public byte[] asByteArray(NumberVector<?> vector) { final long[] longValueList = new long[dimensionality]; for(int dim = 0; dim < dimensionality; ++dim) { final double minValue = minValues[dim]; final double maxValue = maxValues[dim]; - double dimValue = vector.doubleValue(dim + 1); + double dimValue = vector.doubleValue(dim); dimValue = (dimValue - minValue) / (maxValue - minValue); longValueList[dim] = (long) (dimValue * (Long.MAX_VALUE)); @@ -120,5 +120,4 @@ public class ZCurveTransformer { } return bytes; } - }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/EpanechnikovKernelDensityFunction.java b/src/de/lmu/ifi/dbs/elki/math/statistics/EpanechnikovKernelDensityFunction.java index ed9d8d58..25aa2ea7 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/EpanechnikovKernelDensityFunction.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/EpanechnikovKernelDensityFunction.java @@ -23,6 +23,7 @@ package de.lmu.ifi.dbs.elki.math.statistics; along with this program. If not, see <http://www.gnu.org/licenses/>. */ +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; /** * Epanechnikov kernel density estimator. @@ -49,4 +50,18 @@ public final class EpanechnikovKernelDensityFunction implements KernelDensityFun * Static instance. */ public static final EpanechnikovKernelDensityFunction KERNEL = new EpanechnikovKernelDensityFunction(); + + /** + * Parameterization stub. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected EpanechnikovKernelDensityFunction makeInstance() { + return KERNEL; + } + } } diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/GaussianKernelDensityFunction.java b/src/de/lmu/ifi/dbs/elki/math/statistics/GaussianKernelDensityFunction.java index 744a9108..2cd15408 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/GaussianKernelDensityFunction.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/GaussianKernelDensityFunction.java @@ -23,6 +23,7 @@ package de.lmu.ifi.dbs.elki.math.statistics; along with this program. If not, see <http://www.gnu.org/licenses/>. */ +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; /** * Gaussian kernel density estimator. @@ -51,4 +52,18 @@ public final class GaussianKernelDensityFunction implements KernelDensityFunctio * Static instance. */ public static final GaussianKernelDensityFunction KERNEL = new GaussianKernelDensityFunction(); + + /** + * Parameterization stub. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected GaussianKernelDensityFunction makeInstance() { + return KERNEL; + } + } } diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/KernelDensityEstimator.java b/src/de/lmu/ifi/dbs/elki/math/statistics/KernelDensityEstimator.java index 3bb0e1f6..d7ffefb8 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/KernelDensityEstimator.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/KernelDensityEstimator.java @@ -71,7 +71,7 @@ public class KernelDensityEstimator { dens = new double[data.length]; var = new double[data.length]; - double halfwidth = ((max - min) / windows) / 2; + double halfwidth = ((max - min) / windows) * .5; // collect data points for(int current = 0; current < data.length; current++) { @@ -84,7 +84,7 @@ public class KernelDensityEstimator { } double realwidth = (Math.min(data[current] + halfwidth, max) - Math.max(min, data[current] - halfwidth)); double weight = realwidth / (2 * halfwidth); - dens[current] = value / (data.length * realwidth / 2); + dens[current] = value / (data.length * realwidth * .5); var[current] = 1 / weight; } } diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/MultipleLinearRegression.java b/src/de/lmu/ifi/dbs/elki/math/statistics/MultipleLinearRegression.java index 0e674146..79c84701 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/MultipleLinearRegression.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/MultipleLinearRegression.java @@ -138,7 +138,7 @@ public class MultipleLinearRegression { */ @Override public String toString() { - StringBuffer msg = new StringBuffer(); + StringBuilder msg = new StringBuilder(); msg.append("x = ").append(FormatUtil.format(x, 9, 4)); msg.append("\ny = ").append(FormatUtil.format(y, 9, 4)); msg.append("\nb = ").append(FormatUtil.format(b, 9, 4)); diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/StudentDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/StudentDistribution.java deleted file mode 100644 index a8f45f9d..00000000 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/StudentDistribution.java +++ /dev/null @@ -1,189 +0,0 @@ -package de.lmu.ifi.dbs.elki.math.statistics; - -/* - This file is part of ELKI: - Environment for Developing KDD-Applications Supported by Index-Structures - - Copyright (C) 2012 - Ludwig-Maximilians-Universität München - Lehr- und Forschungseinheit für Datenbanksysteme - ELKI Development Team - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -import gnu.trove.map.TDoubleDoubleMap; -import gnu.trove.map.TIntObjectMap; -import gnu.trove.map.hash.TDoubleDoubleHashMap; -import gnu.trove.map.hash.TIntObjectHashMap; - -/** - * Tabelarizes the values for student distribution. - * - * @author Elke Achtert - */ -public class StudentDistribution { - /** - * Available alpha values. - */ - public static double _6000 = 0.6; - - /** - * Available alpha values. - */ - public static double _8000 = 0.8; - - /** - * Available alpha values. - */ - public static double _9000 = 0.9; - - /** - * Available alpha values. - */ - public static double _9500 = 0.95; - - /** - * Available alpha values. - */ - public static double _9750 = 0.975; - - /** - * Available alpha values. - */ - public static double _9900 = 0.99; - - /** - * Available alpha values. - */ - public static double _9950 = 0.995; - - /** - * Available alpha values. - */ - public static double _9990 = 0.999; - - /** - * Available alpha values. - */ - public static double _9995 = 0.9995; - - /** - * Available alpha values. - */ - public static double _4000 = 0.4; - - /** - * Available alpha values. - */ - public static double _2000 = 0.2; - - /** - * Available alpha values. - */ - public static double _1000 = 0.1; - - /** - * Available alpha values. - */ - public static double _0500 = 0.05; - - /** - * Available alpha values. - */ - public static double _0250 = 0.025; - - /** - * Available alpha values. - */ - public static double _0100 = 0.01; - - /** - * Available alpha values. - */ - public static double _0050 = 0.005; - - /** - * Available alpha values. - */ - public static double _0010 = 0.001; - - /** - * Available alpha values. - */ - public static double _0005 = 0.005; - - /** - * Holds the t-values. - */ - private static TIntObjectMap<TDoubleDoubleMap> tValues = new TIntObjectHashMap<TDoubleDoubleMap>(); - - static { - put(31, new double[] { 0.2533, 0.8416, 1.2816, 1.6449, 1.96, 2.3263, 2.5758, 3.0903, 3.2906 }); - } - - /** - * Returns the t-value for the given alpha-value and degree of freedom. - * - * @param alpha the alpha value - * @param n the degree of freedom - * @return the t-value for the given alpha-value and degree of freedom - */ - public static double tValue(double alpha, int n) { - if(n > 30) { - n = 31; - } - TDoubleDoubleMap map = tValues.get(n); - if(map == null) { - throw new IllegalArgumentException("t-values for n=" + n + " not yet tabularized!"); - } - - Double value = map.get(alpha); - if(value == null) { - throw new IllegalArgumentException("t-values for alpha=" + alpha + " not tabularized!"); - } - - return value; - } - - /** - * Stores the specified t-values for the given degree of freedom. - * - * @param n the degree of freedom - * @param values the t-values - */ - private static void put(int n, double[] values) { - TDoubleDoubleMap map = new TDoubleDoubleHashMap(); - map.put(_6000, values[0]); - map.put(_8000, values[1]); - map.put(_9000, values[2]); - map.put(_9500, values[3]); - map.put(_9750, values[4]); - map.put(_9900, values[5]); - map.put(_9950, values[6]); - map.put(_9990, values[7]); - map.put(_9995, values[8]); - - map.put(_4000, -values[0]); - map.put(_2000, -values[1]); - map.put(_1000, -values[2]); - map.put(_0500, -values[3]); - map.put(_0250, -values[4]); - map.put(_0100, -values[5]); - map.put(_0050, -values[6]); - map.put(_0010, -values[7]); - map.put(_0005, -values[8]); - tValues.put(n, map); - } -}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/TriangularKernelDensityFunction.java b/src/de/lmu/ifi/dbs/elki/math/statistics/TriangularKernelDensityFunction.java index e3c23b2a..aee544de 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/TriangularKernelDensityFunction.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/TriangularKernelDensityFunction.java @@ -23,6 +23,7 @@ package de.lmu.ifi.dbs.elki.math.statistics; along with this program. If not, see <http://www.gnu.org/licenses/>. */ +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; /** * Triangular kernel density estimator. @@ -49,4 +50,18 @@ public final class TriangularKernelDensityFunction implements KernelDensityFunct * Static instance. */ public static final TriangularKernelDensityFunction KERNEL = new TriangularKernelDensityFunction(); + + /** + * Parameterization stub. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected TriangularKernelDensityFunction makeInstance() { + return KERNEL; + } + } } diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/UniformKernelDensityFunction.java b/src/de/lmu/ifi/dbs/elki/math/statistics/UniformKernelDensityFunction.java index 8d85528f..66fe7888 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/UniformKernelDensityFunction.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/UniformKernelDensityFunction.java @@ -23,6 +23,7 @@ package de.lmu.ifi.dbs.elki.math.statistics; along with this program. If not, see <http://www.gnu.org/licenses/>. */ +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; /** * Uniform / Rectangular kernel density estimator. @@ -49,4 +50,18 @@ public final class UniformKernelDensityFunction implements KernelDensityFunction * Static instance. */ public static final UniformKernelDensityFunction KERNEL = new UniformKernelDensityFunction(); + + /** + * Parameterization stub. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + @Override + protected UniformKernelDensityFunction makeInstance() { + return KERNEL; + } + } } diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiDistribution.java index 84c86e98..5dc5b399 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiDistribution.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiDistribution.java @@ -91,7 +91,7 @@ public class ChiDistribution implements DistributionWithRandom { * @return CDF value */ public static double cdf(double val, double dof) { - return GammaDistribution.regularizedGammaP(dof / 2, val * val / 2); + return GammaDistribution.regularizedGammaP(dof * .5, val * val * .5); } // FIXME: implement! diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiSquaredDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiSquaredDistribution.java index 8555afd3..efa24079 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiSquaredDistribution.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ChiSquaredDistribution.java @@ -65,8 +65,8 @@ public class ChiSquaredDistribution extends GammaDistribution { if(x == 0) { return 0.0; } - final double k = dof / 2; - if(k == 1.0) { + final double k = dof * .5; + if(Math.abs(k - 1.0) < Double.MIN_NORMAL) { return Math.exp(-x * 2.0) * 2.0; } return Math.exp((k - 1.0) * Math.log(x * 2.0) - x * 2.0 - logGamma(k)) * 2.0; diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/Distribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/Distribution.java index 5b6cd286..ad4ef944 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/Distribution.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/Distribution.java @@ -37,7 +37,7 @@ public interface Distribution { * @param val existing value * @return distribution density */ - public double pdf(double val); + double pdf(double val); /** * Return the cumulative density function at the given value. @@ -45,7 +45,7 @@ public interface Distribution { * @param val existing value * @return cumulative density */ - public double cdf(double val); + double cdf(double val); /** * Quantile aka probit (for normal) aka inverse CDF (invcdf, cdf^-1) function. @@ -53,7 +53,7 @@ public interface Distribution { * @param val Quantile to find * @return Quantile position */ - public double quantile(double val); + double quantile(double val); /** * Describe the distribution @@ -61,5 +61,5 @@ public interface Distribution { * @return description */ @Override - public String toString(); -} + String toString(); +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/DistributionWithRandom.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/DistributionWithRandom.java index af272528..02f5002f 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/DistributionWithRandom.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/DistributionWithRandom.java @@ -33,5 +33,5 @@ public interface DistributionWithRandom extends Distribution { * * @return new random value */ - public double nextRandom(); -} + double nextRandom(); +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ExponentialDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ExponentialDistribution.java new file mode 100644 index 00000000..866f40d6 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/ExponentialDistribution.java @@ -0,0 +1,126 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Random; + +/** + * Exponential distribution. + * + * @author Erich Schubert + */ +public class ExponentialDistribution implements DistributionWithRandom { + /** + * Random generator. + */ + Random rnd; + + /** + * Rate, inverse of mean + */ + double rate; + + /** + * Constructor. + * + * @param rate Rate parameter (1/scale) + */ + public ExponentialDistribution(double rate) { + this(rate, new Random()); + } + + /** + * Constructor. + * + * @param rate Rate parameter (1/scale) + * @param random Random generator + */ + public ExponentialDistribution(double rate, Random random) { + super(); + this.rate = rate; + this.rnd = random; + } + + @Override + public double pdf(double val) { + return rate * Math.exp(-rate * val); + } + + /** + * PDF, static version + * + * @param val Value to compute PDF at + * @param rate Rate parameter (1/scale) + * @return probability density + */ + public static double pdf(double val, double rate) { + return rate * Math.exp(-rate * val); + } + + @Override + public double cdf(double val) { + return 1 - Math.exp(-rate * val); + } + + /** + * Cumulative density, static version + * + * @param val Value to compute CDF at + * @param rate Rate parameter (1/scale) + * @return cumulative density + */ + public static double cdf(double val, double rate) { + return 1 - Math.exp(-rate * val); + } + + @Override + public double quantile(double val) { + return -Math.log(1 - val) / rate; + } + + /** + * Quantile function, static version + * + * @param val Value to compute quantile for + * @param rate Rate parameter + * @return Quantile + */ + public static double quantile(double val, double rate) { + return -Math.log(1 - val) / rate; + } + + /** + * This method currently uses the naive approach of returning + * <code>-log(uniform)</code>. + * + * TODO: there are variants that do not rely on the log method and are faster. + * We need to implement and evaluate these. For details: see + * "Computer methods for sampling from the exponential and normal distributions" + * J. H. Ahrens, U. Dieter, https://dl.acm.org/citation.cfm?id=361593 + */ + @Override + public double nextRandom() { + return -Math.log(rnd.nextDouble()) / rate; + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/HaltonUniformDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/HaltonUniformDistribution.java new file mode 100644 index 00000000..df8fecda --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/HaltonUniformDistribution.java @@ -0,0 +1,313 @@ +package de.lmu.ifi.dbs.elki.math.statistics.distribution; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Random; + +import de.lmu.ifi.dbs.elki.math.MathUtil; +import de.lmu.ifi.dbs.elki.math.Primes; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; + +/** + * Halton sequences are a pseudo-uniform distribution. The data is actually too + * regular for a true uniform distribution, but as such will of course often + * appear to be uniform. + * + * Technically, they are based on Van der Corput sequence and the Von Neumann + * Katutani transformation. These produce a series of integers which then are + * converted to floating point values. + * + * To randomize, we just choose a random starting position, as indicated by + * + * Reference: + * <p> + * Randomized halton sequences<br> + * Wang, X. and Hickernell, F.J.<br /> + * Mathematical and Computer Modelling Vol. 32 (7) + * </p> + * + * <b>Important note: this code hasn't been double checked yet. While it + * probably works for some simple cases such as example data set generation, do + * <em>not</em> rely on it for e.g. quasi monte carlo methods without + * double-checking the quality, and looking at more advanced methods!</b> + * + * Let me repeat this: this code was written <b>to generate toy datasets</b>. It + * <b>may have deficits</b> for other uses! <b>There is a high chance it will + * produce correlated data when used for more than one dimension.</b> - for toy + * data sets, try different random seeds until you find one that works for you. + * + * TODO: find an improved algorithm that takes care of a better randomization, + * for example by adding scrambling. + * + * @author Erich Schubert + */ +@Reference(title = "Randomized halton sequences", authors = "Wang, X. and Hickernell, F.J.", booktitle = "Mathematical and Computer Modelling Vol. 32 (7)", url = "http://dx.doi.org/10.1016/S0895-7177(00)00178-3") +public class HaltonUniformDistribution implements DistributionWithRandom { + /** + * Minimum + */ + private double min; + + /** + * Maximum + */ + private double max; + + /** + * Len := max - min + */ + private double len; + + /** + * Maximum number of iterations of fast variant + */ + private static final int MAXFAST = 1000; + + /** + * Threshold + */ + private static final double ALMOST_ONE = 1.0 - 1e-10; + + /** + * Base value + */ + final short base; + + /** + * Inverse of base, for faster division by multiplication. + */ + final double invbase; + + /** + * Logarithm of base. + */ + final double logbase; + + /** + * Maximum integer to use + */ + final int maxi; + + /** + * Counter, for max iterations of fast function. + */ + int counter = 0; + + /** + * Current value + */ + double current; + + /** + * Integer inverse + */ + long inverse; + + /** + * Constructor for a halton pseudo uniform distribution on the interval [min, + * max[ + * + * @param min Minimum value + * @param max Maximum value + * @param base Base value + * @param seed Random seed (starting value) + */ + public HaltonUniformDistribution(double min, double max, int base, double seed) { + super(); + // Swap parameters if they were given incorrectly. + if (min > max) { + double tmp = min; + min = max; + max = tmp; + } + this.min = min; + this.max = max; + this.len = max - min; + + this.base = (short) base; + this.invbase = 1.0 / base; + this.logbase = Math.log(base); + // 32 bit * log(2) / log(base) + this.maxi = (int) (32.0 * MathUtil.LOG2 / logbase); + this.current = seed; + this.inverse = inverse(seed); + } + + /** + * Constructor for a halton pseudo uniform distribution on the interval [min, + * max[ + * + * @param min Minimum value + * @param max Maximum value + */ + public HaltonUniformDistribution(double min, double max) { + // TODO: use different starting primes? + this(min, max, new Random()); + } + + /** + * Constructor for a halton pseudo uniform distribution on the interval [min, + * max[ + * + * @param min Minimum value + * @param max Maximum value + * @param rnd Random generator + */ + public HaltonUniformDistribution(double min, double max, Random rnd) { + // TODO: use different starting primes? + this(min, max, choosePrime(rnd), rnd.nextDouble()); + } + + /** + * Choose a random prime. We try to avoid the later primes, as they are known + * to cause too correlated data. + * + * @param rnd Random generator + * @return Prime + */ + private static int choosePrime(Random rnd) { + return Primes.FIRST_PRIMES[rnd.nextInt(10)]; + } + + @Override + public double pdf(double val) { + if (val < min || val >= max) { + return 0.0; + } + return 1.0 / len; + } + + @Override + public double cdf(double val) { + if (val < min) { + return 0.0; + } + if (val > max) { + return 1.0; + } + return (val - min) / len; + } + + @Override + public double quantile(double val) { + return min + len * val; + } + + /** + * Compute the inverse with respect to the given base. + * + * @param current Current value + * @return Integer inverse + */ + private long inverse(double current) { + // Represent to base b. + short[] digits = new short[maxi]; + int j; + for (j = 0; j < maxi; j++) { + current *= base; + digits[j] = (short) current; + current -= digits[j]; + if (current <= 1e-10) { + break; + } + } + long inv = 0; + for (j = maxi - 1; j >= 0; j--) { + inv = inv * base + digits[j]; + } + return inv; + } + + /** + * Compute the radical inverse of i. + * + * @param i Input long value + * @return Double radical inverse + */ + private double radicalInverse(long i) { + double digit = 1.0 / (double) base; + double radical = digit; + double inverse = 0.0; + while (i > 0) { + inverse += digit * (double) (i % base); + digit *= radical; + i /= base; + } + return inverse; + } + + /** + * Compute the next radical inverse. + * + * @return Next inverse + */ + private double nextRadicalInverse() { + counter++; + // Do at most MAXFAST appromate steps + if (counter >= MAXFAST) { + counter = 0; + inverse += MAXFAST; + current = radicalInverse(inverse); + return current; + } + // Fast approximation: + double nextInverse = current + invbase; + if (nextInverse < ALMOST_ONE) { + current = nextInverse; + return current; + } else { + double digit1 = invbase, digit2 = invbase * invbase; + while (current + digit2 >= ALMOST_ONE) { + digit1 = digit2; + digit2 *= invbase; + } + current += (digit1 - 1.0) + digit2; + return current; + } + } + + @Override + public double nextRandom() { + return min + nextRadicalInverse() * len; + } + + @Override + public String toString() { + return "HaltonUniformDistribution(min=" + min + ", max=" + max + ")"; + } + + /** + * @return the minimum value + */ + public double getMin() { + return min; + } + + /** + * @return the maximum value + */ + public double getMax() { + return max; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/NormalDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/NormalDistribution.java index 9180b59e..1845dec1 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/NormalDistribution.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/NormalDistribution.java @@ -315,7 +315,7 @@ public class NormalDistribution implements DistributionWithRandom { public static double pdf(double x, double mu, double sigma) { final double x_mu = x - mu; final double sigmasq = sigma * sigma; - return 1 / (Math.sqrt(MathUtil.TWOPI * sigmasq)) * Math.exp(-1 * x_mu * x_mu / 2 / sigmasq); + return 1 / (Math.sqrt(MathUtil.TWOPI * sigmasq)) * Math.exp(-.5 * x_mu * x_mu / sigmasq); } /** diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/PoissonDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/PoissonDistribution.java index 53fb0dc8..a4ea9402 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/PoissonDistribution.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/PoissonDistribution.java @@ -53,26 +53,26 @@ public class PoissonDistribution implements Distribution { private double p; /** Stirling error constants: 1./12 */ - private final static double S0 = 0.08333333333333333333333d; + private static final double S0 = 0.08333333333333333333333d; /** Stirling error constants: 1./360 */ - private final static double S1 = 0.0027777777777777777777777777778d; + private static final double S1 = 0.0027777777777777777777777777778d; /** Stirling error constants: 1./1260 */ - private final static double S2 = 0.00079365079365079365079365d; + private static final double S2 = 0.00079365079365079365079365d; /** Stirling error constants: 1./1680 */ - private final static double S3 = 0.000595238095238095238095238095d; + private static final double S3 = 0.000595238095238095238095238095d; /** Stirling error constants: 1./1188 */ - private final static double S4 = 0.00084175084175084175084175084175d; + private static final double S4 = 0.00084175084175084175084175084175d; /** * Exact table values for n <= 15 in steps of 0.5 * * sfe[n] = ln( (n!*e^n)/((n^n)*sqrt(2*pi*n)) ) */ - private final static double STIRLING_EXACT_ERROR[] = {// + private static final double STIRLING_EXACT_ERROR[] = {// 0.0, // 0.0 0.1534264097200273452913848, // 0.5 0.0810614667953272582196702, // 1.0 @@ -273,7 +273,7 @@ public class PoissonDistribution implements Distribution { private static double stirlingError(int n) { // Try to use a table value: if(n < 16) { - return STIRLING_EXACT_ERROR[n * 2]; + return STIRLING_EXACT_ERROR[n << 1]; } final double nn = n * n; // Use the appropriate number of terms diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/StudentsTDistribution.java b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/StudentsTDistribution.java index 2e9e0d15..fcb96c12 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/StudentsTDistribution.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/distribution/StudentsTDistribution.java @@ -73,7 +73,7 @@ public class StudentsTDistribution implements Distribution { */
public static double pdf(double val, int v) {
// TODO: improve precision by computing "exp" last?
- return Math.exp(GammaDistribution.logGamma((v + 1) / 2) - GammaDistribution.logGamma(v / 2)) * (1 / Math.sqrt(v * Math.PI)) * Math.pow(1 + (val * val) / v, -((v + 1) / 2));
+ return Math.exp(GammaDistribution.logGamma((v + 1) * .5) - GammaDistribution.logGamma(v * .5)) * (1 / Math.sqrt(v * Math.PI)) * Math.pow(1 + (val * val) / v, -((v + 1) * .5));
}
/**
@@ -85,6 +85,6 @@ public class StudentsTDistribution implements Distribution { */
public static double cdf(double val, int v) {
double x = v / (val * val + v);
- return 1 - (0.5 * BetaDistribution.regularizedIncBeta(x, v / 2, 0.5));
+ return 1 - (0.5 * BetaDistribution.regularizedIncBeta(x, v * .5, 0.5));
}
}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/math/statistics/tests/GoodnessOfFitTest.java b/src/de/lmu/ifi/dbs/elki/math/statistics/tests/GoodnessOfFitTest.java index f50f469f..363dbfea 100644 --- a/src/de/lmu/ifi/dbs/elki/math/statistics/tests/GoodnessOfFitTest.java +++ b/src/de/lmu/ifi/dbs/elki/math/statistics/tests/GoodnessOfFitTest.java @@ -38,12 +38,12 @@ public interface GoodnessOfFitTest extends Parameterizable { /**
* Measure the deviation of a full sample from a conditional sample.
*
- * Sample arrays *may* be modified, e.g. sorted, by the test.
+ * Sample arrays <em>may</em> be modified, e.g. sorted, by the test.
*
* @param fullSample Full sample
* @param conditionalSample Conditional sample
*
* @return Deviation
*/
- public double deviation(double[] fullSample, double[] conditionalSample);
-}
+ double deviation(double[] fullSample, double[] conditionalSample);
+}
\ No newline at end of file |