diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/utilities/referencepoints/GridBasedReferencePoints.java')
-rw-r--r-- | src/de/lmu/ifi/dbs/elki/utilities/referencepoints/GridBasedReferencePoints.java | 119 |
1 files changed, 62 insertions, 57 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/GridBasedReferencePoints.java b/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/GridBasedReferencePoints.java index 007efe6f..5ebee64b 100644 --- a/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/GridBasedReferencePoints.java +++ b/src/de/lmu/ifi/dbs/elki/utilities/referencepoints/GridBasedReferencePoints.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.utilities.referencepoints; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -29,50 +29,35 @@ import java.util.Collection; import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; +import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.math.MathUtil; -import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil; +import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector; +import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; -import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; /** * Grid-based strategy to pick reference points. * * @author Erich Schubert - * - * @param <V> Object type */ -public class GridBasedReferencePoints<V extends NumberVector<?>> implements ReferencePointsHeuristic<V> { - // TODO: add "grid sampling" option. - - /** - * Parameter to specify the grid resolution. - * <p> - * Key: {@code -grid.size} - * </p> - */ - public static final OptionID GRID_ID = new OptionID("grid.size", "The number of partitions in each dimension. Points will be placed on the edges of the grid, except for a grid size of 0, where only the mean is generated as reference point."); - +public class GridBasedReferencePoints implements ReferencePointsHeuristic { /** - * Parameter to specify the extra scaling of the space, to allow - * out-of-data-space reference points. - * <p> - * Key: {@code -grid.oversize} - * </p> + * Class logger. */ - public static final OptionID GRID_SCALE_ID = new OptionID("grid.scale", "Scale the grid by the given factor. This can be used to obtain reference points outside the used data space."); + private static final Logging LOG = Logging.getLogger(GridBasedReferencePoints.class); /** - * Holds the value of {@link #GRID_ID}. + * Holds the grid resolution. */ protected int gridres; /** - * Holds the value of {@link #GRID_SCALE_ID}. + * Holds the grid scale. */ protected double gridscale; @@ -89,46 +74,47 @@ public class GridBasedReferencePoints<V extends NumberVector<?>> implements Refe } @Override - public <T extends V> Collection<V> getReferencePoints(Relation<T> db) { - Relation<V> database = DatabaseUtil.relationUglyVectorCast(db); - Pair<V, V> minmax = DatabaseUtil.computeMinMax(database); - NumberVector.Factory<V, ?> factory = RelationUtil.getNumberVectorFactory(database); - - int dim = RelationUtil.dimensionality(db); + public Collection<? extends NumberVector> getReferencePoints(Relation<? extends NumberVector> db) { + double[][] minmax = RelationUtil.computeMinMax(db); + final int dim = minmax[0].length; // Compute mean from minmax. double[] mean = new double[dim]; for(int d = 0; d < dim; d++) { - mean[d] = (minmax.first.doubleValue(d) + minmax.second.doubleValue(d)) * .5; + mean[d] = (minmax[0][d] + minmax[1][d]) * .5; } - int gridpoints = Math.max(1, MathUtil.ipowi(gridres + 1, dim)); - ArrayList<V> result = new ArrayList<>(gridpoints); + if(gridres <= 0) { + LOG.warning("Grid of resolution " + gridres + " will have a single point only."); + ArrayList<Vector> result = new ArrayList<>(1); + result.add(new Vector(mean)); + return result; + } + final int grids = gridres + 1; + final int gridpoints = MathUtil.ipowi(grids, dim); + if(gridpoints < 0) { + throw new AbortException("Grids with more than 2^31 are not supported, or meaningful."); + } + if(gridpoints < 0 || gridpoints > db.size()) { + LOG.warning("Grid has " + gridpoints + " points, but you only have " + db.size() + " observations."); + } + ArrayList<Vector> result = new ArrayList<>(gridpoints); double[] delta = new double[dim]; - if(gridres > 0) { - double halfgrid = gridres / 2.0; - for(int d = 0; d < dim; d++) { - delta[d] = (minmax.second.doubleValue(d) - minmax.first.doubleValue(d)) / gridres; - } + for(int d = 0; d < dim; d++) { + delta[d] = (minmax[1][d] - minmax[0][d]) / gridres; + } - double[] vec = new double[dim]; - for(int i = 0; i < gridpoints; i++) { - int acc = i; - for(int d = 0; d < dim; d++) { - int coord = acc % (gridres + 1); - acc = acc / (gridres + 1); - vec[d] = mean[d] + (coord - halfgrid) * delta[d] * gridscale; - } - V newp = factory.newNumberVector(vec); - // logger.debug("New reference point: " + FormatUtil.format(vec)); - result.add(newp); + double halfgrid = gridres * .5; + for(int i = 0; i < gridpoints; i++) { + double[] vec = new double[dim]; // Will be returned! + int acc = i; + for(int d = 0; d < dim; d++) { + int coord = acc % grids; + acc /= grids; + vec[d] = mean[d] + (coord - halfgrid) * delta[d] * gridscale; } + result.add(new Vector(vec)); } - else { - result.add(factory.newNumberVector(mean)); - // logger.debug("New reference point: " + FormatUtil.format(mean)); - } - return result; } @@ -139,7 +125,26 @@ public class GridBasedReferencePoints<V extends NumberVector<?>> implements Refe * * @apiviz.exclude */ - public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer { + public static class Parameterizer extends AbstractParameterizer { + // TODO: add "grid sampling" option. + + /** + * Parameter to specify the grid resolution. + * <p> + * Key: {@code -grid.size} + * </p> + */ + public static final OptionID GRID_ID = new OptionID("grid.size", "The number of partitions in each dimension. Points will be placed on the edges of the grid, except for a grid size of 0, where only the mean is generated as reference point."); + + /** + * Parameter to specify the extra scaling of the space, to allow + * out-of-data-space reference points. + * <p> + * Key: {@code -grid.oversize} + * </p> + */ + public static final OptionID GRID_SCALE_ID = new OptionID("grid.scale", "Scale the grid by the given factor. This can be used to obtain reference points outside the used data space."); + /** * Holds the value of {@link #GRID_ID}. */ @@ -167,8 +172,8 @@ public class GridBasedReferencePoints<V extends NumberVector<?>> implements Refe } @Override - protected GridBasedReferencePoints<V> makeInstance() { - return new GridBasedReferencePoints<>(gridres, gridscale); + protected GridBasedReferencePoints makeInstance() { + return new GridBasedReferencePoints(gridres, gridscale); } } } |