package de.lmu.ifi.dbs.elki.algorithm; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures Copyright (C) 2015 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ import java.util.Arrays; import de.lmu.ifi.dbs.elki.algorithm.KNNDistancesSampler.KNNDistanceOrderResult; import de.lmu.ifi.dbs.elki.data.type.TypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; import de.lmu.ifi.dbs.elki.database.Database; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.ids.KNNList; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; import de.lmu.ifi.dbs.elki.math.geometry.XYCurve; import de.lmu.ifi.dbs.elki.math.random.RandomFactory; import de.lmu.ifi.dbs.elki.utilities.Alias; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter; /** * Provides an order of the kNN-distances for all objects within the database. * * This class can be used to estimate parameters for other algorithms, such as * estimating the epsilon parameter for DBSCAN: set k to minPts-1, and then * choose a percentile from the sample as epsilon, or plot the result as a graph * and look for a bend or knee in this plot. * * @author Arthur Zimek * @since 0.7.0 * * @param the type of objects handled by this Algorithm */ @Title("KNN-Distance-Order") @Description("Assesses the knn distances for a specified k and orders them.") @Alias("de.lmu.ifi.dbs.elki.algorithm.KNNDistanceOrder") public class KNNDistancesSampler extends AbstractDistanceBasedAlgorithm { /** * The logger for this class. */ private static final Logging LOG = Logging.getLogger(KNNDistancesSampler.class); /** * Parameter k. */ protected int k; /** * Sampling percentage. */ protected double sample; /** * Random number seeding. */ private RandomFactory rnd; /** * Constructor. * * @param distanceFunction Distance function * @param k k Parameter * @param sample Sampling rate, or sample size (when > 1) * @param rnd Random source. */ public KNNDistancesSampler(DistanceFunction distanceFunction, int k, double sample, RandomFactory rnd) { super(distanceFunction); this.k = k; this.sample = sample; this.rnd = rnd; } /** * Provides an order of the kNN-distances for all objects within the specified * database. * * @param database Database * @param relation Relation * @return Result */ public KNNDistanceOrderResult run(Database database, Relation relation) { final DistanceQuery distanceQuery = database.getDistanceQuery(relation, getDistanceFunction()); final KNNQuery knnQuery = database.getKNNQuery(distanceQuery, k + 1); final int size = (int) ((sample <= 1.) ? Math.ceil(relation.size() * sample) : sample); DBIDs sample = DBIDUtil.randomSample(relation.getDBIDs(), size, rnd); FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Sampling kNN distances", size, LOG) : null; double[] knnDistances = new double[size]; int i = 0; for(DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance(), i++) { final KNNList neighbors = knnQuery.getKNNForDBID(iditer, k + 1); knnDistances[i] = neighbors.getKNNDistance(); LOG.incrementProcessed(prog); } LOG.ensureCompleted(prog); return new KNNDistanceOrderResult(knnDistances, k); } @Override public TypeInformation[] getInputTypeRestriction() { return TypeUtil.array(getDistanceFunction().getInputTypeRestriction()); } @Override protected Logging getLogger() { return LOG; } /** * Curve result for a list containing the knn distances. * * @author Arthur Zimek * * @apiviz.exclude */ public static class KNNDistanceOrderResult extends XYCurve { /** * Number of neighbors considered for this KNNDIstanceOrder */ private int k; /** * Construct result * * @param knnDistances distance list to wrap. * @param k number of neighbors considered */ public KNNDistanceOrderResult(double[] knnDistances, int k) { super("Objects", k + "-NN-distance", knnDistances.length + 1); this.k = k; Arrays.sort(knnDistances); for(int j = 0; j < knnDistances.length; j++) { this.addAndSimplify(knnDistances.length - j, knnDistances[j]); } } @Override public String getLongName() { return k + "-NN distance order"; } @Override public String getShortName() { return k + "-NNDistanceOrder"; } } /** * Parameterization class. * * @author Erich Schubert * * @apiviz.exclude */ public static class Parameterizer extends AbstractDistanceBasedAlgorithm.Parameterizer { /** * Parameter to specify the distance of the k-distant object to be assessed, * must be an integer greater than 0. */ public static final OptionID K_ID = new OptionID("knndistanceorder.k", "Specifies the distance of the k-distant object to be assessed, ignoring the query object."); /** * Parameter to specify the average percentage of distances randomly chosen * to be provided in the result, must be a double greater than 0 and less * than or equal to 1. */ public static final OptionID SAMPLING_ID = new OptionID("knndistanceorder.sample", "The percentage of objects to use for sampling, or the absolute number of samples."); /** * Random generator seed for distances. */ public static final OptionID SEED_ID = new OptionID("knndistanceorder.seed", "Random generator seed for sampling."); /** * Parameter k. */ protected int k; /** * Sampling percentage. */ protected double percentage; /** * Random number seeding. */ private RandomFactory rnd; /** * Constructor. */ public Parameterizer() { super(); } @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); IntParameter kP = new IntParameter(K_ID) // .addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT); if(config.grab(kP)) { k = kP.getValue(); } DoubleParameter percentageP = new DoubleParameter(SAMPLING_ID, 1.) // .addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE); if(config.grab(percentageP)) { percentage = percentageP.getValue(); } RandomParameter randomP = new RandomParameter(SEED_ID); if(config.grab(randomP)) { rnd = randomP.getValue(); } } @Override protected KNNDistancesSampler makeInstance() { return new KNNDistancesSampler<>(distanceFunction, k, percentage, rnd); } } }