package de.lmu.ifi.dbs.elki.index.preprocessed.knn;
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.distance.KNNHeap;
import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
/**
* Class that computed the kNN only on a random sample.
*
* Used in:
*
* Subsampling for Efficient and Effective Unsupervised Outlier Detection
* Ensembles
* A. Zimek and M. Gaudet and R. J. G. B. Campello and J. Sander
* In: Proc. 19th ACM SIGKDD International Conference on Knowledge Discovery and
* Data Mining, KDD '13
*
*
* @author Erich Schubert
*
* @param Object type
* @param Distance type
*/
@Reference(title = "Subsampling for Efficient and Effective Unsupervised Outlier Detection Ensembles", authors = "A. Zimek and M. Gaudet and R. J. G. B. Campello and J. Sander", booktitle = "Proc. 19th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, KDD '13")
public class RandomSampleKNNPreprocessor> extends AbstractMaterializeKNNPreprocessor> {
/**
* Logger
*/
private static final Logging LOG = Logging.getLogger(RandomSampleKNNPreprocessor.class);
/**
* Relative share of objects to get
*/
private final double share;
/**
* Random generator
*/
private final RandomFactory rnd;
/**
* Constructor.
*
* @param relation Relation to index
* @param distanceFunction distance function
* @param k k
* @param share Relative share
* @param rnd Random generator
*/
public RandomSampleKNNPreprocessor(Relation relation, DistanceFunction super O, D> distanceFunction, int k, double share, RandomFactory rnd) {
super(relation, distanceFunction, k);
this.share = share;
this.rnd = rnd;
}
@Override
protected void preprocess() {
DistanceQuery distanceQuery = relation.getDatabase().getDistanceQuery(relation, distanceFunction);
storage = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, KNNList.class);
FiniteProgress progress = getLogger().isVerbose() ? new FiniteProgress("Materializing random-sample k nearest neighbors (k=" + k + ")", relation.size(), getLogger()) : null;
final ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
final int samplesize = (int) (ids.size() * share);
for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
KNNHeap kNN = DBIDUtil.newHeap(distanceFunction.getDistanceFactory(), k);
DBIDs rsamp = DBIDUtil.randomSample(ids, samplesize, rnd);
for(DBIDIter iter2 = rsamp.iter(); iter2.valid(); iter2.advance()) {
D dist = distanceQuery.distance(iter, iter2);
kNN.insert(dist, iter2);
}
storage.put(iter, kNN.toKNNList());
if(progress != null) {
progress.incrementProcessed(getLogger());
}
}
if(progress != null) {
progress.ensureCompleted(getLogger());
}
}
@Override
protected Logging getLogger() {
return LOG;
}
@Override
public String getLongName() {
return "random sample kNN";
}
@Override
public String getShortName() {
return "random-sample-knn";
}
@Override
public void logStatistics() {
// No statistics to log.
}
/**
* The parameterizable factory.
*
* @author Erich Schubert
*
* @apiviz.landmark
* @apiviz.stereotype factory
* @apiviz.uses AbstractMaterializeKNNPreprocessor oneway - - «create»
*
* @param The object type
* @param The distance type
*/
public static class Factory> extends AbstractMaterializeKNNPreprocessor.Factory> {
/**
* Relative share of objects to get
*/
private final double share;
/**
* Random generator
*/
private final RandomFactory rnd;
/**
* Constructor.
*
* @param k K
* @param distanceFunction distance function
* @param share Sample size (relative)
* @param rnd Random generator
*/
public Factory(int k, DistanceFunction super O, D> distanceFunction, double share, RandomFactory rnd) {
super(k, distanceFunction);
this.share = share;
this.rnd = rnd;
}
@Override
public RandomSampleKNNPreprocessor instantiate(Relation relation) {
return new RandomSampleKNNPreprocessor<>(relation, distanceFunction, k, share, rnd);
}
/**
* Parameterization class
*
* @author Erich Schubert
*
* @apiviz.exclude
*
* @param Object type
* @param Distance type
*/
public static class Parameterizer> extends AbstractMaterializeKNNPreprocessor.Factory.Parameterizer {
/**
* Parameter to specify how many objects to consider for computing the
* kNN.
*
*
* Key: {@code -randomknn.share}
*
*/
public static final OptionID SHARE_ID = new OptionID("randomknn.share", "The relative amount of objects to consider for kNN computations.");
/**
* Random number generator seed.
*
*
* Key: {@code -randomknn.seed}
*
*/
public static final OptionID SEED_ID = new OptionID("randomknn.seed", "The random number seed.");
/**
* Relative share of objects to get
*/
private double share = 0.0;
/**
* Random generator
*/
private RandomFactory rnd;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
DoubleParameter shareP = new DoubleParameter(SHARE_ID);
shareP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
shareP.addConstraint(CommonConstraints.LESS_THAN_ONE_DOUBLE);
if(config.grab(shareP)) {
share = shareP.getValue();
}
RandomParameter rndP = new RandomParameter(SEED_ID);
if(config.grab(rndP)) {
rnd = rndP.getValue();
}
}
@Override
protected RandomSampleKNNPreprocessor.Factory makeInstance() {
return new RandomSampleKNNPreprocessor.Factory<>(k, distanceFunction, share, rnd);
}
}
}
}