package de.lmu.ifi.dbs.elki.algorithm.outlier; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm; import de.lmu.ifi.dbs.elki.data.type.TypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; import de.lmu.ifi.dbs.elki.database.Database; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult; import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; import de.lmu.ifi.dbs.elki.math.DoubleMinMax; import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; /** *

* Outlier Detection based on the distance of an object to its k nearest * neighbor. *

* *

* Reference:
* S. Ramaswamy, R. Rastogi, K. Shim: Efficient Algorithms for Mining Outliers * from Large Data Sets.
In: Proc. of the Int. Conf. on Management of Data, * Dallas, Texas, 2000. *

* * @author Lisa Reichert * * @apiviz.has KNNQuery * * @param the type of DatabaseObjects handled by this Algorithm * @param the type of Distance used by this Algorithm */ @Title("KNN outlier: Efficient Algorithms for Mining Outliers from Large Data Sets") @Description("Outlier Detection based on the distance of an object to its k nearest neighbor.") @Reference(authors = "S. Ramaswamy, R. Rastogi, K. Shim", title = "Efficient Algorithms for Mining Outliers from Large Data Sets", booktitle = "Proc. of the Int. Conf. on Management of Data, Dallas, Texas, 2000", url = "http://dx.doi.org/10.1145/342009.335437") public class KNNOutlier> extends AbstractDistanceBasedAlgorithm implements OutlierAlgorithm { /** * The logger for this class. */ private static final Logging logger = Logging.getLogger(KNNOutlier.class); /** * Parameter to specify the k nearest neighbor */ public static final OptionID K_ID = OptionID.getOrCreateOptionID("knno.k", "k nearest neighbor"); /** * The parameter k */ private int k; /** * Constructor for a single kNN query. * * @param distanceFunction distance function to use * @param k Value of k */ public KNNOutlier(DistanceFunction distanceFunction, int k) { super(distanceFunction); this.k = k; } /** * Runs the algorithm in the timed evaluation part. */ public OutlierResult run(Database database, Relation relation) { final DistanceQuery distanceQuery = database.getDistanceQuery(relation, getDistanceFunction()); KNNQuery knnQuery = database.getKNNQuery(distanceQuery, k); if(logger.isVerbose()) { logger.verbose("Computing the kNN outlier degree (distance to the k nearest neighbor)"); } FiniteProgress progressKNNDistance = logger.isVerbose() ? new FiniteProgress("kNN distance for objects", relation.size(), logger) : null; DoubleMinMax minmax = new DoubleMinMax(); WritableDoubleDataStore knno_score = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC); // compute distance to the k nearest neighbor. for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { // distance to the kth nearest neighbor final KNNResult knns = knnQuery.getKNNForDBID(iditer, k); double dkn = knns.getKNNDistance().doubleValue(); knno_score.putDouble(iditer, dkn); minmax.put(dkn); if(progressKNNDistance != null) { progressKNNDistance.incrementProcessed(logger); } } if(progressKNNDistance != null) { progressKNNDistance.ensureCompleted(logger); } Relation scoreres = new MaterializedRelation("kNN Outlier Score", "knn-outlier", TypeUtil.DOUBLE, knno_score, relation.getDBIDs()); OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0); return new OutlierResult(meta, scoreres); } @Override public TypeInformation[] getInputTypeRestriction() { return TypeUtil.array(getDistanceFunction().getInputTypeRestriction()); } @Override protected Logging getLogger() { return logger; } /** * Parameterization class. * * @author Erich Schubert * * @apiviz.exclude */ public static class Parameterizer> extends AbstractDistanceBasedAlgorithm.Parameterizer { protected int k = 0; @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); final IntParameter kP = new IntParameter(K_ID); if(config.grab(kP)) { k = kP.getValue(); } } @Override protected KNNOutlier makeInstance() { return new KNNOutlier(distanceFunction, k); } } }