diff options
Diffstat (limited to 'src/tutorial/outlier/ODIN.java')
-rw-r--r-- | src/tutorial/outlier/ODIN.java | 196 |
1 files changed, 196 insertions, 0 deletions
diff --git a/src/tutorial/outlier/ODIN.java b/src/tutorial/outlier/ODIN.java new file mode 100644 index 00000000..a8996305 --- /dev/null +++ b/src/tutorial/outlier/ODIN.java @@ -0,0 +1,196 @@ +package tutorial.outlier; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm; +import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm; +import de.lmu.ifi.dbs.elki.data.type.TypeInformation; +import de.lmu.ifi.dbs.elki.data.type.TypeUtil; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; +import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; +import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; +import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList; +import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; +import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; +import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; + +/** + * Outlier detection based on the in-degree of the kNN graph. + * + * This is a curried version: instead of using a threshold T to obtain a binary + * decision, we use the computed value as outlier score. + * + * Reference: + * <p> + * V. Hautamäki and I. Kärkkäinen and P Fränti<br /> + * Outlier detection using k-nearest neighbour graph<br /> + * Proc. 17th Int. Conf. Pattern Recognition, ICPR 2004 <br /> + * </p> + * + * @author Erich Schubert + * + * @param <O> Object type + * @param <D> Distance type + */ +@Reference(authors = "V. Hautamäki and I. Kärkkäinen and P Fränti", title = "Outlier detection using k-nearest neighbour graph", booktitle = "Proc. 17th Int. Conf. Pattern Recognition, ICPR 2004", url = "http://dx.doi.org/10.1109/ICPR.2004.1334558") +public class ODIN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm { + /** + * Class logger. + */ + private static final Logging LOG = Logging.getLogger(ODIN.class); + + /** + * Number of neighbors for kNN graph. + */ + int k; + + /** + * Constructor. + * + * @param distanceFunction Distance function + * @param k k parameter + */ + public ODIN(DistanceFunction<? super O, D> distanceFunction, int k) { + super(distanceFunction); + this.k = k; + } + + /** + * Run the ODIN algorithm + * + * Tutorial note: the <em>signature</em> of this method depends on the types + * that we requested in the {@link #getInputTypeRestriction} method. Here we + * requested a single relation of type {@code O} , the data type of our + * distance function. + * + * @param database Database to run on. + * @param relation Relation to process. + * @return ODIN outlier result. + */ + public OutlierResult run(Database database, Relation<O> relation) { + // Get the query functions: + DistanceQuery<O, D> dq = database.getDistanceQuery(relation, getDistanceFunction()); + KNNQuery<O, D> knnq = database.getKNNQuery(dq, k); + + // Get the objects to process, and a data storage for counting and output: + DBIDs ids = relation.getDBIDs(); + WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB, 0.); + + // Process all objects + for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { + // Find the nearest neighbors (using an index, if available!) + KNNList<D> neighbors = knnq.getKNNForDBID(iter, k); + // For each neighbor, except ourselves, increase the in-degree: + for (DBIDIter nei = neighbors.iter(); nei.valid(); nei.advance()) { + if (DBIDUtil.equal(iter, nei)) { + continue; + } + scores.put(nei, scores.doubleValue(nei) + 1); + } + } + + // Compute maximum + double min = Double.POSITIVE_INFINITY, max = 0.0; + for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { + min = Math.min(min, scores.doubleValue(iter)); + max = Math.max(max, scores.doubleValue(iter)); + } + // Wrap the result and add metadata. + // By actually specifying theoretical min, max and baseline, we get a better + // visualization (try it out - or see the screenshots in the tutorial)! + OutlierScoreMeta meta = new InvertedOutlierScoreMeta(min, max, 0., ids.size() - 1, k); + Relation<Double> rel = new MaterializedRelation<>("ODIN In-Degree", "odin", TypeUtil.DOUBLE, scores, ids); + return new OutlierResult(meta, rel); + } + + @Override + public TypeInformation[] getInputTypeRestriction() { + return TypeUtil.array(getDistanceFunction().getInputTypeRestriction()); + } + + @Override + protected Logging getLogger() { + return LOG; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + * + * @param <O> Object type + * @param <D> Distance type + */ + public static class Parameterizer<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> { + /** + * Parameter for the number of nearest neighbors: + * + * <pre> + * -odin.k <int> + * </pre> + */ + public static final OptionID K_ID = new OptionID("odin.k", "Number of neighbors to use for kNN graph."); + + /** + * Number of nearest neighbors to use. + */ + int k; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + + IntParameter param = new IntParameter(K_ID); + // Since in a database context, the 1 nearest neighbor + // will usually be the query object itself, we require + // this value to be at least 2. + param.addConstraint(new GreaterConstraint(1)); + if (config.grab(param)) { + k = param.intValue(); + } + } + + @Override + protected ODIN<O, D> makeInstance() { + return new ODIN<>(distanceFunction, k); + } + } +} |