summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof')
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/ALOCI.java737
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/FlexibleLOF.java612
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/INFLO.java261
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDF.java356
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDOF.java213
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOCI.java340
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOF.java293
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LoOP.java442
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/OnlineLOF.java418
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimpleKernelDensityLOF.java287
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimplifiedLOF.java264
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/package-info.java27
12 files changed, 4250 insertions, 0 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/ALOCI.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/ALOCI.java
new file mode 100644
index 00000000..d48679a9
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/ALOCI.java
@@ -0,0 +1,737 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier.lof;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.NumberVectorDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
+import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
+
+/**
+ * Fast Outlier Detection Using the "approximate Local Correlation Integral".
+ *
+ * Outlier detection using multiple epsilon neighborhoods.
+ *
+ * Reference:
+ * <p>
+ * S. Papadimitriou, H. Kitagawa, P. B. Gibbons and C. Faloutsos:<br />
+ * LOCI: Fast Outlier Detection Using the Local Correlation Integral.<br />
+ * In: Proc. 19th IEEE Int. Conf. on Data Engineering (ICDE '03), Bangalore,
+ * India, 2003.
+ * </p>
+ *
+ * @author Jonathan von Brünken
+ * @author Erich Schubert
+ *
+ * @apiviz.composedOf ALOCIQuadTree
+ *
+ * @param <O> Object type
+ * @param <D> Distance type
+ */
+@Title("LOCI: Fast Outlier Detection Using the Local Correlation Integral")
+@Description("Algorithm to compute outliers based on the Local Correlation Integral")
+@Reference(authors = "S. Papadimitriou, H. Kitagawa, P. B. Gibbons, C. Faloutsos", title = "LOCI: Fast Outlier Detection Using the Local Correlation Integral", booktitle = "Proc. 19th IEEE Int. Conf. on Data Engineering (ICDE '03), Bangalore, India, 2003", url = "http://dx.doi.org/10.1109/ICDE.2003.1260802")
+public class ALOCI<O extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(ALOCI.class);
+
+ /**
+ * Minimum size for a leaf.
+ */
+ private int nmin;
+
+ /**
+ * Alpha (level difference of sampling and counting neighborhoods)
+ */
+ private int alpha;
+
+ /**
+ * Number of trees to generate (forest size)
+ */
+ private int g;
+
+ /**
+ * Random generator
+ */
+ private RandomFactory rnd;
+
+ /**
+ * Distance function
+ */
+ private NumberVectorDistanceFunction<D> distFunc;
+
+ /**
+ * Constructor.
+ *
+ * @param distanceFunction Distance function
+ * @param nmin Minimum neighborhood size
+ * @param alpha Alpha value
+ * @param g Number of grids to use
+ * @param rnd Random generator.
+ */
+ public ALOCI(NumberVectorDistanceFunction<D> distanceFunction, int nmin, int alpha, int g, RandomFactory rnd) {
+ super();
+ this.distFunc = distanceFunction;
+ this.nmin = nmin;
+ this.alpha = alpha;
+ this.g = g;
+ this.rnd = rnd;
+ }
+
+ public OutlierResult run(Database database, Relation<O> relation) {
+ final int dim = RelationUtil.dimensionality(relation);
+ final Random random = rnd.getRandom();
+ FiniteProgress progressPreproc = LOG.isVerbose() ? new FiniteProgress("Build aLOCI quadtress", g, LOG) : null;
+
+ // Compute extend of dataset.
+ double[] min, max;
+ {
+ Pair<O, O> hbbs = DatabaseUtil.computeMinMax(relation);
+ double maxd = 0;
+ min = new double[dim];
+ max = new double[dim];
+ for(int i = 0; i < dim; i++) {
+ min[i] = hbbs.first.doubleValue(i);
+ max[i] = hbbs.second.doubleValue(i);
+ maxd = Math.max(maxd, max[i] - min[i]);
+ }
+ // Enlarge bounding box to have equal lengths.
+ for(int i = 0; i < dim; i++) {
+ double diff = (maxd - (max[i] - min[i])) * .5;
+ min[i] -= diff;
+ max[i] += diff;
+ }
+ }
+
+ List<ALOCIQuadTree> qts = new ArrayList<>(g);
+
+ double[] nshift = new double[dim];
+ ALOCIQuadTree qt = new ALOCIQuadTree(min, max, nshift, nmin, relation);
+ qts.add(qt);
+ if(progressPreproc != null) {
+ progressPreproc.incrementProcessed(LOG);
+ }
+ /*
+ * create the remaining g-1 shifted QuadTrees. This not clearly described in
+ * the paper and therefore implemented in a way that achieves good results
+ * with the test data.
+ */
+ for(int shift = 1; shift < g; shift++) {
+ double[] svec = new double[dim];
+ for(int i = 0; i < dim; i++) {
+ svec[i] = random.nextDouble() * (max[i] - min[i]);
+ }
+ qt = new ALOCIQuadTree(min, max, svec, nmin, relation);
+ qts.add(qt);
+ if(progressPreproc != null) {
+ progressPreproc.incrementProcessed(LOG);
+ }
+ }
+ if(progressPreproc != null) {
+ progressPreproc.ensureCompleted(LOG);
+ }
+
+ // aLOCI main loop: evaluate
+ FiniteProgress progressLOCI = LOG.isVerbose() ? new FiniteProgress("Compute aLOCI scores", relation.size(), LOG) : null;
+ WritableDoubleDataStore mdef_norm = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
+ DoubleMinMax minmax = new DoubleMinMax();
+
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ final O obj = relation.get(iditer);
+
+ double maxmdefnorm = 0;
+ // For each level
+ for(int l = 0;; l++) {
+ // Find the closest C_i
+ Node ci = null;
+ for(int i = 0; i < g; i++) {
+ Node ci2 = qts.get(i).findClosestNode(obj, l);
+ if(ci2.getLevel() != l) {
+ continue;
+ }
+ // TODO: always use manhattan?
+ if(ci == null || distFunc.distance(ci.getCenter(), obj).compareTo(distFunc.distance(ci2.getCenter(), obj)) > 0) {
+ ci = ci2;
+ }
+ }
+ // logger.debug("level:" + (ci != null ? ci.getLevel() : -1) +" l:"+l);
+ if(ci == null) {
+ break; // no matching tree for this level.
+ }
+
+ // Find the closest C_j
+ Node cj = null;
+ for(int i = 0; i < g; i++) {
+ Node cj2 = qts.get(i).findClosestNode(ci.getCenter(), l - alpha);
+ // TODO: allow higher levels or not?
+ if(cj != null && cj2.getLevel() < cj.getLevel()) {
+ continue;
+ }
+ // TODO: always use manhattan?
+ if(cj == null || distFunc.distance(cj.getCenter(), ci.getCenter()).compareTo(distFunc.distance(cj2.getCenter(), ci.getCenter())) > 0) {
+ cj = cj2;
+ }
+ }
+ // logger.debug("level:" + (cj != null ? cj.getLevel() : -1) +" l:"+l);
+ if(cj == null) {
+ continue; // no matching tree for this level.
+ }
+ double mdefnorm = calculate_MDEF_norm(cj, ci);
+ // logger.warning("level:" + ci.getLevel() + "/" + cj.getLevel() +
+ // " mdef: " + mdefnorm);
+ maxmdefnorm = Math.max(maxmdefnorm, mdefnorm);
+ }
+ // Store results
+ mdef_norm.putDouble(iditer, maxmdefnorm);
+ minmax.put(maxmdefnorm);
+ if(progressLOCI != null) {
+ progressLOCI.incrementProcessed(LOG);
+ }
+ }
+ if(progressLOCI != null) {
+ progressLOCI.ensureCompleted(LOG);
+ }
+ Relation<Double> scoreResult = new MaterializedRelation<>("aLOCI normalized MDEF", "aloci-mdef-outlier", TypeUtil.DOUBLE, mdef_norm, relation.getDBIDs());
+ OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY);
+ OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
+ return result;
+ }
+
+ /**
+ * Method for the MDEF calculation
+ *
+ * @param sn Sampling Neighborhood
+ * @param cg Counting Neighborhood
+ *
+ * @return MDEF norm
+ */
+ private static double calculate_MDEF_norm(Node sn, Node cg) {
+ // get the square sum of the counting neighborhoods box counts
+ long sq = sn.getSquareSum(cg.getLevel() - sn.getLevel());
+ /*
+ * if the square sum is equal to box count of the sampling Neighborhood then
+ * n_hat is equal one, and as cg needs to have at least one Element mdef
+ * would get zero or lower than zero. This is the case when all of the
+ * counting Neighborhoods contain one or zero Objects. Additionally, the
+ * cubic sum, square sum and sampling Neighborhood box count are all equal,
+ * which leads to sig_n_hat being zero and thus mdef_norm is either negative
+ * infinite or undefined. As the distribution of the Objects seem quite
+ * uniform, a mdef_norm value of zero ( = no outlier) is appropriate and
+ * circumvents the problem of undefined values.
+ */
+ if(sq == sn.getCount()) {
+ return 0.0;
+ }
+ // calculation of mdef according to the paper and standardization as done in
+ // LOCI
+ long cb = sn.getCubicSum(cg.getLevel() - sn.getLevel());
+ double n_hat = (double) sq / sn.getCount();
+ double sig_n_hat = java.lang.Math.sqrt(cb * sn.getCount() - (sq * sq)) / sn.getCount();
+ // Avoid NaN - correct result 0.0?
+ if(sig_n_hat < Double.MIN_NORMAL) {
+ return 0.0;
+ }
+ double mdef = n_hat - cg.getCount();
+ return mdef / sig_n_hat;
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(distFunc.getInputTypeRestriction());
+ }
+
+ /**
+ * Simple quadtree for ALOCI. Not storing the actual objects, just the counts.
+ *
+ * Furthermore, the quadtree can be shifted by a specified vector, wrapping
+ * around min/max
+ *
+ * @author Jonathan von Brünken
+ * @author Erich Schubert
+ *
+ * @apiviz.composedOf Node
+ */
+ static class ALOCIQuadTree {
+ /**
+ * Tree parameters
+ */
+ private double[] shift, min, width;
+
+ /**
+ * Maximum fill for a page before splitting
+ */
+ private int nmin;
+
+ /**
+ * Tree root
+ */
+ Node root;
+
+ /**
+ * Relation indexed.
+ */
+ private Relation<? extends NumberVector<?>> relation;
+
+ /**
+ * Constructor.
+ *
+ * @param min Minimum coordinates
+ * @param max Maximum coordinates
+ * @param shift Tree shift offset
+ * @param nmin Maximum size for a page to split
+ * @param relation Relation to index
+ */
+ public ALOCIQuadTree(double[] min, double[] max, double[] shift, int nmin, Relation<? extends NumberVector<?>> relation) {
+ super();
+ assert (min.length <= 32) : "Quadtrees are only supported for up to 32 dimensions";
+ this.shift = shift;
+ this.nmin = nmin;
+ this.min = min;
+ this.width = new double[min.length];
+ for(int d = 0; d < min.length; d++) {
+ width[d] = max[d] - min[d];
+ if(width[d] <= 0) {
+ width[d] = 1;
+ }
+ }
+ double[] center = new double[min.length];
+ for(int d = 0; d < min.length; d++) {
+ if(shift[d] < width[d] * .5) {
+ center[d] = min[d] + shift[d] + width[d] * .5;
+ }
+ else {
+ center[d] = min[d] + shift[d] - width[d] * .5;
+ }
+ }
+ this.relation = relation;
+ ArrayModifiableDBIDs ids = DBIDUtil.newArray(relation.getDBIDs());
+ List<Node> children = new ArrayList<>();
+ bulkLoad(min.clone(), max.clone(), children, ids, 0, ids.size(), 0, 0, 0);
+ this.root = new Node(0, new Vector(center), ids.size(), -1, children);
+ }
+
+ /**
+ * Bulk load the tree
+ *
+ * @param lmin Subtree minimum (unshifted, will be modified)
+ * @param lmax Subtree maximum (unshifted, will be modified)
+ * @param children List of children for current parent
+ * @param ids IDs to process
+ * @param start Start of ids subinterval
+ * @param end End of ids subinterval
+ * @param dim Current dimension
+ * @param level Current tree level
+ * @param code Bit code of node position
+ */
+ private void bulkLoad(double[] lmin, double[] lmax, List<Node> children, ArrayModifiableDBIDs ids, int start, int end, int dim, int level, int code) {
+ // logger.warning(FormatUtil.format(lmin)+" "+FormatUtil.format(lmax)+" "+start+"->"+end+" "+(end-start));
+ // Hack: Check degenerate cases that won't split
+ if(dim == 0) {
+ DBIDArrayIter iter = ids.iter();
+ iter.seek(start);
+ NumberVector<?> first = relation.get(iter);
+ iter.advance();
+ boolean degenerate = true;
+ loop: for(; iter.getOffset() < end; iter.advance()) {
+ NumberVector<?> other = relation.get(iter);
+ for(int d = 0; d < lmin.length; d++) {
+ if(Math.abs(first.doubleValue(d) - other.doubleValue(d)) > 1E-15) {
+ degenerate = false;
+ break loop;
+ }
+ }
+ }
+ if(degenerate) {
+ double[] center = new double[lmin.length];
+ for(int d = 0; d < lmin.length; d++) {
+ center[d] = lmin[d] * .5 + lmax[d] * .5 + shift[d];
+ if(center[d] > min[d] + width[d]) {
+ center[d] -= width[d];
+ }
+ }
+ children.add(new Node(code, new Vector(center), end - start, level, null));
+ return;
+ }
+ }
+ // Complete level
+ if(dim == lmin.length) {
+ double[] center = new double[lmin.length];
+ for(int d = 0; d < lmin.length; d++) {
+ center[d] = lmin[d] * .5 + lmax[d] * .5 + shift[d];
+ if(center[d] > min[d] + width[d]) {
+ center[d] -= width[d];
+ }
+ }
+ if(end - start < nmin) {
+ children.add(new Node(code, new Vector(center), end - start, level, null));
+ return;
+ }
+ else {
+ List<Node> newchildren = new ArrayList<>();
+ bulkLoad(lmin, lmax, newchildren, ids, start, end, 0, level + 1, 0);
+ children.add(new Node(code, new Vector(center), end - start, level, newchildren));
+ return;
+ }
+ }
+ else {
+ // Partially sort data, by dimension dim < mid
+ DBIDArrayIter siter = ids.iter(), eiter = ids.iter();
+ siter.seek(start);
+ eiter.seek(end - 1);
+ while(siter.getOffset() < eiter.getOffset()) {
+ if(getShiftedDim(relation.get(siter), dim, level) <= .5) {
+ siter.advance();
+ continue;
+ }
+ if(getShiftedDim(relation.get(eiter), dim, level) > 0.5) {
+ eiter.retract();
+ continue;
+ }
+ ids.swap(siter.getOffset(), eiter.getOffset() - 1);
+ siter.advance();
+ eiter.retract();
+ }
+ final int spos = siter.getOffset();
+ if(start < spos) {
+ final double tmp = lmax[dim];
+ lmax[dim] = lmax[dim] * .5 + lmin[dim] * .5;
+ bulkLoad(lmin, lmax, children, ids, start, spos, dim + 1, level, code);
+ lmax[dim] = tmp; // Restore
+ }
+ if(spos < end) {
+ final double tmp = lmin[dim];
+ lmin[dim] = lmax[dim] * .5 + lmin[dim] * .5;
+ bulkLoad(lmin, lmax, children, ids, spos, end, dim + 1, level, code | (1 << dim));
+ lmin[dim] = tmp; // Restore
+ }
+ }
+ }
+
+ /**
+ * Shift and wrap a single dimension.
+ *
+ * @param obj Object
+ * @param dim Dimension
+ * @param level Level (controls scaling/wraping!)
+ * @return Shifted position
+ */
+ private double getShiftedDim(NumberVector<?> obj, int dim, int level) {
+ double pos = obj.doubleValue(dim) + shift[dim];
+ pos = (pos - min[dim]) / width[dim] * (1 + level);
+ return pos - Math.floor(pos);
+ }
+
+ /**
+ * Find the closest node (of depth tlevel or above, if there is no node at
+ * this depth) for the given vector.
+ *
+ * @param vec Query vector
+ * @param tlevel Target level
+ * @return Node
+ */
+ public Node findClosestNode(NumberVector<?> vec, int tlevel) {
+ Node cur = root;
+ for(int level = 0; level <= tlevel; level++) {
+ if(cur.children == null) {
+ break;
+ }
+ int code = 0;
+ for(int d = 0; d < min.length; d++) {
+ if(getShiftedDim(vec, d, level) > .5) {
+ code |= 1 << d;
+ }
+ }
+ boolean found = false;
+ for(Node child : cur.children) {
+ if(child.code == code) {
+ cur = child;
+ found = true;
+ break;
+ }
+ }
+ if(!found) {
+ break; // Do not descend
+ }
+ }
+ return cur;
+ }
+ }
+
+ /**
+ * Node of the ALOCI Quadtree
+ *
+ * @author Erich Schubert
+ */
+ static class Node {
+ /**
+ * Position code
+ */
+ final int code;
+
+ /**
+ * Number of elements
+ */
+ final int count;
+
+ /**
+ * Level of node
+ */
+ final int level;
+
+ /**
+ * Child nodes, may be null
+ */
+ List<Node> children;
+
+ /**
+ * Parent node
+ */
+ Node parent = null;
+
+ /**
+ * Center vector
+ */
+ Vector center;
+
+ /**
+ * Constructor.
+ *
+ * @param code Node code
+ * @param center Center vector
+ * @param count Element count
+ * @param level Node level
+ * @param children Children list
+ */
+ protected Node(int code, Vector center, int count, int level, List<Node> children) {
+ this.code = code;
+ this.center = center;
+ this.count = count;
+ this.level = level;
+ this.children = children;
+ if(children != null) {
+ for(Node child : children) {
+ child.parent = this;
+ }
+ }
+ }
+
+ /**
+ * Get level of node.
+ *
+ * @return Level of node
+ */
+ public int getLevel() {
+ return level;
+ }
+
+ /**
+ * Get count of subtree
+ *
+ * @return subtree count
+ */
+ public int getCount() {
+ return count;
+ }
+
+ /**
+ * Return center vector
+ *
+ * @return center vector
+ */
+ public Vector getCenter() {
+ return center;
+ }
+
+ /**
+ * Get sum of squares, recursively
+ *
+ * @param levels Depth to collect
+ * @return Sum of squares
+ */
+ public long getSquareSum(int levels) {
+ if(levels <= 0 || children == null) {
+ return ((long) count) * ((long) count);
+ }
+ long agg = 0;
+ for(Node child : children) {
+ agg += child.getSquareSum(levels - 1);
+ }
+ return agg;
+ }
+
+ /**
+ * Get cubic sum.
+ *
+ * @param levels Level to collect
+ * @return sum of cubes
+ */
+ public long getCubicSum(int levels) {
+ if(levels <= 0 || children == null) {
+ return ((long) count) * ((long) count) * ((long) count);
+ }
+ long agg = 0;
+ for(Node child : children) {
+ agg += child.getCubicSum(levels - 1);
+ }
+ return agg;
+ }
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<O extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractParameterizer {
+ /**
+ * Parameter to specify the minimum neighborhood size
+ */
+ public static final OptionID NMIN_ID = new OptionID("loci.nmin", "Minimum neighborhood size to be considered.");
+
+ /**
+ * Parameter to specify the averaging neighborhood scaling.
+ */
+ public static final OptionID ALPHA_ID = new OptionID("loci.alpha", "Scaling factor for averaging neighborhood");
+
+ /**
+ * Parameter to specify the number of Grids to use.
+ */
+ public static final OptionID GRIDS_ID = new OptionID("loci.g", "The number of Grids to use.");
+
+ /**
+ * Parameter to specify the seed to initialize Random.
+ */
+ public static final OptionID SEED_ID = new OptionID("loci.seed", "The seed to use for initializing Random.");
+
+ /**
+ * Neighborhood minimum size
+ */
+ protected int nmin = 0;
+
+ /**
+ * Alpha: number of levels difference to use in comparison
+ */
+ protected int alpha = 4;
+
+ /**
+ * G: number of shifted trees to create.
+ */
+ protected int g = 1;
+
+ /**
+ * Random generator
+ */
+ protected RandomFactory rnd;
+
+ /**
+ * The distance function
+ */
+ private NumberVectorDistanceFunction<D> distanceFunction;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ ObjectParameter<NumberVectorDistanceFunction<D>> distanceFunctionP = makeParameterDistanceFunction(EuclideanDistanceFunction.class, NumberVectorDistanceFunction.class);
+ if(config.grab(distanceFunctionP)) {
+ distanceFunction = distanceFunctionP.instantiateClass(config);
+ }
+
+ final IntParameter nminP = new IntParameter(NMIN_ID, 20);
+ if(config.grab(nminP)) {
+ nmin = nminP.getValue();
+ }
+
+ final IntParameter g = new IntParameter(GRIDS_ID, 1);
+ if(config.grab(g)) {
+ this.g = g.getValue();
+ }
+
+ final RandomParameter rndP = new RandomParameter(SEED_ID);
+ if(config.grab(rndP)) {
+ this.rnd = rndP.getValue();
+ }
+
+ final IntParameter alphaP = new IntParameter(ALPHA_ID, 4);
+ if(config.grab(alphaP)) {
+ this.alpha = alphaP.getValue();
+ if(this.alpha < 1) {
+ this.alpha = 1;
+ }
+ }
+ }
+
+ @Override
+ protected ALOCI<O, D> makeInstance() {
+ return new ALOCI<>(distanceFunction, nmin, alpha, g, rnd);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/FlexibleLOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/FlexibleLOF.java
new file mode 100644
index 00000000..80f60e8b
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/FlexibleLOF.java
@@ -0,0 +1,612 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier.lof;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
+import de.lmu.ifi.dbs.elki.data.type.CombinedTypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.QueryUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.DoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
+import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery;
+import de.lmu.ifi.dbs.elki.database.query.rknn.RKNNQuery;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
+import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
+import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
+
+/**
+ * <p>
+ * Flexible variant of the "Local Outlier Factor" algorithm.
+ * </p>
+ *
+ * <p>
+ * This implementation diverts from the original LOF publication in that it
+ * allows the user to use a different distance function for the reachability
+ * distance and neighborhood determination (although the default is to use the
+ * same value.)
+ * </p>
+ *
+ * <p>
+ * The k nearest neighbors are determined using the parameter
+ * {@link de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm#DISTANCE_FUNCTION_ID}
+ * , while the reference set used in reachability distance computation is
+ * configured using {@link Parameterizer#REACHABILITY_DISTANCE_FUNCTION_ID}.
+ * </p>
+ *
+ * <p>
+ * The original LOF parameter was called &quot;minPts&quot;. For consistency
+ * with the name "kNN query", we chose to rename the parameter to {@code k}.
+ * Flexible LOF allows you to set the two values different, which yields the
+ * parameters {@link Parameterizer#KREF_ID} ({@code -lof.krefer}) and
+ * {@link Parameterizer#KREACH_ID} ({@code -lof.kreach})
+ * </p>
+ *
+ * <p>
+ * Reference: <br>
+ * M. M. Breunig, H.-P. Kriegel, R. Ng, J. Sander: LOF: Identifying
+ * Density-Based Local Outliers. <br>
+ * In: Proc. 2nd ACM SIGMOD Int. Conf. on Management of Data (SIGMOD'00),
+ * Dallas, TX, 2000.
+ * </p>
+ *
+ * @author Peer Kröger
+ * @author Erich Schubert
+ * @author Elke Achtert
+ *
+ * @apiviz.has LOFResult oneway - - computes
+ * @apiviz.has KNNQuery
+ *
+ * @param <O> the type of DatabaseObjects handled by this Algorithm
+ * @param <D> Distance type
+ */
+@Title("LOF: Local Outlier Factor")
+@Description("Algorithm to compute density-based local outlier factors in a database based on the neighborhood size parameter 'k'")
+@Reference(authors = "M. M. Breunig, H.-P. Kriegel, R. Ng, and J. Sander", title = "LOF: Identifying Density-Based Local Outliers", booktitle = "Proc. 2nd ACM SIGMOD Int. Conf. on Management of Data (SIGMOD '00), Dallas, TX, 2000", url = "http://dx.doi.org/10.1145/342009.335388")
+public class FlexibleLOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(FlexibleLOF.class);
+
+ /**
+ * Number of neighbors in comparison set.
+ */
+ protected int krefer = 2;
+
+ /**
+ * Number of neighbors used for reachability distance.
+ */
+ protected int kreach = 2;
+
+ /**
+ * Neighborhood distance function.
+ */
+ protected DistanceFunction<? super O, D> referenceDistanceFunction;
+
+ /**
+ * Reachability distance function.
+ */
+ protected DistanceFunction<? super O, D> reachabilityDistanceFunction;
+
+ /**
+ * Include object itself in kNN neighborhood.
+ *
+ * In the official LOF publication, the point itself is not considered to be
+ * part of its k nearest neighbors.
+ */
+ private static boolean objectIsInKNN = false;
+
+ /**
+ * Constructor.
+ *
+ * @param krefer The number of neighbors for reference
+ * @param kreach The number of neighbors for reachability distance
+ * @param neighborhoodDistanceFunction the neighborhood distance function
+ * @param reachabilityDistanceFunction the reachability distance function
+ */
+ public FlexibleLOF(int krefer, int kreach, DistanceFunction<? super O, D> neighborhoodDistanceFunction, DistanceFunction<? super O, D> reachabilityDistanceFunction) {
+ super();
+ this.krefer = krefer + (objectIsInKNN ? 0 : 1);
+ this.kreach = kreach + (objectIsInKNN ? 0 : 1);
+ this.referenceDistanceFunction = neighborhoodDistanceFunction;
+ this.reachabilityDistanceFunction = reachabilityDistanceFunction;
+ }
+
+ /**
+ * Performs the Generalized LOF algorithm on the given database by calling
+ * {@link #doRunInTime}.
+ *
+ * @param database Database to query
+ * @param relation Data to process
+ * @return LOF outlier result
+ */
+ public OutlierResult run(Database database, Relation<O> relation) {
+ StepProgress stepprog = LOG.isVerbose() ? new StepProgress("LOF", 3) : null;
+ Pair<KNNQuery<O, D>, KNNQuery<O, D>> pair = getKNNQueries(database, relation, stepprog);
+ KNNQuery<O, D> kNNRefer = pair.getFirst();
+ KNNQuery<O, D> kNNReach = pair.getSecond();
+ return doRunInTime(relation.getDBIDs(), kNNRefer, kNNReach, stepprog).getResult();
+ }
+
+ /**
+ * Get the kNN queries for the algorithm.
+ *
+ * @param relation the data
+ * @param stepprog the progress logger
+ * @return the kNN queries for the algorithm
+ */
+ private Pair<KNNQuery<O, D>, KNNQuery<O, D>> getKNNQueries(Database database, Relation<O> relation, StepProgress stepprog) {
+ // "HEAVY" flag for knnReach since it is used more than once
+ KNNQuery<O, D> knnReach = QueryUtil.getKNNQuery(relation, reachabilityDistanceFunction, kreach, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
+ // No optimized kNN query - use a preprocessor!
+ if (!(knnReach instanceof PreprocessorKNNQuery)) {
+ if (stepprog != null) {
+ if (referenceDistanceFunction.equals(reachabilityDistanceFunction)) {
+ stepprog.beginStep(1, "Materializing neighborhoods w.r.t. reference neighborhood distance function.", LOG);
+ } else {
+ stepprog.beginStep(1, "Not materializing neighborhoods w.r.t. reference neighborhood distance function, but materializing neighborhoods w.r.t. reachability distance function.", LOG);
+ }
+ }
+ int kpreproc = (referenceDistanceFunction.equals(reachabilityDistanceFunction)) ? Math.max(kreach, krefer) : kreach;
+ MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<>(relation, reachabilityDistanceFunction, kpreproc);
+ database.addIndex(preproc);
+ DistanceQuery<O, D> rdq = database.getDistanceQuery(relation, reachabilityDistanceFunction);
+ knnReach = preproc.getKNNQuery(rdq, kreach);
+ }
+
+ // knnReach is only used once
+ KNNQuery<O, D> knnRefer;
+ if (referenceDistanceFunction == reachabilityDistanceFunction || referenceDistanceFunction.equals(reachabilityDistanceFunction)) {
+ knnRefer = knnReach;
+ } else {
+ // do not materialize the first neighborhood, since it is used only once
+ knnRefer = QueryUtil.getKNNQuery(relation, referenceDistanceFunction, krefer);
+ }
+
+ return new Pair<>(knnRefer, knnReach);
+ }
+
+ /**
+ * Performs the Generalized LOF_SCORE algorithm on the given database and
+ * returns a {@link FlexibleLOF.LOFResult} encapsulating information that may
+ * be needed by an OnlineLOF algorithm.
+ *
+ * @param ids Object ids
+ * @param kNNRefer the kNN query w.r.t. reference neighborhood distance
+ * function
+ * @param kNNReach the kNN query w.r.t. reachability distance function
+ * @param stepprog Progress logger
+ * @return LOF result
+ */
+ protected LOFResult<O, D> doRunInTime(DBIDs ids, KNNQuery<O, D> kNNRefer, KNNQuery<O, D> kNNReach, StepProgress stepprog) {
+ // Assert we got something
+ if (kNNRefer == null) {
+ throw new AbortException("No kNN queries supported by database for reference neighborhood distance function.");
+ }
+ if (kNNReach == null) {
+ throw new AbortException("No kNN queries supported by database for reachability distance function.");
+ }
+
+ // Compute LRDs
+ if (stepprog != null) {
+ stepprog.beginStep(2, "Computing LRDs.", LOG);
+ }
+ WritableDoubleDataStore lrds = computeLRDs(ids, kNNReach);
+
+ // compute LOF_SCORE of each db object
+ if (stepprog != null) {
+ stepprog.beginStep(3, "Computing LOFs.", LOG);
+ }
+ Pair<WritableDoubleDataStore, DoubleMinMax> lofsAndMax = computeLOFs(ids, lrds, kNNRefer);
+ WritableDoubleDataStore lofs = lofsAndMax.getFirst();
+ // track the maximum value for normalization.
+ DoubleMinMax lofminmax = lofsAndMax.getSecond();
+
+ if (stepprog != null) {
+ stepprog.setCompleted(LOG);
+ }
+
+ // Build result representation.
+ Relation<Double> scoreResult = new MaterializedRelation<>("Local Outlier Factor", "lof-outlier", TypeUtil.DOUBLE, lofs, ids);
+ OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
+ OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
+
+ return new LOFResult<>(result, kNNRefer, kNNReach, lrds, lofs);
+ }
+
+ /**
+ * Computes the local reachability density (LRD) of the specified objects.
+ *
+ * @param ids the ids of the objects
+ * @param knnReach the precomputed neighborhood of the objects w.r.t. the
+ * reachability distance
+ * @return the LRDs of the objects
+ */
+ protected WritableDoubleDataStore computeLRDs(DBIDs ids, KNNQuery<O, D> knnReach) {
+ WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
+ FiniteProgress lrdsProgress = LOG.isVerbose() ? new FiniteProgress("LRD", ids.size(), LOG) : null;
+ for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ final KNNList<D> neighbors = knnReach.getKNNForDBID(iter, kreach);
+ double sum = 0.0;
+ int count = 0;
+ if (neighbors instanceof DoubleDistanceKNNList) {
+ // Fast version for double distances
+ for (DoubleDistanceDBIDListIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) {
+ if (objectIsInKNN || !DBIDUtil.equal(neighbor, iter)) {
+ KNNList<D> neighborsNeighbors = knnReach.getKNNForDBID(neighbor, kreach);
+ final double nkdist;
+ if (neighborsNeighbors instanceof DoubleDistanceKNNList) {
+ nkdist = ((DoubleDistanceKNNList) neighborsNeighbors).doubleKNNDistance();
+ } else {
+ nkdist = neighborsNeighbors.getKNNDistance().doubleValue();
+ }
+ sum += Math.max(neighbor.doubleDistance(), nkdist);
+ count++;
+ }
+ }
+ } else {
+ for (DistanceDBIDListIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ if (objectIsInKNN || !DBIDUtil.equal(neighbor, iter)) {
+ KNNList<D> neighborsNeighbors = knnReach.getKNNForDBID(neighbor, kreach);
+ sum += Math.max(neighbor.getDistance().doubleValue(), neighborsNeighbors.getKNNDistance().doubleValue());
+ count++;
+ }
+ }
+ }
+ // Avoid division by 0
+ final double lrd = (sum > 0) ? (count / sum) : Double.POSITIVE_INFINITY;
+ lrds.putDouble(iter, lrd);
+ if (lrdsProgress != null) {
+ lrdsProgress.incrementProcessed(LOG);
+ }
+ }
+ if (lrdsProgress != null) {
+ lrdsProgress.ensureCompleted(LOG);
+ }
+ return lrds;
+ }
+
+ /**
+ * Computes the Local outlier factor (LOF) of the specified objects.
+ *
+ * @param ids the ids of the objects
+ * @param lrds the LRDs of the objects
+ * @param knnRefer the precomputed neighborhood of the objects w.r.t. the
+ * reference distance
+ * @return the LOFs of the objects and the maximum LOF
+ */
+ protected Pair<WritableDoubleDataStore, DoubleMinMax> computeLOFs(DBIDs ids, DoubleDataStore lrds, KNNQuery<O, D> knnRefer) {
+ WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
+ // track the maximum value for normalization.
+ DoubleMinMax lofminmax = new DoubleMinMax();
+
+ FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("LOF_SCORE for objects", ids.size(), LOG) : null;
+ for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ final double lrdp = lrds.doubleValue(iter);
+ final double lof;
+ if (lrdp > 0 && !Double.isInfinite(lrdp)) {
+ final KNNList<D> neighbors = knnRefer.getKNNForDBID(iter, krefer);
+ double sum = 0.0;
+ int count = 0;
+ for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ // skip the point itself
+ if (objectIsInKNN || !DBIDUtil.equal(neighbor, iter)) {
+ sum += lrds.doubleValue(neighbor);
+ count++;
+ }
+ }
+ lof = sum / (count * lrdp);
+ } else {
+ lof = 1.0;
+ }
+ lofs.putDouble(iter, lof);
+ // update minimum and maximum
+ if (!Double.isInfinite(lof)) {
+ lofminmax.put(lof);
+ }
+
+ if (progressLOFs != null) {
+ progressLOFs.incrementProcessed(LOG);
+ }
+ }
+ if (progressLOFs != null) {
+ progressLOFs.ensureCompleted(LOG);
+ }
+ return new Pair<>(lofs, lofminmax);
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ final TypeInformation type;
+ if (reachabilityDistanceFunction.equals(referenceDistanceFunction)) {
+ type = reachabilityDistanceFunction.getInputTypeRestriction();
+ } else {
+ type = new CombinedTypeInformation(referenceDistanceFunction.getInputTypeRestriction(), reachabilityDistanceFunction.getInputTypeRestriction());
+ }
+ return TypeUtil.array(type);
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Encapsulates information like the neighborhood, the LRD and LOF values of
+ * the objects during a run of the {@link FlexibleLOF} algorithm.
+ *
+ * @author Elke Achtert
+ */
+ public static class LOFResult<O, D extends NumberDistance<D, ?>> {
+ /**
+ * The result of the run of the {@link FlexibleLOF} algorithm.
+ */
+ private OutlierResult result;
+
+ /**
+ * The kNN query w.r.t. the reference neighborhood distance.
+ */
+ private final KNNQuery<O, D> kNNRefer;
+
+ /**
+ * The kNN query w.r.t. the reachability distance.
+ */
+ private final KNNQuery<O, D> kNNReach;
+
+ /**
+ * The RkNN query w.r.t. the reference neighborhood distance.
+ */
+ private RKNNQuery<O, D> rkNNRefer;
+
+ /**
+ * The rkNN query w.r.t. the reachability distance.
+ */
+ private RKNNQuery<O, D> rkNNReach;
+
+ /**
+ * The LRD values of the objects.
+ */
+ private final WritableDoubleDataStore lrds;
+
+ /**
+ * The LOF values of the objects.
+ */
+ private final WritableDoubleDataStore lofs;
+
+ /**
+ * Encapsulates information generated during a run of the
+ * {@link FlexibleLOF} algorithm.
+ *
+ * @param result the result of the run of the {@link FlexibleLOF} algorithm
+ * @param kNNRefer the kNN query w.r.t. the reference neighborhood distance
+ * @param kNNReach the kNN query w.r.t. the reachability distance
+ * @param lrds the LRD values of the objects
+ * @param lofs the LOF values of the objects
+ */
+ public LOFResult(OutlierResult result, KNNQuery<O, D> kNNRefer, KNNQuery<O, D> kNNReach, WritableDoubleDataStore lrds, WritableDoubleDataStore lofs) {
+ this.result = result;
+ this.kNNRefer = kNNRefer;
+ this.kNNReach = kNNReach;
+ this.lrds = lrds;
+ this.lofs = lofs;
+ }
+
+ /**
+ * Get the knn query for the reference set.
+ *
+ * @return the kNN query w.r.t. the reference neighborhood distance
+ */
+ public KNNQuery<O, D> getKNNRefer() {
+ return kNNRefer;
+ }
+
+ /**
+ * Get the knn query for the reachability set.
+ *
+ * @return the kNN query w.r.t. the reachability distance
+ */
+ public KNNQuery<O, D> getKNNReach() {
+ return kNNReach;
+ }
+
+ /**
+ * Get the LRD data store.
+ *
+ * @return the LRD values of the objects
+ */
+ public WritableDoubleDataStore getLrds() {
+ return lrds;
+ }
+
+ /**
+ * Get the LOF data store.
+ *
+ * @return the LOF values of the objects
+ */
+ public WritableDoubleDataStore getLofs() {
+ return lofs;
+ }
+
+ /**
+ * Get the outlier result.
+ *
+ * @return the result of the run of the {@link FlexibleLOF} algorithm
+ */
+ public OutlierResult getResult() {
+ return result;
+ }
+
+ /**
+ * Sets the RkNN query w.r.t. the reference neighborhood distance.
+ *
+ * @param rkNNRefer the query to set
+ */
+ public void setRkNNRefer(RKNNQuery<O, D> rkNNRefer) {
+ this.rkNNRefer = rkNNRefer;
+ }
+
+ /**
+ * Get the RkNN query for the reference set.
+ *
+ * @return the RkNN query w.r.t. the reference neighborhood distance
+ */
+ public RKNNQuery<O, D> getRkNNRefer() {
+ return rkNNRefer;
+ }
+
+ /**
+ * Get the RkNN query for the reachability set.
+ *
+ * @return the RkNN query w.r.t. the reachability distance
+ */
+ public RKNNQuery<O, D> getRkNNReach() {
+ return rkNNReach;
+ }
+
+ /**
+ * Sets the RkNN query w.r.t. the reachability distance.
+ *
+ * @param rkNNReach the query to set
+ */
+ public void setRkNNReach(RKNNQuery<O, D> rkNNReach) {
+ this.rkNNReach = rkNNReach;
+ }
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ /**
+ * The distance function to determine the reachability distance between
+ * database objects.
+ */
+ public static final OptionID REACHABILITY_DISTANCE_FUNCTION_ID = new OptionID("lof.reachdistfunction", "Distance function to determine the reachability distance between database objects.");
+
+ /**
+ * Parameter to specify the number of nearest neighbors of an object to be
+ * considered for computing its LOF_SCORE, must be an integer greater than
+ * 1.
+ */
+ public static final OptionID KREF_ID = new OptionID("lof.krefer", "The number of nearest neighbors of an object to be considered for computing its LOF_SCORE.");
+
+ /**
+ * Parameter to specify the number of nearest neighbors of an object to be
+ * considered for computing its reachability distance.
+ */
+ public static final OptionID KREACH_ID = new OptionID("lof.kreach", "The number of nearest neighbors of an object to be considered for computing its LOF_SCORE.");
+
+ /**
+ * The reference set size to use.
+ */
+ protected int krefer = 2;
+
+ /**
+ * The set size to use for reachability distance.
+ */
+ protected int kreach = 2;
+
+ /**
+ * Neighborhood distance function.
+ */
+ protected DistanceFunction<O, D> neighborhoodDistanceFunction = null;
+
+ /**
+ * Reachability distance function.
+ */
+ protected DistanceFunction<O, D> reachabilityDistanceFunction = null;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ final IntParameter pK = new IntParameter(KREF_ID);
+ pK.addConstraint(new GreaterConstraint(1));
+ if (config.grab(pK)) {
+ krefer = pK.intValue();
+ }
+
+ final IntParameter pK2 = new IntParameter(KREACH_ID);
+ pK2.setOptional(true);
+ pK2.addConstraint(new GreaterConstraint(1));
+ if (config.grab(pK2)) {
+ kreach = pK2.intValue();
+ } else {
+ kreach = krefer;
+ }
+
+ final ObjectParameter<DistanceFunction<O, D>> reachDistP = new ObjectParameter<>(REACHABILITY_DISTANCE_FUNCTION_ID, DistanceFunction.class);
+ reachDistP.setOptional(true);
+ if (config.grab(reachDistP)) {
+ reachabilityDistanceFunction = reachDistP.instantiateClass(config);
+ } else {
+ reachabilityDistanceFunction = distanceFunction;
+ }
+ }
+
+ @Override
+ protected FlexibleLOF<O, D> makeInstance() {
+ return new FlexibleLOF<>(kreach, krefer, distanceFunction, reachabilityDistanceFunction);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/INFLO.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/INFLO.java
new file mode 100644
index 00000000..ae297a3c
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/INFLO.java
@@ -0,0 +1,261 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier.lof;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
+import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.math.Mean;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+
+/**
+ * INFLO provides the Mining Algorithms (Two-way Search Method) for Influence
+ * Outliers using Symmetric Relationship
+ * <p>
+ * Reference: <br>
+ * <p>
+ * Jin, W., Tung, A., Han, J., and Wang, W. 2006<br/>
+ * Ranking outliers using symmetric neighborhood relationship<br/>
+ * In Proc. Pacific-Asia Conf. on Knowledge Discovery and Data Mining (PAKDD),
+ * Singapore
+ * </p>
+ *
+ * @author Ahmed Hettab
+ *
+ * @apiviz.has KNNQuery
+ *
+ * @param <O> the type of DatabaseObject the algorithm is applied on
+ */
+@Title("INFLO: Influenced Outlierness Factor")
+@Description("Ranking Outliers Using Symmetric Neigborhood Relationship")
+@Reference(authors = "Jin, W., Tung, A., Han, J., and Wang, W", title = "Ranking outliers using symmetric neighborhood relationship", booktitle = "Proc. Pacific-Asia Conf. on Knowledge Discovery and Data Mining (PAKDD), Singapore, 2006", url = "http://dx.doi.org/10.1007/11731139_68")
+public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(INFLO.class);
+
+ /**
+ * Parameter to specify if any object is a Core Object must be a double
+ * greater than 0.0
+ * <p>
+ * see paper "Two-way search method" 3.2
+ */
+ public static final OptionID M_ID = new OptionID("inflo.m", "The threshold");
+
+ /**
+ * Holds the value of {@link #M_ID}.
+ */
+ private double m;
+
+ /**
+ * Parameter to specify the number of nearest neighbors of an object to be
+ * considered for computing its INFLO_SCORE. must be an integer greater than
+ * 1.
+ */
+ public static final OptionID K_ID = new OptionID("inflo.k", "The number of nearest neighbors of an object to be considered for computing its INFLO_SCORE.");
+
+ /**
+ * Holds the value of {@link #K_ID}.
+ */
+ private int k;
+
+ /**
+ * Constructor with parameters.
+ *
+ * @param distanceFunction Distance function in use
+ * @param m m Parameter
+ * @param k k Parameter
+ */
+ public INFLO(DistanceFunction<? super O, D> distanceFunction, double m, int k) {
+ super(distanceFunction);
+ this.m = m;
+ this.k = k;
+ }
+
+ /**
+ * Run the algorithm
+ *
+ * @param database Database to process
+ * @param relation Relation to process
+ * @return Outlier result
+ */
+ public OutlierResult run(Database database, Relation<O> relation) {
+ DistanceQuery<O, D> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
+
+ ModifiableDBIDs processedIDs = DBIDUtil.newHashSet(relation.size());
+ ModifiableDBIDs pruned = DBIDUtil.newHashSet();
+ // KNNS
+ WritableDataStore<ModifiableDBIDs> knns = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, ModifiableDBIDs.class);
+ // RNNS
+ WritableDataStore<ModifiableDBIDs> rnns = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, ModifiableDBIDs.class);
+ // density
+ WritableDoubleDataStore density = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT);
+ // init knns and rnns
+ for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ knns.put(iditer, DBIDUtil.newArray());
+ rnns.put(iditer, DBIDUtil.newArray());
+ }
+
+ // TODO: use kNN preprocessor?
+ KNNQuery<O, D> knnQuery = database.getKNNQuery(distFunc, k, DatabaseQuery.HINT_HEAVY_USE);
+
+ for (DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) {
+ // if not visited count=0
+ int count = rnns.get(id).size();
+ if (!processedIDs.contains(id)) {
+ // TODO: use exactly k neighbors?
+ KNNList<D> list = knnQuery.getKNNForDBID(id, k);
+ knns.get(id).addDBIDs(list);
+ processedIDs.add(id);
+ density.putDouble(id, 1 / list.getKNNDistance().doubleValue());
+
+ }
+ ModifiableDBIDs s = knns.get(id);
+ for (DBIDIter q = knns.get(id).iter(); q.valid(); q.advance()) {
+ if (!processedIDs.contains(q)) {
+ // TODO: use exactly k neighbors?
+ KNNList<D> listQ = knnQuery.getKNNForDBID(q, k);
+ knns.get(q).addDBIDs(listQ);
+ density.putDouble(q, 1 / listQ.getKNNDistance().doubleValue());
+ processedIDs.add(q);
+ }
+
+ if (knns.get(q).contains(id)) {
+ rnns.get(q).add(id);
+ rnns.get(id).add(q);
+ count++;
+ }
+ }
+ if (count >= s.size() * m) {
+ pruned.add(id);
+ }
+ }
+
+ // Calculate INFLO for any Object
+ // IF Object is pruned INFLO=1.0
+ DoubleMinMax inflominmax = new DoubleMinMax();
+ WritableDoubleDataStore inflos = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
+ for (DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) {
+ if (!pruned.contains(id)) {
+ ModifiableDBIDs knn = knns.get(id);
+ ModifiableDBIDs rnn = rnns.get(id);
+
+ double denP = density.doubleValue(id);
+ knn.addDBIDs(rnn);
+ Mean mean = new Mean();
+ for (DBIDIter iter = knn.iter(); iter.valid(); iter.advance()) {
+ mean.put(density.doubleValue(iter));
+ }
+ double den = mean.getMean() / denP;
+ inflos.putDouble(id, den);
+ // update minimum and maximum
+ inflominmax.put(den);
+
+ }
+ if (pruned.contains(id)) {
+ inflos.putDouble(id, 1.0);
+ inflominmax.put(1.0);
+ }
+ }
+
+ // Build result representation.
+ Relation<Double> scoreResult = new MaterializedRelation<>("Influence Outlier Score", "inflo-outlier", TypeUtil.DOUBLE, inflos, relation.getDBIDs());
+ OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(inflominmax.getMin(), inflominmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
+ return new OutlierResult(scoreMeta, scoreResult);
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(getDistanceFunction().getInputTypeRestriction());
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ protected double m = 1.0;
+
+ protected int k = 0;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ final DoubleParameter mP = new DoubleParameter(M_ID, 1.0);
+ mP.addConstraint(new GreaterConstraint(0.0));
+ if (config.grab(mP)) {
+ m = mP.doubleValue();
+ }
+
+ final IntParameter kP = new IntParameter(K_ID);
+ kP.addConstraint(new GreaterConstraint(1));
+ if (config.grab(kP)) {
+ k = kP.intValue();
+ }
+ }
+
+ @Override
+ protected INFLO<O, D> makeInstance() {
+ return new INFLO<>(distanceFunction, m, k);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDF.java
new file mode 100644
index 00000000..4a86e93d
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDF.java
@@ -0,0 +1,356 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier.lof;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.type.CombinedTypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.QueryUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
+import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
+import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
+import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.GaussianKernelDensityFunction;
+import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.KernelDensityFunction;
+import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * Outlier Detection with Kernel Density Functions.
+ *
+ * A variation of LOF which uses kernel density estimation, but in contrast to
+ * {@link SimpleKernelDensityLOF} also uses the reachability concept of LOF.
+ *
+ * Reference:
+ * <p>
+ * Outlier Detection with Kernel Density Functions.<br/>
+ * L. J. Latecki, A. Lazarevic, D. Pokrajac<br />
+ * Machine Learning and Data Mining in Pattern Recognition 2007
+ * </p>
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has KNNQuery
+ * @apiviz.has KernelDensityFunction
+ *
+ * @param <O> the type of objects handled by this Algorithm
+ * @param <D> Distance type
+ */
+@Reference(authors = "L. J. Latecki, A. Lazarevic, D. Pokrajac", title = "Outlier Detection with Kernel Density Functions", booktitle = "Machine Learning and Data Mining in Pattern Recognition", url = "http://dx.doi.org/10.1007/978-3-540-73499-4_6")
+public class LDF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(LDF.class);
+
+ /**
+ * Parameter k.
+ */
+ protected int k;
+
+ /**
+ * Bandwidth scaling factor.
+ */
+ protected double h = 1;
+
+ /**
+ * Scaling constant, to limit value range to 1/c
+ */
+ protected double c = 0.1;
+
+ /**
+ * Kernel density function
+ */
+ private KernelDensityFunction kernel;
+
+ /**
+ * Constructor.
+ *
+ * @param k the value of k
+ * @param kernel Kernel function
+ * @param h Kernel bandwidth scaling
+ * @param c Score scaling parameter
+ */
+ public LDF(int k, DistanceFunction<? super O, D> distance, KernelDensityFunction kernel, double h, double c) {
+ super(distance);
+ this.k = k + 1;
+ this.kernel = kernel;
+ this.h = h;
+ this.c = c;
+ }
+
+ /**
+ * Run the naive kernel density LOF algorithm.
+ *
+ * @param database Database to query
+ * @param relation Data to process
+ * @return LOF outlier result
+ */
+ public OutlierResult run(Database database, Relation<O> relation) {
+ StepProgress stepprog = LOG.isVerbose() ? new StepProgress("LDF", 3) : null;
+
+ final int dim = RelationUtil.dimensionality(relation);
+
+ DBIDs ids = relation.getDBIDs();
+
+ // "HEAVY" flag for KNN Query since it is used more than once
+ KNNQuery<O, D> knnq = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
+ // No optimized kNN query - use a preprocessor!
+ if (!(knnq instanceof PreprocessorKNNQuery)) {
+ if (stepprog != null) {
+ stepprog.beginStep(1, "Materializing neighborhoods w.r.t. distance function.", LOG);
+ }
+ MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<>(relation, getDistanceFunction(), k);
+ database.addIndex(preproc);
+ DistanceQuery<O, D> rdq = database.getDistanceQuery(relation, getDistanceFunction());
+ knnq = preproc.getKNNQuery(rdq, k);
+ }
+
+ // Compute LDEs
+ if (stepprog != null) {
+ stepprog.beginStep(2, "Computing LDEs.", LOG);
+ }
+ WritableDoubleDataStore ldes = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
+ FiniteProgress densProgress = LOG.isVerbose() ? new FiniteProgress("Densities", ids.size(), LOG) : null;
+ for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
+ final KNNList<D> neighbors = knnq.getKNNForDBID(it, k);
+ double sum = 0.0;
+ int count = 0;
+ if (neighbors instanceof DoubleDistanceKNNList) {
+ // Fast version for double distances
+ for (DoubleDistanceDBIDListIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) {
+ if (DBIDUtil.equal(neighbor, it)) {
+ continue;
+ }
+ final double nkdist = ((DoubleDistanceKNNList) knnq.getKNNForDBID(neighbor, k)).doubleKNNDistance();
+ if (nkdist > 0.) {
+ final double v = Math.max(nkdist, neighbor.doubleDistance()) / (h * nkdist);
+ sum += kernel.density(v) / Math.pow(h * nkdist, dim);
+ count++;
+ } else {
+ sum = Double.POSITIVE_INFINITY;
+ count++;
+ break;
+ }
+ }
+ } else {
+ for (DistanceDBIDListIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ if (DBIDUtil.equal(neighbor, it)) {
+ continue;
+ }
+ final double nkdist = knnq.getKNNForDBID(neighbor, k).getKNNDistance().doubleValue();
+ if (nkdist > 0.) {
+ final double v = Math.max(nkdist, neighbor.getDistance().doubleValue()) / (h * nkdist);
+ sum += kernel.density(v) / Math.pow(h * nkdist, dim);
+ count++;
+ } else {
+ sum = Double.POSITIVE_INFINITY;
+ count++;
+ break;
+ }
+ }
+ }
+ ldes.putDouble(it, sum / count);
+ if (densProgress != null) {
+ densProgress.incrementProcessed(LOG);
+ }
+ }
+ if (densProgress != null) {
+ densProgress.ensureCompleted(LOG);
+ }
+
+ // Compute local density factors.
+ if (stepprog != null) {
+ stepprog.beginStep(3, "Computing LDFs.", LOG);
+ }
+ WritableDoubleDataStore ldfs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
+ // track the maximum value for normalization.
+ DoubleMinMax lofminmax = new DoubleMinMax();
+
+ FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("Local Density Factors", ids.size(), LOG) : null;
+ for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
+ final double lrdp = ldes.doubleValue(it);
+ final KNNList<D> neighbors = knnq.getKNNForDBID(it, k);
+ double sum = 0.0;
+ int count = 0;
+ for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ // skip the point itself
+ if (DBIDUtil.equal(neighbor, it)) {
+ continue;
+ }
+ sum += ldes.doubleValue(neighbor);
+ count++;
+ }
+ sum /= count;
+ final double div = lrdp + c * sum;
+ double ldf = (div > 0) ? sum / div : 0;
+ ldfs.putDouble(it, ldf);
+ // update minimum and maximum
+ lofminmax.put(ldf);
+
+ if (progressLOFs != null) {
+ progressLOFs.incrementProcessed(LOG);
+ }
+ }
+ if (progressLOFs != null) {
+ progressLOFs.ensureCompleted(LOG);
+ }
+
+ if (stepprog != null) {
+ stepprog.setCompleted(LOG);
+ }
+
+ // Build result representation.
+ Relation<Double> scoreResult = new MaterializedRelation<>("Local Density Factor", "ldf-outlier", TypeUtil.DOUBLE, ldfs, ids);
+ OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, 1. / c, 1 / (1 + c));
+ OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
+
+ return result;
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(new CombinedTypeInformation(getDistanceFunction().getInputTypeRestriction(), TypeUtil.NUMBER_VECTOR_FIELD));
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ *
+ * @param <O> vector type
+ * @param <D> distance type
+ */
+ public static class Parameterizer<O extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ /**
+ * Option ID for kernel.
+ */
+ public static final OptionID KERNEL_ID = new OptionID("ldf.kernel", "Kernel to use for LDF.");
+
+ /**
+ * Option ID for k
+ */
+ public static final OptionID K_ID = new OptionID("ldf.k", "Number of neighbors to use for LDF.");
+
+ /**
+ * Option ID for h - kernel bandwidth scaling
+ */
+ public static final OptionID H_ID = new OptionID("ldf.h", "Kernel bandwidth multiplier for LDF.");
+
+ /**
+ * Option ID for c
+ */
+ public static final OptionID C_ID = new OptionID("ldf.c", "Score scaling parameter for LDF.");
+
+ /**
+ * The neighborhood size to use.
+ */
+ protected int k = 2;
+
+ /**
+ * Kernel density function parameter
+ */
+ KernelDensityFunction kernel;
+
+ /**
+ * Bandwidth scaling factor.
+ */
+ protected double h = 1;
+
+ /**
+ * Scaling constant, to limit value range to 1/c
+ */
+ protected double c = 0.1;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ final IntParameter pK = new IntParameter(K_ID);
+ pK.addConstraint(new GreaterConstraint(1));
+ if (config.grab(pK)) {
+ k = pK.getValue();
+ }
+
+ ObjectParameter<KernelDensityFunction> kernelP = new ObjectParameter<>(KERNEL_ID, KernelDensityFunction.class, GaussianKernelDensityFunction.class);
+ if (config.grab(kernelP)) {
+ kernel = kernelP.instantiateClass(config);
+ }
+
+ DoubleParameter hP = new DoubleParameter(H_ID);
+ if (config.grab(hP)) {
+ h = hP.doubleValue();
+ }
+
+ DoubleParameter cP = new DoubleParameter(C_ID, 0.1);
+ if (config.grab(cP)) {
+ c = cP.doubleValue();
+ }
+ }
+
+ @Override
+ protected LDF<O, D> makeInstance() {
+ return new LDF<>(k, distanceFunction, kernel, h, c);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDOF.java
new file mode 100644
index 00000000..80ed3f68
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDOF.java
@@ -0,0 +1,213 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier.lof;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2011
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.math.Mean;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.Alias;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+
+/**
+ * <p>
+ * Computes the LDOF (Local Distance-Based Outlier Factor) for all objects of a
+ * Database.
+ * </p>
+ *
+ * <p>
+ * Reference:<br>
+ * K. Zhang, M. Hutter, H. Jin: A New Local Distance-Based Outlier Detection
+ * Approach for Scattered Real-World Data.<br>
+ * In: Proc. 13th Pacific-Asia Conference on Advances in Knowledge Discovery and
+ * Data Mining (PAKDD 2009), Bangkok, Thailand, 2009.
+ * </p>
+ *
+ * @author Arthur Zimek
+ *
+ * @apiviz.has KNNQuery
+ *
+ * @param <O> the type of DatabaseObjects handled by this Algorithm
+ */
+@Title("LDOF: Local Distance-Based Outlier Factor")
+@Description("Local outlier detection appraoch suitable for scattered data by averaging the kNN distance over all k nearest neighbors")
+@Reference(authors = "K. Zhang, M. Hutter, H. Jin", title = "A New Local Distance-Based Outlier Detection Approach for Scattered Real-World Data", booktitle = "Proc. 13th Pacific-Asia Conference on Advances in Knowledge Discovery and Data Mining (PAKDD 2009), Bangkok, Thailand, 2009", url = "http://dx.doi.org/10.1007/978-3-642-01307-2_84")
+@Alias({"de.lmu.ifi.dbs.elki.algorithm.outlier.LDOF"})
+public class LDOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(LDOF.class);
+
+ /**
+ * Parameter to specify the number of nearest neighbors of an object to be
+ * considered for computing its LDOF_SCORE, must be an integer greater than 1.
+ */
+ public static final OptionID K_ID = new OptionID("ldof.k", "The number of nearest neighbors of an object to be considered for computing its LDOF_SCORE.");
+
+ /**
+ * The baseline for LDOF values. The paper gives 0.5 for uniform
+ * distributions, although one might also discuss using 1.0 as baseline.
+ */
+ private static final double LDOF_BASELINE = 0.5;
+
+ /**
+ * Holds the value of {@link #K_ID}.
+ */
+ int k;
+
+ /**
+ * Constructor.
+ *
+ * @param distanceFunction distance function
+ * @param k k Parameter
+ */
+ public LDOF(DistanceFunction<? super O, D> distanceFunction, int k) {
+ super(distanceFunction);
+ this.k = k;
+ }
+
+ /**
+ * Run the algorithm
+ *
+ * @param database Database to process
+ * @param relation Relation to process
+ * @return Outlier result
+ */
+ public OutlierResult run(Database database, Relation<O> relation) {
+ DistanceQuery<O, D> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
+ KNNQuery<O, D> knnQuery = database.getKNNQuery(distFunc, k);
+
+ // track the maximum value for normalization
+ DoubleMinMax ldofminmax = new DoubleMinMax();
+ // compute the ldof values
+ WritableDoubleDataStore ldofs = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
+
+ // compute LOF_SCORE of each db object
+ if(LOG.isVerbose()) {
+ LOG.verbose("Computing LDOFs");
+ }
+ FiniteProgress progressLDOFs = LOG.isVerbose() ? new FiniteProgress("LDOF_SCORE for objects", relation.size(), LOG) : null;
+
+ Mean dxp = new Mean(), Dxp = new Mean();
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ KNNList<D> neighbors = knnQuery.getKNNForDBID(iditer, k);
+ // skip the point itself
+ dxp.reset(); Dxp.reset();
+ // TODO: optimize for double distances
+ for (DistanceDBIDListIter<D> neighbor1 = neighbors.iter(); neighbor1.valid(); neighbor1.advance()) {
+ if(!DBIDUtil.equal(neighbor1, iditer)) {
+ dxp.put(neighbor1.getDistance().doubleValue());
+ for (DistanceDBIDListIter<D> neighbor2 = neighbors.iter(); neighbor2.valid(); neighbor2.advance()) {
+ if(!DBIDUtil.equal(neighbor1, neighbor2) && !DBIDUtil.equal(neighbor2, iditer)) {
+ Dxp.put(distFunc.distance(neighbor1, neighbor2).doubleValue());
+ }
+ }
+ }
+ }
+ double ldof = dxp.getMean() / Dxp.getMean();
+ if(Double.isNaN(ldof) || Double.isInfinite(ldof)) {
+ ldof = 1.0;
+ }
+ ldofs.putDouble(iditer, ldof);
+ // update maximum
+ ldofminmax.put(ldof);
+
+ if(progressLDOFs != null) {
+ progressLDOFs.incrementProcessed(LOG);
+ }
+ }
+ if(progressLDOFs != null) {
+ progressLDOFs.ensureCompleted(LOG);
+ }
+
+ // Build result representation.
+ Relation<Double> scoreResult = new MaterializedRelation<>("LDOF Outlier Score", "ldof-outlier", TypeUtil.DOUBLE, ldofs, relation.getDBIDs());
+ OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(ldofminmax.getMin(), ldofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, LDOF_BASELINE);
+ return new OutlierResult(scoreMeta, scoreResult);
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(getDistanceFunction().getInputTypeRestriction());
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ protected int k = 0;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ final IntParameter kP = new IntParameter(K_ID);
+ kP.addConstraint(new GreaterConstraint(1));
+ if(config.grab(kP)) {
+ k = kP.getValue();
+ }
+ }
+
+ @Override
+ protected LDOF<O, D> makeInstance() {
+ return new LDOF<>(distanceFunction, k);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOCI.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOCI.java
new file mode 100644
index 00000000..e76c6034
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOCI.java
@@ -0,0 +1,340 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier.lof;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDPair;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.math.MeanVariance;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.Alias;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DistanceParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleIntPair;
+
+/**
+ * Fast Outlier Detection Using the "Local Correlation Integral".
+ *
+ * Exact implementation only, not aLOCI. See {@link ALOCI}
+ *
+ * Outlier detection using multiple epsilon neighborhoods.
+ *
+ * Based on: S. Papadimitriou, H. Kitagawa, P. B. Gibbons and C. Faloutsos:
+ * LOCI: Fast Outlier Detection Using the Local Correlation Integral. In: Proc.
+ * 19th IEEE Int. Conf. on Data Engineering (ICDE '03), Bangalore, India, 2003.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has RangeQuery
+ *
+ * @param <O> Object type
+ * @param <D> Distance type
+ */
+@Title("LOCI: Fast Outlier Detection Using the Local Correlation Integral")
+@Description("Algorithm to compute outliers based on the Local Correlation Integral")
+@Reference(authors = "S. Papadimitriou, H. Kitagawa, P. B. Gibbons, C. Faloutsos", title = "LOCI: Fast Outlier Detection Using the Local Correlation Integral", booktitle = "Proc. 19th IEEE Int. Conf. on Data Engineering (ICDE '03), Bangalore, India, 2003", url = "http://dx.doi.org/10.1109/ICDE.2003.1260802")
+@Alias({"de.lmu.ifi.dbs.elki.algorithm.outlier.LOCI"})
+public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(LOCI.class);
+
+ /**
+ * Parameter to specify the maximum radius of the neighborhood to be
+ * considered, must be suitable to the distance function specified.
+ */
+ public static final OptionID RMAX_ID = new OptionID("loci.rmax", "The maximum radius of the neighborhood to be considered.");
+
+ /**
+ * Parameter to specify the minimum neighborhood size
+ */
+ public static final OptionID NMIN_ID = new OptionID("loci.nmin", "Minimum neighborhood size to be considered.");
+
+ /**
+ * Parameter to specify the averaging neighborhood scaling.
+ */
+ public static final OptionID ALPHA_ID = new OptionID("loci.alpha", "Scaling factor for averaging neighborhood");
+
+ /**
+ * Holds the value of {@link #RMAX_ID}.
+ */
+ private D rmax;
+
+ /**
+ * Holds the value of {@link #NMIN_ID}.
+ */
+ private int nmin;
+
+ /**
+ * Holds the value of {@link #ALPHA_ID}.
+ */
+ private double alpha;
+
+ /**
+ * Constructor.
+ *
+ * @param distanceFunction Distance function
+ * @param rmax Maximum radius
+ * @param nmin Minimum neighborhood size
+ * @param alpha Alpha value
+ */
+ public LOCI(DistanceFunction<? super O, D> distanceFunction, D rmax, int nmin, double alpha) {
+ super(distanceFunction);
+ this.rmax = rmax;
+ this.nmin = nmin;
+ this.alpha = alpha;
+ }
+
+ /**
+ * Run the algorithm
+ *
+ * @param database Database to process
+ * @param relation Relation to process
+ * @return Outlier result
+ */
+ public OutlierResult run(Database database, Relation<O> relation) {
+ DistanceQuery<O, D> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
+ RangeQuery<O, D> rangeQuery = database.getRangeQuery(distFunc);
+
+ FiniteProgress progressPreproc = LOG.isVerbose() ? new FiniteProgress("LOCI preprocessing", relation.size(), LOG) : null;
+ // LOCI preprocessing step
+ WritableDataStore<ArrayList<DoubleIntPair>> interestingDistances = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_SORTED, ArrayList.class);
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ DistanceDBIDList<D> neighbors = rangeQuery.getRangeForDBID(iditer, rmax);
+ // build list of critical distances
+ ArrayList<DoubleIntPair> cdist = new ArrayList<>(neighbors.size() << 1);
+ {
+ for(int i = 0; i < neighbors.size(); i++) {
+ DistanceDBIDPair<D> r = neighbors.get(i);
+ if(i + 1 < neighbors.size() && r.getDistance().compareTo(neighbors.get(i + 1).getDistance()) == 0) {
+ continue;
+ }
+ cdist.add(new DoubleIntPair(r.getDistance().doubleValue(), i));
+ final double ri = r.getDistance().doubleValue() / alpha;
+ if(ri <= rmax.doubleValue()) {
+ cdist.add(new DoubleIntPair(ri, Integer.MIN_VALUE));
+ }
+ }
+ }
+ Collections.sort(cdist);
+ // fill the gaps to have fast lookups of number of neighbors at a given
+ // distance.
+ int lastk = 0;
+ for(DoubleIntPair c : cdist) {
+ if(c.second == Integer.MIN_VALUE) {
+ c.second = lastk;
+ }
+ else {
+ lastk = c.second;
+ }
+ }
+
+ interestingDistances.put(iditer, cdist);
+ if(progressPreproc != null) {
+ progressPreproc.incrementProcessed(LOG);
+ }
+ }
+ if(progressPreproc != null) {
+ progressPreproc.ensureCompleted(LOG);
+ }
+ // LOCI main step
+ FiniteProgress progressLOCI = LOG.isVerbose() ? new FiniteProgress("LOCI scores", relation.size(), LOG) : null;
+ WritableDoubleDataStore mdef_norm = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
+ WritableDoubleDataStore mdef_radius = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
+ DoubleMinMax minmax = new DoubleMinMax();
+
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ final List<DoubleIntPair> cdist = interestingDistances.get(iditer);
+ final double maxdist = cdist.get(cdist.size() - 1).first;
+ final int maxneig = cdist.get(cdist.size() - 1).second;
+
+ double maxmdefnorm = 0.0;
+ double maxnormr = 0;
+ if(maxneig >= nmin) {
+ D range = distFunc.getDistanceFactory().fromDouble(maxdist);
+ // Compute the largest neighborhood we will need.
+ DistanceDBIDList<D> maxneighbors = rangeQuery.getRangeForDBID(iditer, range);
+ // TODO: Ensure the set is sorted. Should be a no-op with most indexes.
+ // For any critical distance, compute the normalized MDEF score.
+ for(DoubleIntPair c : cdist) {
+ // Only start when minimum size is fulfilled
+ if (c.second < nmin) {
+ continue;
+ }
+ final double r = c.first;
+ final double alpha_r = alpha * r;
+ // compute n(p_i, \alpha * r) from list (note: alpha_r is different from c!)
+ final int n_alphar = elementsAtRadius(cdist, alpha_r);
+ // compute \hat{n}(p_i, r, \alpha) and the corresponding \simga_{MDEF}
+ MeanVariance mv_n_r_alpha = new MeanVariance();
+ // TODO: optimize for double distances
+ for (DistanceDBIDListIter<D> neighbor = maxneighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ // Stop at radius r
+ if(neighbor.getDistance().doubleValue() > r) {
+ break;
+ }
+ int rn_alphar = elementsAtRadius(interestingDistances.get(neighbor), alpha_r);
+ mv_n_r_alpha.put(rn_alphar);
+ }
+ // We only use the average and standard deviation
+ final double nhat_r_alpha = mv_n_r_alpha.getMean();
+ final double sigma_nhat_r_alpha = mv_n_r_alpha.getNaiveStddev();
+
+ // Redundant divisions removed.
+ final double mdef = (nhat_r_alpha - n_alphar); // / nhat_r_alpha;
+ final double sigmamdef = sigma_nhat_r_alpha; // / nhat_r_alpha;
+ final double mdefnorm = mdef / sigmamdef;
+
+ if(mdefnorm > maxmdefnorm) {
+ maxmdefnorm = mdefnorm;
+ maxnormr = r;
+ }
+ }
+ }
+ else {
+ // FIXME: when nmin was not fulfilled - what is the proper value then?
+ maxmdefnorm = 1.0;
+ maxnormr = maxdist;
+ }
+ mdef_norm.putDouble(iditer, maxmdefnorm);
+ mdef_radius.putDouble(iditer, maxnormr);
+ minmax.put(maxmdefnorm);
+ if(progressLOCI != null) {
+ progressLOCI.incrementProcessed(LOG);
+ }
+ }
+ if(progressLOCI != null) {
+ progressLOCI.ensureCompleted(LOG);
+ }
+ Relation<Double> scoreResult = new MaterializedRelation<>("LOCI normalized MDEF", "loci-mdef-outlier", TypeUtil.DOUBLE, mdef_norm, relation.getDBIDs());
+ OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
+ OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
+ result.addChildResult(new MaterializedRelation<>("LOCI MDEF Radius", "loci-critical-radius", TypeUtil.DOUBLE, mdef_radius, relation.getDBIDs()));
+ return result;
+ }
+
+ /**
+ * Get the number of objects for a given radius, from the list of critical
+ * distances, storing (radius, count) pairs.
+ *
+ * @param criticalDistances
+ * @param radius
+ * @return Number of elements at the given radius
+ */
+ protected int elementsAtRadius(List<DoubleIntPair> criticalDistances, final double radius) {
+ int n_r = 0;
+ for(DoubleIntPair c2 : criticalDistances) {
+ if(c2.first > radius) {
+ break;
+ }
+ if(c2.second != Integer.MIN_VALUE) {
+ // Update
+ n_r = c2.second;
+ }
+ }
+ return n_r;
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(getDistanceFunction().getInputTypeRestriction());
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ protected D rmax = null;
+
+ protected int nmin = 0;
+
+ protected double alpha = 0.5;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ final D distanceFactory = (distanceFunction != null) ? distanceFunction.getDistanceFactory() : null;
+ final DistanceParameter<D> rmaxP = new DistanceParameter<>(RMAX_ID, distanceFactory);
+ if(config.grab(rmaxP)) {
+ rmax = rmaxP.getValue();
+ }
+
+ final IntParameter nminP = new IntParameter(NMIN_ID, 20);
+ if(config.grab(nminP)) {
+ nmin = nminP.getValue();
+ }
+
+ final DoubleParameter alphaP = new DoubleParameter(ALPHA_ID, 0.5);
+ if(config.grab(alphaP)) {
+ alpha = alphaP.getValue();
+ }
+ }
+
+ @Override
+ protected LOCI<O, D> makeInstance() {
+ return new LOCI<>(distanceFunction, rmax, nmin, alpha);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOF.java
new file mode 100644
index 00000000..302dafe6
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOF.java
@@ -0,0 +1,293 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier.lof;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
+import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
+import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
+import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.Alias;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+
+/**
+ * <p>
+ * Algorithm to compute density-based local outlier factors in a database based
+ * on a specified parameter {@link Parameterizer#K_ID} ({@code -lof.k}).
+ * </p>
+ *
+ * <p>
+ * The original LOF parameter was called &quot;minPts&quot;, but for consistency
+ * within ELKI we have renamed this parameter to &quot;k&quot;.
+ * </p>
+ *
+ * <p>
+ * Reference: <br>
+ * M. M. Breunig, H.-P. Kriegel, R. Ng, J. Sander: LOF: Identifying
+ * Density-Based Local Outliers. <br>
+ * In: Proc. 2nd ACM SIGMOD Int. Conf. on Management of Data (SIGMOD'00),
+ * Dallas, TX, 2000.
+ * </p>
+ *
+ * @author Erich Schubert
+ * @author Elke Achtert
+ *
+ * @apiviz.has KNNQuery
+ *
+ * @param <O> the type of DatabaseObjects handled by this Algorithm
+ * @param <D> Distance type
+ */
+@Title("LOF: Local Outlier Factor")
+@Description("Algorithm to compute density-based local outlier factors in a database based on the neighborhood size parameter 'k'")
+@Reference(authors = "M. M. Breunig, H.-P. Kriegel, R. Ng, and J. Sander", title = "LOF: Identifying Density-Based Local Outliers", booktitle = "Proc. 2nd ACM SIGMOD Int. Conf. on Management of Data (SIGMOD '00), Dallas, TX, 2000", url = "http://dx.doi.org/10.1145/342009.335388")
+@Alias({ "de.lmu.ifi.dbs.elki.algorithm.outlier.LOF", "outlier.LOF", "LOF" })
+public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(LOF.class);
+
+ /**
+ * Holds the value of {@link Parameterizer#K_ID}.
+ */
+ protected int k = 2;
+
+ /**
+ * Constructor.
+ *
+ * @param k the value of k
+ * @param distanceFunction the neighborhood distance function
+ */
+ public LOF(int k, DistanceFunction<? super O, D> distanceFunction) {
+ super(distanceFunction);
+ this.k = k + 1;
+ }
+
+ /**
+ * Performs the Generalized LOF_SCORE algorithm on the given database.
+ *
+ * @param database Database to query
+ * @param relation Data to process
+ * @return LOF outlier result
+ */
+ public OutlierResult run(Database database, Relation<O> relation) {
+ StepProgress stepprog = LOG.isVerbose() ? new StepProgress("LOF", 3) : null;
+ DistanceQuery<O, D> dq = database.getDistanceQuery(relation, getDistanceFunction());
+ // "HEAVY" flag for knn query since it is used more than once
+ KNNQuery<O, D> knnq = database.getKNNQuery(dq, k, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
+ // No optimized kNN query - use a preprocessor!
+ if (!(knnq instanceof PreprocessorKNNQuery)) {
+ if (stepprog != null) {
+ stepprog.beginStep(1, "Materializing LOF neighborhoods.", LOG);
+ }
+ MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<>(relation, getDistanceFunction(), k);
+ knnq = preproc.getKNNQuery(dq, k);
+ }
+ DBIDs ids = relation.getDBIDs();
+
+ // Compute LRDs
+ if (stepprog != null) {
+ stepprog.beginStep(2, "Computing LRDs.", LOG);
+ }
+ WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
+ {
+ FiniteProgress lrdsProgress = LOG.isVerbose() ? new FiniteProgress("LRD", ids.size(), LOG) : null;
+ for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ final KNNList<D> neighbors = knnq.getKNNForDBID(iter, k);
+ double sum = 0.0;
+ int count = 0;
+ if (neighbors instanceof DoubleDistanceKNNList) {
+ // Fast version for double distances
+ for (DoubleDistanceDBIDListIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) {
+ if (DBIDUtil.equal(neighbor, iter)) {
+ continue;
+ }
+ KNNList<D> neighborsNeighbors = knnq.getKNNForDBID(neighbor, k);
+ final double nkdist;
+ if (neighborsNeighbors instanceof DoubleDistanceKNNList) {
+ nkdist = ((DoubleDistanceKNNList) neighborsNeighbors).doubleKNNDistance();
+ } else {
+ nkdist = neighborsNeighbors.getKNNDistance().doubleValue();
+ }
+ sum += Math.max(neighbor.doubleDistance(), nkdist);
+ count++;
+ }
+ } else {
+ for (DistanceDBIDListIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ if (DBIDUtil.equal(neighbor, iter)) {
+ continue;
+ }
+ KNNList<D> neighborsNeighbors = knnq.getKNNForDBID(neighbor, k);
+ sum += Math.max(neighbor.getDistance().doubleValue(), neighborsNeighbors.getKNNDistance().doubleValue());
+ count++;
+ }
+ }
+ // Avoid division by 0
+ final double lrd = (sum > 0) ? (count / sum) : Double.POSITIVE_INFINITY;
+ lrds.putDouble(iter, lrd);
+ if (lrdsProgress != null) {
+ lrdsProgress.incrementProcessed(LOG);
+ }
+ }
+ if (lrdsProgress != null) {
+ lrdsProgress.ensureCompleted(LOG);
+ }
+ }
+
+ // compute LOF_SCORE of each db object
+ if (stepprog != null) {
+ stepprog.beginStep(3, "Computing LOFs.", LOG);
+ }
+ WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
+ // track the maximum value for normalization.
+ DoubleMinMax lofminmax = new DoubleMinMax();
+ {
+ FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("LOF_SCORE for objects", ids.size(), LOG) : null;
+ for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ final double lof;
+ final double lrdp = lrds.doubleValue(iter);
+ final KNNList<D> neighbors = knnq.getKNNForDBID(iter, k);
+ if (!Double.isInfinite(lrdp)) {
+ double sum = 0.0;
+ int count = 0;
+ for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ // skip the point itself
+ if (DBIDUtil.equal(neighbor, iter)) {
+ continue;
+ }
+ final double val = lrds.doubleValue(neighbor);
+ sum += val;
+ count++;
+ if (Double.isInfinite(val)) {
+ break;
+ }
+ }
+ lof = sum / (lrdp * count);
+ } else {
+ lof = 1.0;
+ }
+ lofs.putDouble(iter, lof);
+ // update minimum and maximum
+ lofminmax.put(lof);
+
+ if (progressLOFs != null) {
+ progressLOFs.incrementProcessed(LOG);
+ }
+ }
+ if (progressLOFs != null) {
+ progressLOFs.ensureCompleted(LOG);
+ }
+ }
+
+ if (stepprog != null) {
+ stepprog.setCompleted(LOG);
+ }
+
+ // Build result representation.
+ Relation<Double> scoreResult = new MaterializedRelation<>("Local Outlier Factor", "lof-outlier", TypeUtil.DOUBLE, lofs, ids);
+ OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
+ OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
+
+ return result;
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(getDistanceFunction().getInputTypeRestriction());
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ /**
+ * Parameter to specify the number of nearest neighbors of an object to be
+ * considered for computing its LOF_SCORE, must be an integer greater than
+ * 1.
+ */
+ public static final OptionID K_ID = new OptionID("lof.k", "The number of nearest neighbors of an object to be considered for computing its LOF_SCORE.");
+
+ /**
+ * The neighborhood size to use.
+ */
+ protected int k = 2;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ final IntParameter pK = new IntParameter(K_ID);
+ pK.addConstraint(new GreaterConstraint(1));
+ if (config.grab(pK)) {
+ k = pK.getValue();
+ }
+ }
+
+ @Override
+ protected LOF<O, D> makeInstance() {
+ return new LOF<>(k, distanceFunction);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LoOP.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LoOP.java
new file mode 100644
index 00000000..15ff690a
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LoOP.java
@@ -0,0 +1,442 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier.lof;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
+import de.lmu.ifi.dbs.elki.data.type.CombinedTypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.QueryUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
+import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
+import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
+import de.lmu.ifi.dbs.elki.math.Mean;
+import de.lmu.ifi.dbs.elki.math.MeanVariance;
+import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore;
+import de.lmu.ifi.dbs.elki.utilities.Alias;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
+
+/**
+ * LoOP: Local Outlier Probabilities
+ *
+ * Distance/density based algorithm similar to LOF to detect outliers, but with
+ * statistical methods to achieve better result stability.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has KNNQuery
+ *
+ * @param <O> type of objects handled by this algorithm
+ * @param <D> type of distances used
+ */
+@Title("LoOP: Local Outlier Probabilities")
+@Description("Variant of the LOF algorithm normalized using statistical values.")
+@Reference(authors = "H.-P. Kriegel, P. Kröger, E. Schubert, A. Zimek", title = "LoOP: Local Outlier Probabilities", booktitle = "Proceedings of the 18th International Conference on Information and Knowledge Management (CIKM), Hong Kong, China, 2009", url = "http://dx.doi.org/10.1145/1645953.1646195")
+@Alias({ "de.lmu.ifi.dbs.elki.algorithm.outlier.LoOP", "LoOP", "outlier.LoOP" })
+public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(LoOP.class);
+
+ /**
+ * The distance function to determine the reachability distance between
+ * database objects.
+ */
+ public static final OptionID REACHABILITY_DISTANCE_FUNCTION_ID = new OptionID("loop.referencedistfunction", "Distance function to determine the density of an object.");
+
+ /**
+ * The distance function to determine the reachability distance between
+ * database objects.
+ */
+ public static final OptionID COMPARISON_DISTANCE_FUNCTION_ID = new OptionID("loop.comparedistfunction", "Distance function to determine the reference set of an object.");
+
+ /**
+ * Parameter to specify the number of nearest neighbors of an object to be
+ * considered for computing its LOOP_SCORE, must be an integer greater than 1.
+ */
+ public static final OptionID KREACH_ID = new OptionID("loop.kref", "The number of nearest neighbors of an object to be used for the PRD value.");
+
+ /**
+ * Parameter to specify the number of nearest neighbors of an object to be
+ * considered for computing its LOOP_SCORE, must be an integer greater than 1.
+ */
+ public static final OptionID KCOMP_ID = new OptionID("loop.kcomp", "The number of nearest neighbors of an object to be considered for computing its LOOP_SCORE.");
+
+ /**
+ * Parameter to specify the number of nearest neighbors of an object to be
+ * considered for computing its LOOP_SCORE, must be an integer greater than 1.
+ */
+ public static final OptionID LAMBDA_ID = new OptionID("loop.lambda", "The number of standard deviations to consider for density computation.");
+
+ /**
+ * Holds the value of {@link #KREACH_ID}.
+ */
+ int kreach;
+
+ /**
+ * Holds the value of {@link #KCOMP_ID}.
+ */
+ int kcomp;
+
+ /**
+ * Hold the value of {@link #LAMBDA_ID}.
+ */
+ double lambda;
+
+ /**
+ * Preprocessor Step 1.
+ */
+ protected DistanceFunction<? super O, D> reachabilityDistanceFunction;
+
+ /**
+ * Preprocessor Step 2.
+ */
+ protected DistanceFunction<? super O, D> comparisonDistanceFunction;
+
+ /**
+ * Include object itself in kNN neighborhood.
+ */
+ static boolean objectIsInKNN = false;
+
+ /**
+ * Constructor with parameters.
+ *
+ * @param kreach k for reachability
+ * @param kcomp k for comparison
+ * @param reachabilityDistanceFunction distance function for reachability
+ * @param comparisonDistanceFunction distance function for comparison
+ * @param lambda Lambda parameter
+ */
+ public LoOP(int kreach, int kcomp, DistanceFunction<? super O, D> reachabilityDistanceFunction, DistanceFunction<? super O, D> comparisonDistanceFunction, double lambda) {
+ super();
+ this.kreach = kreach;
+ this.kcomp = kcomp;
+ this.reachabilityDistanceFunction = reachabilityDistanceFunction;
+ this.comparisonDistanceFunction = comparisonDistanceFunction;
+ this.lambda = lambda;
+ }
+
+ /**
+ * Get the kNN queries for the algorithm.
+ *
+ * @param database Database to analyze
+ * @param relation Relation to analyze
+ * @param stepprog Progress logger, may be {@code null}
+ * @return result
+ */
+ protected Pair<KNNQuery<O, D>, KNNQuery<O, D>> getKNNQueries(Database database, Relation<O> relation, StepProgress stepprog) {
+ KNNQuery<O, D> knnComp;
+ KNNQuery<O, D> knnReach;
+ if (comparisonDistanceFunction == reachabilityDistanceFunction || comparisonDistanceFunction.equals(reachabilityDistanceFunction)) {
+ // We need each neighborhood twice - use "HEAVY" flag.
+ knnComp = QueryUtil.getKNNQuery(relation, comparisonDistanceFunction, Math.max(kreach, kcomp), DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
+ // No optimized kNN query - use a preprocessor!
+ if (knnComp == null) {
+ if (stepprog != null) {
+ stepprog.beginStep(1, "Materializing neighborhoods with respect to reference neighborhood distance function.", LOG);
+ }
+ MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<>(relation, comparisonDistanceFunction, kcomp);
+ database.addIndex(preproc);
+ DistanceQuery<O, D> cdq = database.getDistanceQuery(relation, comparisonDistanceFunction);
+ knnComp = preproc.getKNNQuery(cdq, kreach, DatabaseQuery.HINT_HEAVY_USE);
+ } else {
+ if (stepprog != null) {
+ stepprog.beginStep(1, "Optimized neighborhoods provided by database.", LOG);
+ }
+ }
+ knnReach = knnComp;
+ } else {
+ if (stepprog != null) {
+ stepprog.beginStep(1, "Not materializing distance functions, since we request each DBID once only.", LOG);
+ }
+ knnComp = QueryUtil.getKNNQuery(relation, comparisonDistanceFunction, kreach);
+ knnReach = QueryUtil.getKNNQuery(relation, reachabilityDistanceFunction, kcomp);
+ }
+ return new Pair<>(knnComp, knnReach);
+ }
+
+ /**
+ * Performs the LoOP algorithm on the given database.
+ *
+ * @param database Database to process
+ * @param relation Relation to process
+ * @return Outlier result
+ */
+ public OutlierResult run(Database database, Relation<O> relation) {
+ final double sqrt2 = Math.sqrt(2.0);
+
+ StepProgress stepprog = LOG.isVerbose() ? new StepProgress(5) : null;
+
+ Pair<KNNQuery<O, D>, KNNQuery<O, D>> pair = getKNNQueries(database, relation, stepprog);
+ KNNQuery<O, D> knnComp = pair.getFirst();
+ KNNQuery<O, D> knnReach = pair.getSecond();
+
+ // Assert we got something
+ if (knnComp == null) {
+ throw new AbortException("No kNN queries supported by database for comparison distance function.");
+ }
+ if (knnReach == null) {
+ throw new AbortException("No kNN queries supported by database for density estimation distance function.");
+ }
+
+ // Probabilistic distances
+ WritableDoubleDataStore pdists = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
+ Mean mean = new Mean();
+ {// computing PRDs
+ if (stepprog != null) {
+ stepprog.beginStep(3, "Computing pdists", LOG);
+ }
+ FiniteProgress prdsProgress = LOG.isVerbose() ? new FiniteProgress("pdists", relation.size(), LOG) : null;
+ for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ final KNNList<D> neighbors = knnReach.getKNNForDBID(iditer, kreach);
+ mean.reset();
+ // use first kref neighbors as reference set
+ int ks = 0;
+ // TODO: optimize for double distances
+ if (neighbors instanceof DoubleDistanceKNNList) {
+ for (DoubleDistanceDBIDListIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) {
+ if (objectIsInKNN || !DBIDUtil.equal(neighbor, iditer)) {
+ final double d = neighbor.doubleDistance();
+ mean.put(d * d);
+ ks++;
+ if (ks >= kreach) {
+ break;
+ }
+ }
+ }
+ } else {
+ for (DistanceDBIDListIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ if (objectIsInKNN || !DBIDUtil.equal(neighbor, iditer)) {
+ double d = neighbor.getDistance().doubleValue();
+ mean.put(d * d);
+ ks++;
+ if (ks >= kreach) {
+ break;
+ }
+ }
+ }
+ }
+ double pdist = lambda * Math.sqrt(mean.getMean());
+ pdists.putDouble(iditer, pdist);
+ if (prdsProgress != null) {
+ prdsProgress.incrementProcessed(LOG);
+ }
+ }
+ }
+ // Compute PLOF values.
+ WritableDoubleDataStore plofs = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
+ MeanVariance mvplof = new MeanVariance();
+ {// compute LOOP_SCORE of each db object
+ if (stepprog != null) {
+ stepprog.beginStep(4, "Computing PLOF", LOG);
+ }
+
+ FiniteProgress progressPLOFs = LOG.isVerbose() ? new FiniteProgress("PLOFs for objects", relation.size(), LOG) : null;
+ MeanVariance mv = new MeanVariance();
+ for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ final KNNList<D> neighbors = knnComp.getKNNForDBID(iditer, kcomp);
+ mv.reset();
+ // use first kref neighbors as comparison set.
+ int ks = 0;
+ for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ if (objectIsInKNN || !DBIDUtil.equal(neighbor, iditer)) {
+ mv.put(pdists.doubleValue(neighbor));
+ ks++;
+ if (ks >= kcomp) {
+ break;
+ }
+ }
+ }
+ double plof = Math.max(pdists.doubleValue(iditer) / mv.getMean(), 1.0);
+ if (Double.isNaN(plof) || Double.isInfinite(plof)) {
+ plof = 1.0;
+ }
+ plofs.putDouble(iditer, plof);
+ mvplof.put((plof - 1.0) * (plof - 1.0));
+
+ if (progressPLOFs != null) {
+ progressPLOFs.incrementProcessed(LOG);
+ }
+ }
+ }
+
+ double nplof = lambda * Math.sqrt(mvplof.getMean());
+ if (LOG.isDebugging()) {
+ LOG.verbose("nplof normalization factor is " + nplof + " " + mvplof.getMean() + " " + mvplof.getSampleStddev());
+ }
+
+ // Compute final LoOP values.
+ WritableDoubleDataStore loops = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
+ {// compute LOOP_SCORE of each db object
+ if (stepprog != null) {
+ stepprog.beginStep(5, "Computing LoOP scores", LOG);
+ }
+
+ FiniteProgress progressLOOPs = LOG.isVerbose() ? new FiniteProgress("LoOP for objects", relation.size(), LOG) : null;
+ for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ loops.putDouble(iditer, NormalDistribution.erf((plofs.doubleValue(iditer) - 1) / (nplof * sqrt2)));
+
+ if (progressLOOPs != null) {
+ progressLOOPs.incrementProcessed(LOG);
+ }
+ }
+ }
+
+ if (stepprog != null) {
+ stepprog.setCompleted(LOG);
+ }
+
+ // Build result representation.
+ Relation<Double> scoreResult = new MaterializedRelation<>("Local Outlier Probabilities", "loop-outlier", TypeUtil.DOUBLE, loops, relation.getDBIDs());
+ OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore();
+ return new OutlierResult(scoreMeta, scoreResult);
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ final TypeInformation type;
+ if (reachabilityDistanceFunction.equals(comparisonDistanceFunction)) {
+ type = reachabilityDistanceFunction.getInputTypeRestriction();
+ } else {
+ type = new CombinedTypeInformation(reachabilityDistanceFunction.getInputTypeRestriction(), comparisonDistanceFunction.getInputTypeRestriction());
+ }
+ return TypeUtil.array(type);
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractParameterizer {
+ /**
+ * Holds the value of {@link #KREACH_ID}.
+ */
+ int kreach = 0;
+
+ /**
+ * Holds the value of {@link #KCOMP_ID}.
+ */
+ int kcomp = 0;
+
+ /**
+ * Hold the value of {@link #LAMBDA_ID}.
+ */
+ double lambda = 2.0;
+
+ /**
+ * Preprocessor Step 1.
+ */
+ protected DistanceFunction<O, D> reachabilityDistanceFunction = null;
+
+ /**
+ * Preprocessor Step 2.
+ */
+ protected DistanceFunction<O, D> comparisonDistanceFunction = null;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ final IntParameter kcompP = new IntParameter(KCOMP_ID);
+ kcompP.addConstraint(new GreaterConstraint(1));
+ if (config.grab(kcompP)) {
+ kcomp = kcompP.intValue();
+ }
+
+ final ObjectParameter<DistanceFunction<O, D>> compDistP = new ObjectParameter<>(COMPARISON_DISTANCE_FUNCTION_ID, DistanceFunction.class, EuclideanDistanceFunction.class);
+ if (config.grab(compDistP)) {
+ comparisonDistanceFunction = compDistP.instantiateClass(config);
+ }
+
+ final IntParameter kreachP = new IntParameter(KREACH_ID);
+ kreachP.addConstraint(new GreaterConstraint(1));
+ kreachP.setOptional(true);
+ if (config.grab(kreachP)) {
+ kreach = kreachP.intValue();
+ } else {
+ kreach = kcomp;
+ }
+
+ final ObjectParameter<DistanceFunction<O, D>> reachDistP = new ObjectParameter<>(REACHABILITY_DISTANCE_FUNCTION_ID, DistanceFunction.class, true);
+ if (config.grab(reachDistP)) {
+ reachabilityDistanceFunction = reachDistP.instantiateClass(config);
+ }
+
+ // TODO: make default 1.0?
+ final DoubleParameter lambdaP = new DoubleParameter(LAMBDA_ID, 2.0);
+ lambdaP.addConstraint(new GreaterConstraint(0.0));
+ if (config.grab(lambdaP)) {
+ lambda = lambdaP.doubleValue();
+ }
+ }
+
+ @Override
+ protected LoOP<O, D> makeInstance() {
+ DistanceFunction<O, D> realreach = (reachabilityDistanceFunction != null) ? reachabilityDistanceFunction : comparisonDistanceFunction;
+ return new LoOP<>(kreach, kcomp, realreach, comparisonDistanceFunction, lambda);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/OnlineLOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/OnlineLOF.java
new file mode 100644
index 00000000..c01c914f
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/OnlineLOF.java
@@ -0,0 +1,418 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier.lof;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.QueryUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
+import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery;
+import de.lmu.ifi.dbs.elki.database.query.rknn.RKNNQuery;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
+import de.lmu.ifi.dbs.elki.index.preprocessed.knn.AbstractMaterializeKNNPreprocessor;
+import de.lmu.ifi.dbs.elki.index.preprocessed.knn.KNNChangeEvent;
+import de.lmu.ifi.dbs.elki.index.preprocessed.knn.KNNListener;
+import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNAndRKNNPreprocessor;
+import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
+import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization;
+import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
+
+/**
+ * Incremental version of the {@link LOF} Algorithm, supports insertions and
+ * removals.
+ *
+ * @author Elke Achtert
+ *
+ * @apiviz.has FlexibleLOF.LOFResult oneway - - updates
+ */
+// TODO: related to publication?
+public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends FlexibleLOF<O, D> {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(OnlineLOF.class);
+
+ /**
+ * Constructor.
+ *
+ * @param krefer The number of neighbors for reference
+ * @param kreach The number of neighbors for reachability distance
+ * @param neighborhoodDistanceFunction the neighborhood distance function
+ * @param reachabilityDistanceFunction the reachability distance function
+ */
+ public OnlineLOF(int krefer, int kreach, DistanceFunction<? super O, D> neighborhoodDistanceFunction, DistanceFunction<? super O, D> reachabilityDistanceFunction) {
+ super(krefer, kreach, neighborhoodDistanceFunction, reachabilityDistanceFunction);
+ }
+
+ /**
+ * Performs the Generalized LOF_SCORE algorithm on the given database by
+ * calling {@code #doRunInTime(Database)} and adds a {@link LOFKNNListener} to
+ * the preprocessors.
+ */
+ @Override
+ public OutlierResult run(Database database, Relation<O> relation) {
+ StepProgress stepprog = LOG.isVerbose() ? new StepProgress("OnlineLOF", 3) : null;
+
+ Pair<Pair<KNNQuery<O, D>, KNNQuery<O, D>>, Pair<RKNNQuery<O, D>, RKNNQuery<O, D>>> queries = getKNNAndRkNNQueries(database, relation, stepprog);
+ KNNQuery<O, D> kNNRefer = queries.getFirst().getFirst();
+ KNNQuery<O, D> kNNReach = queries.getFirst().getSecond();
+ RKNNQuery<O, D> rkNNRefer = queries.getSecond().getFirst();
+ RKNNQuery<O, D> rkNNReach = queries.getSecond().getSecond();
+
+ LOFResult<O, D> lofResult = super.doRunInTime(relation.getDBIDs(), kNNRefer, kNNReach, stepprog);
+ lofResult.setRkNNRefer(rkNNRefer);
+ lofResult.setRkNNReach(rkNNReach);
+
+ // add listener
+ KNNListener l = new LOFKNNListener(lofResult);
+ ((MaterializeKNNPreprocessor<O, D>) ((PreprocessorKNNQuery<O, D, ? extends KNNList<D>>) lofResult.getKNNRefer()).getPreprocessor()).addKNNListener(l);
+ ((MaterializeKNNPreprocessor<O, D>) ((PreprocessorKNNQuery<O, D, ? extends KNNList<D>>) lofResult.getKNNReach()).getPreprocessor()).addKNNListener(l);
+
+ return lofResult.getResult();
+ }
+
+ /**
+ * Get the kNN and rkNN queries for the algorithm.
+ *
+ * @param relation Data
+ * @param stepprog Progress logger
+ * @return the kNN and rkNN queries
+ */
+ private Pair<Pair<KNNQuery<O, D>, KNNQuery<O, D>>, Pair<RKNNQuery<O, D>, RKNNQuery<O, D>>> getKNNAndRkNNQueries(Database database, Relation<O> relation, StepProgress stepprog) {
+ // Use "HEAVY" flag, since this is an online algorithm
+ KNNQuery<O, D> kNNRefer = QueryUtil.getKNNQuery(relation, referenceDistanceFunction, krefer, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
+ RKNNQuery<O, D> rkNNRefer = QueryUtil.getRKNNQuery(relation, referenceDistanceFunction, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
+
+ // No optimized kNN query or RkNN query - use a preprocessor!
+ if (kNNRefer == null || rkNNRefer == null) {
+ if (stepprog != null) {
+ stepprog.beginStep(1, "Materializing neighborhood w.r.t. reference neighborhood distance function.", LOG);
+ }
+ MaterializeKNNAndRKNNPreprocessor<O, D> preproc = new MaterializeKNNAndRKNNPreprocessor<>(relation, referenceDistanceFunction, krefer);
+ DistanceQuery<O, D> ndq = database.getDistanceQuery(relation, referenceDistanceFunction);
+ kNNRefer = preproc.getKNNQuery(ndq, krefer, DatabaseQuery.HINT_HEAVY_USE);
+ rkNNRefer = preproc.getRKNNQuery(ndq, krefer, DatabaseQuery.HINT_HEAVY_USE);
+ // add as index
+ relation.getDatabase().addIndex(preproc);
+ } else {
+ if (stepprog != null) {
+ stepprog.beginStep(1, "Optimized neighborhood w.r.t. reference neighborhood distance function provided by database.", LOG);
+ }
+ }
+
+ KNNQuery<O, D> kNNReach = QueryUtil.getKNNQuery(relation, reachabilityDistanceFunction, kreach, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
+ RKNNQuery<O, D> rkNNReach = QueryUtil.getRKNNQuery(relation, reachabilityDistanceFunction, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
+ if (kNNReach == null || rkNNReach == null) {
+ if (stepprog != null) {
+ stepprog.beginStep(2, "Materializing neighborhood w.r.t. reachability distance function.", LOG);
+ }
+ ListParameterization config = new ListParameterization();
+ config.addParameter(AbstractMaterializeKNNPreprocessor.Factory.DISTANCE_FUNCTION_ID, reachabilityDistanceFunction);
+ config.addParameter(AbstractMaterializeKNNPreprocessor.Factory.K_ID, kreach);
+ MaterializeKNNAndRKNNPreprocessor<O, D> preproc = new MaterializeKNNAndRKNNPreprocessor<>(relation, reachabilityDistanceFunction, kreach);
+ DistanceQuery<O, D> rdq = database.getDistanceQuery(relation, reachabilityDistanceFunction);
+ kNNReach = preproc.getKNNQuery(rdq, kreach, DatabaseQuery.HINT_HEAVY_USE);
+ rkNNReach = preproc.getRKNNQuery(rdq, kreach, DatabaseQuery.HINT_HEAVY_USE);
+ // add as index
+ relation.getDatabase().addIndex(preproc);
+ }
+
+ Pair<KNNQuery<O, D>, KNNQuery<O, D>> kNNPair = new Pair<>(kNNRefer, kNNReach);
+ Pair<RKNNQuery<O, D>, RKNNQuery<O, D>> rkNNPair = new Pair<>(rkNNRefer, rkNNReach);
+
+ return new Pair<>(kNNPair, rkNNPair);
+ }
+
+ /**
+ * Encapsulates a listener for changes of kNNs used in the online LOF
+ * algorithm.
+ *
+ * @author Elke Achtert
+ *
+ * @apiviz.exclude
+ */
+ private class LOFKNNListener implements KNNListener {
+ /**
+ * Holds the first event of one of the both preprocessors. The online
+ * algorithm is waiting until both events have been received, i.e. until
+ * both preprocessors are up to date.
+ */
+ private KNNChangeEvent firstEventReceived;
+
+ /**
+ * Holds the result of a former run of the LOF algorithm.
+ */
+ private LOFResult<O, D> lofResult;
+
+ /**
+ * Constructs a listener for the LOF algorithm.
+ *
+ * @param lofResult the result of a former run of the LOF algorithm
+ */
+ public LOFKNNListener(LOFResult<O, D> lofResult) {
+ this.lofResult = lofResult;
+ }
+
+ @Override
+ public void kNNsChanged(KNNChangeEvent e) {
+ AbstractMaterializeKNNPreprocessor<O, D, ?> p1 = ((PreprocessorKNNQuery<O, D, ?>) lofResult.getKNNRefer()).getPreprocessor();
+ AbstractMaterializeKNNPreprocessor<O, D, ?> p2 = ((PreprocessorKNNQuery<O, D, ?>) lofResult.getKNNReach()).getPreprocessor();
+
+ if (firstEventReceived == null) {
+ if (e.getSource().equals(p1) && e.getSource().equals(p2)) {
+ kNNsChanged(e, e);
+ } else {
+ firstEventReceived = e;
+ }
+ } else {
+ if (e.getSource().equals(p1) && firstEventReceived.getSource().equals(p2)) {
+ kNNsChanged(e, firstEventReceived);
+ firstEventReceived = null;
+ } else if (e.getSource().equals(p2) && firstEventReceived.getSource().equals(p1)) {
+ kNNsChanged(firstEventReceived, e);
+ firstEventReceived = null;
+ } else {
+ throw new UnsupportedOperationException("Event sources do not fit!");
+ }
+ }
+ }
+
+ /**
+ * Invoked after the events of both preprocessors have been received, i.e.
+ * until both preprocessors are up to date.
+ *
+ * @param e1 the change event of the first preprocessor
+ * @param e2 the change event of the second preprocessor
+ */
+ private void kNNsChanged(KNNChangeEvent e1, KNNChangeEvent e2) {
+ if (!e1.getType().equals(e2.getType())) {
+ throw new UnsupportedOperationException("Event types do not fit: " + e1.getType() + " != " + e2.getType());
+ }
+ if (!e1.getObjects().equals(e2.getObjects())) {
+ throw new UnsupportedOperationException("Objects do not fit: " + e1.getObjects() + " != " + e2.getObjects());
+ }
+
+ if (e1.getType().equals(KNNChangeEvent.Type.DELETE)) {
+ kNNsRemoved(e1.getObjects(), e1.getUpdates(), e2.getUpdates(), lofResult);
+ } else if (e1.getType().equals(KNNChangeEvent.Type.INSERT)) {
+ kNNsInserted(e1.getObjects(), e1.getUpdates(), e2.getUpdates(), lofResult);
+ } else {
+ throw new UnsupportedOperationException("Unsupported event type: " + e1.getType());
+ }
+ }
+
+ /**
+ * Invoked after kNNs have been inserted and updated, updates the result.
+ *
+ * @param insertions the ids of the newly inserted neighborhoods
+ * @param updates1 the ids of the updated neighborhood w.r.t. the
+ * neighborhood distance function
+ * @param updates2 the ids of the updated neighborhood w.r.t. the
+ * reachability distance function
+ * @param lofResult the result of the former LOF run
+ */
+ private void kNNsInserted(DBIDs insertions, DBIDs updates1, DBIDs updates2, LOFResult<O, D> lofResult) {
+ StepProgress stepprog = LOG.isVerbose() ? new StepProgress(3) : null;
+
+ // recompute lrds
+ if (stepprog != null) {
+ stepprog.beginStep(1, "Recompute LRDs.", LOG);
+ }
+ ArrayDBIDs lrd_ids = DBIDUtil.ensureArray(DBIDUtil.union(insertions, updates2));
+ List<? extends DistanceDBIDList<D>> reachDistRKNNs = lofResult.getRkNNReach().getRKNNForBulkDBIDs(lrd_ids, kreach);
+ ArrayDBIDs affected_lrd_id_candidates = mergeIDs(reachDistRKNNs, lrd_ids);
+ ArrayModifiableDBIDs affected_lrd_ids = DBIDUtil.newArray(affected_lrd_id_candidates.size());
+ WritableDoubleDataStore new_lrds = computeLRDs(affected_lrd_id_candidates, lofResult.getKNNReach());
+ for (DBIDIter iter = affected_lrd_id_candidates.iter(); iter.valid(); iter.advance()) {
+ double new_lrd = new_lrds.doubleValue(iter);
+ double old_lrd = lofResult.getLrds().doubleValue(iter);
+ if (Double.isNaN(old_lrd) || old_lrd != new_lrd) {
+ lofResult.getLrds().putDouble(iter, new_lrd);
+ affected_lrd_ids.add(iter);
+ }
+ }
+
+ // recompute lofs
+ if (stepprog != null) {
+ stepprog.beginStep(2, "Recompute LOFS.", LOG);
+ }
+ List<? extends DistanceDBIDList<D>> primDistRKNNs = lofResult.getRkNNRefer().getRKNNForBulkDBIDs(affected_lrd_ids, krefer);
+ ArrayDBIDs affected_lof_ids = mergeIDs(primDistRKNNs, affected_lrd_ids, insertions, updates1);
+ recomputeLOFs(affected_lof_ids, lofResult);
+
+ // fire result changed
+ if (stepprog != null) {
+ stepprog.beginStep(3, "Inform listeners.", LOG);
+ }
+ lofResult.getResult().getHierarchy().resultChanged(lofResult.getResult());
+
+ if (stepprog != null) {
+ stepprog.setCompleted(LOG);
+ }
+ }
+
+ /**
+ * Invoked after kNNs have been removed and updated, updates the result.
+ *
+ * @param deletions the ids of the removed neighborhoods
+ * @param updates1 the ids of the updated neighborhood w.r.t. the
+ * neighborhood distance function
+ * @param updates2 the ids of the updated neighborhood w.r.t. the
+ * reachability distance function
+ * @param lofResult the result of the former LOF run
+ */
+ private void kNNsRemoved(DBIDs deletions, DBIDs updates1, DBIDs updates2, LOFResult<O, D> lofResult) {
+ StepProgress stepprog = LOG.isVerbose() ? new StepProgress(4) : null;
+
+ // delete lrds and lofs
+ if (stepprog != null) {
+ stepprog.beginStep(1, "Delete old LRDs and LOFs.", LOG);
+ }
+ for (DBIDIter iter = deletions.iter(); iter.valid(); iter.advance()) {
+ lofResult.getLrds().delete(iter);
+ lofResult.getLofs().delete(iter);
+ }
+
+ // recompute lrds
+ if (stepprog != null) {
+ stepprog.beginStep(2, "Recompute LRDs.", LOG);
+ }
+ ArrayDBIDs lrd_ids = DBIDUtil.ensureArray(updates2);
+ List<? extends DistanceDBIDList<D>> reachDistRKNNs = lofResult.getRkNNReach().getRKNNForBulkDBIDs(lrd_ids, kreach);
+ ArrayDBIDs affected_lrd_id_candidates = mergeIDs(reachDistRKNNs, lrd_ids);
+ ArrayModifiableDBIDs affected_lrd_ids = DBIDUtil.newArray(affected_lrd_id_candidates.size());
+ WritableDoubleDataStore new_lrds = computeLRDs(affected_lrd_id_candidates, lofResult.getKNNReach());
+ for (DBIDIter iter = affected_lrd_id_candidates.iter(); iter.valid(); iter.advance()) {
+ double new_lrd = new_lrds.doubleValue(iter);
+ double old_lrd = lofResult.getLrds().doubleValue(iter);
+ if (old_lrd != new_lrd) {
+ lofResult.getLrds().putDouble(iter, new_lrd);
+ affected_lrd_ids.add(iter);
+ }
+ }
+
+ // recompute lofs
+ if (stepprog != null) {
+ stepprog.beginStep(3, "Recompute LOFS.", LOG);
+ }
+ List<? extends DistanceDBIDList<D>> primDistRKNNs = lofResult.getRkNNRefer().getRKNNForBulkDBIDs(affected_lrd_ids, krefer);
+ ArrayDBIDs affected_lof_ids = mergeIDs(primDistRKNNs, affected_lrd_ids, updates1);
+ recomputeLOFs(affected_lof_ids, lofResult);
+
+ // fire result changed
+ if (stepprog != null) {
+ stepprog.beginStep(4, "Inform listeners.", LOG);
+ }
+ lofResult.getResult().getHierarchy().resultChanged(lofResult.getResult());
+
+ if (stepprog != null) {
+ stepprog.setCompleted(LOG);
+ }
+ }
+
+ /**
+ * Merges the ids of the query result with the specified ids.
+ *
+ * @param queryResults the list of query results
+ * @param ids the list of ids
+ * @return a set containing the ids of the query result and the specified
+ * ids
+ */
+ private ArrayModifiableDBIDs mergeIDs(List<? extends DistanceDBIDList<D>> queryResults, DBIDs... ids) {
+ ModifiableDBIDs result = DBIDUtil.newHashSet();
+ for (DBIDs dbids : ids) {
+ result.addDBIDs(dbids);
+ }
+ for (DistanceDBIDList<D> queryResult : queryResults) {
+ result.addDBIDs(queryResult);
+ }
+ return DBIDUtil.newArray(result);
+ }
+
+ /**
+ * Recomputes the lofs of the specified ids.
+ *
+ * @param ids the ids of the lofs to be recomputed
+ * @param lofResult the result of the former LOF run
+ */
+ private void recomputeLOFs(DBIDs ids, LOFResult<O, D> lofResult) {
+ Pair<WritableDoubleDataStore, DoubleMinMax> lofsAndMax = computeLOFs(ids, lofResult.getLrds(), lofResult.getKNNRefer());
+ WritableDoubleDataStore new_lofs = lofsAndMax.getFirst();
+ for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ lofResult.getLofs().putDouble(iter, new_lofs.doubleValue(iter));
+ }
+ // track the maximum value for normalization.
+ DoubleMinMax new_lofminmax = lofsAndMax.getSecond();
+
+ // Actualize meta info
+ if (new_lofminmax.isValid() && lofResult.getResult().getOutlierMeta().getActualMaximum() < new_lofminmax.getMax()) {
+ BasicOutlierScoreMeta scoreMeta = (BasicOutlierScoreMeta) lofResult.getResult().getOutlierMeta();
+ scoreMeta.setActualMaximum(new_lofminmax.getMax());
+ }
+
+ if (new_lofminmax.isValid() && lofResult.getResult().getOutlierMeta().getActualMinimum() > new_lofminmax.getMin()) {
+ BasicOutlierScoreMeta scoreMeta = (BasicOutlierScoreMeta) lofResult.getResult().getOutlierMeta();
+ scoreMeta.setActualMinimum(new_lofminmax.getMin());
+ }
+ }
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends FlexibleLOF.Parameterizer<O, D> {
+ @Override
+ protected OnlineLOF<O, D> makeInstance() {
+ return new OnlineLOF<>(kreach, krefer, distanceFunction, reachabilityDistanceFunction);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimpleKernelDensityLOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimpleKernelDensityLOF.java
new file mode 100644
index 00000000..2ff7534a
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimpleKernelDensityLOF.java
@@ -0,0 +1,287 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier.lof;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.type.CombinedTypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.QueryUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
+import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
+import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
+import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.EpanechnikovKernelDensityFunction;
+import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.KernelDensityFunction;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * A simple variant of the LOF algorithm, which uses a simple kernel density
+ * estimation instead of the local reachability density.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has KNNQuery
+ * @apiviz.has KernelDensityFunction
+ *
+ * @param <O> the type of objects handled by this Algorithm
+ * @param <D> Distance type
+ */
+public class SimpleKernelDensityLOF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(SimpleKernelDensityLOF.class);
+
+ /**
+ * Parameter k.
+ */
+ protected int k;
+
+ /**
+ * Kernel density function
+ */
+ private KernelDensityFunction kernel;
+
+ /**
+ * Constructor.
+ *
+ * @param k the value of k
+ * @param kernel Kernel function
+ */
+ public SimpleKernelDensityLOF(int k, DistanceFunction<? super O, D> distance, KernelDensityFunction kernel) {
+ super(distance);
+ this.k = k + 1;
+ this.kernel = kernel;
+ }
+
+ /**
+ * Run the naive kernel density LOF algorithm.
+ *
+ * @param database Database to query
+ * @param relation Data to process
+ * @return LOF outlier result
+ */
+ public OutlierResult run(Database database, Relation<O> relation) {
+ StepProgress stepprog = LOG.isVerbose() ? new StepProgress("KernelDensityLOF", 3) : null;
+
+ final int dim = RelationUtil.dimensionality(relation);
+
+ DBIDs ids = relation.getDBIDs();
+
+ // "HEAVY" flag for KNN Query since it is used more than once
+ KNNQuery<O, D> knnq = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
+ // No optimized kNN query - use a preprocessor!
+ if (!(knnq instanceof PreprocessorKNNQuery)) {
+ if (stepprog != null) {
+ stepprog.beginStep(1, "Materializing neighborhoods w.r.t. distance function.", LOG);
+ }
+ MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<>(relation, getDistanceFunction(), k);
+ database.addIndex(preproc);
+ DistanceQuery<O, D> rdq = database.getDistanceQuery(relation, getDistanceFunction());
+ knnq = preproc.getKNNQuery(rdq, k);
+ }
+
+ // Compute LRDs
+ if (stepprog != null) {
+ stepprog.beginStep(2, "Computing densities.", LOG);
+ }
+ WritableDoubleDataStore dens = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
+ FiniteProgress densProgress = LOG.isVerbose() ? new FiniteProgress("Densities", ids.size(), LOG) : null;
+ for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
+ final KNNList<D> neighbors = knnq.getKNNForDBID(it, k);
+ int count = 0;
+ double sum = 0.0;
+ if (neighbors instanceof DoubleDistanceKNNList) {
+ // Fast version for double distances
+ for (DoubleDistanceDBIDListIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) {
+ if (DBIDUtil.equal(neighbor, it)) {
+ continue;
+ }
+ double max = ((DoubleDistanceKNNList)knnq.getKNNForDBID(neighbor, k)).doubleKNNDistance();
+ final double v = neighbor.doubleDistance() / max;
+ sum += kernel.density(v) / Math.pow(max, dim);
+ count++;
+ }
+ } else {
+ for (DistanceDBIDListIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ if (DBIDUtil.equal(neighbor, it)) {
+ continue;
+ }
+ double max = knnq.getKNNForDBID(neighbor, k).getKNNDistance().doubleValue();
+ final double v = neighbor.getDistance().doubleValue() / max;
+ sum += kernel.density(v) / Math.pow(max, dim);
+ count++;
+ }
+ }
+ final double density = sum / count;
+ dens.putDouble(it, density);
+ if (densProgress != null) {
+ densProgress.incrementProcessed(LOG);
+ }
+ }
+ if (densProgress != null) {
+ densProgress.ensureCompleted(LOG);
+ }
+
+ // compute LOF_SCORE of each db object
+ if (stepprog != null) {
+ stepprog.beginStep(3, "Computing KLOFs.", LOG);
+ }
+ WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
+ // track the maximum value for normalization.
+ DoubleMinMax lofminmax = new DoubleMinMax();
+
+ FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("KLOF_SCORE for objects", ids.size(), LOG) : null;
+ for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
+ final double lrdp = dens.doubleValue(it);
+ final double lof;
+ if (lrdp > 0) {
+ final KNNList<D> neighbors = knnq.getKNNForDBID(it, k);
+ double sum = 0.0;
+ int count = 0;
+ for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ // skip the point itself
+ if (DBIDUtil.equal(neighbor, it)) {
+ continue;
+ }
+ sum += dens.doubleValue(neighbor);
+ count++;
+ }
+ lof = sum / (count * lrdp);
+ } else {
+ lof = 1.0;
+ }
+ lofs.putDouble(it, lof);
+ // update minimum and maximum
+ lofminmax.put(lof);
+
+ if (progressLOFs != null) {
+ progressLOFs.incrementProcessed(LOG);
+ }
+ }
+ if (progressLOFs != null) {
+ progressLOFs.ensureCompleted(LOG);
+ }
+
+ if (stepprog != null) {
+ stepprog.setCompleted(LOG);
+ }
+
+ // Build result representation.
+ Relation<Double> scoreResult = new MaterializedRelation<>("Kernel Density Local Outlier Factor", "kernel-density-slof-outlier", TypeUtil.DOUBLE, lofs, ids);
+ OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
+ OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
+
+ return result;
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(new CombinedTypeInformation(getDistanceFunction().getInputTypeRestriction(), TypeUtil.NUMBER_VECTOR_FIELD));
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ *
+ * @param <O> vector type
+ * @param <D> distance type
+ */
+ public static class Parameterizer<O extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ /**
+ * Option ID for kernel density LOF kernel.
+ */
+ public static final OptionID KERNEL_ID = new OptionID("kernellof.kernel", "Kernel to use for kernel density LOF.");
+
+ /**
+ * The neighborhood size to use.
+ */
+ protected int k = 2;
+
+ /**
+ * Kernel density function parameter
+ */
+ KernelDensityFunction kernel;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ final IntParameter pK = new IntParameter(LOF.Parameterizer.K_ID);
+ pK.addConstraint(new GreaterConstraint(1));
+ if (config.grab(pK)) {
+ k = pK.getValue();
+ }
+
+ ObjectParameter<KernelDensityFunction> kernelP = new ObjectParameter<>(KERNEL_ID, KernelDensityFunction.class, EpanechnikovKernelDensityFunction.class);
+ if (config.grab(kernelP)) {
+ kernel = kernelP.instantiateClass(config);
+ }
+ }
+
+ @Override
+ protected SimpleKernelDensityLOF<O, D> makeInstance() {
+ return new SimpleKernelDensityLOF<>(k, distanceFunction, kernel);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimplifiedLOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimplifiedLOF.java
new file mode 100644
index 00000000..413eaca1
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimplifiedLOF.java
@@ -0,0 +1,264 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier.lof;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.QueryUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
+import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
+import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
+import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.Alias;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+
+/**
+ * A simplified version of the original LOF algorithm, which does not use the
+ * reachability distance, yielding less stable results on inliers.
+ *
+ * Reference:
+ * <p>
+ * Erich Schubert, Arthur Zimek, Hans-Peter Kriegel<br />
+ * Local outlier detection reconsidered: a generalized view on locality with
+ * applications to spatial, video, and network outlier detection<br />
+ * In: Data Mining and Knowledge Discovery
+ * </p>
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has KNNQuery
+ *
+ * @param <O> the type of DatabaseObjects handled by this Algorithm
+ * @param <D> Distance type
+ */
+@Reference(authors = "Erich Schubert, Arthur Zimek, Hans-Peter Kriegel", title = "Local outlier detection reconsidered: a generalized view on locality with applications to spatial, video, and network outlier detection", booktitle = "Data Mining and Knowledge Discovery", url = "http://dx.doi.org/10.1007/s10618-012-0300-z")
+@Alias({ "SimpleLOF", "outlier.SimpleLOF", "de.lmu.ifi.dbs.elki.algorithm.outlier.SimpleLOF" })
+public class SimplifiedLOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(SimplifiedLOF.class);
+
+ /**
+ * Parameter k.
+ */
+ protected int k;
+
+ /**
+ * Constructor.
+ *
+ * @param k the value of k
+ */
+ public SimplifiedLOF(int k, DistanceFunction<? super O, D> distance) {
+ super(distance);
+ this.k = k + 1;
+ }
+
+ /**
+ * Run the Simple LOF algorithm.
+ *
+ * @param database Database to query
+ * @param relation Data to process
+ * @return LOF outlier result
+ */
+ public OutlierResult run(Database database, Relation<O> relation) {
+ StepProgress stepprog = LOG.isVerbose() ? new StepProgress("SimpleLOF", 3) : null;
+
+ DBIDs ids = relation.getDBIDs();
+
+ // "HEAVY" flag for KNN Query since it is used more than once
+ KNNQuery<O, D> knnq = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
+ // No optimized kNN query - use a preprocessor!
+ if (!(knnq instanceof PreprocessorKNNQuery)) {
+ if (stepprog != null) {
+ stepprog.beginStep(1, "Materializing neighborhoods w.r.t. distance function.", LOG);
+ }
+ MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<>(relation, getDistanceFunction(), k);
+ database.addIndex(preproc);
+ DistanceQuery<O, D> rdq = database.getDistanceQuery(relation, getDistanceFunction());
+ knnq = preproc.getKNNQuery(rdq, k);
+ }
+
+ // Compute LRDs
+ if (stepprog != null) {
+ stepprog.beginStep(2, "Computing densities.", LOG);
+ }
+ WritableDoubleDataStore dens = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
+ FiniteProgress densProgress = LOG.isVerbose() ? new FiniteProgress("Densities", ids.size(), LOG) : null;
+ for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
+ final KNNList<D> neighbors = knnq.getKNNForDBID(it, k);
+ double sum = 0.0;
+ int count = 0;
+ if (neighbors instanceof DoubleDistanceKNNList) {
+ // Fast version for double distances
+ for (DoubleDistanceDBIDListIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) {
+ if (DBIDUtil.equal(neighbor, it)) {
+ continue;
+ }
+ sum += neighbor.doubleDistance();
+ count++;
+ }
+ } else {
+ for (DistanceDBIDListIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ if (DBIDUtil.equal(neighbor, it)) {
+ continue;
+ }
+ sum += neighbor.getDistance().doubleValue();
+ count++;
+ }
+ }
+ // Avoid division by 0
+ final double lrd = (sum > 0) ? (count / sum) : 0;
+ dens.putDouble(it, lrd);
+ if (densProgress != null) {
+ densProgress.incrementProcessed(LOG);
+ }
+ }
+ if (densProgress != null) {
+ densProgress.ensureCompleted(LOG);
+ }
+
+ // compute LOF_SCORE of each db object
+ if (stepprog != null) {
+ stepprog.beginStep(3, "Computing SLOFs.", LOG);
+ }
+ WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
+ // track the maximum value for normalization.
+ DoubleMinMax lofminmax = new DoubleMinMax();
+
+ FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("Simple LOF scores.", ids.size(), LOG) : null;
+ for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
+ final double lrdp = dens.doubleValue(it);
+ final double lof;
+ if (lrdp > 0) {
+ final KNNList<D> neighbors = knnq.getKNNForDBID(it, k);
+ double sum = 0.0;
+ int count = 0;
+ for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ // skip the point itself
+ if (DBIDUtil.equal(neighbor, it)) {
+ continue;
+ }
+ sum += dens.doubleValue(neighbor);
+ count++;
+ }
+ lof = sum / (count * lrdp);
+ } else {
+ lof = 1.0;
+ }
+ lofs.putDouble(it, lof);
+ // update minimum and maximum
+ lofminmax.put(lof);
+
+ if (progressLOFs != null) {
+ progressLOFs.incrementProcessed(LOG);
+ }
+ }
+ if (progressLOFs != null) {
+ progressLOFs.ensureCompleted(LOG);
+ }
+
+ if (stepprog != null) {
+ stepprog.setCompleted(LOG);
+ }
+
+ // Build result representation.
+ Relation<Double> scoreResult = new MaterializedRelation<>("Simple Local Outlier Factor", "simple-lof-outlier", TypeUtil.DOUBLE, lofs, ids);
+ OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
+ OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
+
+ return result;
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(getDistanceFunction().getInputTypeRestriction());
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ *
+ * @param <O> vector type
+ * @param <D> distance type
+ */
+ public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ /**
+ * The neighborhood size to use.
+ */
+ protected int k = 2;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ final IntParameter pK = new IntParameter(LOF.Parameterizer.K_ID);
+ pK.addConstraint(new GreaterConstraint(1));
+ if (config.grab(pK)) {
+ k = pK.getValue();
+ }
+ }
+
+ @Override
+ protected SimplifiedLOF<O, D> makeInstance() {
+ return new SimplifiedLOF<>(k, distanceFunction);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/package-info.java
new file mode 100644
index 00000000..48d4b16a
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/package-info.java
@@ -0,0 +1,27 @@
+/**
+ * <p>LOF family of outlier detection algorithms.</p>
+ */
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package de.lmu.ifi.dbs.elki.algorithm.outlier.lof;
+