summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOF.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOF.java')
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOF.java147
1 files changed, 52 insertions, 95 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOF.java
index 28166c75..ff5529f5 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOF.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.lof;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -35,19 +35,13 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList;
-import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
-import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
-import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.KNNList;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
-import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
@@ -56,6 +50,7 @@ import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.utilities.Alias;
+import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -75,10 +70,10 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
* within ELKI we have renamed this parameter to "k".
* </p>
*
+ * Reference:
* <p>
- * Reference: <br>
- * M. M. Breunig, H.-P. Kriegel, R. Ng, J. Sander: LOF: Identifying
- * Density-Based Local Outliers. <br>
+ * M. M. Breunig, H.-P. Kriegel, R. Ng, J. Sander:<br />
+ * LOF: Identifying Density-Based Local Outliers.<br />
* In: Proc. 2nd ACM SIGMOD Int. Conf. on Management of Data (SIGMOD'00),
* Dallas, TX, 2000.
* </p>
@@ -88,37 +83,40 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
*
* @apiviz.has KNNQuery
*
- * @param <O> the type of DatabaseObjects handled by this Algorithm
- * @param <D> Distance type
+ * @param <O> the type of data objects handled by this algorithm
*/
@Title("LOF: Local Outlier Factor")
@Description("Algorithm to compute density-based local outlier factors in a database based on the neighborhood size parameter 'k'")
-@Reference(authors = "M. M. Breunig, H.-P. Kriegel, R. Ng, and J. Sander", title = "LOF: Identifying Density-Based Local Outliers", booktitle = "Proc. 2nd ACM SIGMOD Int. Conf. on Management of Data (SIGMOD '00), Dallas, TX, 2000", url = "http://dx.doi.org/10.1145/342009.335388")
-@Alias({ "de.lmu.ifi.dbs.elki.algorithm.outlier.LOF", "outlier.LOF", "LOF" })
-public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
+@Reference(authors = "M. M. Breunig, H.-P. Kriegel, R. Ng, and J. Sander",//
+title = "LOF: Identifying Density-Based Local Outliers", //
+booktitle = "Proc. 2nd ACM SIGMOD Int. Conf. on Management of Data (SIGMOD '00), Dallas, TX, 2000", //
+url = "http://dx.doi.org/10.1145/342009.335388")
+@Alias({ "de.lmu.ifi.dbs.elki.algorithm.outlier.LOF", "LOF" })
+public class LOF<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
private static final Logging LOG = Logging.getLogger(LOF.class);
/**
- * Holds the value of {@link Parameterizer#K_ID}.
+ * The number of neighbors to query (including the query point!)
*/
protected int k = 2;
/**
* Constructor.
*
- * @param k the value of k
+ * @param k the number of neighbors to use for comparison (excluding the query
+ * point)
* @param distanceFunction the neighborhood distance function
*/
- public LOF(int k, DistanceFunction<? super O, D> distanceFunction) {
+ public LOF(int k, DistanceFunction<? super O> distanceFunction) {
super(distanceFunction);
this.k = k + 1;
}
/**
- * Performs the Generalized LOF_SCORE algorithm on the given database.
+ * Runs the LOF algorithm on the given database.
*
* @param database Database to query
* @param relation Data to process
@@ -126,42 +124,27 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBase
*/
public OutlierResult run(Database database, Relation<O> relation) {
StepProgress stepprog = LOG.isVerbose() ? new StepProgress("LOF", 3) : null;
- DistanceQuery<O, D> dq = database.getDistanceQuery(relation, getDistanceFunction());
- // "HEAVY" flag for knn query since it is used more than once
- KNNQuery<O, D> knnq = database.getKNNQuery(dq, k, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
- // No optimized kNN query - use a preprocessor!
- if(!(knnq instanceof PreprocessorKNNQuery)) {
- if(stepprog != null) {
- stepprog.beginStep(1, "Materializing LOF neighborhoods.", LOG);
- }
- MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<>(relation, getDistanceFunction(), k);
- knnq = preproc.getKNNQuery(dq, k);
- }
DBIDs ids = relation.getDBIDs();
+ LOG.beginStep(stepprog, 1, "Materializing LOF neighborhoods.");
+ KNNQuery<O> knnq = DatabaseUtil.precomputedKNNQuery(database, relation, getDistanceFunction(), k);
+
// Compute LRDs
- if(stepprog != null) {
- stepprog.beginStep(2, "Computing LRDs.", LOG);
- }
+ LOG.beginStep(stepprog, 2, "Computing LRDs.");
WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
computeLRDs(knnq, ids, lrds);
// compute LOF_SCORE of each db object
- if(stepprog != null) {
- stepprog.beginStep(3, "Computing LOFs.", LOG);
- }
- DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
+ LOG.beginStep(stepprog, 3, "Computing LOFs.");
WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_DB);
// track the maximum value for normalization.
DoubleMinMax lofminmax = new DoubleMinMax();
computeLOFScores(knnq, ids, lrds, lofs, lofminmax);
- if(stepprog != null) {
- stepprog.setCompleted(LOG);
- }
+ LOG.setCompleted(stepprog);
// Build result representation.
- Relation<Double> scoreResult = new MaterializedRelation<>("Local Outlier Factor", "lof-outlier", TypeUtil.DOUBLE, lofs, ids);
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("Local Outlier Factor", "lof-outlier", lofs, ids);
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
return new OutlierResult(scoreMeta, scoreResult);
}
@@ -173,50 +156,26 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBase
* @param ids IDs to process
* @param lrds Reachability storage
*/
- private void computeLRDs(KNNQuery<O, D> knnq, DBIDs ids, WritableDoubleDataStore lrds) {
+ private void computeLRDs(KNNQuery<O> knnq, DBIDs ids, WritableDoubleDataStore lrds) {
FiniteProgress lrdsProgress = LOG.isVerbose() ? new FiniteProgress("LRD", ids.size(), LOG) : null;
for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
- final KNNList<D> neighbors = knnq.getKNNForDBID(iter, k);
+ final KNNList neighbors = knnq.getKNNForDBID(iter, k);
double sum = 0.0;
int count = 0;
- if(neighbors instanceof DoubleDistanceKNNList) {
- // Fast version for double distances
- for(DoubleDistanceDBIDListIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) {
- if(DBIDUtil.equal(neighbor, iter)) {
- continue;
- }
- KNNList<D> neighborsNeighbors = knnq.getKNNForDBID(neighbor, k);
- final double nkdist;
- if(neighborsNeighbors instanceof DoubleDistanceKNNList) {
- nkdist = ((DoubleDistanceKNNList) neighborsNeighbors).doubleKNNDistance();
- }
- else {
- nkdist = neighborsNeighbors.getKNNDistance().doubleValue();
- }
- sum += Math.max(neighbor.doubleDistance(), nkdist);
- count++;
- }
- }
- else {
- for(DistanceDBIDListIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
- if(DBIDUtil.equal(neighbor, iter)) {
- continue;
- }
- KNNList<D> neighborsNeighbors = knnq.getKNNForDBID(neighbor, k);
- sum += Math.max(neighbor.getDistance().doubleValue(), neighborsNeighbors.getKNNDistance().doubleValue());
- count++;
+ for(DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ if(DBIDUtil.equal(neighbor, iter)) {
+ continue;
}
+ KNNList neighborsNeighbors = knnq.getKNNForDBID(neighbor, k);
+ sum += Math.max(neighbor.doubleValue(), neighborsNeighbors.getKNNDistance());
+ count++;
}
// Avoid division by 0
final double lrd = (sum > 0) ? (count / sum) : Double.POSITIVE_INFINITY;
lrds.putDouble(iter, lrd);
- if(lrdsProgress != null) {
- lrdsProgress.incrementProcessed(LOG);
- }
- }
- if(lrdsProgress != null) {
- lrdsProgress.ensureCompleted(LOG);
+ LOG.incrementProcessed(lrdsProgress);
}
+ LOG.ensureCompleted(lrdsProgress);
}
/**
@@ -228,14 +187,14 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBase
* @param lofs Local outlier factor storage
* @param lofminmax Score minimum/maximum tracker
*/
- private void computeLOFScores(KNNQuery<O, D> knnq, DBIDs ids, DoubleDataStore lrds, WritableDoubleDataStore lofs, DoubleMinMax lofminmax) {
+ private void computeLOFScores(KNNQuery<O> knnq, DBIDs ids, DoubleDataStore lrds, WritableDoubleDataStore lofs, DoubleMinMax lofminmax) {
FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("LOF_SCORE for objects", ids.size(), LOG) : null;
for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
final double lof;
final double lrdp = lrds.doubleValue(iter);
- final KNNList<D> neighbors = knnq.getKNNForDBID(iter, k);
+ final KNNList neighbors = knnq.getKNNForDBID(iter, k);
if(!Double.isInfinite(lrdp)) {
- double sum = 0.0;
+ double sum = 0.;
int count = 0;
for(DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
// skip the point itself
@@ -258,13 +217,9 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBase
// update minimum and maximum
lofminmax.put(lof);
- if(progressLOFs != null) {
- progressLOFs.incrementProcessed(LOG);
- }
- }
- if(progressLOFs != null) {
- progressLOFs.ensureCompleted(LOG);
+ LOG.incrementProcessed(progressLOFs);
}
+ LOG.ensureCompleted(progressLOFs);
}
@Override
@@ -283,14 +238,16 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBase
* @author Erich Schubert
*
* @apiviz.exclude
+ *
+ * @param <O> Object type
*/
- public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {
/**
* Parameter to specify the number of nearest neighbors of an object to be
- * considered for computing its LOF_SCORE, must be an integer greater than
- * 1.
+ * considered for computing its LOF score, must be an integer greater than
+ * or equal to 1.
*/
- public static final OptionID K_ID = new OptionID("lof.k", "The number of nearest neighbors of an object to be considered for computing its LOF_SCORE.");
+ public static final OptionID K_ID = new OptionID("lof.k", "The number of nearest neighbors of an object to be considered for computing its LOF score.");
/**
* The neighborhood size to use.
@@ -302,14 +259,14 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBase
super.makeOptions(config);
final IntParameter pK = new IntParameter(K_ID);
- pK.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
+ pK.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
if(config.grab(pK)) {
- k = pK.getValue();
+ k = pK.intValue();
}
}
@Override
- protected LOF<O, D> makeInstance() {
+ protected LOF<O> makeInstance() {
return new LOF<>(k, distanceFunction);
}
}