summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDF.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDF.java')
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDF.java129
1 files changed, 38 insertions, 91 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDF.java
index e5049877..c2e29f54 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDF.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.lof;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -30,27 +30,20 @@ import de.lmu.ifi.dbs.elki.data.type.CombinedTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
-import de.lmu.ifi.dbs.elki.database.QueryUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList;
-import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
-import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
-import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.KNNList;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
-import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
@@ -61,6 +54,7 @@ import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.KernelDensityFunction
import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
@@ -88,10 +82,12 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
* @apiviz.has KernelDensityFunction
*
* @param <O> the type of objects handled by this Algorithm
- * @param <D> Distance type
*/
-@Reference(authors = "L. J. Latecki, A. Lazarevic, D. Pokrajac", title = "Outlier Detection with Kernel Density Functions", booktitle = "Machine Learning and Data Mining in Pattern Recognition", url = "http://dx.doi.org/10.1007/978-3-540-73499-4_6")
-public class LDF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
+@Reference(authors = "L. J. Latecki, A. Lazarevic, D. Pokrajac", //
+title = "Outlier Detection with Kernel Density Functions", //
+booktitle = "Machine Learning and Data Mining in Pattern Recognition", //
+url = "http://dx.doi.org/10.1007/978-3-540-73499-4_6")
+public class LDF<O extends NumberVector> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
@@ -125,7 +121,7 @@ public class LDF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
* @param h Kernel bandwidth scaling
* @param c Score scaling parameter
*/
- public LDF(int k, DistanceFunction<? super O, D> distance, KernelDensityFunction kernel, double h, double c) {
+ public LDF(int k, DistanceFunction<? super O> distance, KernelDensityFunction kernel, double h, double c) {
super(distance);
this.k = k + 1;
this.kernel = kernel;
@@ -142,84 +138,42 @@ public class LDF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
*/
public OutlierResult run(Database database, Relation<O> relation) {
StepProgress stepprog = LOG.isVerbose() ? new StepProgress("LDF", 3) : null;
-
final int dim = RelationUtil.dimensionality(relation);
-
DBIDs ids = relation.getDBIDs();
- // "HEAVY" flag for KNN Query since it is used more than once
- KNNQuery<O, D> knnq = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
- // No optimized kNN query - use a preprocessor!
- if(!(knnq instanceof PreprocessorKNNQuery)) {
- if(stepprog != null) {
- stepprog.beginStep(1, "Materializing neighborhoods w.r.t. distance function.", LOG);
- }
- MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<>(relation, getDistanceFunction(), k);
- database.addIndex(preproc);
- DistanceQuery<O, D> rdq = database.getDistanceQuery(relation, getDistanceFunction());
- knnq = preproc.getKNNQuery(rdq, k);
- }
+ LOG.beginStep(stepprog, 1, "Materializing neighborhoods w.r.t. distance function.");
+ KNNQuery<O> knnq = DatabaseUtil.precomputedKNNQuery(database, relation, getDistanceFunction(), k);
// Compute LDEs
- if(stepprog != null) {
- stepprog.beginStep(2, "Computing LDEs.", LOG);
- }
+ LOG.beginStep(stepprog, 2, "Computing LDEs.");
WritableDoubleDataStore ldes = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
FiniteProgress densProgress = LOG.isVerbose() ? new FiniteProgress("Densities", ids.size(), LOG) : null;
for(DBIDIter it = ids.iter(); it.valid(); it.advance()) {
- final KNNList<D> neighbors = knnq.getKNNForDBID(it, k);
+ final KNNList neighbors = knnq.getKNNForDBID(it, k);
double sum = 0.0;
int count = 0;
- if(neighbors instanceof DoubleDistanceKNNList) {
- // Fast version for double distances
- for(DoubleDistanceDBIDListIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) {
- if(DBIDUtil.equal(neighbor, it)) {
- continue;
- }
- final double nkdist = ((DoubleDistanceKNNList) knnq.getKNNForDBID(neighbor, k)).doubleKNNDistance();
- if(nkdist > 0.) {
- final double v = Math.max(nkdist, neighbor.doubleDistance()) / (h * nkdist);
- sum += kernel.density(v) / MathUtil.powi(h * nkdist, dim);
- count++;
- }
- else {
- sum = Double.POSITIVE_INFINITY;
- count++;
- break;
- }
+ // Fast version for double distances
+ for(DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ if(DBIDUtil.equal(neighbor, it)) {
+ continue;
}
- }
- else {
- for(DistanceDBIDListIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
- if(DBIDUtil.equal(neighbor, it)) {
- continue;
- }
- final double nkdist = knnq.getKNNForDBID(neighbor, k).getKNNDistance().doubleValue();
- if(nkdist > 0.) {
- final double v = Math.max(nkdist, neighbor.getDistance().doubleValue()) / (h * nkdist);
- sum += kernel.density(v) / MathUtil.powi(h * nkdist, dim);
- count++;
- }
- else {
- sum = Double.POSITIVE_INFINITY;
- count++;
- break;
- }
+ final double nkdist = knnq.getKNNForDBID(neighbor, k).getKNNDistance();
+ if(!(nkdist > 0.)) {
+ sum = Double.POSITIVE_INFINITY;
+ count++;
+ break;
}
+ final double v = Math.max(nkdist, neighbor.doubleValue()) / (h * nkdist);
+ sum += kernel.density(v) / MathUtil.powi(h * nkdist, dim);
+ count++;
}
ldes.putDouble(it, sum / count);
- if(densProgress != null) {
- densProgress.incrementProcessed(LOG);
- }
- }
- if(densProgress != null) {
- densProgress.ensureCompleted(LOG);
+ LOG.incrementProcessed(densProgress);
}
+ LOG.ensureCompleted(densProgress);
// Compute local density factors.
- if(stepprog != null) {
- stepprog.beginStep(3, "Computing LDFs.", LOG);
- }
+ LOG.beginStep(stepprog, 3, "Computing LDFs.");
WritableDoubleDataStore ldfs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
// track the maximum value for normalization.
DoubleMinMax lofminmax = new DoubleMinMax();
@@ -227,7 +181,7 @@ public class LDF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("Local Density Factors", ids.size(), LOG) : null;
for(DBIDIter it = ids.iter(); it.valid(); it.advance()) {
final double lrdp = ldes.doubleValue(it);
- final KNNList<D> neighbors = knnq.getKNNForDBID(it, k);
+ final KNNList neighbors = knnq.getKNNForDBID(it, k);
double sum = 0.0;
int count = 0;
for(DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
@@ -245,20 +199,14 @@ public class LDF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
// update minimum and maximum
lofminmax.put(ldf);
- if(progressLOFs != null) {
- progressLOFs.incrementProcessed(LOG);
- }
- }
- if(progressLOFs != null) {
- progressLOFs.ensureCompleted(LOG);
+ LOG.incrementProcessed(progressLOFs);
}
+ LOG.ensureCompleted(progressLOFs);
- if(stepprog != null) {
- stepprog.setCompleted(LOG);
- }
+ LOG.setCompleted(stepprog);
// Build result representation.
- Relation<Double> scoreResult = new MaterializedRelation<>("Local Density Factor", "ldf-outlier", TypeUtil.DOUBLE, ldfs, ids);
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("Local Density Factor", "ldf-outlier", ldfs, ids);
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, 1. / c, 1 / (1 + c));
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
@@ -283,9 +231,8 @@ public class LDF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
* @apiviz.exclude
*
* @param <O> vector type
- * @param <D> distance type
*/
- public static class Parameterizer<O extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ public static class Parameterizer<O extends NumberVector> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {
/**
* Option ID for kernel.
*/
@@ -353,7 +300,7 @@ public class LDF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
}
@Override
- protected LDF<O, D> makeInstance() {
+ protected LDF<O> makeInstance() {
return new LDF<>(k, distanceFunction, kernel, h, c);
}
}