summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimpleKernelDensityLOF.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimpleKernelDensityLOF.java')
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimpleKernelDensityLOF.java123
1 files changed, 39 insertions, 84 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimpleKernelDensityLOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimpleKernelDensityLOF.java
index b990ef35..3ba56b16 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimpleKernelDensityLOF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimpleKernelDensityLOF.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.lof;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -30,27 +30,20 @@ import de.lmu.ifi.dbs.elki.data.type.CombinedTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
-import de.lmu.ifi.dbs.elki.database.QueryUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList;
-import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
-import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
-import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.KNNList;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
-import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
@@ -61,6 +54,7 @@ import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.KernelDensityFunction
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
@@ -77,9 +71,8 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
* @apiviz.has KernelDensityFunction
*
* @param <O> the type of objects handled by this Algorithm
- * @param <D> Distance type
*/
-public class SimpleKernelDensityLOF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
+public class SimpleKernelDensityLOF<O extends NumberVector> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
@@ -101,7 +94,7 @@ public class SimpleKernelDensityLOF<O extends NumberVector<?>, D extends NumberD
* @param k the value of k
* @param kernel Kernel function
*/
- public SimpleKernelDensityLOF(int k, DistanceFunction<? super O, D> distance, KernelDensityFunction kernel) {
+ public SimpleKernelDensityLOF(int k, DistanceFunction<? super O> distance, KernelDensityFunction kernel) {
super(distance);
this.k = k + 1;
this.kernel = kernel;
@@ -116,112 +109,75 @@ public class SimpleKernelDensityLOF<O extends NumberVector<?>, D extends NumberD
*/
public OutlierResult run(Database database, Relation<O> relation) {
StepProgress stepprog = LOG.isVerbose() ? new StepProgress("KernelDensityLOF", 3) : null;
-
final int dim = RelationUtil.dimensionality(relation);
-
DBIDs ids = relation.getDBIDs();
- // "HEAVY" flag for KNN Query since it is used more than once
- KNNQuery<O, D> knnq = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
- // No optimized kNN query - use a preprocessor!
- if (!(knnq instanceof PreprocessorKNNQuery)) {
- if (stepprog != null) {
- stepprog.beginStep(1, "Materializing neighborhoods w.r.t. distance function.", LOG);
- }
- MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<>(relation, getDistanceFunction(), k);
- database.addIndex(preproc);
- DistanceQuery<O, D> rdq = database.getDistanceQuery(relation, getDistanceFunction());
- knnq = preproc.getKNNQuery(rdq, k);
- }
+ LOG.beginStep(stepprog, 1, "Materializing neighborhoods w.r.t. distance function.");
+ KNNQuery<O> knnq = DatabaseUtil.precomputedKNNQuery(database, relation, getDistanceFunction(), k);
// Compute LRDs
- if (stepprog != null) {
- stepprog.beginStep(2, "Computing densities.", LOG);
- }
+ LOG.beginStep(stepprog, 2, "Computing densities.");
WritableDoubleDataStore dens = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
FiniteProgress densProgress = LOG.isVerbose() ? new FiniteProgress("Densities", ids.size(), LOG) : null;
- for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
- final KNNList<D> neighbors = knnq.getKNNForDBID(it, k);
+ for(DBIDIter it = ids.iter(); it.valid(); it.advance()) {
+ final KNNList neighbors = knnq.getKNNForDBID(it, k);
int count = 0;
double sum = 0.0;
- if (neighbors instanceof DoubleDistanceKNNList) {
- // Fast version for double distances
- for (DoubleDistanceDBIDListIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) {
- if (DBIDUtil.equal(neighbor, it)) {
- continue;
- }
- double max = ((DoubleDistanceKNNList)knnq.getKNNForDBID(neighbor, k)).doubleKNNDistance();
- final double v = neighbor.doubleDistance() / max;
- sum += kernel.density(v) / MathUtil.powi(max, dim);
- count++;
- }
- } else {
- for (DistanceDBIDListIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
- if (DBIDUtil.equal(neighbor, it)) {
- continue;
- }
- double max = knnq.getKNNForDBID(neighbor, k).getKNNDistance().doubleValue();
- final double v = neighbor.getDistance().doubleValue() / max;
- sum += kernel.density(v) / MathUtil.powi(max, dim);
- count++;
+ // Fast version for double distances
+ for(DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ if(DBIDUtil.equal(neighbor, it)) {
+ continue;
}
+ double max = knnq.getKNNForDBID(neighbor, k).getKNNDistance();
+ final double v = neighbor.doubleValue() / max;
+ sum += kernel.density(v) / MathUtil.powi(max, dim);
+ count++;
}
final double density = sum / count;
dens.putDouble(it, density);
- if (densProgress != null) {
- densProgress.incrementProcessed(LOG);
- }
- }
- if (densProgress != null) {
- densProgress.ensureCompleted(LOG);
+ LOG.incrementProcessed(densProgress);
}
+ LOG.ensureCompleted(densProgress);
// compute LOF_SCORE of each db object
- if (stepprog != null) {
- stepprog.beginStep(3, "Computing KLOFs.", LOG);
- }
+ LOG.beginStep(stepprog, 3, "Computing KLOFs.");
WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
// track the maximum value for normalization.
DoubleMinMax lofminmax = new DoubleMinMax();
FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("KLOF_SCORE for objects", ids.size(), LOG) : null;
- for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
+ for(DBIDIter it = ids.iter(); it.valid(); it.advance()) {
final double lrdp = dens.doubleValue(it);
final double lof;
- if (lrdp > 0) {
- final KNNList<D> neighbors = knnq.getKNNForDBID(it, k);
+ if(lrdp > 0) {
+ final KNNList neighbors = knnq.getKNNForDBID(it, k);
double sum = 0.0;
int count = 0;
- for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ for(DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
// skip the point itself
- if (DBIDUtil.equal(neighbor, it)) {
+ if(DBIDUtil.equal(neighbor, it)) {
continue;
}
sum += dens.doubleValue(neighbor);
count++;
}
lof = sum / (count * lrdp);
- } else {
+ }
+ else {
lof = 1.0;
}
lofs.putDouble(it, lof);
// update minimum and maximum
lofminmax.put(lof);
- if (progressLOFs != null) {
- progressLOFs.incrementProcessed(LOG);
- }
- }
- if (progressLOFs != null) {
- progressLOFs.ensureCompleted(LOG);
+ LOG.incrementProcessed(progressLOFs);
}
+ LOG.ensureCompleted(progressLOFs);
- if (stepprog != null) {
- stepprog.setCompleted(LOG);
- }
+ LOG.setCompleted(stepprog);
// Build result representation.
- Relation<Double> scoreResult = new MaterializedRelation<>("Kernel Density Local Outlier Factor", "kernel-density-slof-outlier", TypeUtil.DOUBLE, lofs, ids);
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("Kernel Density Local Outlier Factor", "kernel-density-slof-outlier", lofs, ids);
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
@@ -246,9 +202,8 @@ public class SimpleKernelDensityLOF<O extends NumberVector<?>, D extends NumberD
* @apiviz.exclude
*
* @param <O> vector type
- * @param <D> distance type
*/
- public static class Parameterizer<O extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ public static class Parameterizer<O extends NumberVector> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {
/**
* Option ID for kernel density LOF kernel.
*/
@@ -270,18 +225,18 @@ public class SimpleKernelDensityLOF<O extends NumberVector<?>, D extends NumberD
final IntParameter pK = new IntParameter(LOF.Parameterizer.K_ID);
pK.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
- if (config.grab(pK)) {
+ if(config.grab(pK)) {
k = pK.getValue();
}
ObjectParameter<KernelDensityFunction> kernelP = new ObjectParameter<>(KERNEL_ID, KernelDensityFunction.class, EpanechnikovKernelDensityFunction.class);
- if (config.grab(kernelP)) {
+ if(config.grab(kernelP)) {
kernel = kernelP.instantiateClass(config);
}
}
@Override
- protected SimpleKernelDensityLOF<O, D> makeInstance() {
+ protected SimpleKernelDensityLOF<O> makeInstance() {
return new SimpleKernelDensityLOF<>(k, distanceFunction, kernel);
}
}