diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimpleKernelDensityLOF.java')
-rw-r--r-- | src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimpleKernelDensityLOF.java | 123 |
1 files changed, 39 insertions, 84 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimpleKernelDensityLOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimpleKernelDensityLOF.java index b990ef35..3ba56b16 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimpleKernelDensityLOF.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimpleKernelDensityLOF.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.lof; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -30,27 +30,20 @@ import de.lmu.ifi.dbs.elki.data.type.CombinedTypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; import de.lmu.ifi.dbs.elki.database.Database; -import de.lmu.ifi.dbs.elki.database.QueryUtil; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; -import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter; -import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter; -import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList; -import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList; -import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery; -import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; +import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter; +import de.lmu.ifi.dbs.elki.database.ids.KNNList; import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; -import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery; -import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; +import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; -import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; import de.lmu.ifi.dbs.elki.logging.progress.StepProgress; @@ -61,6 +54,7 @@ import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.KernelDensityFunction import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta; +import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; @@ -77,9 +71,8 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * @apiviz.has KernelDensityFunction * * @param <O> the type of objects handled by this Algorithm - * @param <D> Distance type */ -public class SimpleKernelDensityLOF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm { +public class SimpleKernelDensityLOF<O extends NumberVector> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm { /** * The logger for this class. */ @@ -101,7 +94,7 @@ public class SimpleKernelDensityLOF<O extends NumberVector<?>, D extends NumberD * @param k the value of k * @param kernel Kernel function */ - public SimpleKernelDensityLOF(int k, DistanceFunction<? super O, D> distance, KernelDensityFunction kernel) { + public SimpleKernelDensityLOF(int k, DistanceFunction<? super O> distance, KernelDensityFunction kernel) { super(distance); this.k = k + 1; this.kernel = kernel; @@ -116,112 +109,75 @@ public class SimpleKernelDensityLOF<O extends NumberVector<?>, D extends NumberD */ public OutlierResult run(Database database, Relation<O> relation) { StepProgress stepprog = LOG.isVerbose() ? new StepProgress("KernelDensityLOF", 3) : null; - final int dim = RelationUtil.dimensionality(relation); - DBIDs ids = relation.getDBIDs(); - // "HEAVY" flag for KNN Query since it is used more than once - KNNQuery<O, D> knnq = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE); - // No optimized kNN query - use a preprocessor! - if (!(knnq instanceof PreprocessorKNNQuery)) { - if (stepprog != null) { - stepprog.beginStep(1, "Materializing neighborhoods w.r.t. distance function.", LOG); - } - MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<>(relation, getDistanceFunction(), k); - database.addIndex(preproc); - DistanceQuery<O, D> rdq = database.getDistanceQuery(relation, getDistanceFunction()); - knnq = preproc.getKNNQuery(rdq, k); - } + LOG.beginStep(stepprog, 1, "Materializing neighborhoods w.r.t. distance function."); + KNNQuery<O> knnq = DatabaseUtil.precomputedKNNQuery(database, relation, getDistanceFunction(), k); // Compute LRDs - if (stepprog != null) { - stepprog.beginStep(2, "Computing densities.", LOG); - } + LOG.beginStep(stepprog, 2, "Computing densities."); WritableDoubleDataStore dens = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP); FiniteProgress densProgress = LOG.isVerbose() ? new FiniteProgress("Densities", ids.size(), LOG) : null; - for (DBIDIter it = ids.iter(); it.valid(); it.advance()) { - final KNNList<D> neighbors = knnq.getKNNForDBID(it, k); + for(DBIDIter it = ids.iter(); it.valid(); it.advance()) { + final KNNList neighbors = knnq.getKNNForDBID(it, k); int count = 0; double sum = 0.0; - if (neighbors instanceof DoubleDistanceKNNList) { - // Fast version for double distances - for (DoubleDistanceDBIDListIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) { - if (DBIDUtil.equal(neighbor, it)) { - continue; - } - double max = ((DoubleDistanceKNNList)knnq.getKNNForDBID(neighbor, k)).doubleKNNDistance(); - final double v = neighbor.doubleDistance() / max; - sum += kernel.density(v) / MathUtil.powi(max, dim); - count++; - } - } else { - for (DistanceDBIDListIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { - if (DBIDUtil.equal(neighbor, it)) { - continue; - } - double max = knnq.getKNNForDBID(neighbor, k).getKNNDistance().doubleValue(); - final double v = neighbor.getDistance().doubleValue() / max; - sum += kernel.density(v) / MathUtil.powi(max, dim); - count++; + // Fast version for double distances + for(DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { + if(DBIDUtil.equal(neighbor, it)) { + continue; } + double max = knnq.getKNNForDBID(neighbor, k).getKNNDistance(); + final double v = neighbor.doubleValue() / max; + sum += kernel.density(v) / MathUtil.powi(max, dim); + count++; } final double density = sum / count; dens.putDouble(it, density); - if (densProgress != null) { - densProgress.incrementProcessed(LOG); - } - } - if (densProgress != null) { - densProgress.ensureCompleted(LOG); + LOG.incrementProcessed(densProgress); } + LOG.ensureCompleted(densProgress); // compute LOF_SCORE of each db object - if (stepprog != null) { - stepprog.beginStep(3, "Computing KLOFs.", LOG); - } + LOG.beginStep(stepprog, 3, "Computing KLOFs."); WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC); // track the maximum value for normalization. DoubleMinMax lofminmax = new DoubleMinMax(); FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("KLOF_SCORE for objects", ids.size(), LOG) : null; - for (DBIDIter it = ids.iter(); it.valid(); it.advance()) { + for(DBIDIter it = ids.iter(); it.valid(); it.advance()) { final double lrdp = dens.doubleValue(it); final double lof; - if (lrdp > 0) { - final KNNList<D> neighbors = knnq.getKNNForDBID(it, k); + if(lrdp > 0) { + final KNNList neighbors = knnq.getKNNForDBID(it, k); double sum = 0.0; int count = 0; - for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { + for(DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { // skip the point itself - if (DBIDUtil.equal(neighbor, it)) { + if(DBIDUtil.equal(neighbor, it)) { continue; } sum += dens.doubleValue(neighbor); count++; } lof = sum / (count * lrdp); - } else { + } + else { lof = 1.0; } lofs.putDouble(it, lof); // update minimum and maximum lofminmax.put(lof); - if (progressLOFs != null) { - progressLOFs.incrementProcessed(LOG); - } - } - if (progressLOFs != null) { - progressLOFs.ensureCompleted(LOG); + LOG.incrementProcessed(progressLOFs); } + LOG.ensureCompleted(progressLOFs); - if (stepprog != null) { - stepprog.setCompleted(LOG); - } + LOG.setCompleted(stepprog); // Build result representation. - Relation<Double> scoreResult = new MaterializedRelation<>("Kernel Density Local Outlier Factor", "kernel-density-slof-outlier", TypeUtil.DOUBLE, lofs, ids); + DoubleRelation scoreResult = new MaterializedDoubleRelation("Kernel Density Local Outlier Factor", "kernel-density-slof-outlier", lofs, ids); OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0); OutlierResult result = new OutlierResult(scoreMeta, scoreResult); @@ -246,9 +202,8 @@ public class SimpleKernelDensityLOF<O extends NumberVector<?>, D extends NumberD * @apiviz.exclude * * @param <O> vector type - * @param <D> distance type */ - public static class Parameterizer<O extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> { + public static class Parameterizer<O extends NumberVector> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> { /** * Option ID for kernel density LOF kernel. */ @@ -270,18 +225,18 @@ public class SimpleKernelDensityLOF<O extends NumberVector<?>, D extends NumberD final IntParameter pK = new IntParameter(LOF.Parameterizer.K_ID); pK.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT); - if (config.grab(pK)) { + if(config.grab(pK)) { k = pK.getValue(); } ObjectParameter<KernelDensityFunction> kernelP = new ObjectParameter<>(KERNEL_ID, KernelDensityFunction.class, EpanechnikovKernelDensityFunction.class); - if (config.grab(kernelP)) { + if(config.grab(kernelP)) { kernel = kernelP.instantiateClass(config); } } @Override - protected SimpleKernelDensityLOF<O, D> makeInstance() { + protected SimpleKernelDensityLOF<O> makeInstance() { return new SimpleKernelDensityLOF<>(k, distanceFunction, kernel); } } |