diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimplifiedLOF.java')
-rw-r--r-- | src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimplifiedLOF.java | 198 |
1 files changed, 91 insertions, 107 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimplifiedLOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimplifiedLOF.java index d54b053f..8fce6503 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimplifiedLOF.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimplifiedLOF.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.lof; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -28,26 +28,19 @@ import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm; import de.lmu.ifi.dbs.elki.data.type.TypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; import de.lmu.ifi.dbs.elki.database.Database; -import de.lmu.ifi.dbs.elki.database.QueryUtil; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; -import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter; -import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter; -import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList; -import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList; -import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery; -import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; +import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter; +import de.lmu.ifi.dbs.elki.database.ids.KNNList; import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; -import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery; -import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; +import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; -import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; import de.lmu.ifi.dbs.elki.logging.progress.StepProgress; @@ -56,6 +49,7 @@ import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta; import de.lmu.ifi.dbs.elki.utilities.Alias; +import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; @@ -68,28 +62,30 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * Reference: * <p> * Erich Schubert, Arthur Zimek, Hans-Peter Kriegel<br /> - * Local outlier detection reconsidered: a generalized view on locality with - * applications to spatial, video, and network outlier detection<br /> - * In: Data Mining and Knowledge Discovery + * Local Outlier Detection Reconsidered: a Generalized View on Locality with + * Applications to Spatial, Video, and Network Outlier Detection<br /> + * Data Mining and Knowledge Discovery, 28(1): 190–237, 2014. * </p> * * @author Erich Schubert * * @apiviz.has KNNQuery * - * @param <O> the type of DatabaseObjects handled by this Algorithm - * @param <D> Distance type + * @param <O> the type of data objects handled by this algorithm */ -@Reference(authors = "Erich Schubert, Arthur Zimek, Hans-Peter Kriegel", title = "Local outlier detection reconsidered: a generalized view on locality with applications to spatial, video, and network outlier detection", booktitle = "Data Mining and Knowledge Discovery", url = "http://dx.doi.org/10.1007/s10618-012-0300-z") -@Alias({ "SimpleLOF", "outlier.SimpleLOF", "de.lmu.ifi.dbs.elki.algorithm.outlier.SimpleLOF" }) -public class SimplifiedLOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm { +@Reference(authors = "E. Schubert, A. Zimek, H.-P. Kriegel", // +title = "Local Outlier Detection Reconsidered: a Generalized View on Locality with Applications to Spatial, Video, and Network Outlier Detection", // +booktitle = "Data Mining and Knowledge Discovery, 28(1): 190–237, 2014.", // +url = "http://dx.doi.org/10.1007/s10618-012-0300-z") +@Alias({ "SimplifiedLOF", "outlier.SimplifiedLOF", "de.lmu.ifi.dbs.elki.algorithm.outlier.SimplifiedLOF" }) +public class SimplifiedLOF<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm { /** * The logger for this class. */ private static final Logging LOG = Logging.getLogger(SimplifiedLOF.class); /** - * Parameter k. + * The number of neighbors to query, excluding the query point. */ protected int k; @@ -98,7 +94,7 @@ public class SimplifiedLOF<O, D extends NumberDistance<D, ?>> extends AbstractDi * * @param k the value of k */ - public SimplifiedLOF(int k, DistanceFunction<? super O, D> distance) { + public SimplifiedLOF(int k, DistanceFunction<? super O> distance) { super(distance); this.k = k + 1; } @@ -111,114 +107,103 @@ public class SimplifiedLOF<O, D extends NumberDistance<D, ?>> extends AbstractDi * @return LOF outlier result */ public OutlierResult run(Database database, Relation<O> relation) { - StepProgress stepprog = LOG.isVerbose() ? new StepProgress("SimpleLOF", 3) : null; - + StepProgress stepprog = LOG.isVerbose() ? new StepProgress("Simplified LOF", 3) : null; DBIDs ids = relation.getDBIDs(); - // "HEAVY" flag for KNN Query since it is used more than once - KNNQuery<O, D> knnq = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE); - // No optimized kNN query - use a preprocessor! - if(!(knnq instanceof PreprocessorKNNQuery)) { - if(stepprog != null) { - stepprog.beginStep(1, "Materializing neighborhoods w.r.t. distance function.", LOG); - } - MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<>(relation, getDistanceFunction(), k); - database.addIndex(preproc); - DistanceQuery<O, D> rdq = database.getDistanceQuery(relation, getDistanceFunction()); - knnq = preproc.getKNNQuery(rdq, k); - } + LOG.beginStep(stepprog, 1, "Materializing neighborhoods w.r.t. distance function."); + KNNQuery<O> knnq = DatabaseUtil.precomputedKNNQuery(database, relation, getDistanceFunction(), k); // Compute LRDs - if(stepprog != null) { - stepprog.beginStep(2, "Computing densities.", LOG); - } + LOG.beginStep(stepprog, 2, "Computing densities."); WritableDoubleDataStore dens = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP); - FiniteProgress densProgress = LOG.isVerbose() ? new FiniteProgress("Densities", ids.size(), LOG) : null; - for(DBIDIter it = ids.iter(); it.valid(); it.advance()) { - final KNNList<D> neighbors = knnq.getKNNForDBID(it, k); + computeSimplifiedLRDs(ids, knnq, dens); + + // compute LOF_SCORE of each db object + LOG.beginStep(stepprog, 3, "Computing SLOFs."); + WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC); + DoubleMinMax lofminmax = new DoubleMinMax(); + computeSimplifiedLOFs(ids, knnq, dens, lofs, lofminmax); + + LOG.setCompleted(stepprog); + + // Build result representation. + DoubleRelation scoreResult = new MaterializedDoubleRelation("Simplified Local Outlier Factor", "simplified-lof-outlier", lofs, ids); + OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0., Double.POSITIVE_INFINITY, 1.); + OutlierResult result = new OutlierResult(scoreMeta, scoreResult); + + return result; + } + + /** + * Compute the simplified reachability densities. + * + * @param ids IDs to process + * @param knnq kNN query class + * @param lrds Density output + */ + private void computeSimplifiedLRDs(DBIDs ids, KNNQuery<O> knnq, WritableDoubleDataStore lrds) { + FiniteProgress lrdsProgress = LOG.isVerbose() ? new FiniteProgress("Densities", ids.size(), LOG) : null; + for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { + final KNNList neighbors = knnq.getKNNForDBID(iter, k); double sum = 0.0; int count = 0; - if(neighbors instanceof DoubleDistanceKNNList) { - // Fast version for double distances - for(DoubleDistanceDBIDListIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) { - if(DBIDUtil.equal(neighbor, it)) { - continue; - } - sum += neighbor.doubleDistance(); - count++; - } - } - else { - for(DistanceDBIDListIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { - if(DBIDUtil.equal(neighbor, it)) { - continue; - } - sum += neighbor.getDistance().doubleValue(); - count++; + for(DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { + if(DBIDUtil.equal(neighbor, iter)) { + continue; } + sum += neighbor.doubleValue(); + count++; } // Avoid division by 0 - final double lrd = (sum > 0) ? (count / sum) : 0; - dens.putDouble(it, lrd); - if(densProgress != null) { - densProgress.incrementProcessed(LOG); - } - } - if(densProgress != null) { - densProgress.ensureCompleted(LOG); - } - - // compute LOF_SCORE of each db object - if(stepprog != null) { - stepprog.beginStep(3, "Computing SLOFs.", LOG); + final double lrd = (sum > 0) ? (count / sum) : Double.POSITIVE_INFINITY; + lrds.putDouble(iter, lrd); + LOG.incrementProcessed(lrdsProgress); } - WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC); - // track the maximum value for normalization. - DoubleMinMax lofminmax = new DoubleMinMax(); + LOG.ensureCompleted(lrdsProgress); + } - FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("Simple LOF scores.", ids.size(), LOG) : null; - for(DBIDIter it = ids.iter(); it.valid(); it.advance()) { - final double lrdp = dens.doubleValue(it); + /** + * Compute the simplified LOF factors. + * + * @param ids IDs to compute for + * @param knnq kNN query class + * @param slrds Object densities + * @param lofs SLOF output storage + * @param lofminmax Minimum and maximum scores + */ + private void computeSimplifiedLOFs(DBIDs ids, KNNQuery<O> knnq, WritableDoubleDataStore slrds, WritableDoubleDataStore lofs, DoubleMinMax lofminmax) { + FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("Simplified LOF scores.", ids.size(), LOG) : null; + for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { final double lof; - if(lrdp > 0) { - final KNNList<D> neighbors = knnq.getKNNForDBID(it, k); - double sum = 0.0; + final double lrdp = slrds.doubleValue(iter); + final KNNList neighbors = knnq.getKNNForDBID(iter, k); + if(!Double.isInfinite(lrdp)) { + double sum = 0.; int count = 0; for(DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { // skip the point itself - if(DBIDUtil.equal(neighbor, it)) { + if(DBIDUtil.equal(neighbor, iter)) { continue; } - sum += dens.doubleValue(neighbor); + final double val = slrds.doubleValue(neighbor); + sum += val; count++; + if(Double.isInfinite(val)) { + break; + } } - lof = sum / (count * lrdp); + lof = sum / (lrdp * count); } else { lof = 1.0; } - lofs.putDouble(it, lof); + lofs.putDouble(iter, lof); // update minimum and maximum lofminmax.put(lof); - if(progressLOFs != null) { - progressLOFs.incrementProcessed(LOG); - } - } - if(progressLOFs != null) { - progressLOFs.ensureCompleted(LOG); - } - - if(stepprog != null) { - stepprog.setCompleted(LOG); + LOG.incrementProcessed(progressLOFs); } - - // Build result representation. - Relation<Double> scoreResult = new MaterializedRelation<>("Simple Local Outlier Factor", "simple-lof-outlier", TypeUtil.DOUBLE, lofs, ids); - OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0); - OutlierResult result = new OutlierResult(scoreMeta, scoreResult); - - return result; + LOG.ensureCompleted(progressLOFs); } @Override @@ -238,10 +223,9 @@ public class SimplifiedLOF<O, D extends NumberDistance<D, ?>> extends AbstractDi * * @apiviz.exclude * - * @param <O> vector type - * @param <D> distance type + * @param <O> Object type */ - public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> { + public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> { /** * The neighborhood size to use. */ @@ -252,14 +236,14 @@ public class SimplifiedLOF<O, D extends NumberDistance<D, ?>> extends AbstractDi super.makeOptions(config); final IntParameter pK = new IntParameter(LOF.Parameterizer.K_ID); - pK.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT); + pK.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT); if(config.grab(pK)) { k = pK.getValue(); } } @Override - protected SimplifiedLOF<O, D> makeInstance() { + protected SimplifiedLOF<O> makeInstance() { return new SimplifiedLOF<>(k, distanceFunction); } } |