diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOF.java')
-rw-r--r-- | src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOF.java | 147 |
1 files changed, 52 insertions, 95 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOF.java index 28166c75..ff5529f5 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOF.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOF.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.lof; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -35,19 +35,13 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; -import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter; -import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter; -import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList; -import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList; -import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery; -import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; +import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter; +import de.lmu.ifi.dbs.elki.database.ids.KNNList; import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; -import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery; -import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; +import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; -import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; import de.lmu.ifi.dbs.elki.logging.progress.StepProgress; @@ -56,6 +50,7 @@ import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta; import de.lmu.ifi.dbs.elki.utilities.Alias; +import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; @@ -75,10 +70,10 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * within ELKI we have renamed this parameter to "k". * </p> * + * Reference: * <p> - * Reference: <br> - * M. M. Breunig, H.-P. Kriegel, R. Ng, J. Sander: LOF: Identifying - * Density-Based Local Outliers. <br> + * M. M. Breunig, H.-P. Kriegel, R. Ng, J. Sander:<br /> + * LOF: Identifying Density-Based Local Outliers.<br /> * In: Proc. 2nd ACM SIGMOD Int. Conf. on Management of Data (SIGMOD'00), * Dallas, TX, 2000. * </p> @@ -88,37 +83,40 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * * @apiviz.has KNNQuery * - * @param <O> the type of DatabaseObjects handled by this Algorithm - * @param <D> Distance type + * @param <O> the type of data objects handled by this algorithm */ @Title("LOF: Local Outlier Factor") @Description("Algorithm to compute density-based local outlier factors in a database based on the neighborhood size parameter 'k'") -@Reference(authors = "M. M. Breunig, H.-P. Kriegel, R. Ng, and J. Sander", title = "LOF: Identifying Density-Based Local Outliers", booktitle = "Proc. 2nd ACM SIGMOD Int. Conf. on Management of Data (SIGMOD '00), Dallas, TX, 2000", url = "http://dx.doi.org/10.1145/342009.335388") -@Alias({ "de.lmu.ifi.dbs.elki.algorithm.outlier.LOF", "outlier.LOF", "LOF" }) -public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm { +@Reference(authors = "M. M. Breunig, H.-P. Kriegel, R. Ng, and J. Sander",// +title = "LOF: Identifying Density-Based Local Outliers", // +booktitle = "Proc. 2nd ACM SIGMOD Int. Conf. on Management of Data (SIGMOD '00), Dallas, TX, 2000", // +url = "http://dx.doi.org/10.1145/342009.335388") +@Alias({ "de.lmu.ifi.dbs.elki.algorithm.outlier.LOF", "LOF" }) +public class LOF<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm { /** * The logger for this class. */ private static final Logging LOG = Logging.getLogger(LOF.class); /** - * Holds the value of {@link Parameterizer#K_ID}. + * The number of neighbors to query (including the query point!) */ protected int k = 2; /** * Constructor. * - * @param k the value of k + * @param k the number of neighbors to use for comparison (excluding the query + * point) * @param distanceFunction the neighborhood distance function */ - public LOF(int k, DistanceFunction<? super O, D> distanceFunction) { + public LOF(int k, DistanceFunction<? super O> distanceFunction) { super(distanceFunction); this.k = k + 1; } /** - * Performs the Generalized LOF_SCORE algorithm on the given database. + * Runs the LOF algorithm on the given database. * * @param database Database to query * @param relation Data to process @@ -126,42 +124,27 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBase */ public OutlierResult run(Database database, Relation<O> relation) { StepProgress stepprog = LOG.isVerbose() ? new StepProgress("LOF", 3) : null; - DistanceQuery<O, D> dq = database.getDistanceQuery(relation, getDistanceFunction()); - // "HEAVY" flag for knn query since it is used more than once - KNNQuery<O, D> knnq = database.getKNNQuery(dq, k, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE); - // No optimized kNN query - use a preprocessor! - if(!(knnq instanceof PreprocessorKNNQuery)) { - if(stepprog != null) { - stepprog.beginStep(1, "Materializing LOF neighborhoods.", LOG); - } - MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<>(relation, getDistanceFunction(), k); - knnq = preproc.getKNNQuery(dq, k); - } DBIDs ids = relation.getDBIDs(); + LOG.beginStep(stepprog, 1, "Materializing LOF neighborhoods."); + KNNQuery<O> knnq = DatabaseUtil.precomputedKNNQuery(database, relation, getDistanceFunction(), k); + // Compute LRDs - if(stepprog != null) { - stepprog.beginStep(2, "Computing LRDs.", LOG); - } + LOG.beginStep(stepprog, 2, "Computing LRDs."); WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP); computeLRDs(knnq, ids, lrds); // compute LOF_SCORE of each db object - if(stepprog != null) { - stepprog.beginStep(3, "Computing LOFs.", LOG); - } - DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC); + LOG.beginStep(stepprog, 3, "Computing LOFs."); WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_DB); // track the maximum value for normalization. DoubleMinMax lofminmax = new DoubleMinMax(); computeLOFScores(knnq, ids, lrds, lofs, lofminmax); - if(stepprog != null) { - stepprog.setCompleted(LOG); - } + LOG.setCompleted(stepprog); // Build result representation. - Relation<Double> scoreResult = new MaterializedRelation<>("Local Outlier Factor", "lof-outlier", TypeUtil.DOUBLE, lofs, ids); + DoubleRelation scoreResult = new MaterializedDoubleRelation("Local Outlier Factor", "lof-outlier", lofs, ids); OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0); return new OutlierResult(scoreMeta, scoreResult); } @@ -173,50 +156,26 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBase * @param ids IDs to process * @param lrds Reachability storage */ - private void computeLRDs(KNNQuery<O, D> knnq, DBIDs ids, WritableDoubleDataStore lrds) { + private void computeLRDs(KNNQuery<O> knnq, DBIDs ids, WritableDoubleDataStore lrds) { FiniteProgress lrdsProgress = LOG.isVerbose() ? new FiniteProgress("LRD", ids.size(), LOG) : null; for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { - final KNNList<D> neighbors = knnq.getKNNForDBID(iter, k); + final KNNList neighbors = knnq.getKNNForDBID(iter, k); double sum = 0.0; int count = 0; - if(neighbors instanceof DoubleDistanceKNNList) { - // Fast version for double distances - for(DoubleDistanceDBIDListIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) { - if(DBIDUtil.equal(neighbor, iter)) { - continue; - } - KNNList<D> neighborsNeighbors = knnq.getKNNForDBID(neighbor, k); - final double nkdist; - if(neighborsNeighbors instanceof DoubleDistanceKNNList) { - nkdist = ((DoubleDistanceKNNList) neighborsNeighbors).doubleKNNDistance(); - } - else { - nkdist = neighborsNeighbors.getKNNDistance().doubleValue(); - } - sum += Math.max(neighbor.doubleDistance(), nkdist); - count++; - } - } - else { - for(DistanceDBIDListIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { - if(DBIDUtil.equal(neighbor, iter)) { - continue; - } - KNNList<D> neighborsNeighbors = knnq.getKNNForDBID(neighbor, k); - sum += Math.max(neighbor.getDistance().doubleValue(), neighborsNeighbors.getKNNDistance().doubleValue()); - count++; + for(DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { + if(DBIDUtil.equal(neighbor, iter)) { + continue; } + KNNList neighborsNeighbors = knnq.getKNNForDBID(neighbor, k); + sum += Math.max(neighbor.doubleValue(), neighborsNeighbors.getKNNDistance()); + count++; } // Avoid division by 0 final double lrd = (sum > 0) ? (count / sum) : Double.POSITIVE_INFINITY; lrds.putDouble(iter, lrd); - if(lrdsProgress != null) { - lrdsProgress.incrementProcessed(LOG); - } - } - if(lrdsProgress != null) { - lrdsProgress.ensureCompleted(LOG); + LOG.incrementProcessed(lrdsProgress); } + LOG.ensureCompleted(lrdsProgress); } /** @@ -228,14 +187,14 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBase * @param lofs Local outlier factor storage * @param lofminmax Score minimum/maximum tracker */ - private void computeLOFScores(KNNQuery<O, D> knnq, DBIDs ids, DoubleDataStore lrds, WritableDoubleDataStore lofs, DoubleMinMax lofminmax) { + private void computeLOFScores(KNNQuery<O> knnq, DBIDs ids, DoubleDataStore lrds, WritableDoubleDataStore lofs, DoubleMinMax lofminmax) { FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("LOF_SCORE for objects", ids.size(), LOG) : null; for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { final double lof; final double lrdp = lrds.doubleValue(iter); - final KNNList<D> neighbors = knnq.getKNNForDBID(iter, k); + final KNNList neighbors = knnq.getKNNForDBID(iter, k); if(!Double.isInfinite(lrdp)) { - double sum = 0.0; + double sum = 0.; int count = 0; for(DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { // skip the point itself @@ -258,13 +217,9 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBase // update minimum and maximum lofminmax.put(lof); - if(progressLOFs != null) { - progressLOFs.incrementProcessed(LOG); - } - } - if(progressLOFs != null) { - progressLOFs.ensureCompleted(LOG); + LOG.incrementProcessed(progressLOFs); } + LOG.ensureCompleted(progressLOFs); } @Override @@ -283,14 +238,16 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBase * @author Erich Schubert * * @apiviz.exclude + * + * @param <O> Object type */ - public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> { + public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> { /** * Parameter to specify the number of nearest neighbors of an object to be - * considered for computing its LOF_SCORE, must be an integer greater than - * 1. + * considered for computing its LOF score, must be an integer greater than + * or equal to 1. */ - public static final OptionID K_ID = new OptionID("lof.k", "The number of nearest neighbors of an object to be considered for computing its LOF_SCORE."); + public static final OptionID K_ID = new OptionID("lof.k", "The number of nearest neighbors of an object to be considered for computing its LOF score."); /** * The neighborhood size to use. @@ -302,14 +259,14 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBase super.makeOptions(config); final IntParameter pK = new IntParameter(K_ID); - pK.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT); + pK.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT); if(config.grab(pK)) { - k = pK.getValue(); + k = pK.intValue(); } } @Override - protected LOF<O, D> makeInstance() { + protected LOF<O> makeInstance() { return new LOF<>(k, distanceFunction); } } |