diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOF.java')
-rw-r--r-- | src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOF.java | 165 |
1 files changed, 99 insertions, 66 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOF.java index 5aba41ec..66bed47a 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOF.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOF.java @@ -29,29 +29,31 @@ import de.lmu.ifi.dbs.elki.data.type.CombinedTypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; import de.lmu.ifi.dbs.elki.database.QueryUtil; -import de.lmu.ifi.dbs.elki.database.datastore.DataStore; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; +import de.lmu.ifi.dbs.elki.database.datastore.DoubleDataStore; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery; -import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; -import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult; import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery; import de.lmu.ifi.dbs.elki.database.query.rknn.RKNNQuery; import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDResultIter; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceKNNList; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult; import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; import de.lmu.ifi.dbs.elki.logging.progress.StepProgress; import de.lmu.ifi.dbs.elki.math.DoubleMinMax; -import de.lmu.ifi.dbs.elki.math.Mean; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta; @@ -118,19 +120,19 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou /** * The logger for this class. */ - private static final Logging logger = Logging.getLogger(LOF.class); + private static final Logging LOG = Logging.getLogger(LOF.class); /** * The distance function to determine the reachability distance between * database objects. */ - public static final OptionID REACHABILITY_DISTANCE_FUNCTION_ID = OptionID.getOrCreateOptionID("lof.reachdistfunction", "Distance function to determine the reachability distance between database objects."); + public static final OptionID REACHABILITY_DISTANCE_FUNCTION_ID = new OptionID("lof.reachdistfunction", "Distance function to determine the reachability distance between database objects."); /** * Parameter to specify the number of nearest neighbors of an object to be * considered for computing its LOF_SCORE, must be an integer greater than 1. */ - public static final OptionID K_ID = OptionID.getOrCreateOptionID("lof.k", "The number of nearest neighbors of an object to be considered for computing its LOF_SCORE."); + public static final OptionID K_ID = new OptionID("lof.k", "The number of nearest neighbors of an object to be considered for computing its LOF_SCORE."); /** * Holds the value of {@link #K_ID}. @@ -189,9 +191,10 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou * calling {@link #doRunInTime}. * * @param relation Data to process + * @return LOF outlier result */ public OutlierResult run(Relation<O> relation) { - StepProgress stepprog = logger.isVerbose() ? new StepProgress("LOF", 3) : null; + StepProgress stepprog = LOG.isVerbose() ? new StepProgress("LOF", 3) : null; Pair<KNNQuery<O, D>, KNNQuery<O, D>> pair = getKNNQueries(relation, stepprog); KNNQuery<O, D> kNNRefer = pair.getFirst(); KNNQuery<O, D> kNNReach = pair.getSecond(); @@ -209,13 +212,12 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou // "HEAVY" flag for knnReach since it is used more than once KNNQuery<O, D> knnReach = QueryUtil.getKNNQuery(relation, reachabilityDistanceFunction, k, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE); // No optimized kNN query - use a preprocessor! - if(!(knnReach instanceof PreprocessorKNNQuery)) { - if(stepprog != null) { - if(neighborhoodDistanceFunction.equals(reachabilityDistanceFunction)) { - stepprog.beginStep(1, "Materializing neighborhoods w.r.t. reference neighborhood distance function.", logger); - } - else { - stepprog.beginStep(1, "Not materializing neighborhoods w.r.t. reference neighborhood distance function, but materializing neighborhoods w.r.t. reachability distance function.", logger); + if (!(knnReach instanceof PreprocessorKNNQuery)) { + if (stepprog != null) { + if (neighborhoodDistanceFunction.equals(reachabilityDistanceFunction)) { + stepprog.beginStep(1, "Materializing neighborhoods w.r.t. reference neighborhood distance function.", LOG); + } else { + stepprog.beginStep(1, "Not materializing neighborhoods w.r.t. reference neighborhood distance function, but materializing neighborhoods w.r.t. reachability distance function.", LOG); } } MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<O, D>(relation, reachabilityDistanceFunction, k); @@ -226,10 +228,9 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou // knnReach is only used once KNNQuery<O, D> knnRefer; - if(neighborhoodDistanceFunction == reachabilityDistanceFunction || neighborhoodDistanceFunction.equals(reachabilityDistanceFunction)) { + if (neighborhoodDistanceFunction == reachabilityDistanceFunction || neighborhoodDistanceFunction.equals(reachabilityDistanceFunction)) { knnRefer = knnReach; - } - else { + } else { // do not materialize the first neighborhood, since it is used only once knnRefer = QueryUtil.getKNNQuery(relation, neighborhoodDistanceFunction, k); } @@ -251,30 +252,30 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou */ protected LOFResult<O, D> doRunInTime(DBIDs ids, KNNQuery<O, D> kNNRefer, KNNQuery<O, D> kNNReach, StepProgress stepprog) { // Assert we got something - if(kNNRefer == null) { + if (kNNRefer == null) { throw new AbortException("No kNN queries supported by database for reference neighborhood distance function."); } - if(kNNReach == null) { + if (kNNReach == null) { throw new AbortException("No kNN queries supported by database for reachability distance function."); } // Compute LRDs - if(stepprog != null) { - stepprog.beginStep(2, "Computing LRDs.", logger); + if (stepprog != null) { + stepprog.beginStep(2, "Computing LRDs.", LOG); } WritableDoubleDataStore lrds = computeLRDs(ids, kNNReach); // compute LOF_SCORE of each db object - if(stepprog != null) { - stepprog.beginStep(3, "Computing LOFs.", logger); + if (stepprog != null) { + stepprog.beginStep(3, "Computing LOFs.", LOG); } Pair<WritableDoubleDataStore, DoubleMinMax> lofsAndMax = computeLOFs(ids, lrds, kNNRefer); WritableDoubleDataStore lofs = lofsAndMax.getFirst(); // track the maximum value for normalization. DoubleMinMax lofminmax = lofsAndMax.getSecond(); - if(stepprog != null) { - stepprog.setCompleted(logger); + if (stepprog != null) { + stepprog.setCompleted(LOG); } // Build result representation. @@ -295,26 +296,44 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou */ protected WritableDoubleDataStore computeLRDs(DBIDs ids, KNNQuery<O, D> knnReach) { WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP); - FiniteProgress lrdsProgress = logger.isVerbose() ? new FiniteProgress("LRD", ids.size(), logger) : null; - Mean mean = new Mean(); - for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { - mean.reset(); - KNNResult<D> neighbors = knnReach.getKNNForDBID(iter, k); - for(DistanceResultPair<D> neighbor : neighbors) { - if(objectIsInKNN || !neighbor.sameDBID(iter)) { - KNNResult<D> neighborsNeighbors = knnReach.getKNNForDBID(neighbor, k); - mean.put(Math.max(neighbor.getDistance().doubleValue(), neighborsNeighbors.getKNNDistance().doubleValue())); + FiniteProgress lrdsProgress = LOG.isVerbose() ? new FiniteProgress("LRD", ids.size(), LOG) : null; + for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { + final KNNResult<D> neighbors = knnReach.getKNNForDBID(iter, k); + double sum = 0.0; + int count = 0; + if (neighbors instanceof DoubleDistanceKNNList) { + // Fast version for double distances + for (DoubleDistanceDBIDResultIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) { + if (objectIsInKNN || !DBIDUtil.equal(neighbor, iter)) { + KNNResult<D> neighborsNeighbors = knnReach.getKNNForDBID(neighbor, k); + final double nkdist; + if (neighborsNeighbors instanceof DoubleDistanceKNNList) { + nkdist = ((DoubleDistanceKNNList) neighborsNeighbors).doubleKNNDistance(); + } else { + nkdist = neighborsNeighbors.getKNNDistance().doubleValue(); + } + sum += Math.max(neighbor.doubleDistance(), nkdist); + count++; + } + } + } else { + for (DistanceDBIDResultIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { + if (objectIsInKNN || !DBIDUtil.equal(neighbor, iter)) { + KNNResult<D> neighborsNeighbors = knnReach.getKNNForDBID(neighbor, k); + sum += Math.max(neighbor.getDistance().doubleValue(), neighborsNeighbors.getKNNDistance().doubleValue()); + count++; + } } } // Avoid division by 0 - final double lrd = (mean.getCount() > 0) ? 1 / mean.getMean() : 0.0; + final double lrd = (sum > 0) ? (count / sum) : 0; lrds.putDouble(iter, lrd); - if(lrdsProgress != null) { - lrdsProgress.incrementProcessed(logger); + if (lrdsProgress != null) { + lrdsProgress.incrementProcessed(LOG); } } - if(lrdsProgress != null) { - lrdsProgress.ensureCompleted(logger); + if (lrdsProgress != null) { + lrdsProgress.ensureCompleted(LOG); } return lrds; } @@ -328,40 +347,40 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou * reference distance * @return the LOFs of the objects and the maximum LOF */ - protected Pair<WritableDoubleDataStore, DoubleMinMax> computeLOFs(DBIDs ids, DataStore<Double> lrds, KNNQuery<O, D> knnRefer) { + protected Pair<WritableDoubleDataStore, DoubleMinMax> computeLOFs(DBIDs ids, DoubleDataStore lrds, KNNQuery<O, D> knnRefer) { WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC); // track the maximum value for normalization. DoubleMinMax lofminmax = new DoubleMinMax(); - FiniteProgress progressLOFs = logger.isVerbose() ? new FiniteProgress("LOF_SCORE for objects", ids.size(), logger) : null; - Mean mean = new Mean(); - for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { - double lrdp = lrds.get(iter); + FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("LOF_SCORE for objects", ids.size(), LOG) : null; + for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { + final double lrdp = lrds.doubleValue(iter); final double lof; - if(lrdp > 0) { + if (lrdp > 0) { final KNNResult<D> neighbors = knnRefer.getKNNForDBID(iter, k); - mean.reset(); - for(DistanceResultPair<D> neighbor : neighbors) { + double sum = 0.0; + int count = 0; + for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { // skip the point itself - if(objectIsInKNN || !neighbor.sameDBID(iter)) { - mean.put(lrds.get(neighbor)); + if (objectIsInKNN || !DBIDUtil.equal(neighbor, iter)) { + sum += lrds.doubleValue(neighbor); + count++; } } - lof = mean.getMean() / lrdp; - } - else { + lof = sum / (count * lrdp); + } else { lof = 1.0; } lofs.putDouble(iter, lof); // update minimum and maximum lofminmax.put(lof); - if(progressLOFs != null) { - progressLOFs.incrementProcessed(logger); + if (progressLOFs != null) { + progressLOFs.incrementProcessed(LOG); } } - if(progressLOFs != null) { - progressLOFs.ensureCompleted(logger); + if (progressLOFs != null) { + progressLOFs.ensureCompleted(LOG); } return new Pair<WritableDoubleDataStore, DoubleMinMax>(lofs, lofminmax); } @@ -369,10 +388,9 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou @Override public TypeInformation[] getInputTypeRestriction() { final TypeInformation type; - if(reachabilityDistanceFunction.equals(neighborhoodDistanceFunction)) { + if (reachabilityDistanceFunction.equals(neighborhoodDistanceFunction)) { type = reachabilityDistanceFunction.getInputTypeRestriction(); - } - else { + } else { type = new CombinedTypeInformation(neighborhoodDistanceFunction.getInputTypeRestriction(), reachabilityDistanceFunction.getInputTypeRestriction()); } return TypeUtil.array(type); @@ -380,7 +398,7 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou @Override protected Logging getLogger() { - return logger; + return LOG; } /** @@ -442,6 +460,8 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou } /** + * Get the knn query for the reference set. + * * @return the kNN query w.r.t. the reference neighborhood distance */ public KNNQuery<O, D> getKNNRefer() { @@ -449,6 +469,8 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou } /** + * Get the knn query for the reachability set. + * * @return the kNN query w.r.t. the reachability distance */ public KNNQuery<O, D> getKNNReach() { @@ -456,6 +478,8 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou } /** + * Get the LRD data store. + * * @return the LRD values of the objects */ public WritableDoubleDataStore getLrds() { @@ -463,6 +487,8 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou } /** + * Get the LOF data store. + * * @return the LOF values of the objects */ public WritableDoubleDataStore getLofs() { @@ -470,6 +496,8 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou } /** + * Get the outlier result. + * * @return the result of the run of the {@link LOF} algorithm */ public OutlierResult getResult() { @@ -486,6 +514,8 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou } /** + * Get the RkNN query for the reference set. + * * @return the RkNN query w.r.t. the reference neighborhood distance */ public RKNNQuery<O, D> getRkNNRefer() { @@ -493,6 +523,8 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou } /** + * Get the RkNN query for the reachability set. + * * @return the RkNN query w.r.t. the reachability distance */ public RKNNQuery<O, D> getRkNNReach() { @@ -518,7 +550,7 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou */ public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> { /** - * The neighborhood size to use + * The neighborhood size to use. */ protected int k = 2; @@ -536,13 +568,14 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou protected void makeOptions(Parameterization config) { super.makeOptions(config); - final IntParameter pK = new IntParameter(K_ID, new GreaterConstraint(1)); - if(config.grab(pK)) { + final IntParameter pK = new IntParameter(K_ID); + pK.addConstraint(new GreaterConstraint(1)); + if (config.grab(pK)) { k = pK.getValue(); } final ObjectParameter<DistanceFunction<O, D>> reachDistP = new ObjectParameter<DistanceFunction<O, D>>(REACHABILITY_DISTANCE_FUNCTION_ID, DistanceFunction.class, true); - if(config.grab(reachDistP)) { + if (config.grab(reachDistP)) { reachabilityDistanceFunction = reachDistP.instantiateClass(config); } } @@ -554,4 +587,4 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou return new LOF<O, D>(k, distanceFunction, rdist); } } -}
\ No newline at end of file +} |