summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOF.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOF.java')
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOF.java165
1 files changed, 99 insertions, 66 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOF.java
index 5aba41ec..66bed47a 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOF.java
@@ -29,29 +29,31 @@ import de.lmu.ifi.dbs.elki.data.type.CombinedTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.QueryUtil;
-import de.lmu.ifi.dbs.elki.database.datastore.DataStore;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.DoubleDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery;
import de.lmu.ifi.dbs.elki.database.query.rknn.RKNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDResultIter;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceKNNList;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
-import de.lmu.ifi.dbs.elki.math.Mean;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
@@ -118,19 +120,19 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(LOF.class);
+ private static final Logging LOG = Logging.getLogger(LOF.class);
/**
* The distance function to determine the reachability distance between
* database objects.
*/
- public static final OptionID REACHABILITY_DISTANCE_FUNCTION_ID = OptionID.getOrCreateOptionID("lof.reachdistfunction", "Distance function to determine the reachability distance between database objects.");
+ public static final OptionID REACHABILITY_DISTANCE_FUNCTION_ID = new OptionID("lof.reachdistfunction", "Distance function to determine the reachability distance between database objects.");
/**
* Parameter to specify the number of nearest neighbors of an object to be
* considered for computing its LOF_SCORE, must be an integer greater than 1.
*/
- public static final OptionID K_ID = OptionID.getOrCreateOptionID("lof.k", "The number of nearest neighbors of an object to be considered for computing its LOF_SCORE.");
+ public static final OptionID K_ID = new OptionID("lof.k", "The number of nearest neighbors of an object to be considered for computing its LOF_SCORE.");
/**
* Holds the value of {@link #K_ID}.
@@ -189,9 +191,10 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
* calling {@link #doRunInTime}.
*
* @param relation Data to process
+ * @return LOF outlier result
*/
public OutlierResult run(Relation<O> relation) {
- StepProgress stepprog = logger.isVerbose() ? new StepProgress("LOF", 3) : null;
+ StepProgress stepprog = LOG.isVerbose() ? new StepProgress("LOF", 3) : null;
Pair<KNNQuery<O, D>, KNNQuery<O, D>> pair = getKNNQueries(relation, stepprog);
KNNQuery<O, D> kNNRefer = pair.getFirst();
KNNQuery<O, D> kNNReach = pair.getSecond();
@@ -209,13 +212,12 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
// "HEAVY" flag for knnReach since it is used more than once
KNNQuery<O, D> knnReach = QueryUtil.getKNNQuery(relation, reachabilityDistanceFunction, k, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
// No optimized kNN query - use a preprocessor!
- if(!(knnReach instanceof PreprocessorKNNQuery)) {
- if(stepprog != null) {
- if(neighborhoodDistanceFunction.equals(reachabilityDistanceFunction)) {
- stepprog.beginStep(1, "Materializing neighborhoods w.r.t. reference neighborhood distance function.", logger);
- }
- else {
- stepprog.beginStep(1, "Not materializing neighborhoods w.r.t. reference neighborhood distance function, but materializing neighborhoods w.r.t. reachability distance function.", logger);
+ if (!(knnReach instanceof PreprocessorKNNQuery)) {
+ if (stepprog != null) {
+ if (neighborhoodDistanceFunction.equals(reachabilityDistanceFunction)) {
+ stepprog.beginStep(1, "Materializing neighborhoods w.r.t. reference neighborhood distance function.", LOG);
+ } else {
+ stepprog.beginStep(1, "Not materializing neighborhoods w.r.t. reference neighborhood distance function, but materializing neighborhoods w.r.t. reachability distance function.", LOG);
}
}
MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<O, D>(relation, reachabilityDistanceFunction, k);
@@ -226,10 +228,9 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
// knnReach is only used once
KNNQuery<O, D> knnRefer;
- if(neighborhoodDistanceFunction == reachabilityDistanceFunction || neighborhoodDistanceFunction.equals(reachabilityDistanceFunction)) {
+ if (neighborhoodDistanceFunction == reachabilityDistanceFunction || neighborhoodDistanceFunction.equals(reachabilityDistanceFunction)) {
knnRefer = knnReach;
- }
- else {
+ } else {
// do not materialize the first neighborhood, since it is used only once
knnRefer = QueryUtil.getKNNQuery(relation, neighborhoodDistanceFunction, k);
}
@@ -251,30 +252,30 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
*/
protected LOFResult<O, D> doRunInTime(DBIDs ids, KNNQuery<O, D> kNNRefer, KNNQuery<O, D> kNNReach, StepProgress stepprog) {
// Assert we got something
- if(kNNRefer == null) {
+ if (kNNRefer == null) {
throw new AbortException("No kNN queries supported by database for reference neighborhood distance function.");
}
- if(kNNReach == null) {
+ if (kNNReach == null) {
throw new AbortException("No kNN queries supported by database for reachability distance function.");
}
// Compute LRDs
- if(stepprog != null) {
- stepprog.beginStep(2, "Computing LRDs.", logger);
+ if (stepprog != null) {
+ stepprog.beginStep(2, "Computing LRDs.", LOG);
}
WritableDoubleDataStore lrds = computeLRDs(ids, kNNReach);
// compute LOF_SCORE of each db object
- if(stepprog != null) {
- stepprog.beginStep(3, "Computing LOFs.", logger);
+ if (stepprog != null) {
+ stepprog.beginStep(3, "Computing LOFs.", LOG);
}
Pair<WritableDoubleDataStore, DoubleMinMax> lofsAndMax = computeLOFs(ids, lrds, kNNRefer);
WritableDoubleDataStore lofs = lofsAndMax.getFirst();
// track the maximum value for normalization.
DoubleMinMax lofminmax = lofsAndMax.getSecond();
- if(stepprog != null) {
- stepprog.setCompleted(logger);
+ if (stepprog != null) {
+ stepprog.setCompleted(LOG);
}
// Build result representation.
@@ -295,26 +296,44 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
*/
protected WritableDoubleDataStore computeLRDs(DBIDs ids, KNNQuery<O, D> knnReach) {
WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
- FiniteProgress lrdsProgress = logger.isVerbose() ? new FiniteProgress("LRD", ids.size(), logger) : null;
- Mean mean = new Mean();
- for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
- mean.reset();
- KNNResult<D> neighbors = knnReach.getKNNForDBID(iter, k);
- for(DistanceResultPair<D> neighbor : neighbors) {
- if(objectIsInKNN || !neighbor.sameDBID(iter)) {
- KNNResult<D> neighborsNeighbors = knnReach.getKNNForDBID(neighbor, k);
- mean.put(Math.max(neighbor.getDistance().doubleValue(), neighborsNeighbors.getKNNDistance().doubleValue()));
+ FiniteProgress lrdsProgress = LOG.isVerbose() ? new FiniteProgress("LRD", ids.size(), LOG) : null;
+ for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ final KNNResult<D> neighbors = knnReach.getKNNForDBID(iter, k);
+ double sum = 0.0;
+ int count = 0;
+ if (neighbors instanceof DoubleDistanceKNNList) {
+ // Fast version for double distances
+ for (DoubleDistanceDBIDResultIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) {
+ if (objectIsInKNN || !DBIDUtil.equal(neighbor, iter)) {
+ KNNResult<D> neighborsNeighbors = knnReach.getKNNForDBID(neighbor, k);
+ final double nkdist;
+ if (neighborsNeighbors instanceof DoubleDistanceKNNList) {
+ nkdist = ((DoubleDistanceKNNList) neighborsNeighbors).doubleKNNDistance();
+ } else {
+ nkdist = neighborsNeighbors.getKNNDistance().doubleValue();
+ }
+ sum += Math.max(neighbor.doubleDistance(), nkdist);
+ count++;
+ }
+ }
+ } else {
+ for (DistanceDBIDResultIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ if (objectIsInKNN || !DBIDUtil.equal(neighbor, iter)) {
+ KNNResult<D> neighborsNeighbors = knnReach.getKNNForDBID(neighbor, k);
+ sum += Math.max(neighbor.getDistance().doubleValue(), neighborsNeighbors.getKNNDistance().doubleValue());
+ count++;
+ }
}
}
// Avoid division by 0
- final double lrd = (mean.getCount() > 0) ? 1 / mean.getMean() : 0.0;
+ final double lrd = (sum > 0) ? (count / sum) : 0;
lrds.putDouble(iter, lrd);
- if(lrdsProgress != null) {
- lrdsProgress.incrementProcessed(logger);
+ if (lrdsProgress != null) {
+ lrdsProgress.incrementProcessed(LOG);
}
}
- if(lrdsProgress != null) {
- lrdsProgress.ensureCompleted(logger);
+ if (lrdsProgress != null) {
+ lrdsProgress.ensureCompleted(LOG);
}
return lrds;
}
@@ -328,40 +347,40 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
* reference distance
* @return the LOFs of the objects and the maximum LOF
*/
- protected Pair<WritableDoubleDataStore, DoubleMinMax> computeLOFs(DBIDs ids, DataStore<Double> lrds, KNNQuery<O, D> knnRefer) {
+ protected Pair<WritableDoubleDataStore, DoubleMinMax> computeLOFs(DBIDs ids, DoubleDataStore lrds, KNNQuery<O, D> knnRefer) {
WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
// track the maximum value for normalization.
DoubleMinMax lofminmax = new DoubleMinMax();
- FiniteProgress progressLOFs = logger.isVerbose() ? new FiniteProgress("LOF_SCORE for objects", ids.size(), logger) : null;
- Mean mean = new Mean();
- for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
- double lrdp = lrds.get(iter);
+ FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("LOF_SCORE for objects", ids.size(), LOG) : null;
+ for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ final double lrdp = lrds.doubleValue(iter);
final double lof;
- if(lrdp > 0) {
+ if (lrdp > 0) {
final KNNResult<D> neighbors = knnRefer.getKNNForDBID(iter, k);
- mean.reset();
- for(DistanceResultPair<D> neighbor : neighbors) {
+ double sum = 0.0;
+ int count = 0;
+ for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
// skip the point itself
- if(objectIsInKNN || !neighbor.sameDBID(iter)) {
- mean.put(lrds.get(neighbor));
+ if (objectIsInKNN || !DBIDUtil.equal(neighbor, iter)) {
+ sum += lrds.doubleValue(neighbor);
+ count++;
}
}
- lof = mean.getMean() / lrdp;
- }
- else {
+ lof = sum / (count * lrdp);
+ } else {
lof = 1.0;
}
lofs.putDouble(iter, lof);
// update minimum and maximum
lofminmax.put(lof);
- if(progressLOFs != null) {
- progressLOFs.incrementProcessed(logger);
+ if (progressLOFs != null) {
+ progressLOFs.incrementProcessed(LOG);
}
}
- if(progressLOFs != null) {
- progressLOFs.ensureCompleted(logger);
+ if (progressLOFs != null) {
+ progressLOFs.ensureCompleted(LOG);
}
return new Pair<WritableDoubleDataStore, DoubleMinMax>(lofs, lofminmax);
}
@@ -369,10 +388,9 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
@Override
public TypeInformation[] getInputTypeRestriction() {
final TypeInformation type;
- if(reachabilityDistanceFunction.equals(neighborhoodDistanceFunction)) {
+ if (reachabilityDistanceFunction.equals(neighborhoodDistanceFunction)) {
type = reachabilityDistanceFunction.getInputTypeRestriction();
- }
- else {
+ } else {
type = new CombinedTypeInformation(neighborhoodDistanceFunction.getInputTypeRestriction(), reachabilityDistanceFunction.getInputTypeRestriction());
}
return TypeUtil.array(type);
@@ -380,7 +398,7 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -442,6 +460,8 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
}
/**
+ * Get the knn query for the reference set.
+ *
* @return the kNN query w.r.t. the reference neighborhood distance
*/
public KNNQuery<O, D> getKNNRefer() {
@@ -449,6 +469,8 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
}
/**
+ * Get the knn query for the reachability set.
+ *
* @return the kNN query w.r.t. the reachability distance
*/
public KNNQuery<O, D> getKNNReach() {
@@ -456,6 +478,8 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
}
/**
+ * Get the LRD data store.
+ *
* @return the LRD values of the objects
*/
public WritableDoubleDataStore getLrds() {
@@ -463,6 +487,8 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
}
/**
+ * Get the LOF data store.
+ *
* @return the LOF values of the objects
*/
public WritableDoubleDataStore getLofs() {
@@ -470,6 +496,8 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
}
/**
+ * Get the outlier result.
+ *
* @return the result of the run of the {@link LOF} algorithm
*/
public OutlierResult getResult() {
@@ -486,6 +514,8 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
}
/**
+ * Get the RkNN query for the reference set.
+ *
* @return the RkNN query w.r.t. the reference neighborhood distance
*/
public RKNNQuery<O, D> getRkNNRefer() {
@@ -493,6 +523,8 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
}
/**
+ * Get the RkNN query for the reachability set.
+ *
* @return the RkNN query w.r.t. the reachability distance
*/
public RKNNQuery<O, D> getRkNNReach() {
@@ -518,7 +550,7 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
*/
public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
/**
- * The neighborhood size to use
+ * The neighborhood size to use.
*/
protected int k = 2;
@@ -536,13 +568,14 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final IntParameter pK = new IntParameter(K_ID, new GreaterConstraint(1));
- if(config.grab(pK)) {
+ final IntParameter pK = new IntParameter(K_ID);
+ pK.addConstraint(new GreaterConstraint(1));
+ if (config.grab(pK)) {
k = pK.getValue();
}
final ObjectParameter<DistanceFunction<O, D>> reachDistP = new ObjectParameter<DistanceFunction<O, D>>(REACHABILITY_DISTANCE_FUNCTION_ID, DistanceFunction.class, true);
- if(config.grab(reachDistP)) {
+ if (config.grab(reachDistP)) {
reachabilityDistanceFunction = reachDistP.instantiateClass(config);
}
}
@@ -554,4 +587,4 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
return new LOF<O, D>(k, distanceFunction, rdist);
}
}
-} \ No newline at end of file
+}