summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/algorithm/outlier
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/algorithm/outlier')
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/COP.java36
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/DWOF.java82
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java11
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java11
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNWeightOutlier.java189
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java51
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/OutlierAlgorithm.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/ReferenceBasedOutlierDetection.java337
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleCOP.java35
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/anglebased/ABOD.java (renamed from src/de/lmu/ifi/dbs/elki/algorithm/outlier/ABOD.java)58
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/anglebased/FastABOD.java (renamed from src/de/lmu/ifi/dbs/elki/algorithm/outlier/FastABOD.java)60
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/anglebased/LBABOD.java (renamed from src/de/lmu/ifi/dbs/elki/algorithm/outlier/LBABOD.java)35
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/anglebased/package-info.java27
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/clustering/EMOutlier.java (renamed from src/de/lmu/ifi/dbs/elki/algorithm/outlier/EMOutlier.java)40
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/clustering/KMeansOutlierDetection.java178
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/clustering/SilhouetteOutlierDetection.java253
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/clustering/package-info.java27
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/AbstractDBOutlier.java (renamed from src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractDBOutlier.java)59
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/DBOutlierDetection.java (renamed from src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierDetection.java)72
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/DBOutlierScore.java (renamed from src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierScore.java)39
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/HilOut.java (renamed from src/de/lmu/ifi/dbs/elki/algorithm/outlier/HilOut.java)285
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/KNNOutlier.java (renamed from src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNOutlier.java)97
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/KNNWeightOutlier.java205
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/ODIN.java (renamed from src/de/lmu/ifi/dbs/elki/algorithm/outlier/ODIN.java)37
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/ReferenceBasedOutlierDetection.java325
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/package-info.java30
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/KNNWeightProcessor.java118
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/ParallelKNNOutlier.java181
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/ParallelKNNWeightOutlier.java187
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/package-info.java27
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/ALOCI.java71
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/COF.java276
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/FlexibleLOF.java271
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/INFLO.java232
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/KDEOS.java445
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDF.java129
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDOF.java64
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOCI.java331
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOF.java147
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LoOP.java372
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/OnlineLOF.java192
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimpleKernelDensityLOF.java123
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimplifiedLOF.java198
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/LOFProcessor.java119
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/LRDProcessor.java103
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/ParallelLOF.java208
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/ParallelSimplifiedLOF.java197
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/SimplifiedLRDProcessor.java97
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/package-info.java44
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java9
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java60
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/HiCS.java59
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/RescaleMetaOutlierAlgorithm.java13
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/SimpleOutlierEnsemble.java28
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractDistanceBasedSpatialOutlier.java19
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractNeighborhoodOutlier.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuGLSBackwardSearchAlgorithm.java30
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMeanMultipleAttributes.java17
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianAlgorithm.java12
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianMultipleAttributes.java17
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMoranScatterplotOutlier.java12
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java36
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuScatterplotOutlier.java12
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuZTestOutlier.java12
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SLOM.java22
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SOF.java22
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java20
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/AbstractPrecomputedNeighborhood.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java10
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java17
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/NeighborSetPredicate.java5
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/PrecomputedKNearestNeighborNeighborhood.java80
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/UnweightedNeighborhoodAdapter.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/WeightedNeighborSetPredicate.java5
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/AbstractAggarwalYuOutlier.java (renamed from src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractAggarwalYuOutlier.java)92
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/AggarwalYuEvolutionary.java (renamed from src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuEvolutionary.java)247
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/AggarwalYuNaive.java (renamed from src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuNaive.java)28
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java106
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OutRankS1.java37
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/SOD.java109
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/svm/LibSVMOneClassOutlierDetection.java279
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/svm/package-info.java29
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java7
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java7
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAverageCoordinateOutlier.java11
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java11
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java7
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/package-info.java2
96 files changed, 5483 insertions, 2741 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/COP.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/COP.java
index 190d14fe..1d723443 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/COP.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/COP.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -38,14 +38,15 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.KNNList;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid;
@@ -84,11 +85,10 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
* @author Erich Schubert
*
* @param <V> the type of NumberVector handled by this Algorithm
- * @param <D> Distance type
*/
@Title("COP: Correlation Outlier Probability")
@Reference(authors = "Hans-Peter Kriegel, Peer Kröger, Erich Schubert, Arthur Zimek", title = "Outlier Detection in Arbitrarily Oriented Subspaces", booktitle = "Proc. IEEE International Conference on Data Mining (ICDM 2012)")
-public class COP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<V, D, OutlierResult> implements OutlierAlgorithm {
+public class COP<V extends NumberVector> extends AbstractDistanceBasedAlgorithm<V, OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
@@ -164,7 +164,7 @@ public class COP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
/**
* Holds the PCA runner.
*/
- private PCARunner<V> pca;
+ private PCARunner pca;
/**
* Expected amount of outliers.
@@ -209,7 +209,7 @@ public class COP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
* @param dist Distance distribution model (ChiSquared, Gamma)
* @param models Report models
*/
- public COP(DistanceFunction<? super V, D> distanceFunction, int k, PCARunner<V> pca, double expect, DistanceDist dist, boolean models) {
+ public COP(DistanceFunction<? super V> distanceFunction, int k, PCARunner pca, double expect, DistanceDist dist, boolean models) {
super(distanceFunction);
this.k = k;
this.pca = pca;
@@ -226,7 +226,7 @@ public class COP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
*/
public OutlierResult run(Relation<V> relation) {
final DBIDs ids = relation.getDBIDs();
- KNNQuery<V, D> knnQuery = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k + 1);
+ KNNQuery<V> knnQuery = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k + 1);
final int dim = RelationUtil.dimensionality(relation);
if(k <= dim + 1) {
@@ -244,7 +244,7 @@ public class COP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Correlation Outlier Probabilities", relation.size(), LOG) : null;
for(DBIDIter id = ids.iter(); id.valid(); id.advance()) {
- KNNList<D> neighbors = knnQuery.getKNNForDBID(id, k + 1);
+ KNNList neighbors = knnQuery.getKNNForDBID(id, k + 1);
ModifiableDBIDs nids = DBIDUtil.newHashSet(neighbors);
nids.remove(id); // Do not use query object
@@ -324,16 +324,12 @@ public class COP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
cop_dim.putInt(id, dim + 1 - vdim);
}
- if(prog != null) {
- prog.incrementProcessed(LOG);
- }
- }
- if(prog != null) {
- prog.ensureCompleted(LOG);
+ LOG.incrementProcessed(prog);
}
+ LOG.ensureCompleted(prog);
// combine results.
- Relation<Double> scoreResult = new MaterializedRelation<>("Correlation Outlier Probabilities", COP_SCORES, TypeUtil.DOUBLE, cop_score, ids);
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("Correlation Outlier Probabilities", COP_SCORES, cop_score, ids);
OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore();
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
if(models) {
@@ -360,7 +356,7 @@ public class COP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<V, D> {
+ public static class Parameterizer<V extends NumberVector> extends AbstractDistanceBasedAlgorithm.Parameterizer<V> {
/**
* Parameter to specify the number of nearest neighbors of an object to be
* considered for computing its COP_SCORE, must be an integer greater than
@@ -415,7 +411,7 @@ public class COP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
/**
* Holds the object performing the dependency derivation.
*/
- PCARunner<V> pca;
+ PCARunner pca;
/**
* Distance distributution assumption.
@@ -450,7 +446,7 @@ public class COP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
if(config.grab(expectP)) {
expect = expectP.doubleValue();
}
- ObjectParameter<PCARunner<V>> pcaP = new ObjectParameter<>(PCARUNNER_ID, PCARunner.class, PCARunner.class);
+ ObjectParameter<PCARunner> pcaP = new ObjectParameter<>(PCARUNNER_ID, PCARunner.class, PCARunner.class);
if(config.grab(pcaP)) {
pca = pcaP.instantiateClass(config);
}
@@ -461,7 +457,7 @@ public class COP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
}
@Override
- protected COP<V, D> makeInstance() {
+ protected COP<V> makeInstance() {
return new COP<>(distanceFunction, k, pca, expect, dist, models);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DWOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DWOF.java
index ef782390..3d484562 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DWOF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DWOF.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -35,18 +35,18 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.KNNList;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
-import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
@@ -92,13 +92,13 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
* @author Omar Yousry
*
* @param <O> the type of DatabaseObjects handled by this Algorithm
- * @param <D> Distance type
*/
-
@Title("DWOF: Dynamic Window Outlier Factor")
@Description("Algorithm to compute dynamic-window outlier factors in a database based on the neighborhood size parameter 'k'")
-@Reference(authors = "R. Momtaz, N. Mohssen, M. A. Gowayyed", title = "DWOF: A Robust Density-Based OutlierDetection Approach", booktitle = "Pattern Recognition and Image Analysis, Proc. 6th Iberian Conference, IbPRIA 2013, Funchal, Madeira, Portugal, 2013.", url = "http://dx.doi.org/10.1007%2F978-3-642-38628-2_61")
-public class DWOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
+@Reference(authors = "R. Momtaz, N. Mohssen, M. A. Gowayyed", //
+title = "DWOF: A Robust Density-Based Outlier Detection Approach", //
+booktitle = "Pattern Recognition and Image Analysis, Proc. 6th Iberian Conference, IbPRIA 2013, Funchal, Madeira, Portugal, 2013.", url = "http://dx.doi.org/10.1007%2F978-3-642-38628-2_61")
+public class DWOF<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
@@ -122,7 +122,7 @@ public class DWOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
* @param k the value of k
* @param delta Radius increase factor
*/
- public DWOF(DistanceFunction<? super O, D> distanceFunction, int k, double delta) {
+ public DWOF(DistanceFunction<? super O> distanceFunction, int k, double delta) {
super(distanceFunction);
this.k = k + 1;
this.delta = delta;
@@ -138,10 +138,10 @@ public class DWOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
*/
public OutlierResult run(Database database, Relation<O> relation) {
final DBIDs ids = relation.getDBIDs();
- DistanceQuery<O, D> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
+ DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
// Get k nearest neighbor and range query on the relation.
- KNNQuery<O, D> knnq = database.getKNNQuery(distFunc, k, DatabaseQuery.HINT_HEAVY_USE);
- RangeQuery<O, D> rnnQuery = database.getRangeQuery(distFunc, DatabaseQuery.HINT_HEAVY_USE);
+ KNNQuery<O> knnq = database.getKNNQuery(distFunc, k, DatabaseQuery.HINT_HEAVY_USE);
+ RangeQuery<O> rnnQuery = database.getRangeQuery(distFunc, DatabaseQuery.HINT_HEAVY_USE);
StepProgress stepProg = LOG.isVerbose() ? new StepProgress("DWOF", 2) : null;
// DWOF output score storage.
@@ -160,9 +160,7 @@ public class DWOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
}
IndefiniteProgress clusEvalProgress = LOG.isVerbose() ? new IndefiniteProgress("Evaluating DWOFs", LOG) : null;
while(countUnmerged > 0) {
- if(clusEvalProgress != null) {
- clusEvalProgress.incrementProcessed(LOG);
- }
+ LOG.incrementProcessed(clusEvalProgress);
// Increase radii
for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
radii.putDouble(iter, radii.doubleValue(iter) * delta);
@@ -185,19 +183,15 @@ public class DWOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
dwofs.putDouble(iter, dwofs.doubleValue(iter) + newScore);
}
}
- if(clusEvalProgress != null) {
- clusEvalProgress.setCompleted(LOG);
- }
- if(stepProg != null) {
- stepProg.setCompleted(LOG);
- }
+ LOG.setCompleted(clusEvalProgress);
+ LOG.setCompleted(stepProg);
// Build result representation.
DoubleMinMax minmax = new DoubleMinMax();
for(DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
minmax.put(dwofs.doubleValue(iter));
}
OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY);
- Relation<Double> rel = new MaterializedRelation<>("Dynamic-Window Outlier Factors", "dwof-outlier", TypeUtil.DOUBLE, dwofs, ids);
+ DoubleRelation rel = new MaterializedDoubleRelation("Dynamic-Window Outlier Factors", "dwof-outlier", dwofs, ids);
return new OutlierResult(meta, rel);
}
@@ -213,7 +207,7 @@ public class DWOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
* @param knnq kNN search function
* @param radii WritableDoubleDataStore to store radii
*/
- private void initializeRadii(DBIDs ids, KNNQuery<O, D> knnq, DistanceQuery<O, D> distFunc, WritableDoubleDataStore radii) {
+ private void initializeRadii(DBIDs ids, KNNQuery<O> knnq, DistanceQuery<O> distFunc, WritableDoubleDataStore radii) {
FiniteProgress avgDistProgress = LOG.isVerbose() ? new FiniteProgress("Calculating average kNN distances-", ids.size(), LOG) : null;
double absoluteMinDist = Double.POSITIVE_INFINITY;
double minAvgDist = Double.POSITIVE_INFINITY;
@@ -221,7 +215,7 @@ public class DWOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
Mean mean = new Mean();
// Iterate over all objects
for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
- KNNList<D> iterNeighbors = knnq.getKNNForDBID(iter, k);
+ KNNList iterNeighbors = knnq.getKNNForDBID(iter, k);
// skip the point itself
mean.reset();
for(DBIDIter neighbor1 = iterNeighbors.iter(); neighbor1.valid(); neighbor1.advance()) {
@@ -232,7 +226,7 @@ public class DWOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
if(DBIDUtil.equal(neighbor1, neighbor2) || DBIDUtil.equal(neighbor2, iter)) {
continue;
}
- double distance = distFunc.distance(neighbor1, neighbor2).doubleValue();
+ double distance = distFunc.distance(neighbor1, neighbor2);
mean.put(distance);
if(distance > 0. && distance < absoluteMinDist) {
absoluteMinDist = distance;
@@ -244,13 +238,9 @@ public class DWOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
if(currentMean < minAvgDist) {
minAvgDist = currentMean;
}
- if(avgDistProgress != null) {
- avgDistProgress.incrementProcessed(LOG);
- }
- }
- if(avgDistProgress != null) {
- avgDistProgress.ensureCompleted(LOG);
+ LOG.incrementProcessed(avgDistProgress);
}
+ LOG.ensureCompleted(avgDistProgress);
// Initializing the radii of all objects.
for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
@@ -272,7 +262,7 @@ public class DWOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
* @param radii Radii to cluster accordingly
* @param labels Label storage.
*/
- private void clusterData(DBIDs ids, RangeQuery<O, D> rnnQuery, WritableDoubleDataStore radii, WritableDataStore<ModifiableDBIDs> labels) {
+ private void clusterData(DBIDs ids, RangeQuery<O> rnnQuery, WritableDoubleDataStore radii, WritableDataStore<ModifiableDBIDs> labels) {
FiniteProgress clustProg = LOG.isVerbose() ? new FiniteProgress("Density-Based Clustering", ids.size(), LOG) : null;
// Iterate over all objects
for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
@@ -282,18 +272,16 @@ public class DWOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
ModifiableDBIDs newCluster = DBIDUtil.newArray();
newCluster.add(iter);
labels.put(iter, newCluster);
- if(clustProg != null) {
- clustProg.incrementProcessed(LOG);
- }
+ LOG.incrementProcessed(clustProg);
// container of the points to be added and their radii neighbors to the
// cluster
ModifiableDBIDs nChain = DBIDUtil.newArray();
nChain.add(iter);
// iterate over nChain
for(DBIDIter toGetNeighbors = nChain.iter(); toGetNeighbors.valid(); toGetNeighbors.advance()) {
- D range = rnnQuery.getDistanceFactory().fromDouble(radii.doubleValue(toGetNeighbors));
- DistanceDBIDList<D> nNeighbors = rnnQuery.getRangeForDBID(toGetNeighbors, range);
- for(DistanceDBIDListIter<D> iter2 = nNeighbors.iter(); iter2.valid(); iter2.advance()) {
+ double range = radii.doubleValue(toGetNeighbors);
+ DoubleDBIDList nNeighbors = rnnQuery.getRangeForDBID(toGetNeighbors, range);
+ for(DoubleDBIDListIter iter2 = nNeighbors.iter(); iter2.valid(); iter2.advance()) {
if(DBIDUtil.equal(toGetNeighbors, iter2)) {
continue;
}
@@ -301,9 +289,7 @@ public class DWOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
newCluster.add(iter2);
labels.put(iter2, newCluster);
nChain.add(iter2);
- if(clustProg != null) {
- clustProg.incrementProcessed(LOG);
- }
+ LOG.incrementProcessed(clustProg);
}
else if(labels.get(iter2) != newCluster) {
ModifiableDBIDs toBeDeleted = labels.get(iter2);
@@ -316,9 +302,7 @@ public class DWOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
}
}
}
- if(clustProg != null) {
- clustProg.ensureCompleted(LOG);
- }
+ LOG.ensureCompleted(clustProg);
}
/**
@@ -360,8 +344,10 @@ public class DWOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
* @author Omar Yousry
*
* @apiviz.exclude
+ *
+ * @param <O> Object type
*/
- public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {
/**
* Option ID for the number of neighbors.
*/
@@ -400,7 +386,7 @@ public class DWOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
}
@Override
- protected DWOF<O, D> makeInstance() {
+ protected DWOF<O> makeInstance() {
return new DWOF<>(distanceFunction, k, delta);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java
index 3f8bb484..2383824e 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java
@@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2013
+Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -30,7 +30,8 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.logging.Logging;
@@ -60,7 +61,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
*/
@Title("Gaussian Model Outlier Detection")
@Description("Fit a multivariate gaussian model onto the data, and use the PDF to compute an outlier score.")
-public class GaussianModel<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+public class GaussianModel<V extends NumberVector> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
@@ -136,7 +137,7 @@ public class GaussianModel<V extends NumberVector<?>> extends AbstractAlgorithm<
else {
meta = new InvertedOutlierScoreMeta(mm.getMin(), mm.getMax(), 0.0, Double.POSITIVE_INFINITY);
}
- Relation<Double> res = new MaterializedRelation<>("Gaussian Model Outlier Score", "gaussian-model-outlier", TypeUtil.DOUBLE, oscores, relation.getDBIDs());
+ DoubleRelation res = new MaterializedDoubleRelation("Gaussian Model Outlier Score", "gaussian-model-outlier", oscores, relation.getDBIDs());
return new OutlierResult(meta, res);
}
@@ -157,7 +158,7 @@ public class GaussianModel<V extends NumberVector<?>> extends AbstractAlgorithm<
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector> extends AbstractParameterizer {
protected boolean invert = false;
@Override
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java
index e6659a8f..53e573e3 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java
@@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2013
+Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -36,7 +36,8 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.generic.MaskedDBIDs;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.logging.Logging;
@@ -78,7 +79,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
@Title("Gaussian-Uniform Mixture Model Outlier Detection")
@Description("Fits a mixture model consisting of a Gaussian and a uniform distribution to the data.")
@Reference(prefix = "Generalization using the likelihood gain as outlier score of", authors = "Eskin, Eleazar", title = "Anomaly detection over noisy data using learned probability distributions", booktitle = "Proc. of the Seventeenth International Conference on Machine Learning (ICML-2000)")
-public class GaussianUniformMixture<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+public class GaussianUniformMixture<V extends NumberVector> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
@@ -184,7 +185,7 @@ public class GaussianUniformMixture<V extends NumberVector<?>> extends AbstractA
}
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, 0.0);
- Relation<Double> res = new MaterializedRelation<>("Gaussian Mixture Outlier Score", "gaussian-mixture-outlier", TypeUtil.DOUBLE, oscores, relation.getDBIDs());
+ DoubleRelation res = new MaterializedDoubleRelation("Gaussian Mixture Outlier Score", "gaussian-mixture-outlier", oscores, relation.getDBIDs());
return new OutlierResult(meta, res);
}
@@ -247,7 +248,7 @@ public class GaussianUniformMixture<V extends NumberVector<?>> extends AbstractA
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector> extends AbstractParameterizer {
protected double l = 1E-7;
protected double c = 0;
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNWeightOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNWeightOutlier.java
deleted file mode 100644
index 88603f09..00000000
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNWeightOutlier.java
+++ /dev/null
@@ -1,189 +0,0 @@
-package de.lmu.ifi.dbs.elki.algorithm.outlier;
-
-/*
- This file is part of ELKI:
- Environment for Developing KDD-Applications Supported by Index-Structures
-
- Copyright (C) 2011
- Ludwig-Maximilians-Universität München
- Lehr- und Forschungseinheit für Datenbanksysteme
- ELKI Development Team
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
-import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
-import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
-import de.lmu.ifi.dbs.elki.database.Database;
-import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
-import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList;
-import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
-import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
-import de.lmu.ifi.dbs.elki.database.relation.Relation;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
-import de.lmu.ifi.dbs.elki.logging.Logging;
-import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
-import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
-import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
-import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
-import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
-import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
-import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
-import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
-
-/**
- * Outlier Detection based on the accumulated distances of a point to its k
- * nearest neighbors.
- *
- * Based on: F. Angiulli, C. Pizzuti: Fast Outlier Detection in High Dimensional
- * Spaces. In: Proc. European Conference on Principles of Knowledge Discovery
- * and Data Mining (PKDD'02), Helsinki, Finland, 2002.
- *
- * @author Lisa Reichert
- *
- * @apiviz.has KNNQuery
- *
- * @param <O> the type of DatabaseObjects handled by this Algorithm
- * @param <D> the type of Distance used by this Algorithm
- */
-@Title("KNNWeight outlier detection")
-@Description("Outlier Detection based on the distances of an object to its k nearest neighbors.")
-@Reference(authors = "F. Angiulli, C. Pizzuti", title = "Fast Outlier Detection in High Dimensional Spaces", booktitle = "Proc. European Conference on Principles of Knowledge Discovery and Data Mining (PKDD'02), Helsinki, Finland, 2002", url = "http://dx.doi.org/10.1007/3-540-45681-3_2")
-public class KNNWeightOutlier<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
- /**
- * The logger for this class.
- */
- private static final Logging LOG = Logging.getLogger(KNNWeightOutlier.class);
-
- /**
- * Parameter to specify the k nearest neighbor
- */
- public static final OptionID K_ID = new OptionID("knnwod.k", "k nearest neighbor");
-
- /**
- * The kNN query used.
- */
- public static final OptionID KNNQUERY_ID = new OptionID("knnwod.knnquery", "kNN query to use");
-
- /**
- * Holds the value of {@link #K_ID}.
- */
- private int k;
-
- /**
- * Constructor with parameters.
- *
- * @param distanceFunction Distance function
- * @param k k Parameter
- */
- public KNNWeightOutlier(DistanceFunction<? super O, D> distanceFunction, int k) {
- super(distanceFunction);
- this.k = k;
- }
-
- /**
- * Runs the algorithm in the timed evaluation part.
- */
- public OutlierResult run(Database database, Relation<O> relation) {
- final DistanceQuery<O, D> distanceQuery = database.getDistanceQuery(relation, getDistanceFunction());
- KNNQuery<O, D> knnQuery = database.getKNNQuery(distanceQuery, k);
-
- if(LOG.isVerbose()) {
- LOG.verbose("computing outlier degree(sum of the distances to the k nearest neighbors");
- }
- FiniteProgress progressKNNWeight = LOG.isVerbose() ? new FiniteProgress("KNNWOD_KNNWEIGHT for objects", relation.size(), LOG) : null;
-
- DoubleMinMax minmax = new DoubleMinMax();
-
- // compute distance to the k nearest neighbor. n objects with the highest
- // distance are flagged as outliers
- WritableDoubleDataStore knnw_score = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- // compute sum of the distances to the k nearest neighbors
-
- final KNNList<D> knn = knnQuery.getKNNForDBID(iditer, k);
- double skn = 0;
- if(knn instanceof DoubleDistanceKNNList) {
- for(DoubleDistanceDBIDListIter neighbor = ((DoubleDistanceKNNList) knn).iter(); neighbor.valid(); neighbor.advance()) {
- skn += neighbor.doubleDistance();
- }
- }
- else {
- for(DistanceDBIDListIter<D> neighbor = knn.iter(); neighbor.valid(); neighbor.advance()) {
- skn += neighbor.getDistance().doubleValue();
- }
- }
- knnw_score.putDouble(iditer, skn);
- minmax.put(skn);
-
- if(progressKNNWeight != null) {
- progressKNNWeight.incrementProcessed(LOG);
- }
- }
- if(progressKNNWeight != null) {
- progressKNNWeight.ensureCompleted(LOG);
- }
-
- Relation<Double> res = new MaterializedRelation<>("Weighted kNN Outlier Score", "knnw-outlier", TypeUtil.DOUBLE, knnw_score, relation.getDBIDs());
- OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
- return new OutlierResult(meta, res);
- }
-
- @Override
- public TypeInformation[] getInputTypeRestriction() {
- return TypeUtil.array(getDistanceFunction().getInputTypeRestriction());
- }
-
- @Override
- protected Logging getLogger() {
- return LOG;
- }
-
- /**
- * Parameterization class.
- *
- * @author Erich Schubert
- *
- * @apiviz.exclude
- */
- public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
- protected int k = 0;
-
- @Override
- protected void makeOptions(Parameterization config) {
- super.makeOptions(config);
- final IntParameter kP = new IntParameter(K_ID);
- if(config.grab(kP)) {
- k = kP.getValue();
- }
- }
-
- @Override
- protected KNNWeightOutlier<O, D> makeInstance() {
- return new KNNWeightOutlier<>(distanceFunction, k);
- }
- }
-} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java
index b1ffae63..61d11935 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -27,7 +27,7 @@ import java.util.ArrayList;
import java.util.List;
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
-import de.lmu.ifi.dbs.elki.algorithm.clustering.OPTICS;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.optics.OPTICS;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
@@ -38,15 +38,15 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
-import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.KNNList;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
@@ -60,8 +60,8 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
/**
- * OPTICSOF provides the Optics-of algorithm, an algorithm to find Local
- * Outliers in a database.
+ * Optics-OF outlier detection algorithm, an algorithm to find Local Outliers in
+ * a database based on ideas from {@link OPTICS} clustering.
* <p>
* Reference:<br>
* Markus M. Breunig, Hans-Peter Kriegel, Raymond T. N, Jörg Sander:<br />
@@ -79,8 +79,11 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
*/
@Title("OPTICS-OF: Identifying Local Outliers")
@Description("Algorithm to compute density-based local outlier factors in a database based on the neighborhood size parameter 'minpts'")
-@Reference(authors = "M. M. Breunig, H.-P. Kriegel, R. Ng, and J. Sander", title = "OPTICS-OF: Identifying Local Outliers", booktitle = "Proc. of the 3rd European Conference on Principles of Knowledge Discovery and Data Mining (PKDD), Prague, Czech Republic", url = "http://springerlink.metapress.com/content/76bx6413gqb4tvta/")
-public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
+@Reference(authors = "M. M. Breunig, H.-P. Kriegel, R. Ng, and J. Sander", //
+title = "OPTICS-OF: Identifying Local Outliers", //
+booktitle = "Proc. of the 3rd European Conference on Principles of Knowledge Discovery and Data Mining (PKDD), Prague, Czech Republic", //
+url = "http://springerlink.metapress.com/content/76bx6413gqb4tvta/")
+public class OPTICSOF<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
@@ -97,7 +100,7 @@ public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanc
* @param distanceFunction distance function
* @param minpts minPts parameter
*/
- public OPTICSOF(DistanceFunction<? super O, D> distanceFunction, int minpts) {
+ public OPTICSOF(DistanceFunction<? super O> distanceFunction, int minpts) {
super(distanceFunction);
this.minpts = minpts;
}
@@ -110,13 +113,13 @@ public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanc
* @return Outlier detection result
*/
public OutlierResult run(Database database, Relation<O> relation) {
- DistanceQuery<O, D> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
- KNNQuery<O, D> knnQuery = database.getKNNQuery(distQuery, minpts);
- RangeQuery<O, D> rangeQuery = database.getRangeQuery(distQuery);
+ DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
+ KNNQuery<O> knnQuery = database.getKNNQuery(distQuery, minpts);
+ RangeQuery<O> rangeQuery = database.getRangeQuery(distQuery);
DBIDs ids = relation.getDBIDs();
// FIXME: implicit preprocessor.
- WritableDataStore<KNNList<D>> nMinPts = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, KNNList.class);
+ WritableDataStore<KNNList> nMinPts = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, KNNList.class);
WritableDoubleDataStore coreDistance = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
WritableIntegerDataStore minPtsNeighborhoodSize = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, -1);
@@ -124,10 +127,10 @@ public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanc
// N_minpts(id) and core-distance(id)
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- KNNList<D> minptsNeighbours = knnQuery.getKNNForDBID(iditer, minpts);
- D d = minptsNeighbours.getKNNDistance();
+ KNNList minptsNeighbours = knnQuery.getKNNForDBID(iditer, minpts);
+ double d = minptsNeighbours.getKNNDistance();
nMinPts.put(iditer, minptsNeighbours);
- coreDistance.putDouble(iditer, d.doubleValue());
+ coreDistance.putDouble(iditer, d);
minPtsNeighborhoodSize.put(iditer, rangeQuery.getRangeForDBID(iditer, d).size());
}
@@ -138,9 +141,9 @@ public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanc
List<Double> core = new ArrayList<>();
double lrd = 0;
// TODO: optimize for double distances
- for(DistanceDBIDListIter<D> neighbor = nMinPts.get(iditer).iter(); neighbor.valid(); neighbor.advance()) {
+ for(DoubleDBIDListIter neighbor = nMinPts.get(iditer).iter(); neighbor.valid(); neighbor.advance()) {
double coreDist = coreDistance.doubleValue(neighbor);
- double dist = distQuery.distance(iditer, neighbor).doubleValue();
+ double dist = distQuery.distance(iditer, neighbor);
double rd = Math.max(coreDist, dist);
lrd = rd + lrd;
core.add(rd);
@@ -166,7 +169,7 @@ public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanc
ofminmax.put(of);
}
// Build result representation.
- Relation<Double> scoreResult = new MaterializedRelation<>("OPTICS Outlier Scores", "optics-outlier", TypeUtil.DOUBLE, ofs, relation.getDBIDs());
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("OPTICS Outlier Scores", "optics-outlier", ofs, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(ofminmax.getMin(), ofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
return new OutlierResult(scoreMeta, scoreResult);
}
@@ -188,13 +191,13 @@ public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanc
*
* @apiviz.exclude
*/
- public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {
protected int minpts = 0;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final IntParameter param = new IntParameter(OPTICS.MINPTS_ID);
+ final IntParameter param = new IntParameter(OPTICS.Parameterizer.MINPTS_ID);
param.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
if(config.grab(param)) {
minpts = param.getValue();
@@ -202,7 +205,7 @@ public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanc
}
@Override
- protected OPTICSOF<O, D> makeInstance() {
+ protected OPTICSOF<O> makeInstance() {
return new OPTICSOF<>(distanceFunction, minpts);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OutlierAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OutlierAlgorithm.java
index f3ef5ab5..4e0662a1 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OutlierAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OutlierAlgorithm.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ReferenceBasedOutlierDetection.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ReferenceBasedOutlierDetection.java
deleted file mode 100644
index d254c9a1..00000000
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ReferenceBasedOutlierDetection.java
+++ /dev/null
@@ -1,337 +0,0 @@
-package de.lmu.ifi.dbs.elki.algorithm.outlier;
-
-/*
- This file is part of ELKI:
- Environment for Developing KDD-Applications Supported by Index-Structures
-
- Copyright (C) 2011
- Ludwig-Maximilians-Universität München
- Lehr- und Forschungseinheit für Datenbanksysteme
- ELKI Development Team
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-import java.util.Collection;
-import java.util.Iterator;
-
-import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
-import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
-import de.lmu.ifi.dbs.elki.data.NumberVector;
-import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
-import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
-import de.lmu.ifi.dbs.elki.database.Database;
-import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
-import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDPair;
-import de.lmu.ifi.dbs.elki.database.ids.generic.GenericDistanceDBIDList;
-import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
-import de.lmu.ifi.dbs.elki.database.relation.Relation;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
-import de.lmu.ifi.dbs.elki.logging.Logging;
-import de.lmu.ifi.dbs.elki.math.Mean;
-import de.lmu.ifi.dbs.elki.result.ReferencePointsResult;
-import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
-import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
-import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
-import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
-import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
-import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
-import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
-import de.lmu.ifi.dbs.elki.utilities.referencepoints.GridBasedReferencePoints;
-import de.lmu.ifi.dbs.elki.utilities.referencepoints.ReferencePointsHeuristic;
-
-/**
- * <p>
- * provides the Reference-Based Outlier Detection algorithm, an algorithm that
- * computes kNN distances approximately, using reference points.
- * </p>
- * <p>
- * Reference:<br>
- * Y. Pei, O. R. Zaiane, Y. Gao: An Efficient Reference-Based Approach to
- * Outlier Detection in Large Datasets.</br> In: Proc. IEEE Int. Conf. on Data
- * Mining (ICDM'06), Hong Kong, China, 2006.
- * </p>
- *
- * @author Lisa Reichert
- * @author Erich Schubert
- *
- * @apiviz.composedOf ReferencePointsHeuristic
- *
- * @param <V> a type of {@link NumberVector} as a suitable data object for this
- * algorithm
- * @param <D> the distance type processed
- */
-@Title("An Efficient Reference-based Approach to Outlier Detection in Large Datasets")
-@Description("Computes kNN distances approximately, using reference points with various reference point strategies.")
-@Reference(authors = "Y. Pei, O.R. Zaiane, Y. Gao", title = "An Efficient Reference-based Approach to Outlier Detection in Large Datasets", booktitle = "Proc. 6th IEEE Int. Conf. on Data Mining (ICDM '06), Hong Kong, China, 2006", url = "http://dx.doi.org/10.1109/ICDM.2006.17")
-public class ReferenceBasedOutlierDetection<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
- /**
- * The logger for this class.
- */
- private static final Logging LOG = Logging.getLogger(ReferenceBasedOutlierDetection.class);
-
- /**
- * Parameter for the reference points heuristic.
- */
- public static final OptionID REFP_ID = new OptionID("refod.refp", "The heuristic for finding reference points.");
-
- /**
- * Parameter to specify the number of nearest neighbors of an object, to be
- * considered for computing its REFOD_SCORE, must be an integer greater than
- * 1.
- */
- public static final OptionID K_ID = new OptionID("refod.k", "The number of nearest neighbors");
-
- /**
- * Holds the value of {@link #K_ID}.
- */
- private int k;
-
- /**
- * Stores the reference point strategy
- */
- private ReferencePointsHeuristic<V> refp;
-
- /**
- * Distance function to use.
- */
- private DistanceFunction<V, D> distanceFunction;
-
- /**
- * Constructor with parameters.
- *
- * @param k k Parameter
- * @param distanceFunction distance function
- * @param refp Reference points heuristic
- */
- public ReferenceBasedOutlierDetection(int k, DistanceFunction<V, D> distanceFunction, ReferencePointsHeuristic<V> refp) {
- super();
- this.k = k;
- this.distanceFunction = distanceFunction;
- this.refp = refp;
- }
-
- /**
- * Run the algorithm on the given relation.
- *
- * @param database Database
- * @param relation Relation to process
- * @return Outlier result
- */
- public OutlierResult run(Database database, Relation<V> relation) {
- DistanceQuery<V, D> distFunc = database.getDistanceQuery(relation, distanceFunction);
- Collection<V> refPoints = refp.getReferencePoints(relation);
-
- DBIDs ids = relation.getDBIDs();
- // storage of distance/score values.
- WritableDoubleDataStore rbod_score = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC | DataStoreFactory.HINT_HOT);
-
- // Compute density estimation:
- {
- // compute density for one reference point, to initialize the first
- // density
- // value for each object, then update
- final Iterator<V> iter = refPoints.iterator();
- if(!iter.hasNext()) {
- throw new AbortException("Cannot compute ROS without reference points!");
- }
- V firstRef = iter.next();
- // compute distance vector for the first reference point
- DistanceDBIDList<D> firstReferenceDists = computeDistanceVector(firstRef, relation, distFunc);
- for(int l = 0; l < firstReferenceDists.size(); l++) {
- double density = computeDensity(firstReferenceDists, l);
- // Initial value
- rbod_score.putDouble(firstReferenceDists.get(l), density);
- }
- // compute density values for all remaining reference points
- while(iter.hasNext()) {
- V refPoint = iter.next();
- DistanceDBIDList<D> referenceDists = computeDistanceVector(refPoint, relation, distFunc);
- // compute density value for each object
- for(int l = 0; l < referenceDists.size(); l++) {
- double density = computeDensity(referenceDists, l);
- // Update minimum
- if(density < rbod_score.doubleValue(referenceDists.get(l))) {
- rbod_score.putDouble(referenceDists.get(l), density);
- }
- }
- }
- }
- // compute maximum density
- double maxDensity = 0.0;
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- double dens = rbod_score.doubleValue(iditer);
- if(dens > maxDensity) {
- maxDensity = dens;
- }
- }
- // compute ROS
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- double score = 1 - (rbod_score.doubleValue(iditer) / maxDensity);
- rbod_score.putDouble(iditer, score);
- }
-
- // adds reference points to the result. header information for the
- // visualizer to find the reference points in the result
- ReferencePointsResult<V> refp = new ReferencePointsResult<>("Reference points", "reference-points", refPoints);
-
- Relation<Double> scoreResult = new MaterializedRelation<>("Reference-points Outlier Scores", "reference-outlier", TypeUtil.DOUBLE, rbod_score, relation.getDBIDs());
- OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(0.0, 1.0, 0.0, 1.0, 0.0);
- OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
- result.addChildResult(refp);
- return result;
- }
-
- /**
- * Computes for each object the distance to one reference point. (one
- * dimensional representation of the data set)
- *
- * @param refPoint Reference Point Feature Vector
- * @param database database to work on
- * @param distFunc Distance function to use
- * @return array containing the distance to one reference point for each
- * database object and the object id
- */
- protected DistanceDBIDList<D> computeDistanceVector(V refPoint, Relation<V> database, DistanceQuery<V, D> distFunc) {
- // TODO: optimize for double distances?
- GenericDistanceDBIDList<D> referenceDists = new GenericDistanceDBIDList<>(database.size());
- for(DBIDIter iditer = database.iterDBIDs(); iditer.valid(); iditer.advance()) {
- referenceDists.add(distFunc.distance(iditer, refPoint), iditer);
- }
- referenceDists.sort();
- return referenceDists;
- }
-
- /**
- * Computes the density of an object. The density of an object is the
- * distances to the k nearest neighbors. Neighbors and distances are computed
- * approximately. (approximation for kNN distance: instead of a normal NN
- * search the NN of an object are those objects that have a similar distance
- * to a reference point. The k- nearest neighbors of an object are those
- * objects that lay close to the object in the reference distance vector)
- *
- * @param referenceDists vector of the reference distances,
- * @param index index of the current object
- * @return density for one object and reference point
- */
- protected double computeDensity(DistanceDBIDList<D> referenceDists, int index) {
- final DistanceDBIDPair<D> x = referenceDists.get(index);
- final double xDist = x.getDistance().doubleValue();
-
- int lef = index - 1;
- int rig = index + 1;
- Mean mean = new Mean();
- double lef_d = (lef >= 0) ? referenceDists.get(lef).getDistance().doubleValue() : Double.NEGATIVE_INFINITY;
- double rig_d = (rig < referenceDists.size()) ? referenceDists.get(rig).getDistance().doubleValue() : Double.NEGATIVE_INFINITY;
- while(mean.getCount() < k) {
- if(lef >= 0 && rig < referenceDists.size()) {
- // Prefer n or m?
- if(Math.abs(lef_d - xDist) < Math.abs(rig_d - xDist)) {
- mean.put(Math.abs(lef_d - xDist));
- // Update n
- lef--;
- lef_d = (lef >= 0) ? referenceDists.get(lef).getDistance().doubleValue() : Double.NEGATIVE_INFINITY;
- }
- else {
- mean.put(Math.abs(rig_d - xDist));
- // Update right
- rig++;
- rig_d = (rig < referenceDists.size()) ? referenceDists.get(rig).getDistance().doubleValue() : Double.NEGATIVE_INFINITY;
- }
- }
- else {
- if(lef >= 0) {
- // Choose left, since right is not available.
- mean.put(Math.abs(lef_d - xDist));
- // update left
- lef--;
- lef_d = (lef >= 0) ? referenceDists.get(lef).getDistance().doubleValue() : Double.NEGATIVE_INFINITY;
- }
- else if(rig < referenceDists.size()) {
- // Choose right, since left is not available
- mean.put(Math.abs(rig_d - xDist));
- // Update right
- rig++;
- rig_d = (rig < referenceDists.size()) ? referenceDists.get(rig).getDistance().doubleValue() : Double.NEGATIVE_INFINITY;
- }
- else {
- // Not enough objects in database?
- throw new IndexOutOfBoundsException();
- }
- }
- }
-
- return 1.0 / mean.getMean();
- }
-
- @Override
- public TypeInformation[] getInputTypeRestriction() {
- return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD);
- }
-
- @Override
- protected Logging getLogger() {
- return LOG;
- }
-
- /**
- * Parameterization class.
- *
- * @author Erich Schubert
- *
- * @apiviz.exclude
- */
- public static class Parameterizer<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<V, D> {
- /**
- * Holds the value of {@link #K_ID}.
- */
- private int k;
-
- /**
- * Stores the reference point strategy
- */
- private ReferencePointsHeuristic<V> refp;
-
- @Override
- protected void makeOptions(Parameterization config) {
- super.makeOptions(config);
- final IntParameter pK = new IntParameter(K_ID);
- pK.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
- if(config.grab(pK)) {
- k = pK.getValue();
- }
- final ObjectParameter<ReferencePointsHeuristic<V>> refpP = new ObjectParameter<>(REFP_ID, ReferencePointsHeuristic.class, GridBasedReferencePoints.class);
- if(config.grab(refpP)) {
- refp = refpP.instantiateClass(config);
- }
- }
-
- @Override
- protected ReferenceBasedOutlierDetection<V, D> makeInstance() {
- return new ReferenceBasedOutlierDetection<>(k, distanceFunction, refp);
- }
- }
-} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleCOP.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleCOP.java
index 72a727a5..ef8f2192 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleCOP.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleCOP.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -40,13 +40,14 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.KNNList;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
@@ -81,7 +82,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
*/
@Title("Simple COP: Correlation Outlier Probability")
@Reference(authors = "Arthur Zimek", title = "Correlation Clustering. PhD thesis, Chapter 18", booktitle = "")
-public class SimpleCOP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<V, D, OutlierResult> implements OutlierAlgorithm {
+public class SimpleCOP<V extends NumberVector> extends AbstractDistanceBasedAlgorithm<V, OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
@@ -95,7 +96,7 @@ public class SimpleCOP<V extends NumberVector<?>, D extends NumberDistance<D, ?>
/**
* Holds the object performing the dependency derivation
*/
- private DependencyDerivator<V, D> dependencyDerivator;
+ private DependencyDerivator<V> dependencyDerivator;
/**
* Constructor.
@@ -104,14 +105,14 @@ public class SimpleCOP<V extends NumberVector<?>, D extends NumberDistance<D, ?>
* @param k k Parameter
* @param pca PCA runner-
*/
- public SimpleCOP(DistanceFunction<? super V, D> distanceFunction, int k, PCAFilteredRunner<V> pca) {
+ public SimpleCOP(DistanceFunction<? super V> distanceFunction, int k, PCAFilteredRunner pca) {
super(distanceFunction);
this.k = k;
this.dependencyDerivator = new DependencyDerivator<>(null, FormatUtil.NF, pca, 0, false);
}
public OutlierResult run(Database database, Relation<V> data) throws IllegalStateException {
- KNNQuery<V, D> knnQuery = QueryUtil.getKNNQuery(data, getDistanceFunction(), k + 1);
+ KNNQuery<V> knnQuery = QueryUtil.getKNNQuery(data, getDistanceFunction(), k + 1);
DBIDs ids = data.getDBIDs();
@@ -124,7 +125,7 @@ public class SimpleCOP<V extends NumberVector<?>, D extends NumberDistance<D, ?>
FiniteProgress progressLocalPCA = LOG.isVerbose() ? new FiniteProgress("Correlation Outlier Probabilities", data.size(), LOG) : null;
double sqrt2 = Math.sqrt(2.0);
for(DBIDIter id = data.iterDBIDs(); id.valid(); id.advance()) {
- KNNList<D> neighbors = knnQuery.getKNNForDBID(id, k + 1);
+ KNNList neighbors = knnQuery.getKNNForDBID(id, k + 1);
ModifiableDBIDs nids = DBIDUtil.newArray(neighbors);
nids.remove(id);
@@ -147,16 +148,12 @@ public class SimpleCOP<V extends NumberVector<?>, D extends NumberDistance<D, ?>
cop_sol.put(id, depsol);
- if(progressLocalPCA != null) {
- progressLocalPCA.incrementProcessed(LOG);
- }
- }
- if(progressLocalPCA != null) {
- progressLocalPCA.ensureCompleted(LOG);
+ LOG.incrementProcessed(progressLocalPCA);
}
+ LOG.ensureCompleted(progressLocalPCA);
}
// combine results.
- Relation<Double> scoreResult = new MaterializedRelation<>("Original Correlation Outlier Probabilities", "origcop-outlier", TypeUtil.DOUBLE, cop_score, ids);
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("Original Correlation Outlier Probabilities", "origcop-outlier", cop_score, ids);
OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore();
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
// extra results
@@ -184,7 +181,7 @@ public class SimpleCOP<V extends NumberVector<?>, D extends NumberDistance<D, ?>
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<V, D> {
+ public static class Parameterizer<V extends NumberVector> extends AbstractDistanceBasedAlgorithm.Parameterizer<V> {
/**
* Parameter to specify the number of nearest neighbors of an object to be
* considered for computing its COP_SCORE, must be an integer greater than
@@ -212,7 +209,7 @@ public class SimpleCOP<V extends NumberVector<?>, D extends NumberDistance<D, ?>
/**
* Holds the object performing the dependency derivation
*/
- protected PCAFilteredRunner<V> pca;
+ protected PCAFilteredRunner pca;
@Override
protected void makeOptions(Parameterization config) {
@@ -222,14 +219,14 @@ public class SimpleCOP<V extends NumberVector<?>, D extends NumberDistance<D, ?>
if(config.grab(kP)) {
k = kP.intValue();
}
- ObjectParameter<PCAFilteredRunner<V>> pcaP = new ObjectParameter<>(PCARUNNER_ID, PCAFilteredRunner.class, PCAFilteredRunner.class);
+ ObjectParameter<PCAFilteredRunner> pcaP = new ObjectParameter<>(PCARUNNER_ID, PCAFilteredRunner.class, PCAFilteredRunner.class);
if(config.grab(pcaP)) {
pca = pcaP.instantiateClass(config);
}
}
@Override
- protected SimpleCOP<V, D> makeInstance() {
+ protected SimpleCOP<V> makeInstance() {
return new SimpleCOP<>(distanceFunction, k, pca);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ABOD.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/anglebased/ABOD.java
index 65447713..35dfb1ee 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ABOD.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/anglebased/ABOD.java
@@ -1,10 +1,10 @@
-package de.lmu.ifi.dbs.elki.algorithm.outlier;
+package de.lmu.ifi.dbs.elki.algorithm.outlier.anglebased;
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -24,6 +24,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
*/
import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
@@ -36,9 +37,9 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.query.similarity.SimilarityQuery;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.distance.similarityfunction.SimilarityFunction;
import de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.KernelMatrix;
import de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.PolynomialKernelFunction;
@@ -48,6 +49,7 @@ import de.lmu.ifi.dbs.elki.math.MeanVariance;
import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.Alias;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -63,9 +65,13 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
* dimensional data sets. Exact version, which has cubic runtime (see also
* {@link FastABOD} and {@link LBABOD} for faster versions).
*
- * H.-P. Kriegel, M. Schubert, and A. Zimek: Angle-Based Outlier Detection in
- * High-dimensional Data. In: Proc. 14th ACM SIGKDD Int. Conf. on Knowledge
- * Discovery and Data Mining (KDD '08), Las Vegas, NV, 2008.
+ * Reference:
+ * <p>
+ * H.-P. Kriegel, M. Schubert, and A. Zimek:<br />
+ * Angle-Based Outlier Detection in High-dimensional Data.<br />
+ * In: Proc. 14th ACM SIGKDD Int. Conf. on Knowledge Discovery and Data Mining
+ * (KDD '08), Las Vegas, NV, 2008.
+ * </p>
*
* @author Matthias Schubert (Original Code)
* @author Erich Schubert (ELKIfication)
@@ -74,8 +80,12 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
*/
@Title("ABOD: Angle-Based Outlier Detection")
@Description("Outlier detection using variance analysis on angles, especially for high dimensional data sets.")
-@Reference(authors = "H.-P. Kriegel, M. Schubert, and A. Zimek", title = "Angle-Based Outlier Detection in High-dimensional Data", booktitle = "Proc. 14th ACM SIGKDD Int. Conf. on Knowledge Discovery and Data Mining (KDD '08), Las Vegas, NV, 2008", url = "http://dx.doi.org/10.1145/1401890.1401946")
-public class ABOD<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+@Reference(authors = "H.-P. Kriegel, M. Schubert, A. Zimek", //
+title = "Angle-Based Outlier Detection in High-dimensional Data", //
+booktitle = "Proc. 14th ACM SIGKDD Int. Conf. on Knowledge Discovery and Data Mining (KDD '08), Las Vegas, NV, 2008", //
+url = "http://dx.doi.org/10.1145/1401890.1401946")
+@Alias({ "de.lmu.ifi.dbs.elki.algorithm.outlier.ABOD", "abod" })
+public class ABOD<V extends NumberVector> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
@@ -84,14 +94,14 @@ public class ABOD<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe
/**
* Store the configured Kernel version.
*/
- protected SimilarityFunction<? super V, DoubleDistance> kernelFunction;
+ protected SimilarityFunction<? super V> kernelFunction;
/**
* Constructor for Angle-Based Outlier Detection (ABOD).
*
* @param kernelFunction kernel function to use
*/
- public ABOD(SimilarityFunction<? super V, DoubleDistance> kernelFunction) {
+ public ABOD(SimilarityFunction<? super V> kernelFunction) {
super();
this.kernelFunction = kernelFunction;
}
@@ -105,21 +115,21 @@ public class ABOD<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe
public OutlierResult run(Database db, Relation<V> relation) {
DBIDs ids = relation.getDBIDs();
// Build a kernel matrix, to make O(n^3) slightly less bad.
- SimilarityQuery<V, DoubleDistance> sq = db.getSimilarityQuery(relation, kernelFunction);
+ SimilarityQuery<V> sq = db.getSimilarityQuery(relation, kernelFunction);
KernelMatrix kernelMatrix = new KernelMatrix(sq, relation, ids);
WritableDoubleDataStore abodvalues = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
DoubleMinMax minmaxabod = new DoubleMinMax();
MeanVariance s = new MeanVariance();
- for (DBIDIter pA = ids.iter(); pA.valid(); pA.advance()) {
+ for(DBIDIter pA = ids.iter(); pA.valid(); pA.advance()) {
final double abof = computeABOF(relation, kernelMatrix, pA, s);
minmaxabod.put(abof);
abodvalues.putDouble(pA, abof);
}
// Build result representation.
- Relation<Double> scoreResult = new MaterializedRelation<>("Angle-Based Outlier Degree", "abod-outlier", TypeUtil.DOUBLE, abodvalues, relation.getDBIDs());
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("Angle-Based Outlier Degree", "abod-outlier", abodvalues, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new InvertedOutlierScoreMeta(minmaxabod.getMin(), minmaxabod.getMax(), 0.0, Double.POSITIVE_INFINITY);
return new OutlierResult(scoreMeta, scoreResult);
}
@@ -137,24 +147,24 @@ public class ABOD<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe
s.reset(); // Reused
double simAA = kernelMatrix.getSimilarity(pA, pA);
- for (DBIDIter nB = relation.iterDBIDs(); nB.valid(); nB.advance()) {
- if (DBIDUtil.equal(nB, pA)) {
+ for(DBIDIter nB = relation.iterDBIDs(); nB.valid(); nB.advance()) {
+ if(DBIDUtil.equal(nB, pA)) {
continue;
}
double simBB = kernelMatrix.getSimilarity(nB, nB);
double simAB = kernelMatrix.getSimilarity(pA, nB);
double sqdAB = simAA + simBB - simAB - simAB;
- if (!(sqdAB > 0.)) {
+ if(!(sqdAB > 0.)) {
continue;
}
- for (DBIDIter nC = relation.iterDBIDs(); nC.valid(); nC.advance()) {
- if (DBIDUtil.equal(nC, pA) || DBIDUtil.compare(nC, nB) < 0) {
+ for(DBIDIter nC = relation.iterDBIDs(); nC.valid(); nC.advance()) {
+ if(DBIDUtil.equal(nC, pA) || DBIDUtil.compare(nC, nB) < 0) {
continue;
}
double simCC = kernelMatrix.getSimilarity(nC, nC);
double simAC = kernelMatrix.getSimilarity(pA, nC);
double sqdAC = simAA + simCC - simAC;
- if (!(sqdAC > 0.)) {
+ if(!(sqdAC > 0.)) {
continue;
}
// Exploit bilinearity of scalar product:
@@ -190,7 +200,7 @@ public class ABOD<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector> extends AbstractParameterizer {
/**
* Parameter for the kernel function.
*/
@@ -199,13 +209,13 @@ public class ABOD<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe
/**
* Distance function.
*/
- protected SimilarityFunction<V, DoubleDistance> kernelFunction = null;
+ protected SimilarityFunction<V> kernelFunction = null;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final ObjectParameter<SimilarityFunction<V, DoubleDistance>> param = new ObjectParameter<>(KERNEL_FUNCTION_ID, SimilarityFunction.class, PolynomialKernelFunction.class);
- if (config.grab(param)) {
+ final ObjectParameter<SimilarityFunction<V>> param = new ObjectParameter<>(KERNEL_FUNCTION_ID, SimilarityFunction.class, PolynomialKernelFunction.class);
+ if(config.grab(param)) {
kernelFunction = param.instantiateClass(config);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/FastABOD.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/anglebased/FastABOD.java
index ee6bd434..56bedaac 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/FastABOD.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/anglebased/FastABOD.java
@@ -1,10 +1,10 @@
-package de.lmu.ifi.dbs.elki.algorithm.outlier;
+package de.lmu.ifi.dbs.elki.algorithm.outlier.anglebased;
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -35,9 +35,9 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair;
import de.lmu.ifi.dbs.elki.database.query.similarity.SimilarityQuery;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.distance.similarityfunction.SimilarityFunction;
import de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.KernelMatrix;
import de.lmu.ifi.dbs.elki.logging.Logging;
@@ -46,6 +46,7 @@ import de.lmu.ifi.dbs.elki.math.MeanVariance;
import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.Alias;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ComparableMaxHeap;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ObjectHeap;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
@@ -60,9 +61,13 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
*
* Fast-ABOD (approximateABOF) version.
*
- * H.-P. Kriegel, M. Schubert, and A. Zimek: Angle-Based Outlier Detection in
- * High-dimensional Data. In: Proc. 14th ACM SIGKDD Int. Conf. on Knowledge
- * Discovery and Data Mining (KDD '08), Las Vegas, NV, 2008.
+ * Reference:
+ * <p>
+ * H.-P. Kriegel, M. Schubert, and A. Zimek:<br />
+ * Angle-Based Outlier Detection in High-dimensional Data.<br />
+ * In: Proc. 14th ACM SIGKDD Int. Conf. on Knowledge Discovery and Data Mining
+ * (KDD '08), Las Vegas, NV, 2008.
+ * </p>
*
* @author Matthias Schubert (Original Code)
* @author Erich Schubert (ELKIfication)
@@ -71,8 +76,12 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
*/
@Title("Approximate ABOD: Angle-Based Outlier Detection")
@Description("Outlier detection using variance analysis on angles, especially for high dimensional data sets.")
-@Reference(authors = "H.-P. Kriegel, M. Schubert, and A. Zimek", title = "Angle-Based Outlier Detection in High-dimensional Data", booktitle = "Proc. 14th ACM SIGKDD Int. Conf. on Knowledge Discovery and Data Mining (KDD '08), Las Vegas, NV, 2008", url = "http://dx.doi.org/10.1145/1401890.1401946")
-public class FastABOD<V extends NumberVector<?>> extends ABOD<V> {
+@Reference(authors = "H.-P. Kriegel, M. Schubert, A. Zimek", //
+title = "Angle-Based Outlier Detection in High-dimensional Data", //
+booktitle = "Proc. 14th ACM SIGKDD Int. Conf. on Knowledge Discovery and Data Mining (KDD '08), Las Vegas, NV, 2008", //
+url = "http://dx.doi.org/10.1145/1401890.1401946")
+@Alias({ "de.lmu.ifi.dbs.elki.algorithm.outlier.FastABOD", "fastabod" })
+public class FastABOD<V extends NumberVector> extends ABOD<V> {
/**
* The logger for this class.
*/
@@ -89,7 +98,7 @@ public class FastABOD<V extends NumberVector<?>> extends ABOD<V> {
* @param kernelFunction kernel function to use
* @param k Number of nearest neighbors
*/
- public FastABOD(SimilarityFunction<? super V, DoubleDistance> kernelFunction, int k) {
+ public FastABOD(SimilarityFunction<? super V> kernelFunction, int k) {
super(kernelFunction);
this.k = k;
}
@@ -104,51 +113,52 @@ public class FastABOD<V extends NumberVector<?>> extends ABOD<V> {
public OutlierResult run(Database db, Relation<V> relation) {
DBIDs ids = relation.getDBIDs();
// Build a kernel matrix, to make O(n^3) slightly less bad.
- SimilarityQuery<V, DoubleDistance> sq = db.getSimilarityQuery(relation, kernelFunction);
+ SimilarityQuery<V> sq = db.getSimilarityQuery(relation, kernelFunction);
KernelMatrix kernelMatrix = new KernelMatrix(sq, relation, ids);
WritableDoubleDataStore abodvalues = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
DoubleMinMax minmaxabod = new DoubleMinMax();
MeanVariance s = new MeanVariance();
- for (DBIDIter pA = ids.iter(); pA.valid(); pA.advance()) {
+ for(DBIDIter pA = ids.iter(); pA.valid(); pA.advance()) {
s.reset();
final double simAA = kernelMatrix.getSimilarity(pA, pA);
// Choose the k-min nearest
ComparableMaxHeap<DoubleDBIDPair> nn = new ComparableMaxHeap<>(k);
- for (DBIDIter nB = relation.iterDBIDs(); nB.valid(); nB.advance()) {
- if (DBIDUtil.equal(nB, pA)) {
+ for(DBIDIter nB = relation.iterDBIDs(); nB.valid(); nB.advance()) {
+ if(DBIDUtil.equal(nB, pA)) {
continue;
}
double simBB = kernelMatrix.getSimilarity(nB, nB);
double simAB = kernelMatrix.getSimilarity(pA, nB);
double sqdAB = simAA + simBB - simAB - simAB;
- if (!(sqdAB > 0.)) {
+ if(!(sqdAB > 0.)) {
continue;
}
- if (nn.size() < k) {
+ if(nn.size() < k) {
nn.add(DBIDUtil.newPair(sqdAB, nB));
- } else if (sqdAB < nn.peek().doubleValue()) {
+ }
+ else if(sqdAB < nn.peek().doubleValue()) {
nn.replaceTopElement(DBIDUtil.newPair(sqdAB, nB));
}
}
- for (ObjectHeap.UnsortedIter<DoubleDBIDPair> iB = nn.unsortedIter(); iB.valid(); iB.advance()) {
+ for(ObjectHeap.UnsortedIter<DoubleDBIDPair> iB = nn.unsortedIter(); iB.valid(); iB.advance()) {
DoubleDBIDPair nB = iB.get();
double sqdAB = nB.doubleValue();
double simAB = kernelMatrix.getSimilarity(pA, nB);
- if (!(sqdAB > 0.)) {
+ if(!(sqdAB > 0.)) {
continue;
}
- for (ObjectHeap.UnsortedIter<DoubleDBIDPair> iC = nn.unsortedIter(); iC.valid(); iC.advance()) {
+ for(ObjectHeap.UnsortedIter<DoubleDBIDPair> iC = nn.unsortedIter(); iC.valid(); iC.advance()) {
DoubleDBIDPair nC = iC.get();
- if (DBIDUtil.compare(nC, nB) < 0) {
+ if(DBIDUtil.compare(nC, nB) < 0) {
continue;
}
double sqdAC = nC.doubleValue();
double simAC = kernelMatrix.getSimilarity(pA, nC);
- if (!(sqdAC > 0.)) {
+ if(!(sqdAC > 0.)) {
continue;
}
// Exploit bilinearity of scalar product:
@@ -169,7 +179,7 @@ public class FastABOD<V extends NumberVector<?>> extends ABOD<V> {
}
// Build result representation.
- Relation<Double> scoreResult = new MaterializedRelation<>("Angle-Based Outlier Degree", "abod-outlier", TypeUtil.DOUBLE, abodvalues, relation.getDBIDs());
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("Angle-Based Outlier Degree", "abod-outlier", abodvalues, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new InvertedOutlierScoreMeta(minmaxabod.getMin(), minmaxabod.getMax(), 0.0, Double.POSITIVE_INFINITY);
return new OutlierResult(scoreMeta, scoreResult);
}
@@ -191,7 +201,7 @@ public class FastABOD<V extends NumberVector<?>> extends ABOD<V> {
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<?>> extends ABOD.Parameterizer<V> {
+ public static class Parameterizer<V extends NumberVector> extends ABOD.Parameterizer<V> {
/**
* Parameter for the nearest neighbors.
*/
@@ -206,7 +216,7 @@ public class FastABOD<V extends NumberVector<?>> extends ABOD<V> {
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
final IntParameter kP = new IntParameter(K_ID);
- if (config.grab(kP)) {
+ if(config.grab(kP)) {
k = kP.intValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LBABOD.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/anglebased/LBABOD.java
index 37b4d050..0ef19a50 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LBABOD.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/anglebased/LBABOD.java
@@ -1,10 +1,10 @@
-package de.lmu.ifi.dbs.elki.algorithm.outlier;
+package de.lmu.ifi.dbs.elki.algorithm.outlier.anglebased;
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -35,9 +35,9 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair;
import de.lmu.ifi.dbs.elki.database.query.similarity.SimilarityQuery;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.distance.similarityfunction.SimilarityFunction;
import de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.KernelMatrix;
import de.lmu.ifi.dbs.elki.logging.Logging;
@@ -49,6 +49,7 @@ import de.lmu.ifi.dbs.elki.math.MeanVariance;
import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.Alias;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ComparableMaxHeap;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ComparableMinHeap;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.DoubleMinHeap;
@@ -70,9 +71,13 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
* Outlier detection using variance analysis on angles, especially for high
* dimensional data sets.
*
- * H.-P. Kriegel, M. Schubert, and A. Zimek: Angle-Based Outlier Detection in
- * High-dimensional Data. In: Proc. 14th ACM SIGKDD Int. Conf. on Knowledge
- * Discovery and Data Mining (KDD '08), Las Vegas, NV, 2008.
+ * Reference:
+ * <p>
+ * H.-P. Kriegel, M. Schubert, and A. Zimek:<br />
+ * Angle-Based Outlier Detection in High-dimensional Data.<br />
+ * In: Proc. 14th ACM SIGKDD Int. Conf. on Knowledge Discovery and Data Mining
+ * (KDD '08), Las Vegas, NV, 2008.
+ * </p>
*
* @author Matthias Schubert (Original Code)
* @author Erich Schubert (ELKIfication)
@@ -81,8 +86,12 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
*/
@Title("LB-ABOD: Lower Bounded Angle-Based Outlier Detection")
@Description("Outlier detection using variance analysis on angles, especially for high dimensional data sets.")
-@Reference(authors = "H.-P. Kriegel, M. Schubert, and A. Zimek", title = "Angle-Based Outlier Detection in High-dimensional Data", booktitle = "Proc. 14th ACM SIGKDD Int. Conf. on Knowledge Discovery and Data Mining (KDD '08), Las Vegas, NV, 2008", url = "http://dx.doi.org/10.1145/1401890.1401946")
-public class LBABOD<V extends NumberVector<?>> extends FastABOD<V> {
+@Reference(authors = "H.-P. Kriegel, M. Schubert, A. Zimek", //
+title = "Angle-Based Outlier Detection in High-dimensional Data", //
+booktitle = "Proc. 14th ACM SIGKDD Int. Conf. on Knowledge Discovery and Data Mining (KDD '08), Las Vegas, NV, 2008", //
+url = "http://dx.doi.org/10.1145/1401890.1401946")
+@Alias({ "de.lmu.ifi.dbs.elki.algorithm.outlier.LBABOD", "lb-abod" })
+public class LBABOD<V extends NumberVector> extends FastABOD<V> {
/**
* The logger for this class.
*/
@@ -100,7 +109,7 @@ public class LBABOD<V extends NumberVector<?>> extends FastABOD<V> {
* @param k k parameter
* @param l Number of outliers to find exact
*/
- public LBABOD(SimilarityFunction<? super V, DoubleDistance> kernelFunction, int k, int l) {
+ public LBABOD(SimilarityFunction<? super V> kernelFunction, int k, int l) {
super(kernelFunction, k);
this.l = l;
}
@@ -114,7 +123,7 @@ public class LBABOD<V extends NumberVector<?>> extends FastABOD<V> {
@Override
public OutlierResult run(Database db, Relation<V> relation) {
DBIDs ids = relation.getDBIDs();
- SimilarityQuery<V, DoubleDistance> sq = relation.getDatabase().getSimilarityQuery(relation, kernelFunction);
+ SimilarityQuery<V> sq = db.getSimilarityQuery(relation, kernelFunction);
KernelMatrix kernelMatrix = new KernelMatrix(sq, relation, ids);
// Output storage.
@@ -237,7 +246,7 @@ public class LBABOD<V extends NumberVector<?>> extends FastABOD<V> {
LOG.statistics(new LongStatistic("lb-abod.refinements", refinements));
}
// Build result representation.
- Relation<Double> scoreResult = new MaterializedRelation<>("Angle-based Outlier Detection", "abod-outlier", TypeUtil.DOUBLE, abodvalues, ids);
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("Angle-based Outlier Detection", "abod-outlier", abodvalues, ids);
OutlierScoreMeta scoreMeta = new InvertedOutlierScoreMeta(minmaxabod.getMin(), minmaxabod.getMax(), 0.0, Double.POSITIVE_INFINITY);
return new OutlierResult(scoreMeta, scoreResult);
}
@@ -259,7 +268,7 @@ public class LBABOD<V extends NumberVector<?>> extends FastABOD<V> {
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<?>> extends FastABOD.Parameterizer<V> {
+ public static class Parameterizer<V extends NumberVector> extends FastABOD.Parameterizer<V> {
/**
* Parameter to specify the number of outliers to compute exactly.
*/
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/anglebased/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/anglebased/package-info.java
new file mode 100644
index 00000000..f729559f
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/anglebased/package-info.java
@@ -0,0 +1,27 @@
+/**
+ * Angle-based outlier detection algorithms.
+ */
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package de.lmu.ifi.dbs.elki.algorithm.outlier.anglebased; \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/EMOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/clustering/EMOutlier.java
index 76191cf2..5a02fb56 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/EMOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/clustering/EMOutlier.java
@@ -1,10 +1,10 @@
-package de.lmu.ifi.dbs.elki.algorithm.outlier;
+package de.lmu.ifi.dbs.elki.algorithm.outlier.clustering;
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -24,10 +24,10 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
*/
import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
-import de.lmu.ifi.dbs.elki.algorithm.clustering.EM;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.em.EM;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.NumberVector;
-import de.lmu.ifi.dbs.elki.data.model.EMModel;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
@@ -35,7 +35,8 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.result.Result;
@@ -64,7 +65,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz
// TODO: re-use an existing EM when present?
@Title("EM Outlier: Outlier Detection based on the generic EM clustering")
@Description("The outlier score assigned is based on the highest cluster probability obtained from EM clustering.")
-public class EMOutlier<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+public class EMOutlier<V extends NumberVector> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
@@ -73,14 +74,14 @@ public class EMOutlier<V extends NumberVector<?>> extends AbstractAlgorithm<Outl
/**
* Inner algorithm.
*/
- private EM<V> emClustering;
+ private EM<V, ?> emClustering;
/**
* Constructor with an existing em clustering algorithm.
*
* @param emClustering EM clustering algorithm to use.
*/
- public EMOutlier(EM<V> emClustering) {
+ public EMOutlier(EM<V, ?> emClustering) {
super();
this.emClustering = emClustering;
}
@@ -94,13 +95,13 @@ public class EMOutlier<V extends NumberVector<?>> extends AbstractAlgorithm<Outl
*/
public OutlierResult run(Database database, Relation<V> relation) {
emClustering.setSoft(true);
- Clustering<EMModel<V>> emresult = emClustering.run(database, relation);
+ Clustering<?> emresult = emClustering.run(database, relation);
Relation<double[]> soft = null;
- for (Iter<Result> iter = emresult.getHierarchy().iterChildren(emresult); iter.valid(); iter.advance()) {
- if (!(iter.get() instanceof Relation)) {
+ for(Iter<Result> iter = emresult.getHierarchy().iterChildren(emresult); iter.valid(); iter.advance()) {
+ if(!(iter.get() instanceof Relation)) {
continue;
}
- if (((Relation<?>) iter.get()).getDataTypeInformation() == EM.SOFT_TYPE) {
+ if(((Relation<?>) iter.get()).getDataTypeInformation() == EM.SOFT_TYPE) {
@SuppressWarnings("unchecked")
Relation<double[]> rel = (Relation<double[]>) iter.get();
soft = rel;
@@ -109,16 +110,16 @@ public class EMOutlier<V extends NumberVector<?>> extends AbstractAlgorithm<Outl
double globmax = 0.0;
WritableDoubleDataStore emo_score = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT);
- for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double maxProb = Double.POSITIVE_INFINITY;
double[] probs = soft.get(iditer);
- for (double prob : probs) {
+ for(double prob : probs) {
maxProb = Math.min(1. - prob, maxProb);
}
emo_score.putDouble(iditer, maxProb);
globmax = Math.max(maxProb, globmax);
}
- Relation<Double> scoreres = new MaterializedRelation<>("EM outlier scores", "em-outlier", TypeUtil.DOUBLE, emo_score, relation.getDBIDs());
+ DoubleRelation scoreres = new MaterializedDoubleRelation("EM outlier scores", "em-outlier", emo_score, relation.getDBIDs());
OutlierScoreMeta meta = new ProbabilisticOutlierScore(0.0, globmax);
// combine results.
OutlierResult result = new OutlierResult(meta, scoreres);
@@ -144,13 +145,16 @@ public class EMOutlier<V extends NumberVector<?>> extends AbstractAlgorithm<Outl
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
- protected EM<V> em = null;
+ public static class Parameterizer<V extends NumberVector> extends AbstractParameterizer {
+ /**
+ * EM clustering algorithm to run.
+ */
+ protected EM<V, ?> em;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- Class<EM<V>> cls = ClassGenericsUtil.uglyCastIntoSubclass(EM.class);
+ Class<EM<V, ?>> cls = ClassGenericsUtil.uglyCastIntoSubclass(EM.class);
em = config.tryInstantiate(cls);
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/clustering/KMeansOutlierDetection.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/clustering/KMeansOutlierDetection.java
new file mode 100644
index 00000000..c6155527
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/clustering/KMeansOutlierDetection.java
@@ -0,0 +1,178 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier.clustering;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeans;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansLloyd;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
+import de.lmu.ifi.dbs.elki.data.Cluster;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.model.ModelUtil;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * Outlier detection by using k-means clustering.
+ *
+ * The scores are assigned by the objects distance to the nearest center.
+ *
+ * We don't have a clear reference for this approach, but it seems to be a best
+ * practise in some areas to remove objects that have the largest distance from
+ * their center. If you need to cite this approach, please cite the ELKI version
+ * you used (use the <a href="http://elki.dbs.ifi.lmu.de/wiki/Publications">ELKI
+ * publication list</a> for citation information and BibTeX templates).
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has KMeans
+ *
+ * @param <O> Object type
+ */
+public class KMeansOutlierDetection<O extends NumberVector> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+ /**
+ * Class logger.
+ */
+ private static final Logging LOG = Logging.getLogger(KMeansOutlierDetection.class);
+
+ /**
+ * Clustering algorithm to use
+ */
+ KMeans<O, ?> clusterer;
+
+ /**
+ * Constructor.
+ *
+ * @param clusterer Clustering algorithm
+ */
+ public KMeansOutlierDetection(KMeans<O, ?> clusterer) {
+ super();
+ this.clusterer = clusterer;
+ }
+
+ /**
+ * Run the outlier detection algorithm.
+ *
+ * @param database Database
+ * @param relation Relation
+ * @return Outlier detection result
+ */
+ public OutlierResult run(Database database, Relation<O> relation) {
+ DistanceFunction<? super O> df = clusterer.getDistanceFunction();
+ DistanceQuery<O> dq = database.getDistanceQuery(relation, df);
+
+ // TODO: improve ELKI api to ensure we're using the same DBIDs!
+ Clustering<?> c = clusterer.run(database, relation);
+
+ WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_DB);
+ DoubleMinMax mm = new DoubleMinMax();
+
+ @SuppressWarnings("unchecked")
+ NumberVector.Factory<O> factory = (NumberVector.Factory<O>) RelationUtil.assumeVectorField(relation).getFactory();
+ List<? extends Cluster<?>> clusters = c.getAllClusters();
+ for(Cluster<?> cluster : clusters) {
+ // FIXME: use a primitive distance function on number vectors instead.
+ O mean = factory.newNumberVector(ModelUtil.getPrototype(cluster.getModel(), relation));
+ for(DBIDIter iter = cluster.getIDs().iter(); iter.valid(); iter.advance()) {
+ double dist = dq.distance(mean, iter);
+ scores.put(iter, dist);
+ mm.put(dist);
+ }
+ }
+
+ // Build result representation.
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("KMeans outlier scores", "kmeans-outlier", scores, relation.getDBIDs());
+ OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax(), 0., Double.POSITIVE_INFINITY, 0.);
+ return new OutlierResult(scoreMeta, scoreResult);
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(clusterer.getDistanceFunction().getInputTypeRestriction());
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterizer.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ *
+ * @param <O> Object type
+ */
+ public static class Parameterizer<O extends NumberVector> extends AbstractParameterizer {
+ /**
+ * Parameter for choosing the clustering algorithm.
+ */
+ public static final OptionID CLUSTERING_ID = new OptionID("kmeans.algorithm", //
+ "Clustering algorithm to use for detecting outliers.");
+
+ /**
+ * Clustering algorithm to use
+ */
+ KMeans<O, ?> clusterer;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ ObjectParameter<KMeans<O, ?>> clusterP = new ObjectParameter<>(CLUSTERING_ID, KMeans.class, KMeansLloyd.class);
+ if(config.grab(clusterP)) {
+ clusterer = clusterP.instantiateClass(config);
+ }
+ }
+
+ @Override
+ protected KMeansOutlierDetection<O> makeInstance() {
+ return new KMeansOutlierDetection<>(clusterer);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/clustering/SilhouetteOutlierDetection.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/clustering/SilhouetteOutlierDetection.java
new file mode 100644
index 00000000..3bd9cf8b
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/clustering/SilhouetteOutlierDetection.java
@@ -0,0 +1,253 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier.clustering;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
+import de.lmu.ifi.dbs.elki.data.Cluster;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.evaluation.clustering.internal.EvaluateSilhouette;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * Outlier detection by using the Silhouette Coefficients.
+ *
+ * Silhouette values are computed as in:
+ * <p>
+ * P. J. Rousseeuw<br />
+ * Silhouettes: A graphical aid to the interpretation and validation of cluster
+ * analysis<br />
+ * In: Journal of Computational and Applied Mathematics Volume 20, November 1987
+ * </p>
+ *
+ * but then used as outlier scores. To cite this outlier detection approach,
+ * please cite the ELKI version you used (use the <a
+ * href="http://elki.dbs.ifi.lmu.de/wiki/Publications">ELKI publication list</a>
+ * for citation information and BibTeX templates).
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has ClusteringAlgorithm
+ *
+ * @param <O> Object type
+ */
+@Reference(authors = "P. J. Rousseeuw", //
+title = "Silhouettes: A graphical aid to the interpretation and validation of cluster analysis", //
+booktitle = "Journal of Computational and Applied Mathematics, Volume 20", //
+url = "http://dx.doi.org/10.1016%2F0377-0427%2887%2990125-7")
+public class SilhouetteOutlierDetection<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm {
+ /**
+ * Class logger.
+ */
+ private static final Logging LOG = Logging.getLogger(SilhouetteOutlierDetection.class);
+
+ /**
+ * Clustering algorithm to use
+ */
+ ClusteringAlgorithm<?> clusterer;
+
+ /**
+ * Keep noise "clusters" merged, instead of breaking them into singletons.
+ */
+ private boolean mergenoise = false;
+
+ /**
+ * Constructor.
+ *
+ * @param distanceFunction Distance function
+ * @param clusterer Clustering algorithm
+ * @param mergenoise Flag to keep "noise" clusters merged, instead of breaking
+ * them into singletons.
+ */
+ public SilhouetteOutlierDetection(DistanceFunction<? super O> distanceFunction, ClusteringAlgorithm<?> clusterer, boolean mergenoise) {
+ super(distanceFunction);
+ this.clusterer = clusterer;
+ this.mergenoise = mergenoise;
+ }
+
+ @Override
+ public OutlierResult run(Database database) {
+ Relation<O> relation = database.getRelation(getDistanceFunction().getInputTypeRestriction());
+ DistanceQuery<O> dq = database.getDistanceQuery(relation, getDistanceFunction());
+
+ // TODO: improve ELKI api to ensure we're using the same DBIDs!
+ Clustering<?> c = clusterer.run(database);
+
+ WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_DB);
+ DoubleMinMax mm = new DoubleMinMax();
+
+ List<? extends Cluster<?>> clusters = c.getAllClusters();
+ for(Cluster<?> cluster : clusters) {
+ if(cluster.size() <= 1 || (!mergenoise && cluster.isNoise())) {
+ // As suggested in Rousseeuw, we use 0 for singletons.
+ for(DBIDIter iter = cluster.getIDs().iter(); iter.valid(); iter.advance()) {
+ scores.put(iter, 0.);
+ }
+ mm.put(0.);
+ continue;
+ }
+ ArrayDBIDs ids = DBIDUtil.ensureArray(cluster.getIDs());
+ double[] as = new double[ids.size()]; // temporary storage.
+ DBIDArrayIter it1 = ids.iter(), it2 = ids.iter();
+ for(it1.seek(0); it1.valid(); it1.advance()) {
+ // a: In-cluster distances
+ double a = as[it1.getOffset()]; // Already computed distances
+ for(it2.seek(it1.getOffset() + 1); it2.valid(); it2.advance()) {
+ final double dist = dq.distance(it1, it2);
+ a += dist;
+ as[it2.getOffset()] += dist;
+ }
+ a /= (ids.size() - 1);
+ // b: other clusters:
+ double min = Double.POSITIVE_INFINITY;
+ for(Cluster<?> ocluster : clusters) {
+ if(ocluster == /* yes, reference identity */cluster) {
+ continue;
+ }
+ if(!mergenoise && ocluster.isNoise()) {
+ // Treat noise cluster as singletons:
+ for(DBIDIter it3 = ocluster.getIDs().iter(); it3.valid(); it3.advance()) {
+ double dist = dq.distance(it1, it3);
+ if(dist < min) {
+ min = dist;
+ }
+ }
+ continue;
+ }
+ final DBIDs oids = ocluster.getIDs();
+ double b = 0.;
+ for(DBIDIter it3 = oids.iter(); it3.valid(); it3.advance()) {
+ b += dq.distance(it1, it3);
+ }
+ b /= oids.size();
+ if(b < min) {
+ min = b;
+ }
+ }
+ final double score = (min - a) / Math.max(min, a);
+ scores.put(it1, score);
+ mm.put(score);
+ }
+ }
+
+ // Build result representation.
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("Silhouette Coefficients", "silhouette-outlier", scores, relation.getDBIDs());
+ OutlierScoreMeta scoreMeta = new InvertedOutlierScoreMeta(mm.getMin(), mm.getMax(), -1., 1., .5);
+ return new OutlierResult(scoreMeta, scoreResult);
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ final TypeInformation dt = getDistanceFunction().getInputTypeRestriction();
+ TypeInformation[] t = clusterer.getInputTypeRestriction();
+ for(TypeInformation i : t) {
+ if(dt.isAssignableFromType(i)) {
+ return t;
+ }
+ }
+ // Prepend distance type:
+ TypeInformation[] t2 = new TypeInformation[t.length + 1];
+ t2[0] = dt;
+ System.arraycopy(t, 0, t2, 1, t.length);
+ return t2;
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterizer.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ *
+ * @param <O> Object type
+ */
+ public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {
+ /**
+ * Parameter for choosing the clustering algorithm
+ */
+ public static final OptionID CLUSTERING_ID = new OptionID("silhouette.clustering", //
+ "Clustering algorithm to use for the silhouette coefficients.");
+
+ /**
+ * Clustering algorithm to use
+ */
+ ClusteringAlgorithm<?> clusterer;
+
+ /**
+ * Keep noise "clusters" merged, instead of breaking them into singletons.
+ */
+ private boolean mergenoise = false;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ ObjectParameter<ClusteringAlgorithm<?>> clusterP = new ObjectParameter<>(CLUSTERING_ID, ClusteringAlgorithm.class);
+ if(config.grab(clusterP)) {
+ clusterer = clusterP.instantiateClass(config);
+ }
+
+ Flag noiseP = new Flag(EvaluateSilhouette.Parameterizer.MERGENOISE_ID);
+ if(config.grab(noiseP)) {
+ mergenoise = noiseP.isTrue();
+ }
+ }
+
+ @Override
+ protected SilhouetteOutlierDetection<O> makeInstance() {
+ return new SilhouetteOutlierDetection<>(distanceFunction, clusterer, mergenoise);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/clustering/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/clustering/package-info.java
new file mode 100644
index 00000000..15ee771e
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/clustering/package-info.java
@@ -0,0 +1,27 @@
+/**
+ * Clustering based outlier detection.
+ */
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package de.lmu.ifi.dbs.elki.algorithm.outlier.clustering; \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractDBOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/AbstractDBOutlier.java
index 5cafe04d..1fa43ff6 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractDBOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/AbstractDBOutlier.java
@@ -1,10 +1,10 @@
-package de.lmu.ifi.dbs.elki.algorithm.outlier;
+package de.lmu.ifi.dbs.elki.algorithm.outlier.distance;
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -24,52 +24,54 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
*/
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DistanceParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
/**
* Simple distance based outlier detection algorithms.
*
+ * Reference:
* <p>
- * Reference: E.M. Knorr, R. T. Ng: Algorithms for Mining Distance-Based
- * Outliers in Large Datasets, In: Procs Int. Conf. on Very Large Databases
- * (VLDB'98), New York, USA, 1998.
+ * E.M. Knorr, R. T. Ng:<br />
+ * Algorithms for Mining Distance-Based Outliers in Large Datasets,<br />
+ * In: Procs Int. Conf. on Very Large Databases (VLDB'98), New York, USA, 1998.
+ * </p>
*
* @author Lisa Reichert
*
* @param <O> the type of DatabaseObjects handled by this Algorithm
- * @param <D> the type of Distance used by this Algorithm
*/
-public abstract class AbstractDBOutlier<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
+@Reference(authors = "E.M. Knorr, R. T. Ng", //
+title = "Algorithms for Mining Distance-Based Outliers in Large Datasets", //
+booktitle = "Procs Int. Conf. on Very Large Databases (VLDB'98), New York, USA, 1998")
+public abstract class AbstractDBOutlier<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm {
/**
- * Parameter to specify the size of the D-neighborhood
+ * Radius parameter d.
*/
- public static final OptionID D_ID = new OptionID("dbod.d", "size of the D-neighborhood");
-
- /**
- * Holds the value of {@link #D_ID}.
- */
- private D d;
+ private double d;
/**
* Constructor with actual parameters.
*
* @param distanceFunction distance function to use
- * @param d d value
+ * @param d radius d value
*/
- public AbstractDBOutlier(DistanceFunction<? super O, D> distanceFunction, D d) {
+ public AbstractDBOutlier(DistanceFunction<? super O> distanceFunction, double d) {
super(distanceFunction);
this.d = d;
}
@@ -86,7 +88,7 @@ public abstract class AbstractDBOutlier<O, D extends Distance<D>> extends Abstra
DoubleDataStore dbodscore = computeOutlierScores(database, relation, d);
// Build result representation.
- Relation<Double> scoreResult = new MaterializedRelation<>("Density-Based Outlier Detection", "db-outlier", TypeUtil.DOUBLE, dbodscore, relation.getDBIDs());
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("Density-Based Outlier Detection", "db-outlier", dbodscore, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore();
return new OutlierResult(scoreMeta, scoreResult);
}
@@ -99,7 +101,7 @@ public abstract class AbstractDBOutlier<O, D extends Distance<D>> extends Abstra
* @param d distance
* @return computed scores
*/
- protected abstract DoubleDataStore computeOutlierScores(Database database, Relation<O> relation, D d);
+ protected abstract DoubleDataStore computeOutlierScores(Database database, Relation<O> relation, double d);
@Override
public TypeInformation[] getInputTypeRestriction() {
@@ -113,11 +115,16 @@ public abstract class AbstractDBOutlier<O, D extends Distance<D>> extends Abstra
*
* @apiviz.exclude
*/
- public abstract static class Parameterizer<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ public abstract static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {
+ /**
+ * Parameter to specify the size of the D-neighborhood
+ */
+ public static final OptionID D_ID = new OptionID("dbod.d", "size of the D-neighborhood");
+
/**
* Query radius
*/
- protected D d = null;
+ protected double d;
@Override
protected void makeOptions(Parameterization config) {
@@ -130,9 +137,9 @@ public abstract class AbstractDBOutlier<O, D extends Distance<D>> extends Abstra
*
* @param config Parameterization
*/
- protected void configD(Parameterization config, DistanceFunction<?, D> distanceFunction) {
- final D distanceFactory = (distanceFunction != null) ? distanceFunction.getDistanceFactory() : null;
- final DistanceParameter<D> param = new DistanceParameter<>(D_ID, distanceFactory);
+ protected void configD(Parameterization config, DistanceFunction<?> distanceFunction) {
+ final DoubleParameter param = new DoubleParameter(D_ID) //
+ .addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
if(config.grab(param)) {
d = param.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierDetection.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/DBOutlierDetection.java
index 4f4d12bf..62e26830 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierDetection.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/DBOutlierDetection.java
@@ -1,10 +1,10 @@
-package de.lmu.ifi.dbs.elki.algorithm.outlier;
+package de.lmu.ifi.dbs.elki.algorithm.outlier.distance;
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -29,15 +29,15 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DoubleDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
-import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
+import de.lmu.ifi.dbs.elki.database.ids.KNNList;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.utilities.Alias;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -49,10 +49,13 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
* Simple distanced based outlier detection algorithm. User has to specify two
* parameters An object is flagged as an outlier if at least a fraction p of all
* data objects has a distance above d from c
+ *
+ * Reference:
* <p>
- * Reference: E.M. Knorr, R. T. Ng: Algorithms for Mining Distance-Based
- * Outliers in Large Datasets, In: Procs Int. Conf. on Very Large Databases
- * (VLDB'98), New York, USA, 1998.
+ * E.M. Knorr, R. T. Ng:<br />
+ * Algorithms for Mining Distance-Based Outliers in Large Datasets,<br />
+ * In: Procs Int. Conf. on Very Large Databases (VLDB'98), New York, USA, 1998.
+ * </p>
*
* This paper presents several Distance Based Outlier Detection algorithms.
* Implemented here is a simple index based algorithm as presented in section
@@ -63,25 +66,21 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
* @apiviz.has KNNQuery
*
* @param <O> the type of DatabaseObjects handled by this Algorithm
- * @param <D> the type of Distance used by this Algorithm
*/
@Title("DBOD: Distance Based Outlier Detection")
@Description("If the D-neighborhood of an object contains only very few objects (less than (1-p) percent of the data) this object is flagged as an outlier")
-@Reference(authors = "E.M. Knorr, R. T. Ng", title = "Algorithms for Mining Distance-Based Outliers in Large Datasets", booktitle = "Procs Int. Conf. on Very Large Databases (VLDB'98), New York, USA, 1998")
-public class DBOutlierDetection<O, D extends Distance<D>> extends AbstractDBOutlier<O, D> {
+@Reference(authors = "E.M. Knorr, R. T. Ng", //
+title = "Algorithms for Mining Distance-Based Outliers in Large Datasets", //
+booktitle = "Procs Int. Conf. on Very Large Databases (VLDB'98), New York, USA, 1998")
+@Alias({ "de.lmu.ifi.dbs.elki.algorithm.outlier.DBOutlierDetection" })
+public class DBOutlierDetection<O> extends AbstractDBOutlier<O> {
/**
* The logger for this class.
*/
private static final Logging LOG = Logging.getLogger(DBOutlierDetection.class);
/**
- * Parameter to specify the minimum fraction of objects that must be outside
- * the D- neighborhood of an outlier
- */
- public static final OptionID P_ID = new OptionID("dbod.p", "minimum fraction of objects that must be outside the D-neighborhood of an outlier");
-
- /**
- * Holds the value of {@link #P_ID}.
+ * Density threshold percentage p.
*/
private double p;
@@ -92,15 +91,15 @@ public class DBOutlierDetection<O, D extends Distance<D>> extends AbstractDBOutl
* @param d distance query radius
* @param p percentage parameter
*/
- public DBOutlierDetection(DistanceFunction<O, D> distanceFunction, D d, double p) {
+ public DBOutlierDetection(DistanceFunction<O> distanceFunction, double d, double p) {
super(distanceFunction, d);
this.p = p;
}
@Override
- protected DoubleDataStore computeOutlierScores(Database database, Relation<O> relation, D neighborhoodSize) {
- DistanceQuery<O, D> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
- KNNQuery<O, D> knnQuery = database.getKNNQuery(distFunc, DatabaseQuery.HINT_OPTIMIZED_ONLY);
+ protected DoubleDataStore computeOutlierScores(Database database, Relation<O> relation, double neighborhoodSize) {
+ DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
+ KNNQuery<O> knnQuery = database.getKNNQuery(distFunc, DatabaseQuery.HINT_OPTIMIZED_ONLY);
// maximum number of objects in the D-neighborhood of an outlier
int m = (int) ((distFunc.getRelation().size()) * (1 - p));
@@ -115,13 +114,13 @@ public class DBOutlierDetection<O, D extends Distance<D>> extends AbstractDBOutl
// if index exists, kNN query. if the distance to the mth nearest neighbor
// is more than d -> object is outlier
if(knnQuery != null) {
- for(DBIDIter iditer = distFunc.getRelation().iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for(DBIDIter iditer = distFunc.getRelation().iterDBIDs(); iditer.valid(); iditer.advance()) {
counter++;
- final KNNList<D> knns = knnQuery.getKNNForDBID(iditer, m);
+ final KNNList knns = knnQuery.getKNNForDBID(iditer, m);
if(LOG.isDebugging()) {
LOG.debugFine("distance to mth nearest neighbour" + knns.toString());
}
- if(knns.get(Math.min(m, knns.size()) - 1).getDistance().compareTo(neighborhoodSize) <= 0) {
+ if(knns.get(Math.min(m, knns.size()) - 1).doubleValue() <= neighborhoodSize) {
// flag as outlier
scores.putDouble(iditer, 1.0);
}
@@ -136,12 +135,12 @@ public class DBOutlierDetection<O, D extends Distance<D>> extends AbstractDBOutl
}
else {
// range query for each object. stop if m objects are found
- for(DBIDIter iditer = distFunc.getRelation().iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for(DBIDIter iditer = distFunc.getRelation().iterDBIDs(); iditer.valid(); iditer.advance()) {
counter++;
int count = 0;
- for (DBIDIter iterator = distFunc.getRelation().iterDBIDs(); iterator.valid() && count < m; iterator.advance()) {
- D currentDistance = distFunc.distance(iditer, iterator);
- if(currentDistance.compareTo(neighborhoodSize) <= 0) {
+ for(DBIDIter iterator = distFunc.getRelation().iterDBIDs(); iterator.valid() && count < m; iterator.advance()) {
+ double currentDistance = distFunc.distance(iditer, iterator);
+ if(currentDistance <= neighborhoodSize) {
count++;
}
}
@@ -152,9 +151,7 @@ public class DBOutlierDetection<O, D extends Distance<D>> extends AbstractDBOutl
progressOFlags.setProcessed(counter, LOG);
}
}
- if(progressOFlags != null) {
- progressOFlags.ensureCompleted(LOG);
- }
+ LOG.ensureCompleted(progressOFlags);
return scores;
}
@@ -170,7 +167,16 @@ public class DBOutlierDetection<O, D extends Distance<D>> extends AbstractDBOutl
*
* @apiviz.exclude
*/
- public static class Parameterizer<O, D extends Distance<D>> extends AbstractDBOutlier.Parameterizer<O, D> {
+ public static class Parameterizer<O> extends AbstractDBOutlier.Parameterizer<O> {
+ /**
+ * Parameter to specify the minimum fraction of objects that must be outside
+ * the D- neighborhood of an outlier
+ */
+ public static final OptionID P_ID = new OptionID("dbod.p", "minimum fraction of objects that must be outside the D-neighborhood of an outlier");
+
+ /**
+ * Density threshold p.
+ */
protected double p = 0.0;
@Override
@@ -183,7 +189,7 @@ public class DBOutlierDetection<O, D extends Distance<D>> extends AbstractDBOutl
}
@Override
- protected DBOutlierDetection<O, D> makeInstance() {
+ protected DBOutlierDetection<O> makeInstance() {
return new DBOutlierDetection<>(distanceFunction, d, p);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierScore.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/DBOutlierScore.java
index d6528682..ac097d75 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierScore.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/DBOutlierScore.java
@@ -1,10 +1,10 @@
-package de.lmu.ifi.dbs.elki.algorithm.outlier;
+package de.lmu.ifi.dbs.elki.algorithm.outlier.distance;
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -33,8 +33,8 @@ import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.utilities.Alias;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -46,17 +46,28 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
* score thus eliminating this parameter and turning the method into a ranking
* method instead of a labelling one.
*
+ * Reference:
+ * <p>
+ * E.M. Knorr, R. T. Ng:<br />
+ * Algorithms for Mining Distance-Based Outliers in Large Datasets,<br />
+ * In: Procs Int. Conf. on Very Large Databases (VLDB'98), New York, USA, 1998.
+ * </p>
+ *
* @author Lisa Reichert
*
* @apiviz.has RangeQuery
*
* @param <O> Database object type
- * @param <D> Distance type
*/
@Title("Distance based outlier score")
-@Description("Generalization of the original DB-Outlier approach to a ranking method, by turning the fraction parameter into the output value.")
-@Reference(prefix = "Generalization of a method proposed in", authors = "E.M. Knorr, R. T. Ng", title = "Algorithms for Mining Distance-Based Outliers in Large Datasets", booktitle = "Procs Int. Conf. on Very Large Databases (VLDB'98), New York, USA, 1998")
-public class DBOutlierScore<O, D extends Distance<D>> extends AbstractDBOutlier<O, D> {
+@Description("Generalization of the original DB-Outlier approach to a ranking method, "//
+ + "by turning the fraction parameter into the output value.")
+@Reference(prefix = "Generalization of a method proposed in", //
+authors = "E.M. Knorr, R. T. Ng", //
+title = "Algorithms for Mining Distance-Based Outliers in Large Datasets", //
+booktitle = "Procs Int. Conf. on Very Large Databases (VLDB'98), New York, USA, 1998")
+@Alias({ "de.lmu.ifi.dbs.elki.algorithm.outlier.DBOutlierScore" })
+public class DBOutlierScore<O> extends AbstractDBOutlier<O> {
/**
* The logger for this class.
*/
@@ -68,19 +79,19 @@ public class DBOutlierScore<O, D extends Distance<D>> extends AbstractDBOutlier<
* @param distanceFunction Distance function
* @param d distance radius parameter
*/
- public DBOutlierScore(DistanceFunction<O, D> distanceFunction, D d) {
+ public DBOutlierScore(DistanceFunction<O> distanceFunction, double d) {
super(distanceFunction, d);
}
@Override
- protected DoubleDataStore computeOutlierScores(Database database, Relation<O> relation, D d) {
- DistanceQuery<O, D> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
- RangeQuery<O, D> rangeQuery = database.getRangeQuery(distFunc);
+ protected DoubleDataStore computeOutlierScores(Database database, Relation<O> relation, double d) {
+ DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
+ RangeQuery<O> rangeQuery = database.getRangeQuery(distFunc);
final double size = distFunc.getRelation().size();
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(distFunc.getRelation().getDBIDs(), DataStoreFactory.HINT_STATIC);
// TODO: use bulk when implemented.
- for(DBIDIter iditer = distFunc.getRelation().iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for(DBIDIter iditer = distFunc.getRelation().iterDBIDs(); iditer.valid(); iditer.advance()) {
// compute percentage of neighbors in the given neighborhood with size d
double n = (rangeQuery.getRangeForDBID(iditer, d).size()) / size;
scores.putDouble(iditer, 1.0 - n);
@@ -100,9 +111,9 @@ public class DBOutlierScore<O, D extends Distance<D>> extends AbstractDBOutlier<
*
* @apiviz.exclude
*/
- public static class Parameterizer<O, D extends Distance<D>> extends AbstractDBOutlier.Parameterizer<O, D> {
+ public static class Parameterizer<O> extends AbstractDBOutlier.Parameterizer<O> {
@Override
- protected DBOutlierScore<O, D> makeInstance() {
+ protected DBOutlierScore<O> makeInstance() {
return new DBOutlierScore<>(distanceFunction, d);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/HilOut.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/HilOut.java
index e0cdd0c5..6eac9e95 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/HilOut.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/HilOut.java
@@ -1,10 +1,10 @@
-package de.lmu.ifi.dbs.elki.algorithm.outlier;
+package de.lmu.ifi.dbs.elki.algorithm.outlier.distance;
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -28,6 +28,7 @@ import java.util.HashSet;
import java.util.Set;
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
@@ -36,20 +37,17 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDFactory;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair;
import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDPair;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.LPNormDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultUtil;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
@@ -57,9 +55,9 @@ import de.lmu.ifi.dbs.elki.math.spacefillingcurves.HilbertSpatialSorter;
import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.Alias;
import de.lmu.ifi.dbs.elki.utilities.BitsUtil;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
-import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ComparatorMaxHeap;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ComparableMaxHeap;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ComparatorMinHeap;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ObjectHeap;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
@@ -71,7 +69,6 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.EnumParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
-import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
/**
* Fast Outlier Detection in High Dimensional Spaces
@@ -96,8 +93,12 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
*/
@Title("Fast Outlier Detection in High Dimensional Spaces")
@Description("Algorithm to compute outliers using Hilbert space filling curves")
-@Reference(authors = "F. Angiulli, C. Pizzuti", title = "Fast Outlier Detection in High Dimensional Spaces", booktitle = "Proc. European Conference on Principles of Knowledge Discovery and Data Mining (PKDD'02)", url = "http://dx.doi.org/10.1145/375663.375668")
-public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgorithm<O, DoubleDistance, OutlierResult> implements OutlierAlgorithm {
+@Reference(authors = "F. Angiulli, C. Pizzuti", //
+title = "Fast Outlier Detection in High Dimensional Spaces", //
+booktitle = "Proc. European Conference on Principles of Knowledge Discovery and Data Mining (PKDD'02)", //
+url = "http://dx.doi.org/10.1145/375663.375668")
+@Alias({ "de.lmu.ifi.dbs.elki.algorithm.outlier.HilOut" })
+public class HilOut<O extends NumberVector> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
@@ -131,7 +132,7 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
/**
* Distance query
*/
- private DistanceQuery<O, DoubleDistance> distq;
+ private DistanceQuery<O> distq;
/**
* Set sizes, total and current iteration
@@ -143,13 +144,6 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
*/
private double omega_star;
- // public int distcomp = 1;
-
- /**
- * Comparator for sorting the heaps.
- */
- private static final Comparator<? super DistanceDBIDPair<?>> COMPARATOR = DistanceDBIDResultUtil.distanceComparator();
-
/**
* Type of output: all scores (upper bounds) or top n only
*
@@ -190,21 +184,19 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
double[] min;
double diameter = 0; // Actually "length of edge"
{
- Pair<O, O> hbbs = DatabaseUtil.computeMinMax(relation);
- min = new double[d];
- double[] max = new double[d];
- for (int i = 0; i < d; i++) {
- min[i] = hbbs.first.doubleValue(i);
- max[i] = hbbs.second.doubleValue(i);
+ double[][] hbbs = RelationUtil.computeMinMax(relation);
+ min = hbbs[0];
+ double[] max = hbbs[1];
+ for(int i = 0; i < d; i++) {
diameter = Math.max(diameter, max[i] - min[i]);
}
// Enlarge bounding box to have equal lengths.
- for (int i = 0; i < d; i++) {
+ for(int i = 0; i < d; i++) {
double diff = (diameter - (max[i] - min[i])) * .5;
min[i] -= diff;
max[i] += diff;
}
- if (LOG.isVerbose()) {
+ if(LOG.isVerbose()) {
LOG.verbose("Rescaling dataset by " + (1 / diameter) + " to fit the unit cube.");
}
}
@@ -216,7 +208,7 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
FiniteProgress progressHilOut = LOG.isVerbose() ? new FiniteProgress("HilOut iterations", d + 1, LOG) : null;
FiniteProgress progressTrueOut = LOG.isVerbose() ? new FiniteProgress("True outliers found", n, LOG) : null;
// Main part: 1. Phase max. d+1 loops
- for (int j = 0; j <= d && n_star < n; j++) {
+ for(int j = 0; j <= d && n_star < n; j++) {
// initialize (clear) out and wlb - not 100% clear in the paper
h.out.clear();
h.wlb.clear();
@@ -226,51 +218,49 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
scan(h, (int) (k * capital_n / (double) capital_n_star));
// determine the true outliers (n_star)
trueOutliers(h);
- if (progressTrueOut != null) {
+ if(progressTrueOut != null) {
progressTrueOut.setProcessed(n_star, LOG);
}
// Build the top Set as out + wlb
h.top.clear();
HashSetModifiableDBIDs top_keys = DBIDUtil.newHashSet(h.out.size());
- for (ObjectHeap.UnsortedIter<HilFeature> iter = h.out.unsortedIter(); iter.valid(); iter.advance()) {
+ for(ObjectHeap.UnsortedIter<HilFeature> iter = h.out.unsortedIter(); iter.valid(); iter.advance()) {
HilFeature entry = iter.get();
top_keys.add(entry.id);
h.top.add(entry);
}
- for (ObjectHeap.UnsortedIter<HilFeature> iter = h.wlb.unsortedIter(); iter.valid(); iter.advance()) {
+ for(ObjectHeap.UnsortedIter<HilFeature> iter = h.wlb.unsortedIter(); iter.valid(); iter.advance()) {
HilFeature entry = iter.get();
- if (!top_keys.contains(entry.id)) {
+ if(!top_keys.contains(entry.id)) {
// No need to update top_keys - discarded
h.top.add(entry);
}
}
- if (progressHilOut != null) {
- progressHilOut.incrementProcessed(LOG);
- }
+ LOG.incrementProcessed(progressHilOut);
}
// 2. Phase: Additional Scan if less than n true outliers determined
- if (n_star < n) {
+ if(n_star < n) {
h.out.clear();
h.wlb.clear();
// TODO: reinitialize shift to 0?
scan(h, capital_n);
}
- if (progressHilOut != null) {
+ if(progressHilOut != null) {
progressHilOut.setProcessed(d, LOG);
progressHilOut.ensureCompleted(LOG);
}
- if (progressTrueOut != null) {
+ if(progressTrueOut != null) {
progressTrueOut.setProcessed(n, LOG);
progressTrueOut.ensureCompleted(LOG);
}
DoubleMinMax minmax = new DoubleMinMax();
// Return weights in out
- if (tn == ScoreType.TopN) {
+ if(tn == ScoreType.TopN) {
minmax.put(0.0);
- for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
hilout_weight.putDouble(iditer, 0.0);
}
- for (ObjectHeap.UnsortedIter<HilFeature> iter = h.out.unsortedIter(); iter.valid(); iter.advance()) {
+ for(ObjectHeap.UnsortedIter<HilFeature> iter = h.out.unsortedIter(); iter.valid(); iter.advance()) {
HilFeature ent = iter.get();
minmax.put(ent.ubound);
hilout_weight.putDouble(ent.id, ent.ubound);
@@ -278,12 +268,12 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
}
// Return all weights in pf
else {
- for (HilFeature ent : h.pf) {
+ for(HilFeature ent : h.pf) {
minmax.put(ent.ubound);
hilout_weight.putDouble(ent.id, ent.ubound);
}
}
- Relation<Double> scoreResult = new MaterializedRelation<>("HilOut weight", "hilout-weight", TypeUtil.DOUBLE, hilout_weight, relation.getDBIDs());
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("HilOut weight", "hilout-weight", hilout_weight, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY);
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
return result;
@@ -297,35 +287,37 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
*/
private void scan(HilbertFeatures hf, int k0) {
final int mink0 = Math.min(2 * k0, capital_n - 1);
- if (LOG.isDebuggingFine()) {
+ if(LOG.isDebuggingFine()) {
LOG.debugFine("Scanning with k0=" + k0 + " (" + mink0 + ")" + " N*=" + capital_n_star);
}
- for (int i = 0; i < hf.pf.length; i++) {
- if (hf.pf[i].ubound < omega_star) {
+ for(int i = 0; i < hf.pf.length; i++) {
+ if(hf.pf[i].ubound < omega_star) {
continue;
}
- if (hf.pf[i].lbound < hf.pf[i].ubound) {
+ if(hf.pf[i].lbound < hf.pf[i].ubound) {
double omega = hf.fastUpperBound(i);
- if (omega < omega_star) {
+ if(omega < omega_star) {
hf.pf[i].ubound = omega;
- } else {
+ }
+ else {
int maxcount;
// capital_n-1 instead of capital_n: all, except self
- if (hf.top.contains(hf.pf[i])) {
+ if(hf.top.contains(hf.pf[i])) {
maxcount = capital_n - 1;
- } else {
+ }
+ else {
maxcount = mink0;
}
innerScan(hf, i, maxcount);
}
}
- if (hf.pf[i].ubound > 0) {
+ if(hf.pf[i].ubound > 0) {
hf.updateOUT(i);
}
- if (hf.pf[i].lbound > 0) {
+ if(hf.pf[i].lbound > 0) {
hf.updateWLB(i);
}
- if (hf.wlb.size() >= n) {
+ if(hf.wlb.size() >= n) {
omega_star = Math.max(omega_star, hf.wlb.peek().lbound);
}
}
@@ -344,40 +336,43 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
int a = i, b = i;
int level = h, levela = h, levelb = h;
// Explore up to "maxcount" neighbors in this pass
- for (int count = 0; count < maxcount; count++) {
+ for(int count = 0; count < maxcount; count++) {
final int c; // Neighbor to explore
- if (a == 0) { // At left end, explore right
+ if(a == 0) { // At left end, explore right
// assert (b < capital_n - 1);
levelb = Math.min(levelb, hf.pf[b].level);
b++;
c = b;
- } else if (b >= capital_n - 1) { // At right end, explore left
+ }
+ else if(b >= capital_n - 1) { // At right end, explore left
// assert (a > 0);
a--;
levela = Math.min(levela, hf.pf[a].level);
c = a;
- } else if (hf.pf[a - 1].level >= hf.pf[b].level) { // Prefer higher level
+ }
+ else if(hf.pf[a - 1].level >= hf.pf[b].level) { // Prefer higher level
a--;
levela = Math.min(levela, hf.pf[a].level);
c = a;
- } else {
+ }
+ else {
// assert (b < capital_n - 1);
levelb = Math.min(levelb, hf.pf[b].level);
b++;
c = b;
}
- if (!hf.pf[i].nn_keys.contains(hf.pf[c].id)) {
+ if(!hf.pf[i].nn_keys.contains(hf.pf[c].id)) {
// hf.distcomp ++;
- hf.pf[i].insert(hf.pf[c].id, distq.distance(p, hf.pf[c].id).doubleValue(), k);
- if (hf.pf[i].nn.size() == k) {
- if (hf.pf[i].sum_nn < omega_star) {
+ hf.pf[i].insert(hf.pf[c].id, distq.distance(p, hf.pf[c].id), k);
+ if(hf.pf[i].nn.size() == k) {
+ if(hf.pf[i].sum_nn < omega_star) {
break; // stop = true
}
final int mlevel = Math.max(levela, levelb);
- if (mlevel < level) {
+ if(mlevel < level) {
level = mlevel;
final double delta = hf.minDistLevel(hf.pf[i].id, level);
- if (delta >= hf.pf[i].nn.peek().doubleDistance()) {
+ if(delta >= hf.pf[i].nn.peek().doubleValue()) {
break; // stop = true
}
}
@@ -387,17 +382,17 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
double br = hf.boxRadius(i, a - 1, b + 1);
double newlb = 0.0;
double newub = 0.0;
- for (ObjectHeap.UnsortedIter<DoubleDistanceDBIDPair> iter = hf.pf[i].nn.unsortedIter(); iter.valid(); iter.advance()) {
- DoubleDistanceDBIDPair entry = iter.get();
- newub += entry.doubleDistance();
- if (entry.doubleDistance() <= br) {
- newlb += entry.doubleDistance();
+ for(ObjectHeap.UnsortedIter<DoubleDBIDPair> iter = hf.pf[i].nn.unsortedIter(); iter.valid(); iter.advance()) {
+ DoubleDBIDPair entry = iter.get();
+ newub += entry.doubleValue();
+ if(entry.doubleValue() <= br) {
+ newlb += entry.doubleValue();
}
}
- if (newlb > hf.pf[i].lbound) {
+ if(newlb > hf.pf[i].lbound) {
hf.pf[i].lbound = newlb;
}
- if (newub < hf.pf[i].ubound) {
+ if(newub < hf.pf[i].ubound) {
hf.pf[i].ubound = newub;
}
}
@@ -411,9 +406,9 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
private void trueOutliers(HilbertFeatures h) {
n_star = 0;
- for (ObjectHeap.UnsortedIter<HilFeature> iter = h.out.unsortedIter(); iter.valid(); iter.advance()) {
+ for(ObjectHeap.UnsortedIter<HilFeature> iter = h.out.unsortedIter(); iter.valid(); iter.advance()) {
HilFeature entry = iter.get();
- if (entry.ubound >= omega_star && (entry.ubound - entry.lbound < 1E-10)) {
+ if(entry.ubound >= omega_star && (entry.ubound - entry.lbound < 1E-10)) {
n_star++;
}
}
@@ -494,8 +489,8 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
this.pf = new HilFeature[relation.size()];
int pos = 0;
- for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- pf[pos++] = new HilFeature(DBIDUtil.deref(iditer), new ComparatorMaxHeap<DoubleDistanceDBIDPair>(k, COMPARATOR));
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ pf[pos++] = new HilFeature(DBIDUtil.deref(iditer), new ComparableMaxHeap<DoubleDBIDPair>(k));
}
this.out = new ComparatorMinHeap<>(n, new Comparator<HilFeature>() {
@Override
@@ -523,42 +518,45 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
// FIXME: 64 bit mode untested - sign bit is tricky to handle correctly
// with the rescaling. 63 bit should be fine. The sign bit probably needs
// to be handled differently, or at least needs careful testing of the API
- if (h >= 32) { // 32 to 63 bit
+ if(h >= 32) { // 32 to 63 bit
final long scale = Long.MAX_VALUE; // = 63 bits
- for (int i = 0; i < pf.length; i++) {
- NumberVector<?> obj = relation.get(pf[i].id);
+ for(int i = 0; i < pf.length; i++) {
+ NumberVector obj = relation.get(pf[i].id);
long[] coord = new long[d];
- for (int dim = 0; dim < d; dim++) {
+ for(int dim = 0; dim < d; dim++) {
coord[dim] = (long) (getDimForObject(obj, dim) * .5 * scale);
}
pf[i].hilbert = HilbertSpatialSorter.coordinatesToHilbert(coord, h, 1);
}
- } else if (h >= 16) { // 16-31 bit
+ }
+ else if(h >= 16) { // 16-31 bit
final int scale = ~1 >>> 1;
- for (int i = 0; i < pf.length; i++) {
- NumberVector<?> obj = relation.get(pf[i].id);
+ for(int i = 0; i < pf.length; i++) {
+ NumberVector obj = relation.get(pf[i].id);
int[] coord = new int[d];
- for (int dim = 0; dim < d; dim++) {
+ for(int dim = 0; dim < d; dim++) {
coord[dim] = (int) (getDimForObject(obj, dim) * .5 * scale);
}
pf[i].hilbert = HilbertSpatialSorter.coordinatesToHilbert(coord, h, 1);
}
- } else if (h >= 8) { // 8-15 bit
+ }
+ else if(h >= 8) { // 8-15 bit
final int scale = ~1 >>> 16;
- for (int i = 0; i < pf.length; i++) {
- NumberVector<?> obj = relation.get(pf[i].id);
+ for(int i = 0; i < pf.length; i++) {
+ NumberVector obj = relation.get(pf[i].id);
short[] coord = new short[d];
- for (int dim = 0; dim < d; dim++) {
+ for(int dim = 0; dim < d; dim++) {
coord[dim] = (short) (getDimForObject(obj, dim) * .5 * scale);
}
pf[i].hilbert = HilbertSpatialSorter.coordinatesToHilbert(coord, h, 16);
}
- } else { // 1-7 bit
+ }
+ else { // 1-7 bit
final int scale = ~1 >>> 8;
- for (int i = 0; i < pf.length; i++) {
- NumberVector<?> obj = relation.get(pf[i].id);
+ for(int i = 0; i < pf.length; i++) {
+ NumberVector obj = relation.get(pf[i].id);
byte[] coord = new byte[d];
- for (int dim = 0; dim < d; dim++) {
+ for(int dim = 0; dim < d; dim++) {
coord[dim] = (byte) (getDimForObject(obj, dim) * .5 * scale);
}
pf[i].hilbert = HilbertSpatialSorter.coordinatesToHilbert(coord, h, 24);
@@ -566,13 +564,13 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
}
java.util.Arrays.sort(pf);
// Update levels
- for (int i = 0; i < pf.length - 1; i++) {
+ for(int i = 0; i < pf.length - 1; i++) {
pf[i].level = minRegLevel(i, i + 1);
}
// Count candidates
capital_n_star = 0;
- for (int i = 0; i < pf.length; i++) {
- if (pf[i].ubound >= omega_star) {
+ for(int i = 0; i < pf.length; i++) {
+ if(pf[i].ubound >= omega_star) {
capital_n_star++;
}
}
@@ -584,11 +582,12 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
* @param i position in pf of the feature to be inserted
*/
private void updateOUT(int i) {
- if (out.size() < n) {
+ if(out.size() < n) {
out.add(pf[i]);
- } else {
+ }
+ else {
HilFeature head = out.peek();
- if (pf[i].ubound > head.ubound) {
+ if(pf[i].ubound > head.ubound) {
// replace smallest
out.replaceTopElement(pf[i]);
}
@@ -601,11 +600,12 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
* @param i position in pf of the feature to be inserted
*/
private void updateWLB(int i) {
- if (wlb.size() < n) {
+ if(wlb.size() < n) {
wlb.add(pf[i]);
- } else {
+ }
+ else {
HilFeature head = wlb.peek();
- if (pf[i].lbound > head.lbound) {
+ if(pf[i].lbound > head.lbound) {
// replace smallest
wlb.replaceTopElement(pf[i]);
}
@@ -622,12 +622,13 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
private double fastUpperBound(int i) {
int pre = i;
int post = i;
- while (post - pre < k) {
+ while(post - pre < k) {
int pre_level = (pre - 1 >= 0) ? pf[pre - 1].level : -2;
int post_level = (post < capital_n - 1) ? pf[post].level : -2;
- if (post_level >= pre_level) {
+ if(post_level >= pre_level) {
post++;
- } else {
+ }
+ else {
pre--;
}
}
@@ -642,12 +643,12 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
* @param level Level of the corresponding r-region
*/
private double minDistLevel(DBID id, int level) {
- final NumberVector<?> obj = relation.get(id);
+ final NumberVector obj = relation.get(id);
// level 1 is supposed to have r=1 as in the original publication
// 2 ^ - (level - 1)
final double r = 1.0 / (1 << (level - 1));
double dist = Double.POSITIVE_INFINITY;
- for (int dim = 0; dim < d; dim++) {
+ for(int dim = 0; dim < d; dim++) {
final double p_m_r = getDimForObject(obj, dim) % r;
dist = Math.min(dist, Math.min(p_m_r, r - p_m_r));
}
@@ -662,36 +663,39 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
* @param level Level of the corresponding r-region
*/
private double maxDistLevel(DBID id, int level) {
- final NumberVector<?> obj = relation.get(id);
+ final NumberVector obj = relation.get(id);
// level 1 is supposed to have r=1 as in the original publication
final double r = 1.0 / (1 << (level - 1));
double dist;
- if (t == 1.0) {
+ if(t == 1.0) {
dist = 0.0;
- for (int dim = 0; dim < d; dim++) {
+ for(int dim = 0; dim < d; dim++) {
final double p_m_r = getDimForObject(obj, dim) % r;
// assert (p_m_r >= 0);
dist += Math.max(p_m_r, r - p_m_r);
}
- } else if (t == 2.0) {
+ }
+ else if(t == 2.0) {
dist = 0.0;
- for (int dim = 0; dim < d; dim++) {
+ for(int dim = 0; dim < d; dim++) {
final double p_m_r = getDimForObject(obj, dim) % r;
// assert (p_m_r >= 0);
double a = Math.max(p_m_r, r - p_m_r);
dist += a * a;
}
dist = Math.sqrt(dist);
- } else if (!Double.isInfinite(t)) {
+ }
+ else if(!Double.isInfinite(t)) {
dist = 0.0;
- for (int dim = 0; dim < d; dim++) {
+ for(int dim = 0; dim < d; dim++) {
final double p_m_r = getDimForObject(obj, dim) % r;
dist += Math.pow(Math.max(p_m_r, r - p_m_r), t);
}
dist = Math.pow(dist, 1.0 / t);
- } else {
+ }
+ else {
dist = Double.NEGATIVE_INFINITY;
- for (int dim = 0; dim < d; dim++) {
+ for(int dim = 0; dim < d; dim++) {
final double p_m_r = getDimForObject(obj, dim) % r;
dist = Math.max(dist, Math.max(p_m_r, r - p_m_r));
}
@@ -707,9 +711,9 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
* @return Number of level shared
*/
private int numberSharedLevels(long[] a, long[] b) {
- for (int i = 0, j = a.length - 1; i < a.length; i++, j--) {
+ for(int i = 0, j = a.length - 1; i < a.length; i++, j--) {
final long diff = a[j] ^ b[j];
- if (diff != 0) {
+ if(diff != 0) {
// expected unused = available - used
final int expected = (a.length * Long.SIZE) - (d * h);
return ((BitsUtil.numberOfLeadingZeros(diff) + i * Long.SIZE) - expected) / d;
@@ -758,14 +762,16 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
private double boxRadius(int i, int a, int b) {
// level are inversely ordered to box sizes. min -> max
final int level;
- if (a < 0) {
- if (b >= pf.length) {
+ if(a < 0) {
+ if(b >= pf.length) {
return Double.POSITIVE_INFINITY;
}
level = maxRegLevel(i, b);
- } else if (b >= pf.length) {
+ }
+ else if(b >= pf.length) {
level = maxRegLevel(i, a);
- } else {
+ }
+ else {
level = Math.max(maxRegLevel(i, a), maxRegLevel(i, b));
}
return minDistLevel(pf[i].id, level);
@@ -778,7 +784,7 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
* @param dim Dimension
* @return Projected and shifted position
*/
- private double getDimForObject(NumberVector<?> obj, int dim) {
+ private double getDimForObject(NumberVector obj, int dim) {
return (obj.doubleValue(dim) - min[dim]) / diameter + shift;
}
}
@@ -822,7 +828,7 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
/**
* Heap with the nearest known neighbors
*/
- public ObjectHeap<DoubleDistanceDBIDPair> nn;
+ public ObjectHeap<DoubleDBIDPair> nn;
/**
* Set representation of the nearest neighbors for faster lookups
@@ -840,7 +846,7 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
* @param id Object ID
* @param nn Heap for neighbors
*/
- public HilFeature(DBID id, ObjectHeap<DoubleDistanceDBIDPair> nn) {
+ public HilFeature(DBID id, ObjectHeap<DoubleDBIDPair> nn) {
super();
this.id = id;
this.nn = nn;
@@ -861,21 +867,22 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
*/
protected void insert(DBID id, double dt, int k) {
// assert (!nn_keys.contains(id));
- if (nn.size() < k) {
- DoubleDistanceDBIDPair entry = DBIDFactory.FACTORY.newDistancePair(dt, id);
+ if(nn.size() < k) {
+ DoubleDBIDPair entry = DBIDUtil.newPair(dt, id);
nn.add(entry);
nn_keys.add(id);
sum_nn += dt;
- } else {
- DoubleDistanceDBIDPair head = nn.peek();
- if (dt < head.doubleDistance()) {
+ }
+ else {
+ DoubleDBIDPair head = nn.peek();
+ if(dt < head.doubleValue()) {
head = nn.poll(); // Remove worst
- sum_nn -= head.doubleDistance();
+ sum_nn -= head.doubleValue();
nn_keys.remove(head);
// assert (nn.peek().doubleDistance() <= head.doubleDistance());
- DoubleDistanceDBIDPair entry = DBIDFactory.FACTORY.newDistancePair(dt, id);
+ DoubleDBIDPair entry = DBIDUtil.newPair(dt, id);
nn.add(entry);
nn_keys.add(id);
sum_nn += dt;
@@ -893,7 +900,7 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
*
* @param <O> Vector type
*/
- public static class Parameterizer<O extends NumberVector<?>> extends AbstractParameterizer {
+ public static class Parameterizer<O extends NumberVector> extends AbstractParameterizer {
/**
* Parameter to specify how many next neighbors should be used in the
* computation
@@ -951,27 +958,27 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo
super.makeOptions(config);
final IntParameter kP = new IntParameter(K_ID, 5);
- if (config.grab(kP)) {
+ if(config.grab(kP)) {
k = kP.getValue();
}
final IntParameter nP = new IntParameter(N_ID, 10);
- if (config.grab(nP)) {
+ if(config.grab(nP)) {
n = nP.getValue();
}
final IntParameter hP = new IntParameter(H_ID, 32);
- if (config.grab(hP)) {
+ if(config.grab(hP)) {
h = hP.getValue();
}
ObjectParameter<LPNormDistanceFunction> distP = AbstractDistanceBasedAlgorithm.makeParameterDistanceFunction(EuclideanDistanceFunction.class, LPNormDistanceFunction.class);
- if (config.grab(distP)) {
+ if(config.grab(distP)) {
distfunc = distP.instantiateClass(config);
}
final EnumParameter<ScoreType> tnP = new EnumParameter<>(TN_ID, ScoreType.class, ScoreType.TopN);
- if (config.grab(tnP)) {
+ if(config.grab(tnP)) {
tn = tnP.getValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/KNNOutlier.java
index 503487c8..97970f0a 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/KNNOutlier.java
@@ -1,10 +1,10 @@
-package de.lmu.ifi.dbs.elki.algorithm.outlier;
+package de.lmu.ifi.dbs.elki.algorithm.outlier.distance;
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -24,6 +24,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
*/
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
@@ -31,24 +32,25 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList;
-import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
+import de.lmu.ifi.dbs.elki.database.ids.KNNList;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.Alias;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -56,11 +58,19 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
* Outlier Detection based on the distance of an object to its k nearest
* neighbor.
*
+ * This implementation differs from the original pseudocode: the k nearest
+ * neighbors do not exclude the point that is currently evaluated. I.e. for k=1
+ * the resulting score is the distance to the 1-nearest neighbor that is not the
+ * query point and therefore should match k=2 in the exact pseudocode - a value
+ * of k=1 in the original code does not make sense, as the 1NN distance will be
+ * 0 for every point in the database. If you for any reason want to use the
+ * original algorithm, subtract 1 from the k parameter.
+ *
+ * Reference:
* <p>
- * Reference:<br>
- * S. Ramaswamy, R. Rastogi, K. Shim: Efficient Algorithms for Mining Outliers
- * from Large Data Sets.</br> In: Proc. of the Int. Conf. on Management of Data,
- * Dallas, Texas, 2000.
+ * S. Ramaswamy, R. Rastogi, K. Shim:<br />
+ * Efficient Algorithms for Mining Outliers from Large Data Sets.<br />
+ * In: Proc. of the Int. Conf. on Management of Data, Dallas, Texas, 2000.
* </p>
*
* @author Lisa Reichert
@@ -68,24 +78,22 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
* @apiviz.has KNNQuery
*
* @param <O> the type of DatabaseObjects handled by this Algorithm
- * @param <D> the type of Distance used by this Algorithm
*/
@Title("KNN outlier: Efficient Algorithms for Mining Outliers from Large Data Sets")
@Description("Outlier Detection based on the distance of an object to its k nearest neighbor.")
-@Reference(authors = "S. Ramaswamy, R. Rastogi, K. Shim", title = "Efficient Algorithms for Mining Outliers from Large Data Sets", booktitle = "Proc. of the Int. Conf. on Management of Data, Dallas, Texas, 2000", url = "http://dx.doi.org/10.1145/342009.335437")
-public class KNNOutlier<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
+@Reference(authors = "S. Ramaswamy, R. Rastogi, K. Shim", //
+title = "Efficient Algorithms for Mining Outliers from Large Data Sets", //
+booktitle = "Proc. of the Int. Conf. on Management of Data, Dallas, Texas, 2000", //
+url = "http://dx.doi.org/10.1145/342009.335437")
+@Alias({ "de.lmu.ifi.dbs.elki.algorithm.outlier.KNNOutlier", "knno" })
+public class KNNOutlier<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
private static final Logging LOG = Logging.getLogger(KNNOutlier.class);
/**
- * Parameter to specify the k nearest neighbor
- */
- public static final OptionID K_ID = new OptionID("knno.k", "k nearest neighbor");
-
- /**
- * The parameter k
+ * The parameter k (including query point!)
*/
private int k;
@@ -93,9 +101,9 @@ public class KNNOutlier<O, D extends NumberDistance<D, ?>> extends AbstractDista
* Constructor for a single kNN query.
*
* @param distanceFunction distance function to use
- * @param k Value of k
+ * @param k Value of k (including query point!)
*/
- public KNNOutlier(DistanceFunction<? super O, D> distanceFunction, int k) {
+ public KNNOutlier(DistanceFunction<? super O> distanceFunction, int k) {
super(distanceFunction);
this.k = k;
}
@@ -104,39 +112,27 @@ public class KNNOutlier<O, D extends NumberDistance<D, ?>> extends AbstractDista
* Runs the algorithm in the timed evaluation part.
*/
public OutlierResult run(Database database, Relation<O> relation) {
- final DistanceQuery<O, D> distanceQuery = database.getDistanceQuery(relation, getDistanceFunction());
- KNNQuery<O, D> knnQuery = database.getKNNQuery(distanceQuery, k);
+ final DistanceQuery<O> distanceQuery = database.getDistanceQuery(relation, getDistanceFunction());
+ final KNNQuery<O> knnQuery = database.getKNNQuery(distanceQuery, k + 1);
- if(LOG.isVerbose()) {
- LOG.verbose("Computing the kNN outlier degree (distance to the k nearest neighbor)");
- }
- FiniteProgress progressKNNDistance = LOG.isVerbose() ? new FiniteProgress("kNN distance for objects", relation.size(), LOG) : null;
+ FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("kNN distance for objects", relation.size(), LOG) : null;
DoubleMinMax minmax = new DoubleMinMax();
WritableDoubleDataStore knno_score = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
// compute distance to the k nearest neighbor.
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
// distance to the kth nearest neighbor
- final KNNList<D> knns = knnQuery.getKNNForDBID(iditer, k);
- final double dkn;
- if(knns instanceof DoubleDistanceKNNList) {
- dkn = ((DoubleDistanceKNNList) knns).doubleKNNDistance();
- }
- else {
- dkn = knns.getKNNDistance().doubleValue();
- }
+ // (assuming the query point is always included, with distance 0)
+ final KNNList knns = knnQuery.getKNNForDBID(iditer, k + 1);
+ final double dkn = knns.getKNNDistance();
knno_score.putDouble(iditer, dkn);
minmax.put(dkn);
- if(progressKNNDistance != null) {
- progressKNNDistance.incrementProcessed(LOG);
- }
- }
- if(progressKNNDistance != null) {
- progressKNNDistance.ensureCompleted(LOG);
+ LOG.incrementProcessed(prog);
}
- Relation<Double> scoreres = new MaterializedRelation<>("kNN Outlier Score", "knn-outlier", TypeUtil.DOUBLE, knno_score, relation.getDBIDs());
+ LOG.ensureCompleted(prog);
+ DoubleRelation scoreres = new MaterializedDoubleRelation("kNN Outlier Score", "knn-outlier", knno_score, relation.getDBIDs());
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
return new OutlierResult(meta, scoreres);
}
@@ -158,20 +154,31 @@ public class KNNOutlier<O, D extends NumberDistance<D, ?>> extends AbstractDista
*
* @apiviz.exclude
*/
- public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {
+ /**
+ * Parameter to specify the k nearest neighbor
+ */
+ public static final OptionID K_ID = new OptionID("knno.k", //
+ "The k nearest neighbor, excluding the query point "//
+ + "(i.e. query point is the 0-nearest-neighbor)");
+
+ /**
+ * k parameter
+ */
protected int k = 0;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final IntParameter kP = new IntParameter(K_ID);
+ final IntParameter kP = new IntParameter(K_ID)//
+ .addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
if(config.grab(kP)) {
k = kP.getValue();
}
}
@Override
- protected KNNOutlier<O, D> makeInstance() {
+ protected KNNOutlier<O> makeInstance() {
return new KNNOutlier<>(distanceFunction, k);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/KNNWeightOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/KNNWeightOutlier.java
new file mode 100644
index 00000000..b09f7480
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/KNNWeightOutlier.java
@@ -0,0 +1,205 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier.distance;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.KNNList;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.Alias;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+
+/**
+ * Outlier Detection based on the accumulated distances of a point to its k
+ * nearest neighbors.
+ *
+ * As in the original publication (as far as we could tell from the pseudocode
+ * included), the current point is not included in the nearest neighbors (see
+ * figures in the publication). This matches the intuition common in nearest
+ * neighbor classification, where the evaluated instances are not part of the
+ * training set; but it contrasts to the pseudocode of the kNN outlier method
+ * and the database interpretation (which returns all objects stored in the
+ * database).
+ *
+ * Furthermore, we report the sum of the k distances (called "weight" in the
+ * original publication). Other implementations may return the average distance
+ * instead, and therefore yield different results.
+ *
+ * Reference:
+ * <p>
+ * F. Angiulli, C. Pizzuti:<br />
+ * Fast Outlier Detection in High Dimensional Spaces.<br />
+ * In: Proc. European Conference on Principles of Knowledge Discovery and Data
+ * Mining (PKDD'02), Helsinki, Finland, 2002.
+ * </p>
+ *
+ * @author Lisa Reichert
+ *
+ * @apiviz.has KNNQuery
+ *
+ * @param <O> the type of DatabaseObjects handled by this Algorithm
+ */
+@Title("KNNWeight outlier detection")
+@Description("Outlier detection based on the sum of distances of an object to its k nearest neighbors.")
+@Reference(authors = "F. Angiulli, C. Pizzuti", //
+title = "Fast Outlier Detection in High Dimensional Spaces", //
+booktitle = "Proc. European Conference on Principles of Knowledge Discovery and Data Mining (PKDD'02), Helsinki, Finland, 2002", //
+url = "http://dx.doi.org/10.1007/3-540-45681-3_2")
+@Alias({ "de.lmu.ifi.dbs.elki.algorithm.outlier.KNNWeightOutlier", "knnw" })
+public class KNNWeightOutlier<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(KNNWeightOutlier.class);
+
+ /**
+ * Holds the number of nearest neighbors to query (including query point!)
+ */
+ private int k;
+
+ /**
+ * Constructor with parameters.
+ *
+ * @param distanceFunction Distance function
+ * @param k k Parameter (not including query point!)
+ */
+ public KNNWeightOutlier(DistanceFunction<? super O> distanceFunction, int k) {
+ super(distanceFunction);
+ this.k = k;
+ }
+
+ /**
+ * Runs the algorithm in the timed evaluation part.
+ *
+ * @param database Database context
+ * @param relation Data relation
+ */
+ public OutlierResult run(Database database, Relation<O> relation) {
+ final DistanceQuery<O> distanceQuery = database.getDistanceQuery(relation, getDistanceFunction());
+ KNNQuery<O> knnQuery = database.getKNNQuery(distanceQuery, k + 1);
+
+ FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Compute kNN weights.", relation.size(), LOG) : null;
+
+ DoubleMinMax minmax = new DoubleMinMax();
+ WritableDoubleDataStore knnw_score = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ final KNNList knn = knnQuery.getKNNForDBID(iditer, k + 1);
+ double skn = 0; // sum of the distances to the k nearest neighbors
+ int i = 0; // number of neighbors so far
+ for(DoubleDBIDListIter neighbor = knn.iter(); i < k && neighbor.valid(); neighbor.advance()) {
+ if(DBIDUtil.equal(iditer, neighbor)) {
+ continue;
+ }
+ skn += neighbor.doubleValue();
+ ++i;
+ }
+ if(i < k) {
+ // Less than k neighbors found
+ // Approximative index, or k > data set size!
+ skn = Double.POSITIVE_INFINITY;
+ }
+ knnw_score.putDouble(iditer, skn);
+ minmax.put(skn);
+
+ LOG.incrementProcessed(prog);
+ }
+ LOG.ensureCompleted(prog);
+
+ DoubleRelation res = new MaterializedDoubleRelation("kNN weight Outlier Score", "knnw-outlier", knnw_score, relation.getDBIDs());
+ OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0., Double.POSITIVE_INFINITY, 0.);
+ return new OutlierResult(meta, res);
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(getDistanceFunction().getInputTypeRestriction());
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {
+ /**
+ * Parameter to specify the k nearest neighbor.
+ */
+ public static final OptionID K_ID = new OptionID("knnwod.k", //
+ "The k nearest neighbor, excluding the query point "//
+ + "(i.e. query point is the 0-nearest-neighbor)");
+
+ /**
+ * k parameter
+ */
+ protected int k = 0;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ final IntParameter kP = new IntParameter(K_ID) //
+ .addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(kP)) {
+ k = kP.getValue();
+ }
+ }
+
+ @Override
+ protected KNNWeightOutlier<O> makeInstance() {
+ return new KNNWeightOutlier<>(distanceFunction, k);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ODIN.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/ODIN.java
index a5b39146..67380335 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ODIN.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/ODIN.java
@@ -1,10 +1,10 @@
-package de.lmu.ifi.dbs.elki.algorithm.outlier;
+package de.lmu.ifi.dbs.elki.algorithm.outlier.distance;
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -23,6 +23,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
@@ -32,17 +33,17 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.Alias;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
@@ -60,16 +61,19 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
* <p>
* V. Hautamäki and I. Kärkkäinen and P Fränti<br />
* Outlier detection using k-nearest neighbour graph<br />
- * Proc. 17th Int. Conf. Pattern Recognition, ICPR 2004 <br />
+ * Proc. 17th Int. Conf. Pattern Recognition, ICPR 2004
* </p>
*
* @author Erich Schubert
*
* @param <O> Object type
- * @param <D> Distance type
*/
-@Reference(authors = "V. Hautamäki and I. Kärkkäinen and P Fränti", title = "Outlier detection using k-nearest neighbour graph", booktitle = "Proc. 17th Int. Conf. Pattern Recognition, ICPR 2004", url = "http://dx.doi.org/10.1109/ICPR.2004.1334558")
-public class ODIN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
+@Reference(authors = "V. Hautamäki and I. Kärkkäinen and P Fränti", //
+title = "Outlier detection using k-nearest neighbour graph", //
+booktitle = "Proc. 17th Int. Conf. Pattern Recognition, ICPR 2004", //
+url = "http://dx.doi.org/10.1109/ICPR.2004.1334558")
+@Alias({ "de.lmu.ifi.dbs.elki.algorithm.outlier.ODIN" })
+public class ODIN<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm {
/**
* Class logger.
*/
@@ -86,7 +90,7 @@ public class ODIN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorit
* @param distanceFunction Distance function
* @param k k parameter
*/
- public ODIN(DistanceFunction<? super O, D> distanceFunction, int k) {
+ public ODIN(DistanceFunction<? super O> distanceFunction, int k) {
super(distanceFunction);
this.k = k;
}
@@ -100,8 +104,8 @@ public class ODIN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorit
*/
public OutlierResult run(Database database, Relation<O> relation) {
// Get the query functions:
- DistanceQuery<O, D> dq = database.getDistanceQuery(relation, getDistanceFunction());
- KNNQuery<O, D> knnq = database.getKNNQuery(dq, k);
+ DistanceQuery<O> dq = database.getDistanceQuery(relation, getDistanceFunction());
+ KNNQuery<O> knnq = database.getKNNQuery(dq, k);
// Get the objects to process, and a data storage for counting and output:
DBIDs ids = relation.getDBIDs();
@@ -112,7 +116,7 @@ public class ODIN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorit
// Process all objects
for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
// Find the nearest neighbors (using an index, if available!)
- KNNList<D> neighbors = knnq.getKNNForDBID(iter, k);
+ DBIDs neighbors = knnq.getKNNForDBID(iter, k);
// For each neighbor, except ourselves, increase the in-degree:
for(DBIDIter nei = neighbors.iter(); nei.valid(); nei.advance()) {
if(DBIDUtil.equal(iter, nei)) {
@@ -131,7 +135,7 @@ public class ODIN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorit
// Wrap the result and add metadata.
OutlierScoreMeta meta = new InvertedOutlierScoreMeta(min, max, 0., inc * (ids.size() - 1), 1);
- Relation<Double> rel = new MaterializedRelation<>("ODIN In-Degree", "odin", TypeUtil.DOUBLE, scores, ids);
+ DoubleRelation rel = new MaterializedDoubleRelation("ODIN In-Degree", "odin", scores, ids);
return new OutlierResult(meta, rel);
}
@@ -153,9 +157,8 @@ public class ODIN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorit
* @apiviz.exclude
*
* @param <O> Object type
- * @param <D> Distance type
*/
- public static class Parameterizer<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {
/**
* Parameter for the number of nearest neighbors:
*
@@ -185,7 +188,7 @@ public class ODIN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorit
}
@Override
- protected ODIN<O, D> makeInstance() {
+ protected ODIN<O> makeInstance() {
return new ODIN<>(distanceFunction, k);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/ReferenceBasedOutlierDetection.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/ReferenceBasedOutlierDetection.java
new file mode 100644
index 00000000..e1407679
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/ReferenceBasedOutlierDetection.java
@@ -0,0 +1,325 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier.distance;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.Collection;
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractPrimitiveDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList;
+import de.lmu.ifi.dbs.elki.database.query.distance.PrimitiveDistanceQuery;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.result.ReferencePointsResult;
+import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.Alias;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+import de.lmu.ifi.dbs.elki.utilities.referencepoints.GridBasedReferencePoints;
+import de.lmu.ifi.dbs.elki.utilities.referencepoints.ReferencePointsHeuristic;
+
+/**
+ * Reference-Based Outlier Detection algorithm, an algorithm that computes kNN
+ * distances approximately, using reference points.
+ *
+ * kNN distances are approximated by the difference in distance from a reference
+ * point. For this approximation to be of high quality, triangle inequality is
+ * required; but the algorithm can also process non-metric distances.
+ *
+ * Reference:
+ * <p>
+ * Y. Pei, O. R. Zaiane, Y. Gao<br />
+ * An Efficient Reference-Based Approach to Outlier Detection in Large Datasets<br />
+ * In: Proc. IEEE Int. Conf. on Data Mining (ICDM'06), Hong Kong, China, 2006
+ * </p>
+ *
+ * @author Lisa Reichert
+ * @author Erich Schubert
+ *
+ * @apiviz.composedOf ReferencePointsHeuristic
+ */
+@Title("An Efficient Reference-based Approach to Outlier Detection in Large Datasets")
+@Description("Computes kNN distances approximately, using reference points with various reference point strategies.")
+@Reference(authors = "Y. Pei, O.R. Zaiane, Y. Gao", //
+title = "An Efficient Reference-based Approach to Outlier Detection in Large Datasets", //
+booktitle = "Proc. 6th IEEE Int. Conf. on Data Mining (ICDM '06)", //
+url = "http://dx.doi.org/10.1109/ICDM.2006.17")
+@Alias({ "de.lmu.ifi.dbs.elki.algorithm.outlier.ReferenceBasedOutlierDetection" })
+public class ReferenceBasedOutlierDetection extends AbstractPrimitiveDistanceBasedAlgorithm<NumberVector, OutlierResult> implements OutlierAlgorithm {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(ReferenceBasedOutlierDetection.class);
+
+ /**
+ * Holds the number of neighbors to use for density estimation.
+ */
+ private int k;
+
+ /**
+ * Stores the reference point strategy.
+ */
+ private ReferencePointsHeuristic refp;
+
+ /**
+ * Constructor with parameters.
+ *
+ * @param k k Parameter
+ * @param distanceFunction distance function
+ * @param refp Reference points heuristic
+ */
+ public ReferenceBasedOutlierDetection(int k, PrimitiveDistanceFunction<? super NumberVector> distanceFunction, ReferencePointsHeuristic refp) {
+ super(distanceFunction);
+ this.k = k;
+ this.refp = refp;
+ }
+
+ /**
+ * Run the algorithm on the given relation.
+ *
+ * @param database Database
+ * @param relation Relation to process
+ * @return Outlier result
+ */
+ public OutlierResult run(Database database, Relation<? extends NumberVector> relation) {
+ @SuppressWarnings("unchecked")
+ PrimitiveDistanceQuery<? super NumberVector> distq = (PrimitiveDistanceQuery<? super NumberVector>) database.getDistanceQuery(relation, distanceFunction);
+ Collection<? extends NumberVector> refPoints = refp.getReferencePoints(relation);
+ if(refPoints.size() < 1) {
+ throw new AbortException("Cannot compute ROS without reference points!");
+ }
+
+ DBIDs ids = relation.getDBIDs();
+ if(k >= ids.size()) {
+ throw new AbortException("k must not be chosen larger than the database size!");
+ }
+ // storage of distance/score values.
+ WritableDoubleDataStore rbod_score = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC | DataStoreFactory.HINT_HOT, Double.NaN);
+
+ // Compute density estimation:
+ for(NumberVector refPoint : refPoints) {
+ DoubleDBIDList referenceDists = computeDistanceVector(refPoint, relation, distq);
+ updateDensities(rbod_score, referenceDists);
+ }
+ // compute maximum density
+ DoubleMinMax mm = new DoubleMinMax();
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ mm.put(rbod_score.doubleValue(iditer));
+ }
+ // compute ROS
+ double scale = mm.getMax() > 0. ? 1. / mm.getMax() : 1.;
+ mm.reset(); // Reuse
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ double score = 1 - (rbod_score.doubleValue(iditer) * scale);
+ mm.put(score);
+ rbod_score.putDouble(iditer, score);
+ }
+
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("Reference-points Outlier Scores", "reference-outlier", rbod_score, relation.getDBIDs());
+ OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax(), 0., 1., 0.);
+ OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
+ // adds reference points to the result. header information for the
+ // visualizer to find the reference points in the result
+ result.addChildResult(new ReferencePointsResult<>("Reference points", "reference-points", refPoints));
+ return result;
+ }
+
+ /**
+ * Computes for each object the distance to one reference point. (one
+ * dimensional representation of the data set)
+ *
+ * @param refPoint Reference Point Feature Vector
+ * @param database database to work on
+ * @param distFunc Distance function to use
+ * @return array containing the distance to one reference point for each
+ * database object and the object id
+ */
+ protected DoubleDBIDList computeDistanceVector(NumberVector refPoint, Relation<? extends NumberVector> database, PrimitiveDistanceQuery<? super NumberVector> distFunc) {
+ ModifiableDoubleDBIDList referenceDists = DBIDUtil.newDistanceDBIDList(database.size());
+ for(DBIDIter iditer = database.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ referenceDists.add(distFunc.distance(iditer, refPoint), iditer);
+ }
+ referenceDists.sort();
+ return referenceDists;
+ }
+
+ /**
+ * Update the density estimates for each object.
+ *
+ * @param rbod_score Density storage
+ * @param referenceDists Distances from current reference point
+ */
+ protected void updateDensities(WritableDoubleDataStore rbod_score, DoubleDBIDList referenceDists) {
+ DoubleDBIDListIter it = referenceDists.iter();
+ for(int l = 0; l < referenceDists.size(); l++) {
+ double density = computeDensity(referenceDists, it, l);
+ // computeDensity modified the iterator, reset:
+ it.seek(l);
+ // NaN indicates the first run.
+ if(!(density > rbod_score.doubleValue(it))) {
+ rbod_score.putDouble(it, density);
+ }
+ }
+ }
+
+ /**
+ * Computes the density of an object. The density of an object is the
+ * distances to the k nearest neighbors. Neighbors and distances are computed
+ * approximately. (approximation for kNN distance: instead of a normal NN
+ * search the NN of an object are those objects that have a similar distance
+ * to a reference point. The k- nearest neighbors of an object are those
+ * objects that lay close to the object in the reference distance vector)
+ *
+ * @param referenceDists vector of the reference distances
+ * @param iter Iterator to this list (will be reused)
+ * @param index index of the current object
+ * @return density for one object and reference point
+ */
+ protected double computeDensity(DoubleDBIDList referenceDists, DoubleDBIDListIter iter, int index) {
+ final int size = referenceDists.size();
+ final double xDist = iter.seek(index).doubleValue();
+
+ int lef = index, rig = index;
+ double sum = 0.;
+ double lef_d = (--lef >= 0) ? xDist - iter.seek(lef).doubleValue() : Double.POSITIVE_INFINITY;
+ double rig_d = (++rig < size) ? iter.seek(rig).doubleValue() - xDist : Double.POSITIVE_INFINITY;
+ for(int i = 0; i < k; ++i) {
+ if(lef >= 0 && rig < size) {
+ // Prefer n or m?
+ if(lef_d < rig_d) {
+ sum += lef_d;
+ // Update left
+ lef_d = (--lef >= 0) ? xDist - iter.seek(lef).doubleValue() : Double.POSITIVE_INFINITY;
+ }
+ else {
+ sum += rig_d;
+ // Update right
+ rig_d = (++rig < size) ? iter.seek(rig).doubleValue() - xDist : Double.POSITIVE_INFINITY;
+ }
+ }
+ else if(lef >= 0) {
+ // Choose left, since right is not available.
+ sum += lef_d;
+ // update left
+ lef_d = (--lef >= 0) ? xDist - iter.seek(lef).doubleValue() : Double.POSITIVE_INFINITY;
+ }
+ else if(rig < size) {
+ // Choose right, since left is not available
+ sum += rig_d;
+ // Update right
+ rig_d = (++rig < size) ? iter.seek(rig).doubleValue() - xDist : Double.POSITIVE_INFINITY;
+ }
+ else {
+ // Not enough objects in database?
+ throw new IndexOutOfBoundsException("Less than k objects?");
+ }
+ }
+ return k / sum;
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(distanceFunction.getInputTypeRestriction());
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractPrimitiveDistanceBasedAlgorithm.Parameterizer<NumberVector> {
+ /**
+ * Parameter for the reference points heuristic.
+ */
+ public static final OptionID REFP_ID = new OptionID("refod.refp", "The heuristic for finding reference points.");
+
+ /**
+ * Parameter to specify the number of nearest neighbors of an object, to be
+ * considered for computing its REFOD_SCORE, must be an integer greater than
+ * 1.
+ */
+ public static final OptionID K_ID = new OptionID("refod.k", "The number of nearest neighbors");
+
+ /**
+ * Holds the value of {@link #K_ID}.
+ */
+ private int k;
+
+ /**
+ * Stores the reference point strategy
+ */
+ private ReferencePointsHeuristic refp;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ final IntParameter pK = new IntParameter(K_ID) //
+ .addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
+ if(config.grab(pK)) {
+ k = pK.getValue();
+ }
+ final ObjectParameter<ReferencePointsHeuristic> refpP = new ObjectParameter<>(REFP_ID, ReferencePointsHeuristic.class, GridBasedReferencePoints.class);
+ if(config.grab(refpP)) {
+ refp = refpP.instantiateClass(config);
+ }
+ }
+
+ @Override
+ protected ReferenceBasedOutlierDetection makeInstance() {
+ return new ReferenceBasedOutlierDetection(k, distanceFunction, refp);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/package-info.java
new file mode 100644
index 00000000..ac292a01
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/package-info.java
@@ -0,0 +1,30 @@
+/**
+ * Distance-based outlier detection algorithms, such as DBOutlier and kNN.
+ *
+ * For methods based on <em>local</em> density, see package
+ * {@link de.lmu.ifi.dbs.elki.algorithm.outlier.lof} instead.
+ */
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package de.lmu.ifi.dbs.elki.algorithm.outlier.distance; \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/KNNWeightProcessor.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/KNNWeightProcessor.java
new file mode 100644
index 00000000..a26a7505
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/KNNWeightProcessor.java
@@ -0,0 +1,118 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier.distance.parallel;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.KNNList;
+import de.lmu.ifi.dbs.elki.parallel.Executor;
+import de.lmu.ifi.dbs.elki.parallel.processor.AbstractDoubleProcessor;
+import de.lmu.ifi.dbs.elki.parallel.processor.KNNProcessor;
+import de.lmu.ifi.dbs.elki.parallel.variables.SharedDouble;
+import de.lmu.ifi.dbs.elki.parallel.variables.SharedObject;
+
+/**
+ * Compute the kNN weight score, used by {@link ParallelKNNWeightOutlier}.
+ *
+ * Needs the k nearest neighbors as input, for example from {@link KNNProcessor}
+ *
+ * @author Erich Schubert
+ */
+public class KNNWeightProcessor extends AbstractDoubleProcessor {
+ /**
+ * K parameter
+ */
+ int k;
+
+ /**
+ * Constructor.
+ *
+ * @param k K parameter
+ */
+ public KNNWeightProcessor(int k) {
+ super();
+ this.k = k;
+ }
+
+ /**
+ * KNN query object
+ */
+ SharedObject<? extends KNNList> input;
+
+ /**
+ * Connect the input channel.
+ *
+ * @param input Input channel
+ */
+ public void connectKNNInput(SharedObject<? extends KNNList> input) {
+ this.input = input;
+ }
+
+ @Override
+ public Instance instantiate(Executor executor) {
+ return new Instance(k, executor.getInstance(input), executor.getInstance(output));
+ }
+
+ /**
+ * Instance for precomputing the kNN.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ private static class Instance extends AbstractDoubleProcessor.Instance {
+ /**
+ * k Parameter
+ */
+ int k;
+
+ /**
+ * kNN query
+ */
+ SharedObject.Instance<? extends KNNList> input;
+
+ /**
+ * Constructor.
+ *
+ * @param k K parameter
+ * @param input kNN list input
+ * @param store Datastore to write to
+ */
+ protected Instance(int k, SharedObject.Instance<? extends KNNList> input, SharedDouble.Instance store) {
+ super(store);
+ this.k = k;
+ this.input = input;
+ }
+
+ @Override
+ public void map(DBIDRef id) {
+ final KNNList list = input.get();
+ int i = 0;
+ double sum = 0;
+ for(DoubleDBIDListIter iter = list.iter(); iter.valid() && i < k; iter.advance(), ++i) {
+ sum += iter.doubleValue();
+ }
+ output.set(sum);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/ParallelKNNOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/ParallelKNNOutlier.java
new file mode 100644
index 00000000..b7b43765
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/ParallelKNNOutlier.java
@@ -0,0 +1,181 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier.distance.parallel;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.distance.KNNOutlier;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.KNNList;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.parallel.ParallelExecutor;
+import de.lmu.ifi.dbs.elki.parallel.processor.DoubleMinMaxProcessor;
+import de.lmu.ifi.dbs.elki.parallel.processor.KDistanceProcessor;
+import de.lmu.ifi.dbs.elki.parallel.processor.KNNProcessor;
+import de.lmu.ifi.dbs.elki.parallel.processor.WriteDoubleDataStoreProcessor;
+import de.lmu.ifi.dbs.elki.parallel.variables.SharedDouble;
+import de.lmu.ifi.dbs.elki.parallel.variables.SharedObject;
+import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+
+/**
+ * Parallel implementation of KNN Outlier detection.
+ *
+ * Reference:
+ * <p>
+ * S. Ramaswamy, R. Rastogi, K. Shim:<br />
+ * Efficient Algorithms for Mining Outliers from Large Data Sets.<br />
+ * In: Proc. of the Int. Conf. on Management of Data, Dallas, Texas, 2000.
+ * </p>
+ *
+ * This parallelized implementation is based on the easy-to-parallelize
+ * generalized pattern discussed in
+ * <p>
+ * Erich Schubert, Arthur Zimek, Hans-Peter Kriegel<br />
+ * Local Outlier Detection Reconsidered: a Generalized View on Locality with
+ * Applications to Spatial, Video, and Network Outlier Detection<br />
+ * Data Mining and Knowledge Discovery, 28(1): 190–237, 2014.
+ * </p>
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.composedOf KNNProcessor
+ * @apiviz.composedOf KDistanceProcessor
+ *
+ * @param <O> Object type
+ */
+@Reference(authors = "E. Schubert, A. Zimek, H.-P. Kriegel", //
+title = "Local Outlier Detection Reconsidered: a Generalized View on Locality with Applications to Spatial, Video, and Network Outlier Detection", //
+booktitle = "Data Mining and Knowledge Discovery, 28(1): 190–237, 2014.", //
+url = "http://dx.doi.org/10.1007/s10618-012-0300-z")
+public class ParallelKNNOutlier<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm {
+ /**
+ * Parameter k
+ */
+ private int k;
+
+ /**
+ * Constructor.
+ *
+ * @param distanceFunction Distance function
+ * @param k K parameter
+ */
+ public ParallelKNNOutlier(DistanceFunction<? super O> distanceFunction, int k) {
+ super(distanceFunction);
+ this.k = k;
+ }
+
+ /**
+ * Class logger
+ */
+ private static final Logging LOG = Logging.getLogger(ParallelKNNOutlier.class);
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(getDistanceFunction().getInputTypeRestriction());
+ }
+
+ public OutlierResult run(Database database, Relation<O> relation) {
+ DBIDs ids = relation.getDBIDs();
+ WritableDoubleDataStore store = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB);
+ DistanceQuery<O> distq = database.getDistanceQuery(relation, getDistanceFunction());
+ KNNQuery<O> knnq = database.getKNNQuery(distq, k + 1);
+
+ // Compute the kNN
+ KNNProcessor<O> knnm = new KNNProcessor<>(k + 1, knnq);
+ SharedObject<KNNList> knnv = new SharedObject<>();
+ knnm.connectKNNOutput(knnv);
+ // Extract the k-distance
+ KDistanceProcessor kdistm = new KDistanceProcessor(k + 1);
+ SharedDouble kdistv = new SharedDouble();
+ kdistm.connectKNNInput(knnv);
+ kdistm.connectOutput(kdistv);
+ // Store in outlier scores
+ WriteDoubleDataStoreProcessor storem = new WriteDoubleDataStoreProcessor(store);
+ storem.connectInput(kdistv);
+ // Gather statistics
+ DoubleMinMaxProcessor mmm = new DoubleMinMaxProcessor();
+ mmm.connectInput(kdistv);
+
+ ParallelExecutor.run(ids, knnm, kdistm, storem, mmm);
+
+ DoubleMinMax minmax = mmm.getMinMax();
+ DoubleRelation scoreres = new MaterializedDoubleRelation("kNN Outlier Score", "knn-outlier", store, ids);
+ OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
+ return new OutlierResult(meta, scoreres);
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ *
+ * @param <O> Object type
+ */
+ public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {
+ /**
+ * K parameter
+ */
+ int k;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ IntParameter kP = new IntParameter(KNNOutlier.Parameterizer.K_ID);
+ if(config.grab(kP)) {
+ k = kP.getValue();
+ }
+ }
+
+ @Override
+ protected ParallelKNNOutlier<O> makeInstance() {
+ return new ParallelKNNOutlier<>(distanceFunction, k);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/ParallelKNNWeightOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/ParallelKNNWeightOutlier.java
new file mode 100644
index 00000000..40639ec5
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/ParallelKNNWeightOutlier.java
@@ -0,0 +1,187 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier.distance.parallel;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.distance.KNNWeightOutlier;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.KNNList;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.parallel.ParallelExecutor;
+import de.lmu.ifi.dbs.elki.parallel.processor.DoubleMinMaxProcessor;
+import de.lmu.ifi.dbs.elki.parallel.processor.KNNProcessor;
+import de.lmu.ifi.dbs.elki.parallel.processor.WriteDoubleDataStoreProcessor;
+import de.lmu.ifi.dbs.elki.parallel.variables.SharedDouble;
+import de.lmu.ifi.dbs.elki.parallel.variables.SharedObject;
+import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+
+/**
+ * Parallel implementation of KNN Weight Outlier detection.
+ *
+ * Reference:
+ * <p>
+ * F. Angiulli, C. Pizzuti:<br />
+ * Fast Outlier Detection in High Dimensional Spaces.<br />
+ * In: Proc. European Conference on Principles of Knowledge Discovery and Data
+ * Mining (PKDD'02), Helsinki, Finland, 2002.
+ * </p>
+ *
+ * This parallelized implementation is based on the easy-to-parallelize
+ * generalized pattern discussed in
+ * <p>
+ * Erich Schubert, Arthur Zimek, Hans-Peter Kriegel<br />
+ * Local Outlier Detection Reconsidered: a Generalized View on Locality with
+ * Applications to Spatial, Video, and Network Outlier Detection<br />
+ * Data Mining and Knowledge Discovery, 28(1): 190–237, 2014.
+ * </p>
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.composedOf KNNWeightProcessor
+ *
+ * @param <O> Object type
+ */
+@Reference(authors = "E. Schubert, A. Zimek, H.-P. Kriegel", //
+title = "Local Outlier Detection Reconsidered: a Generalized View on Locality with Applications to Spatial, Video, and Network Outlier Detection", //
+booktitle = "Data Mining and Knowledge Discovery, 28(1): 190–237, 2014.", //
+url = "http://dx.doi.org/10.1007/s10618-012-0300-z")
+public class ParallelKNNWeightOutlier<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm {
+ /**
+ * Parameter k
+ */
+ private int k;
+
+ /**
+ * Constructor.
+ *
+ * @param distanceFunction Distance function
+ * @param k K parameter
+ */
+ public ParallelKNNWeightOutlier(DistanceFunction<? super O> distanceFunction, int k) {
+ super(distanceFunction);
+ this.k = k;
+ }
+
+ /**
+ * Class logger
+ */
+ private static final Logging LOG = Logging.getLogger(ParallelKNNWeightOutlier.class);
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(getDistanceFunction().getInputTypeRestriction());
+ }
+
+ /**
+ * Run the parallel kNN weight outlier detector.
+ *
+ * @param database Database to process
+ * @param relation Relation to analyze
+ * @return Outlier detection result
+ */
+ public OutlierResult run(Database database, Relation<O> relation) {
+ DBIDs ids = relation.getDBIDs();
+ WritableDoubleDataStore store = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB);
+ DistanceQuery<O> distq = database.getDistanceQuery(relation, getDistanceFunction());
+ KNNQuery<O> knnq = database.getKNNQuery(distq, k + 1);
+
+ // Find kNN
+ KNNProcessor<O> knnm = new KNNProcessor<>(k + 1, knnq);
+ SharedObject<KNNList> knnv = new SharedObject<>();
+ knnm.connectKNNOutput(knnv);
+ // Extract outlier score
+ KNNWeightProcessor kdistm = new KNNWeightProcessor(k + 1);
+ SharedDouble kdistv = new SharedDouble();
+ kdistm.connectKNNInput(knnv);
+ kdistm.connectOutput(kdistv);
+ // Store in output result
+ WriteDoubleDataStoreProcessor storem = new WriteDoubleDataStoreProcessor(store);
+ storem.connectInput(kdistv);
+ // And gather statistics for metadata
+ DoubleMinMaxProcessor mmm = new DoubleMinMaxProcessor();
+ mmm.connectInput(kdistv);
+
+ ParallelExecutor.run(ids, knnm, kdistm, storem, mmm);
+
+ DoubleMinMax minmax = mmm.getMinMax();
+ DoubleRelation scoreres = new MaterializedDoubleRelation("kNN weight Outlier Score", "knnw-outlier", store, ids);
+ OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0., Double.POSITIVE_INFINITY, 0.);
+ return new OutlierResult(meta, scoreres);
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ *
+ * @param <O> Object type
+ */
+ public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {
+ /**
+ * K parameter
+ */
+ int k;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ IntParameter kP = new IntParameter(KNNWeightOutlier.Parameterizer.K_ID);
+ if(config.grab(kP)) {
+ k = kP.getValue();
+ }
+ }
+
+ @Override
+ protected ParallelKNNWeightOutlier<O> makeInstance() {
+ return new ParallelKNNWeightOutlier<>(distanceFunction, k);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/package-info.java
new file mode 100644
index 00000000..58090507
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/package-info.java
@@ -0,0 +1,27 @@
+/**
+ * Parallel implementations of distance-based outlier detectors.
+ */
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package de.lmu.ifi.dbs.elki.algorithm.outlier.distance.parallel; \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/ALOCI.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/ALOCI.java
index f978365e..60e2ff00 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/ALOCI.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/ALOCI.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.lof;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -40,21 +40,20 @@ import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.NumberVectorDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+import de.lmu.ifi.dbs.elki.math.random.RandomFactory;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
-import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -64,7 +63,6 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
-import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
/**
* Fast Outlier Detection Using the "approximate Local Correlation Integral".
@@ -85,12 +83,11 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
* @apiviz.composedOf ALOCIQuadTree
*
* @param <O> Object type
- * @param <D> Distance type
*/
@Title("LOCI: Fast Outlier Detection Using the Local Correlation Integral")
@Description("Algorithm to compute outliers based on the Local Correlation Integral")
@Reference(authors = "S. Papadimitriou, H. Kitagawa, P. B. Gibbons, C. Faloutsos", title = "LOCI: Fast Outlier Detection Using the Local Correlation Integral", booktitle = "Proc. 19th IEEE Int. Conf. on Data Engineering (ICDE '03), Bangalore, India, 2003", url = "http://dx.doi.org/10.1109/ICDE.2003.1260802")
-public class ALOCI<O extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+public class ALOCI<O extends NumberVector> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
@@ -119,7 +116,7 @@ public class ALOCI<O extends NumberVector<?>, D extends NumberDistance<D, ?>> ex
/**
* Distance function
*/
- private NumberVectorDistanceFunction<D> distFunc;
+ private NumberVectorDistanceFunction<?> distFunc;
/**
* Constructor.
@@ -130,7 +127,7 @@ public class ALOCI<O extends NumberVector<?>, D extends NumberDistance<D, ?>> ex
* @param g Number of grids to use
* @param rnd Random generator.
*/
- public ALOCI(NumberVectorDistanceFunction<D> distanceFunction, int nmin, int alpha, int g, RandomFactory rnd) {
+ public ALOCI(NumberVectorDistanceFunction<?> distanceFunction, int nmin, int alpha, int g, RandomFactory rnd) {
super();
this.distFunc = distanceFunction;
this.nmin = nmin;
@@ -147,13 +144,11 @@ public class ALOCI<O extends NumberVector<?>, D extends NumberDistance<D, ?>> ex
// Compute extend of dataset.
double[] min, max;
{
- Pair<O, O> hbbs = DatabaseUtil.computeMinMax(relation);
+ double[][] hbbs = RelationUtil.computeMinMax(relation);
+ min = hbbs[0];
+ max = hbbs[1];
double maxd = 0;
- min = new double[dim];
- max = new double[dim];
for(int i = 0; i < dim; i++) {
- min[i] = hbbs.first.doubleValue(i);
- max[i] = hbbs.second.doubleValue(i);
maxd = Math.max(maxd, max[i] - min[i]);
}
// Enlarge bounding box to have equal lengths.
@@ -169,9 +164,7 @@ public class ALOCI<O extends NumberVector<?>, D extends NumberDistance<D, ?>> ex
double[] nshift = new double[dim];
ALOCIQuadTree qt = new ALOCIQuadTree(min, max, nshift, nmin, relation);
qts.add(qt);
- if(progressPreproc != null) {
- progressPreproc.incrementProcessed(LOG);
- }
+ LOG.incrementProcessed(progressPreproc);
/*
* create the remaining g-1 shifted QuadTrees. This not clearly described in
* the paper and therefore implemented in a way that achieves good results
@@ -184,13 +177,9 @@ public class ALOCI<O extends NumberVector<?>, D extends NumberDistance<D, ?>> ex
}
qt = new ALOCIQuadTree(min, max, svec, nmin, relation);
qts.add(qt);
- if(progressPreproc != null) {
- progressPreproc.incrementProcessed(LOG);
- }
- }
- if(progressPreproc != null) {
- progressPreproc.ensureCompleted(LOG);
+ LOG.incrementProcessed(progressPreproc);
}
+ LOG.ensureCompleted(progressPreproc);
// aLOCI main loop: evaluate
FiniteProgress progressLOCI = LOG.isVerbose() ? new FiniteProgress("Compute aLOCI scores", relation.size(), LOG) : null;
@@ -211,7 +200,7 @@ public class ALOCI<O extends NumberVector<?>, D extends NumberDistance<D, ?>> ex
continue;
}
// TODO: always use manhattan?
- if(ci == null || distFunc.distance(ci.getCenter(), obj).compareTo(distFunc.distance(ci2.getCenter(), obj)) > 0) {
+ if(ci == null || distFunc.distance(ci.getCenter(), obj) > distFunc.distance(ci2.getCenter(), obj)) {
ci = ci2;
}
}
@@ -229,7 +218,7 @@ public class ALOCI<O extends NumberVector<?>, D extends NumberDistance<D, ?>> ex
continue;
}
// TODO: always use manhattan?
- if(cj == null || distFunc.distance(cj.getCenter(), ci.getCenter()).compareTo(distFunc.distance(cj2.getCenter(), ci.getCenter())) > 0) {
+ if(cj == null || distFunc.distance(cj.getCenter(), ci.getCenter()) > distFunc.distance(cj2.getCenter(), ci.getCenter())) {
cj = cj2;
}
}
@@ -245,14 +234,10 @@ public class ALOCI<O extends NumberVector<?>, D extends NumberDistance<D, ?>> ex
// Store results
mdef_norm.putDouble(iditer, maxmdefnorm);
minmax.put(maxmdefnorm);
- if(progressLOCI != null) {
- progressLOCI.incrementProcessed(LOG);
- }
- }
- if(progressLOCI != null) {
- progressLOCI.ensureCompleted(LOG);
+ LOG.incrementProcessed(progressLOCI);
}
- Relation<Double> scoreResult = new MaterializedRelation<>("aLOCI normalized MDEF", "aloci-mdef-outlier", TypeUtil.DOUBLE, mdef_norm, relation.getDBIDs());
+ LOG.ensureCompleted(progressLOCI);
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("aLOCI normalized MDEF", "aloci-mdef-outlier", mdef_norm, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY);
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
return result;
@@ -336,7 +321,7 @@ public class ALOCI<O extends NumberVector<?>, D extends NumberDistance<D, ?>> ex
/**
* Relation indexed.
*/
- private Relation<? extends NumberVector<?>> relation;
+ private Relation<? extends NumberVector> relation;
/**
* Constructor.
@@ -347,7 +332,7 @@ public class ALOCI<O extends NumberVector<?>, D extends NumberDistance<D, ?>> ex
* @param nmin Maximum size for a page to split
* @param relation Relation to index
*/
- public ALOCIQuadTree(double[] min, double[] max, double[] shift, int nmin, Relation<? extends NumberVector<?>> relation) {
+ public ALOCIQuadTree(double[] min, double[] max, double[] shift, int nmin, Relation<? extends NumberVector> relation) {
super();
assert (min.length <= 32) : "Quadtrees are only supported for up to 32 dimensions";
this.shift = shift;
@@ -395,11 +380,11 @@ public class ALOCI<O extends NumberVector<?>, D extends NumberDistance<D, ?>> ex
if(dim == 0) {
DBIDArrayIter iter = ids.iter();
iter.seek(start);
- NumberVector<?> first = relation.get(iter);
+ NumberVector first = relation.get(iter);
iter.advance();
boolean degenerate = true;
loop: for(; iter.getOffset() < end; iter.advance()) {
- NumberVector<?> other = relation.get(iter);
+ NumberVector other = relation.get(iter);
for(int d = 0; d < lmin.length; d++) {
if(Math.abs(first.doubleValue(d) - other.doubleValue(d)) > 1E-15) {
degenerate = false;
@@ -481,7 +466,7 @@ public class ALOCI<O extends NumberVector<?>, D extends NumberDistance<D, ?>> ex
* @param level Level (controls scaling/wraping!)
* @return Shifted position
*/
- private double getShiftedDim(NumberVector<?> obj, int dim, int level) {
+ private double getShiftedDim(NumberVector obj, int dim, int level) {
double pos = obj.doubleValue(dim) + shift[dim];
pos = (pos - min[dim]) / width[dim] * (1 + level);
return pos - Math.floor(pos);
@@ -495,7 +480,7 @@ public class ALOCI<O extends NumberVector<?>, D extends NumberDistance<D, ?>> ex
* @param tlevel Target level
* @return Node
*/
- public Node findClosestNode(NumberVector<?> vec, int tlevel) {
+ public Node findClosestNode(NumberVector vec, int tlevel) {
Node cur = root;
for(int level = 0; level <= tlevel; level++) {
if(cur.children == null) {
@@ -650,7 +635,7 @@ public class ALOCI<O extends NumberVector<?>, D extends NumberDistance<D, ?>> ex
*
* @apiviz.exclude
*/
- public static class Parameterizer<O extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<O extends NumberVector> extends AbstractParameterizer {
/**
* Parameter to specify the minimum neighborhood size
*/
@@ -694,13 +679,13 @@ public class ALOCI<O extends NumberVector<?>, D extends NumberDistance<D, ?>> ex
/**
* The distance function
*/
- private NumberVectorDistanceFunction<D> distanceFunction;
+ private NumberVectorDistanceFunction<?> distanceFunction;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- ObjectParameter<NumberVectorDistanceFunction<D>> distanceFunctionP = makeParameterDistanceFunction(EuclideanDistanceFunction.class, NumberVectorDistanceFunction.class);
+ ObjectParameter<NumberVectorDistanceFunction<?>> distanceFunctionP = makeParameterDistanceFunction(EuclideanDistanceFunction.class, NumberVectorDistanceFunction.class);
if(config.grab(distanceFunctionP)) {
distanceFunction = distanceFunctionP.instantiateClass(config);
}
@@ -730,7 +715,7 @@ public class ALOCI<O extends NumberVector<?>, D extends NumberDistance<D, ?>> ex
}
@Override
- protected ALOCI<O, D> makeInstance() {
+ protected ALOCI<O> makeInstance() {
return new ALOCI<>(distanceFunction, nmin, alpha, g, rnd);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/COF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/COF.java
new file mode 100644
index 00000000..09b5d8b8
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/COF.java
@@ -0,0 +1,276 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier.lof;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.DoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.KNNList;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
+import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+
+/**
+ * Connectivity-based outlier factor (COF).
+ *
+ * Reference:
+ * <p>
+ * J. Tang, Z. Chen, A. W. C. Fu, D. W. Cheung<br />
+ * Enhancing effectiveness of outlier detections for low density patterns.<br />
+ * In Advances in Knowledge Discovery and Data Mining.
+ * </p>
+ *
+ * @author Erich Schubert
+ *
+ * @param <O> Object type
+ */
+@Reference(authors = "J. Tang, Z. Chen, A. W. C. Fu, D. W. Cheung", //
+title = "Enhancing effectiveness of outlier detections for low density patterns", //
+booktitle = "In Advances in Knowledge Discovery and Data Mining", //
+url = "http://dx.doi.org/10.1007/3-540-47887-6_53")
+public class COF<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(COF.class);
+
+ /**
+ * The number of neighbors to query (including the query point!)
+ */
+ protected int k;
+
+ /**
+ * Constructor.
+ *
+ * @param k the number of neighbors to use for comparison (excluding the query
+ * point)
+ * @param distanceFunction the neighborhood distance function
+ */
+ public COF(int k, DistanceFunction<? super O> distanceFunction) {
+ super(distanceFunction);
+ this.k = k + 1;
+ }
+
+ /**
+ * Runs the COF algorithm on the given database.
+ *
+ * @param database Database to query
+ * @param relation Data to process
+ * @return COF outlier result
+ */
+ public OutlierResult run(Database database, Relation<O> relation) {
+ StepProgress stepprog = LOG.isVerbose() ? new StepProgress("COF", 3) : null;
+ DistanceQuery<O> dq = database.getDistanceQuery(relation, getDistanceFunction());
+ LOG.beginStep(stepprog, 1, "Materializing COF neighborhoods.");
+ KNNQuery<O> knnq = DatabaseUtil.precomputedKNNQuery(database, relation, dq, k);
+ DBIDs ids = relation.getDBIDs();
+
+ LOG.beginStep(stepprog, 2, "Computing Average Chaining Distances.");
+ WritableDoubleDataStore acds = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
+ computeAverageChainingDistances(knnq, dq, ids, acds);
+
+ // compute COF_SCORE of each db object
+ LOG.beginStep(stepprog, 3, "Computing Connectivity-based Outlier Factors.");
+ WritableDoubleDataStore cofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_DB);
+ // track the maximum value for normalization.
+ DoubleMinMax cofminmax = new DoubleMinMax();
+ computeCOFScores(knnq, ids, acds, cofs, cofminmax);
+
+ LOG.setCompleted(stepprog);
+
+ // Build result representation.
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("Connectivity-Based Outlier Factor", "cof-outlier", cofs, ids);
+ OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(cofminmax.getMin(), cofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
+ return new OutlierResult(scoreMeta, scoreResult);
+ }
+
+ /**
+ * Computes the average chaining distance, the average length of a path
+ * through the given set of points to each target. The authors of COF decided
+ * to approximate this value using a weighted mean that assumes every object
+ * is reached from the previous point (but actually every point could be best
+ * reachable from the first, in which case this does not make much sense.)
+ *
+ * TODO: can we accelerate this by using the kNN of the neighbors?
+ *
+ * @param knnq KNN query
+ * @param dq Distance query
+ * @param ids IDs to process
+ * @param acds Storage for average chaining distances
+ */
+ protected void computeAverageChainingDistances(KNNQuery<O> knnq, DistanceQuery<O> dq, DBIDs ids, WritableDoubleDataStore acds) {
+ FiniteProgress lrdsProgress = LOG.isVerbose() ? new FiniteProgress("Computing average chaining distances", ids.size(), LOG) : null;
+
+ // Compute the chaining distances.
+ // We do <i>not</i> bother to materialize the chaining order.
+ for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ final KNNList neighbors = knnq.getKNNForDBID(iter, k);
+ final int r = neighbors.size();
+ DoubleDBIDListIter it1 = neighbors.iter(), it2 = neighbors.iter();
+ // Store the current lowest reachability.
+ final double[] mindists = new double[r];
+ for(int i = 0; it1.valid(); it1.advance(), ++i) {
+ mindists[i] = DBIDUtil.equal(it1, iter) ? Double.NaN : it1.doubleValue();
+ }
+
+ double acsum = 0.;
+ for(int j = ((r < k) ? r : k) - 1; j > 0; --j) {
+ // Find the minimum:
+ int minpos = -1;
+ double mindist = Double.NaN;
+ for(int i = 0; i < mindists.length; ++i) {
+ double curdist = mindists[i];
+ // Both values could be NaN, deliberately.
+ if(curdist == curdist && !(curdist > mindist)) {
+ minpos = i;
+ mindist = curdist;
+ }
+ }
+ acsum += mindist * j; // Weighted sum, decreasing weights
+ mindists[minpos] = Double.NaN;
+ it1.seek(minpos);
+ // Update distances
+ it2.seek(0);
+ for(int i = 0; it2.valid(); it2.advance(), ++i) {
+ final double curdist = mindists[i];
+ if(curdist != curdist) {
+ continue; // NaN = processed!
+ }
+ double newdist = dq.distance(it1, it2);
+ if(newdist < curdist) {
+ mindists[i] = newdist;
+ }
+ }
+ }
+ acds.putDouble(iter, acsum / (r * 0.5 * (r - 1.)));
+ LOG.incrementProcessed(lrdsProgress);
+ }
+ LOG.ensureCompleted(lrdsProgress);
+ }
+
+ /**
+ * Compute Connectivity outlier factors.
+ *
+ * @param knnq KNN query
+ * @param ids IDs to process
+ * @param acds Average chaining distances
+ * @param cofs Connectivity outlier factor storage
+ * @param cofminmax Score minimum/maximum tracker
+ */
+ private void computeCOFScores(KNNQuery<O> knnq, DBIDs ids, DoubleDataStore acds, WritableDoubleDataStore cofs, DoubleMinMax cofminmax) {
+ FiniteProgress progressCOFs = LOG.isVerbose() ? new FiniteProgress("COF for objects", ids.size(), LOG) : null;
+ for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ final KNNList neighbors = knnq.getKNNForDBID(iter, k);
+ // Aggregate the average chaining distances of all neighbors:
+ double sum = 0.;
+ for(DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ // skip the point itself
+ if(DBIDUtil.equal(neighbor, iter)) {
+ continue;
+ }
+ sum += acds.doubleValue(neighbor);
+ }
+ final double cof = (sum > 0.) ? (acds.doubleValue(iter) * k / sum) : (acds.doubleValue(iter) > 0. ? Double.POSITIVE_INFINITY : 1.);
+ cofs.putDouble(iter, cof);
+ // update minimum and maximum
+ cofminmax.put(cof);
+
+ LOG.incrementProcessed(progressCOFs);
+ }
+ LOG.ensureCompleted(progressCOFs);
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(getDistanceFunction().getInputTypeRestriction());
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ *
+ * @param <O> Object type
+ */
+ public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {
+ /**
+ * Parameter to specify the neighborhood size for COF. This does not include
+ * the query object.
+ */
+ public static final OptionID K_ID = new OptionID("cof.k", "The number of neighbors (not including the query object) to use for computing the COF score.");
+
+ /**
+ * The neighborhood size to use.
+ */
+ protected int k = 2;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ final IntParameter pK = new IntParameter(K_ID) //
+ .addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(pK)) {
+ k = pK.intValue();
+ }
+ }
+
+ @Override
+ protected COF<O> makeInstance() {
+ return new COF<>(k, distanceFunction);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/FlexibleLOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/FlexibleLOF.java
index 2508b6b0..372bf68c 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/FlexibleLOF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/FlexibleLOF.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.lof;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -38,20 +38,16 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList;
-import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.KNNList;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
-import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery;
import de.lmu.ifi.dbs.elki.database.query.rknn.RKNNQuery;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
-import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
@@ -59,6 +55,7 @@ import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -113,12 +110,14 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
* @apiviz.has KNNQuery
*
* @param <O> the type of DatabaseObjects handled by this Algorithm
- * @param <D> Distance type
*/
@Title("LOF: Local Outlier Factor")
@Description("Algorithm to compute density-based local outlier factors in a database based on the neighborhood size parameter 'k'")
-@Reference(authors = "M. M. Breunig, H.-P. Kriegel, R. Ng, and J. Sander", title = "LOF: Identifying Density-Based Local Outliers", booktitle = "Proc. 2nd ACM SIGMOD Int. Conf. on Management of Data (SIGMOD '00), Dallas, TX, 2000", url = "http://dx.doi.org/10.1145/342009.335388")
-public class FlexibleLOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+@Reference(authors = "M. M. Breunig, H.-P. Kriegel, R. Ng, J. Sander", //
+title = "LOF: Identifying Density-Based Local Outliers", //
+booktitle = "Proc. 2nd ACM SIGMOD Int. Conf. on Management of Data (SIGMOD '00), Dallas, TX, 2000", //
+url = "http://dx.doi.org/10.1145/342009.335388")
+public class FlexibleLOF<O> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
@@ -137,20 +136,12 @@ public class FlexibleLOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgo
/**
* Neighborhood distance function.
*/
- protected DistanceFunction<? super O, D> referenceDistanceFunction;
+ protected DistanceFunction<? super O> referenceDistanceFunction;
/**
* Reachability distance function.
*/
- protected DistanceFunction<? super O, D> reachabilityDistanceFunction;
-
- /**
- * Include object itself in kNN neighborhood.
- *
- * In the official LOF publication, the point itself is not considered to be
- * part of its k nearest neighbors.
- */
- private static boolean objectIsInKNN = false;
+ protected DistanceFunction<? super O> reachabilityDistanceFunction;
/**
* Constructor.
@@ -160,10 +151,10 @@ public class FlexibleLOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgo
* @param neighborhoodDistanceFunction the neighborhood distance function
* @param reachabilityDistanceFunction the reachability distance function
*/
- public FlexibleLOF(int krefer, int kreach, DistanceFunction<? super O, D> neighborhoodDistanceFunction, DistanceFunction<? super O, D> reachabilityDistanceFunction) {
+ public FlexibleLOF(int krefer, int kreach, DistanceFunction<? super O> neighborhoodDistanceFunction, DistanceFunction<? super O> reachabilityDistanceFunction) {
super();
- this.krefer = krefer + (objectIsInKNN ? 0 : 1);
- this.kreach = kreach + (objectIsInKNN ? 0 : 1);
+ this.krefer = krefer + 1;
+ this.kreach = kreach + 1;
this.referenceDistanceFunction = neighborhoodDistanceFunction;
this.reachabilityDistanceFunction = reachabilityDistanceFunction;
}
@@ -178,9 +169,9 @@ public class FlexibleLOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgo
*/
public OutlierResult run(Database database, Relation<O> relation) {
StepProgress stepprog = LOG.isVerbose() ? new StepProgress("LOF", 3) : null;
- Pair<KNNQuery<O, D>, KNNQuery<O, D>> pair = getKNNQueries(database, relation, stepprog);
- KNNQuery<O, D> kNNRefer = pair.getFirst();
- KNNQuery<O, D> kNNReach = pair.getSecond();
+ Pair<KNNQuery<O>, KNNQuery<O>> pair = getKNNQueries(database, relation, stepprog);
+ KNNQuery<O> kNNRefer = pair.getFirst();
+ KNNQuery<O> kNNReach = pair.getSecond();
return doRunInTime(relation.getDBIDs(), kNNRefer, kNNReach, stepprog).getResult();
}
@@ -191,30 +182,29 @@ public class FlexibleLOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgo
* @param stepprog the progress logger
* @return the kNN queries for the algorithm
*/
- private Pair<KNNQuery<O, D>, KNNQuery<O, D>> getKNNQueries(Database database, Relation<O> relation, StepProgress stepprog) {
+ private Pair<KNNQuery<O>, KNNQuery<O>> getKNNQueries(Database database, Relation<O> relation, StepProgress stepprog) {
// "HEAVY" flag for knnReach since it is used more than once
- KNNQuery<O, D> knnReach = QueryUtil.getKNNQuery(relation, reachabilityDistanceFunction, kreach, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
+ KNNQuery<O> knnReach = QueryUtil.getKNNQuery(relation, reachabilityDistanceFunction, kreach, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
// No optimized kNN query - use a preprocessor!
- if (!(knnReach instanceof PreprocessorKNNQuery)) {
- if (stepprog != null) {
- if (referenceDistanceFunction.equals(reachabilityDistanceFunction)) {
+ if(!(knnReach instanceof PreprocessorKNNQuery)) {
+ if(stepprog != null) {
+ if(referenceDistanceFunction.equals(reachabilityDistanceFunction)) {
stepprog.beginStep(1, "Materializing neighborhoods w.r.t. reference neighborhood distance function.", LOG);
- } else {
+ }
+ else {
stepprog.beginStep(1, "Not materializing neighborhoods w.r.t. reference neighborhood distance function, but materializing neighborhoods w.r.t. reachability distance function.", LOG);
}
}
int kpreproc = (referenceDistanceFunction.equals(reachabilityDistanceFunction)) ? Math.max(kreach, krefer) : kreach;
- MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<>(relation, reachabilityDistanceFunction, kpreproc);
- database.addIndex(preproc);
- DistanceQuery<O, D> rdq = database.getDistanceQuery(relation, reachabilityDistanceFunction);
- knnReach = preproc.getKNNQuery(rdq, kreach);
+ knnReach = DatabaseUtil.precomputedKNNQuery(database, relation, reachabilityDistanceFunction, kpreproc);
}
// knnReach is only used once
- KNNQuery<O, D> knnRefer;
- if (referenceDistanceFunction == reachabilityDistanceFunction || referenceDistanceFunction.equals(reachabilityDistanceFunction)) {
+ KNNQuery<O> knnRefer;
+ if(referenceDistanceFunction == reachabilityDistanceFunction || referenceDistanceFunction.equals(reachabilityDistanceFunction)) {
knnRefer = knnReach;
- } else {
+ }
+ else {
// do not materialize the first neighborhood, since it is used only once
knnRefer = QueryUtil.getKNNQuery(relation, referenceDistanceFunction, krefer);
}
@@ -234,149 +224,118 @@ public class FlexibleLOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgo
* @param stepprog Progress logger
* @return LOF result
*/
- protected LOFResult<O, D> doRunInTime(DBIDs ids, KNNQuery<O, D> kNNRefer, KNNQuery<O, D> kNNReach, StepProgress stepprog) {
+ protected LOFResult<O> doRunInTime(DBIDs ids, KNNQuery<O> kNNRefer, KNNQuery<O> kNNReach, StepProgress stepprog) {
// Assert we got something
- if (kNNRefer == null) {
+ if(kNNRefer == null) {
throw new AbortException("No kNN queries supported by database for reference neighborhood distance function.");
}
- if (kNNReach == null) {
+ if(kNNReach == null) {
throw new AbortException("No kNN queries supported by database for reachability distance function.");
}
// Compute LRDs
- if (stepprog != null) {
- stepprog.beginStep(2, "Computing LRDs.", LOG);
- }
- WritableDoubleDataStore lrds = computeLRDs(ids, kNNReach);
+ LOG.beginStep(stepprog, 2, "Computing LRDs.");
+ WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
+ computeLRDs(kNNReach, ids, lrds);
// compute LOF_SCORE of each db object
- if (stepprog != null) {
- stepprog.beginStep(3, "Computing LOFs.", LOG);
- }
- Pair<WritableDoubleDataStore, DoubleMinMax> lofsAndMax = computeLOFs(ids, lrds, kNNRefer);
- WritableDoubleDataStore lofs = lofsAndMax.getFirst();
+ LOG.beginStep(stepprog, 3, "Computing LOFs.");
+ WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
// track the maximum value for normalization.
- DoubleMinMax lofminmax = lofsAndMax.getSecond();
+ DoubleMinMax lofminmax = new DoubleMinMax();
+ computeLOFs(kNNRefer, ids, lrds, lofs, lofminmax);
- if (stepprog != null) {
- stepprog.setCompleted(LOG);
- }
+ LOG.setCompleted(stepprog);
// Build result representation.
- Relation<Double> scoreResult = new MaterializedRelation<>("Local Outlier Factor", "lof-outlier", TypeUtil.DOUBLE, lofs, ids);
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("Local Outlier Factor", "lof-outlier", lofs, ids);
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
-
return new LOFResult<>(result, kNNRefer, kNNReach, lrds, lofs);
}
/**
* Computes the local reachability density (LRD) of the specified objects.
*
- * @param ids the ids of the objects
- * @param knnReach the precomputed neighborhood of the objects w.r.t. the
+ * @param knnq the precomputed neighborhood of the objects w.r.t. the
* reachability distance
- * @return the LRDs of the objects
+ * @param ids the ids of the objects
+ * @param lrds Reachability storage
*/
- protected WritableDoubleDataStore computeLRDs(DBIDs ids, KNNQuery<O, D> knnReach) {
- WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
+ protected void computeLRDs(KNNQuery<O> knnq, DBIDs ids, WritableDoubleDataStore lrds) {
FiniteProgress lrdsProgress = LOG.isVerbose() ? new FiniteProgress("LRD", ids.size(), LOG) : null;
- for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
- final KNNList<D> neighbors = knnReach.getKNNForDBID(iter, kreach);
+ for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ final KNNList neighbors = knnq.getKNNForDBID(iter, kreach);
double sum = 0.0;
int count = 0;
- if (neighbors instanceof DoubleDistanceKNNList) {
- // Fast version for double distances
- for (DoubleDistanceDBIDListIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) {
- if (objectIsInKNN || !DBIDUtil.equal(neighbor, iter)) {
- KNNList<D> neighborsNeighbors = knnReach.getKNNForDBID(neighbor, kreach);
- final double nkdist;
- if (neighborsNeighbors instanceof DoubleDistanceKNNList) {
- nkdist = ((DoubleDistanceKNNList) neighborsNeighbors).doubleKNNDistance();
- } else {
- nkdist = neighborsNeighbors.getKNNDistance().doubleValue();
- }
- sum += Math.max(neighbor.doubleDistance(), nkdist);
- count++;
- }
- }
- } else {
- for (DistanceDBIDListIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
- if (objectIsInKNN || !DBIDUtil.equal(neighbor, iter)) {
- KNNList<D> neighborsNeighbors = knnReach.getKNNForDBID(neighbor, kreach);
- sum += Math.max(neighbor.getDistance().doubleValue(), neighborsNeighbors.getKNNDistance().doubleValue());
- count++;
- }
+ for(DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ if(DBIDUtil.equal(neighbor, iter)) {
+ continue;
}
+ KNNList neighborsNeighbors = knnq.getKNNForDBID(neighbor, kreach);
+ sum += Math.max(neighbor.doubleValue(), neighborsNeighbors.getKNNDistance());
+ count++;
}
// Avoid division by 0
final double lrd = (sum > 0) ? (count / sum) : Double.POSITIVE_INFINITY;
lrds.putDouble(iter, lrd);
- if (lrdsProgress != null) {
- lrdsProgress.incrementProcessed(LOG);
- }
- }
- if (lrdsProgress != null) {
- lrdsProgress.ensureCompleted(LOG);
+ LOG.incrementProcessed(lrdsProgress);
}
- return lrds;
+ LOG.ensureCompleted(lrdsProgress);
}
/**
* Computes the Local outlier factor (LOF) of the specified objects.
*
- * @param ids the ids of the objects
- * @param lrds the LRDs of the objects
- * @param knnRefer the precomputed neighborhood of the objects w.r.t. the
+ * @param knnq the precomputed neighborhood of the objects w.r.t. the
* reference distance
- * @return the LOFs of the objects and the maximum LOF
+ * @param ids IDs to process
+ * @param lrds Local reachability distances
+ * @param lofs Local outlier factor storage
+ * @param lofminmax Score minimum/maximum tracker
*/
- protected Pair<WritableDoubleDataStore, DoubleMinMax> computeLOFs(DBIDs ids, DoubleDataStore lrds, KNNQuery<O, D> knnRefer) {
- WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
- // track the maximum value for normalization.
- DoubleMinMax lofminmax = new DoubleMinMax();
-
+ protected void computeLOFs(KNNQuery<O> knnq, DBIDs ids, DoubleDataStore lrds, WritableDoubleDataStore lofs, DoubleMinMax lofminmax) {
FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("LOF_SCORE for objects", ids.size(), LOG) : null;
- for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
- final double lrdp = lrds.doubleValue(iter);
+ for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
final double lof;
- if (lrdp > 0 && !Double.isInfinite(lrdp)) {
- final KNNList<D> neighbors = knnRefer.getKNNForDBID(iter, krefer);
- double sum = 0.0;
+ final double lrdp = lrds.doubleValue(iter);
+ final KNNList neighbors = knnq.getKNNForDBID(iter, krefer);
+ if(!Double.isInfinite(lrdp)) {
+ double sum = 0.;
int count = 0;
- for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ for(DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
// skip the point itself
- if (objectIsInKNN || !DBIDUtil.equal(neighbor, iter)) {
- sum += lrds.doubleValue(neighbor);
- count++;
+ if(DBIDUtil.equal(neighbor, iter)) {
+ continue;
+ }
+ final double val = lrds.doubleValue(neighbor);
+ sum += val;
+ count++;
+ if(Double.isInfinite(val)) {
+ break;
}
}
- lof = sum / (count * lrdp);
- } else {
+ lof = sum / (lrdp * count);
+ }
+ else {
lof = 1.0;
}
lofs.putDouble(iter, lof);
// update minimum and maximum
- if (!Double.isInfinite(lof)) {
- lofminmax.put(lof);
- }
+ lofminmax.put(lof);
- if (progressLOFs != null) {
- progressLOFs.incrementProcessed(LOG);
- }
+ LOG.incrementProcessed(progressLOFs);
}
- if (progressLOFs != null) {
- progressLOFs.ensureCompleted(LOG);
- }
- return new Pair<>(lofs, lofminmax);
+ LOG.ensureCompleted(progressLOFs);
}
@Override
public TypeInformation[] getInputTypeRestriction() {
final TypeInformation type;
- if (reachabilityDistanceFunction.equals(referenceDistanceFunction)) {
+ if(reachabilityDistanceFunction.equals(referenceDistanceFunction)) {
type = reachabilityDistanceFunction.getInputTypeRestriction();
- } else {
+ }
+ else {
type = new CombinedTypeInformation(referenceDistanceFunction.getInputTypeRestriction(), reachabilityDistanceFunction.getInputTypeRestriction());
}
return TypeUtil.array(type);
@@ -393,7 +352,7 @@ public class FlexibleLOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgo
*
* @author Elke Achtert
*/
- public static class LOFResult<O, D extends NumberDistance<D, ?>> {
+ public static class LOFResult<O> {
/**
* The result of the run of the {@link FlexibleLOF} algorithm.
*/
@@ -402,22 +361,22 @@ public class FlexibleLOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgo
/**
* The kNN query w.r.t. the reference neighborhood distance.
*/
- private final KNNQuery<O, D> kNNRefer;
+ private final KNNQuery<O> kNNRefer;
/**
* The kNN query w.r.t. the reachability distance.
*/
- private final KNNQuery<O, D> kNNReach;
+ private final KNNQuery<O> kNNReach;
/**
* The RkNN query w.r.t. the reference neighborhood distance.
*/
- private RKNNQuery<O, D> rkNNRefer;
+ private RKNNQuery<O> rkNNRefer;
/**
* The rkNN query w.r.t. the reachability distance.
*/
- private RKNNQuery<O, D> rkNNReach;
+ private RKNNQuery<O> rkNNReach;
/**
* The LRD values of the objects.
@@ -439,7 +398,7 @@ public class FlexibleLOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgo
* @param lrds the LRD values of the objects
* @param lofs the LOF values of the objects
*/
- public LOFResult(OutlierResult result, KNNQuery<O, D> kNNRefer, KNNQuery<O, D> kNNReach, WritableDoubleDataStore lrds, WritableDoubleDataStore lofs) {
+ public LOFResult(OutlierResult result, KNNQuery<O> kNNRefer, KNNQuery<O> kNNReach, WritableDoubleDataStore lrds, WritableDoubleDataStore lofs) {
this.result = result;
this.kNNRefer = kNNRefer;
this.kNNReach = kNNReach;
@@ -452,7 +411,7 @@ public class FlexibleLOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgo
*
* @return the kNN query w.r.t. the reference neighborhood distance
*/
- public KNNQuery<O, D> getKNNRefer() {
+ public KNNQuery<O> getKNNRefer() {
return kNNRefer;
}
@@ -461,7 +420,7 @@ public class FlexibleLOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgo
*
* @return the kNN query w.r.t. the reachability distance
*/
- public KNNQuery<O, D> getKNNReach() {
+ public KNNQuery<O> getKNNReach() {
return kNNReach;
}
@@ -497,7 +456,7 @@ public class FlexibleLOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgo
*
* @param rkNNRefer the query to set
*/
- public void setRkNNRefer(RKNNQuery<O, D> rkNNRefer) {
+ public void setRkNNRefer(RKNNQuery<O> rkNNRefer) {
this.rkNNRefer = rkNNRefer;
}
@@ -506,7 +465,7 @@ public class FlexibleLOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgo
*
* @return the RkNN query w.r.t. the reference neighborhood distance
*/
- public RKNNQuery<O, D> getRkNNRefer() {
+ public RKNNQuery<O> getRkNNRefer() {
return rkNNRefer;
}
@@ -515,7 +474,7 @@ public class FlexibleLOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgo
*
* @return the RkNN query w.r.t. the reachability distance
*/
- public RKNNQuery<O, D> getRkNNReach() {
+ public RKNNQuery<O> getRkNNReach() {
return rkNNReach;
}
@@ -524,7 +483,7 @@ public class FlexibleLOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgo
*
* @param rkNNReach the query to set
*/
- public void setRkNNReach(RKNNQuery<O, D> rkNNReach) {
+ public void setRkNNReach(RKNNQuery<O> rkNNReach) {
this.rkNNReach = rkNNReach;
}
}
@@ -536,7 +495,7 @@ public class FlexibleLOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgo
*
* @apiviz.exclude
*/
- public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {
/**
* The distance function to determine the reachability distance between
* database objects.
@@ -545,16 +504,16 @@ public class FlexibleLOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgo
/**
* Parameter to specify the number of nearest neighbors of an object to be
- * considered for computing its LOF_SCORE, must be an integer greater than
- * 1.
+ * considered for computing its LOF score, must be an integer greater or
+ * equal to 1.
*/
- public static final OptionID KREF_ID = new OptionID("lof.krefer", "The number of nearest neighbors of an object to be considered for computing its LOF_SCORE.");
+ public static final OptionID KREF_ID = new OptionID("lof.krefer", "The number of nearest neighbors of an object to be considered for computing its LOF score.");
/**
* Parameter to specify the number of nearest neighbors of an object to be
* considered for computing its reachability distance.
*/
- public static final OptionID KREACH_ID = new OptionID("lof.kreach", "The number of nearest neighbors of an object to be considered for computing its LOF_SCORE.");
+ public static final OptionID KREACH_ID = new OptionID("lof.kreach", "The number of nearest neighbors of an object to be considered for computing its LOF score.");
/**
* The reference set size to use.
@@ -569,43 +528,45 @@ public class FlexibleLOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgo
/**
* Neighborhood distance function.
*/
- protected DistanceFunction<O, D> neighborhoodDistanceFunction = null;
+ protected DistanceFunction<O> neighborhoodDistanceFunction = null;
/**
* Reachability distance function.
*/
- protected DistanceFunction<O, D> reachabilityDistanceFunction = null;
+ protected DistanceFunction<O> reachabilityDistanceFunction = null;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
final IntParameter pK = new IntParameter(KREF_ID);
- pK.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
- if (config.grab(pK)) {
+ pK.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(pK)) {
krefer = pK.intValue();
}
final IntParameter pK2 = new IntParameter(KREACH_ID);
pK2.setOptional(true);
- pK2.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
- if (config.grab(pK2)) {
+ pK2.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(pK2)) {
kreach = pK2.intValue();
- } else {
+ }
+ else {
kreach = krefer;
}
- final ObjectParameter<DistanceFunction<O, D>> reachDistP = new ObjectParameter<>(REACHABILITY_DISTANCE_FUNCTION_ID, DistanceFunction.class);
+ final ObjectParameter<DistanceFunction<O>> reachDistP = new ObjectParameter<>(REACHABILITY_DISTANCE_FUNCTION_ID, DistanceFunction.class);
reachDistP.setOptional(true);
- if (config.grab(reachDistP)) {
+ if(config.grab(reachDistP)) {
reachabilityDistanceFunction = reachDistP.instantiateClass(config);
- } else {
+ }
+ else {
reachabilityDistanceFunction = distanceFunction;
}
}
@Override
- protected FlexibleLOF<O, D> makeInstance() {
+ protected FlexibleLOF<O> makeInstance() {
return new FlexibleLOF<>(kreach, krefer, distanceFunction, reachabilityDistanceFunction);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/INFLO.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/INFLO.java
index 28fcf01b..611701ab 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/INFLO.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/INFLO.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.lof;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -34,15 +34,16 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.KNNList;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.math.Mean;
@@ -59,18 +60,19 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
/**
- * INFLO provides the Mining Algorithms (Two-way Search Method) for Influence
- * Outliers using Symmetric Relationship
- * <p>
+ * Influence Outliers using Symmetric Relationship (INFLO) using two-way search,
+ * is an outlier detection method based on LOF; but also using the reverse kNN.
+ *
* Reference: <br>
* <p>
- * Jin, W., Tung, A., Han, J., and Wang, W. 2006<br/>
- * Ranking outliers using symmetric neighborhood relationship<br/>
- * In Proc. Pacific-Asia Conf. on Knowledge Discovery and Data Mining (PAKDD),
- * Singapore
+ * W. Jin, A. Tung, J. Han, and W. Wang<br />
+ * Ranking outliers using symmetric neighborhood relationship<br />
+ * Proc. 10th Pacific-Asia conference on Advances in Knowledge Discovery and
+ * Data Mining, 2006.
* </p>
*
* @author Ahmed Hettab
+ * @author Erich Schubert
*
* @apiviz.has KNNQuery
*
@@ -78,35 +80,23 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
*/
@Title("INFLO: Influenced Outlierness Factor")
@Description("Ranking Outliers Using Symmetric Neigborhood Relationship")
-@Reference(authors = "Jin, W., Tung, A., Han, J., and Wang, W", title = "Ranking outliers using symmetric neighborhood relationship", booktitle = "Proc. Pacific-Asia Conf. on Knowledge Discovery and Data Mining (PAKDD), Singapore, 2006", url = "http://dx.doi.org/10.1007/11731139_68")
-public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
+@Reference(authors = "W. Jin, A. Tung, J. Han, and W. Wang", //
+title = "Ranking outliers using symmetric neighborhood relationship", //
+booktitle = "Proc. 10th Pacific-Asia conference on Advances in Knowledge Discovery and Data Mining", //
+url = "http://dx.doi.org/10.1007/11731139_68")
+public class INFLO<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
private static final Logging LOG = Logging.getLogger(INFLO.class);
/**
- * Parameter to specify if any object is a Core Object must be a double
- * greater than 0.0
- * <p>
- * see paper "Two-way search method" 3.2
- */
- public static final OptionID M_ID = new OptionID("inflo.m", "The threshold");
-
- /**
- * Holds the value of {@link #M_ID}.
+ * Pruning threshold m.
*/
private double m;
/**
- * Parameter to specify the number of nearest neighbors of an object to be
- * considered for computing its INFLO_SCORE. must be an integer greater than
- * 1.
- */
- public static final OptionID K_ID = new OptionID("inflo.k", "The number of nearest neighbors of an object to be considered for computing its INFLO_SCORE.");
-
- /**
- * Holds the value of {@link #K_ID}.
+ * Number of neighbors to use.
*/
private int k;
@@ -117,7 +107,7 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa
* @param m m Parameter
* @param k k Parameter
*/
- public INFLO(DistanceFunction<? super O, D> distanceFunction, double m, int k) {
+ public INFLO(DistanceFunction<? super O> distanceFunction, double m, int k) {
super(distanceFunction);
this.m = m;
this.k = k;
@@ -131,9 +121,9 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa
* @return Outlier result
*/
public OutlierResult run(Database database, Relation<O> relation) {
- DistanceQuery<O, D> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
+ DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
+ KNNQuery<O> knnQuery = database.getKNNQuery(distFunc, k + 1, DatabaseQuery.HINT_HEAVY_USE);
- ModifiableDBIDs processedIDs = DBIDUtil.newHashSet(relation.size());
ModifiableDBIDs pruned = DBIDUtil.newHashSet();
// KNNS
WritableDataStore<ModifiableDBIDs> knns = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, ModifiableDBIDs.class);
@@ -147,72 +137,112 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa
rnns.put(iditer, DBIDUtil.newArray());
}
- // TODO: use kNN preprocessor?
- KNNQuery<O, D> knnQuery = database.getKNNQuery(distFunc, k, DatabaseQuery.HINT_HEAVY_USE);
+ computeNeighborhoods(relation, knnQuery, pruned, knns, rnns, density);
- for(DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) {
- // if not visited count=0
- int count = rnns.get(id).size();
- if(!processedIDs.contains(id)) {
- // TODO: use exactly k neighbors?
- KNNList<D> list = knnQuery.getKNNForDBID(id, k);
- knns.get(id).addDBIDs(list);
- processedIDs.add(id);
- density.putDouble(id, 1 / list.getKNNDistance().doubleValue());
+ // Calculate INFLO for any Object
+ DoubleMinMax inflominmax = new DoubleMinMax();
+ WritableDoubleDataStore inflos = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
+ // Note: this modifies knns, by adding rknns!
+ computeINFLO(relation, pruned, knns, rnns, density, inflos, inflominmax);
- }
- ModifiableDBIDs s = knns.get(id);
- for(DBIDIter q = knns.get(id).iter(); q.valid(); q.advance()) {
- if(!processedIDs.contains(q)) {
- // TODO: use exactly k neighbors?
- KNNList<D> listQ = knnQuery.getKNNForDBID(q, k);
- knns.get(q).addDBIDs(listQ);
- density.putDouble(q, 1 / listQ.getKNNDistance().doubleValue());
- processedIDs.add(q);
- }
+ // Build result representation.
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("Influence Outlier Score", "inflo-outlier", inflos, relation.getDBIDs());
+ OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(inflominmax.getMin(), inflominmax.getMax(), 0., Double.POSITIVE_INFINITY, 1.);
+ return new OutlierResult(scoreMeta, scoreResult);
+ }
- if(knns.get(q).contains(id)) {
- rnns.get(q).add(id);
- rnns.get(id).add(q);
+ /**
+ * Compute neighborhoods
+ *
+ * @param relation
+ * @param knnQuery
+ * @param pruned
+ * @param knns
+ * @param rnns
+ * @param density
+ */
+ protected void computeNeighborhoods(Relation<O> relation, KNNQuery<O> knnQuery, ModifiableDBIDs pruned, WritableDataStore<ModifiableDBIDs> knns, WritableDataStore<ModifiableDBIDs> rnns, WritableDoubleDataStore density) {
+ for(DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
+ // if not visited count=0
+ int count = rnns.get(iter).size();
+ DBIDs knn = getKNN(iter, knnQuery, knns, density);
+ for(DBIDIter niter = knn.iter(); niter.valid(); niter.advance()) {
+ // Ignore the query point itself.
+ if(DBIDUtil.equal(iter, niter)) {
+ continue;
+ }
+ if(getKNN(niter, knnQuery, knns, density).contains(iter)) {
+ rnns.get(niter).add(iter);
+ rnns.get(iter).add(niter);
count++;
}
}
- if(count >= s.size() * m) {
- pruned.add(id);
+ if(count >= knn.size() * m) {
+ pruned.add(iter);
}
}
+ }
- // Calculate INFLO for any Object
- // IF Object is pruned INFLO=1.0
- DoubleMinMax inflominmax = new DoubleMinMax();
- WritableDoubleDataStore inflos = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
- for(DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) {
- if(!pruned.contains(id)) {
- ModifiableDBIDs knn = knns.get(id);
- ModifiableDBIDs rnn = rnns.get(id);
-
- double denP = density.doubleValue(id);
- knn.addDBIDs(rnn);
- Mean mean = new Mean();
- for(DBIDIter iter = knn.iter(); iter.valid(); iter.advance()) {
- mean.put(density.doubleValue(iter));
+ /**
+ * Compute the final INFLO scores.
+ *
+ * @param relation Data relation
+ * @param pruned Pruned objects
+ * @param knns kNN storage
+ * @param rnns reverse kNN storage
+ * @param density Density estimation
+ * @param inflos Inflo score storage
+ * @param inflominmax Output of minimum and maximum
+ */
+ protected void computeINFLO(Relation<O> relation, ModifiableDBIDs pruned, WritableDataStore<ModifiableDBIDs> knns, WritableDataStore<ModifiableDBIDs> rnns, WritableDoubleDataStore density, WritableDoubleDataStore inflos, DoubleMinMax inflominmax) {
+ Mean mean = new Mean();
+ for(DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
+ if(pruned.contains(iter)) {
+ inflos.putDouble(iter, 1.);
+ inflominmax.put(1.);
+ continue;
+ }
+ ModifiableDBIDs knn = knns.get(iter), rnn = rnns.get(iter);
+ knn.addDBIDs(rnn);
+ // Compute mean density of NN \cup RNN
+ mean.reset();
+ for(DBIDIter niter = knn.iter(); niter.valid(); niter.advance()) {
+ if(DBIDUtil.equal(iter, niter)) {
+ continue;
}
- double den = mean.getMean() / denP;
- inflos.putDouble(id, den);
- // update minimum and maximum
- inflominmax.put(den);
-
+ mean.put(density.doubleValue(niter));
}
- if(pruned.contains(id)) {
- inflos.putDouble(id, 1.0);
- inflominmax.put(1.0);
+ double denP = density.doubleValue(iter);
+ double den;
+ if(denP > 0.) {
+ den = mean.getMean() / denP;
}
+ else {
+ den = mean.getMean() == 0 ? 1. : Double.POSITIVE_INFINITY;
+ }
+ inflos.putDouble(iter, den);
+ // update minimum and maximum
+ inflominmax.put(den);
}
+ }
- // Build result representation.
- Relation<Double> scoreResult = new MaterializedRelation<>("Influence Outlier Score", "inflo-outlier", TypeUtil.DOUBLE, inflos, relation.getDBIDs());
- OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(inflominmax.getMin(), inflominmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
- return new OutlierResult(scoreMeta, scoreResult);
+ /**
+ * Get the (forward only) kNN of an object, including the query point
+ *
+ * @param q Query point
+ * @param knnQuery Query function
+ * @param knns kNN storage
+ * @param density Density storage
+ * @return Neighbor list
+ */
+ protected DBIDs getKNN(DBIDIter q, KNNQuery<O> knnQuery, WritableDataStore<ModifiableDBIDs> knns, WritableDoubleDataStore density) {
+ ModifiableDBIDs s = knns.get(q);
+ if(s.size() == 0) {
+ KNNList listQ = knnQuery.getKNNForDBID(q, k + 1);
+ s.addDBIDs(listQ);
+ density.putDouble(q, 1. / listQ.getKNNDistance());
+ }
+ return s;
}
@Override
@@ -232,29 +262,49 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa
*
* @apiviz.exclude
*/
- public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {
+ /**
+ * Parameter to specify if any object is a Core Object must be a double
+ * greater than 0.0
+ *
+ * see paper "Two-way search method" 3.2
+ */
+ public static final OptionID M_ID = new OptionID("inflo.m", "The pruning threshold");
+
+ /**
+ * Parameter to specify the number of nearest neighbors of an object to be
+ * considered for computing its INFLO score.
+ */
+ public static final OptionID K_ID = new OptionID("inflo.k", "The number of nearest neighbors of an object to be considered for computing its INFLO score.");
+
+ /**
+ * M parameter
+ */
protected double m = 1.0;
+ /**
+ * Number of neighbors to use.
+ */
protected int k = 0;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final DoubleParameter mP = new DoubleParameter(M_ID, 1.0);
- mP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
+ final DoubleParameter mP = new DoubleParameter(M_ID, 1.0)//
+ .addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
if(config.grab(mP)) {
m = mP.doubleValue();
}
- final IntParameter kP = new IntParameter(K_ID);
- kP.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
+ final IntParameter kP = new IntParameter(K_ID) //
+ .addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
if(config.grab(kP)) {
k = kP.intValue();
}
}
@Override
- protected INFLO<O, D> makeInstance() {
+ protected INFLO<O> makeInstance() {
return new INFLO<>(distanceFunction, m, k);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/KDEOS.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/KDEOS.java
new file mode 100644
index 00000000..2183872f
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/KDEOS.java
@@ -0,0 +1,445 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier.lof;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
+import de.lmu.ifi.dbs.elki.data.FeatureVector;
+import de.lmu.ifi.dbs.elki.data.type.CombinedTypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.KNNList;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.math.MathUtil;
+import de.lmu.ifi.dbs.elki.math.MeanVariance;
+import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution;
+import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.GaussianKernelDensityFunction;
+import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.KernelDensityFunction;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore;
+import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessEqualGlobalConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * Generalized Outlier Detection with Flexible Kernel Density Estimates.
+ *
+ * This is an outlier detection inspired by LOF, but using kernel density
+ * estimation (KDE) from statistics. Unfortunately, for higher dimensional data,
+ * kernel density estimation itself becomes difficult. At this point, the
+ * <tt>kdeos.idim</tt> parameter can become useful, which allows to either
+ * disable dimensionality adjustment completely (<tt>0</tt>) or to set it to a
+ * lower dimensionality than the data representation. This may sound like a hack
+ * at first, but real data is often of lower intrinsic dimensionality, and
+ * embedded into a higher data representation. Adjusting the kernel to account
+ * for the representation seems to yield worse results than using a lower,
+ * intrinsic, dimensionality.
+ *
+ * If your data set has many duplicates, the <tt>kdeos.kernel.minbw</tt>
+ * parameter sets a minimum kernel bandwidth, which may improve results in these
+ * cases, as it prevents kernels from degenerating to single points.
+ *
+ * Reference:
+ * <p>
+ * Erich Schubert, Arthur Zimek, Hans-Peter Kriegel<br />
+ * Generalized Outlier Detection with Flexible Kernel Density Estimates<br />
+ * In Proceedings of the 14th SIAM International Conference on Data Mining
+ * (SDM), Philadelphia, PA, 2014.
+ * </p>
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has KNNQuery
+ * @apiviz.has KernelDensityFunction
+ *
+ * @param <O> Object type
+ */
+@Reference(authors = "Erich Schubert, Arthur Zimek, Hans-Peter Kriegel", //
+title = "Generalized Outlier Detection with Flexible Kernel Density Estimates", //
+booktitle = "Proc. 14th SIAM International Conference on Data Mining (SDM), Philadelphia, PA, 2014", //
+url = "http://dx.doi.org/10.1137/1.9781611973440.63")
+public class KDEOS<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm {
+ /**
+ * Class logger.
+ */
+ private static final Logging LOG = Logging.getLogger(KDEOS.class);
+
+ /**
+ * Kernel function to use for density estimation.
+ */
+ KernelDensityFunction kernel;
+
+ /**
+ * Minimum and maximum number of neighbors to use.
+ */
+ int kmin, kmax;
+
+ /**
+ * Kernel scaling parameter.
+ */
+ double scale;
+
+ /**
+ * Kernel minimum bandwidth.
+ */
+ double minBandwidth = 1e-6;
+
+ /**
+ * Intrinsic dimensionality.
+ */
+ int idim = -1;
+
+ /**
+ * Significance cutoff when computing kernel density.
+ */
+ final static double CUTOFF = 1e-20;
+
+ /**
+ * Constructor.
+ *
+ * @param distanceFunction Distance function
+ * @param kmin Minimum number of neighbors
+ * @param kmax Maximum number of neighbors
+ * @param kernel Kernel function
+ * @param scale Kernel scaling parameter
+ * @param idim Intrinsic dimensionality (use 0 to use real dimensionality)
+ */
+ public KDEOS(DistanceFunction<? super O> distanceFunction, int kmin, int kmax, KernelDensityFunction kernel, double minBandwidth, double scale, int idim) {
+ super(distanceFunction);
+ this.kmin = kmin;
+ this.kmax = kmax;
+ this.kernel = kernel;
+ this.minBandwidth = minBandwidth;
+ this.scale = scale;
+ this.idim = idim;
+ }
+
+ /**
+ * Run the KDEOS outlier detection algorithm.
+ *
+ * @param database Database to query
+ * @param rel Relation to process
+ * @return Outlier detection result
+ */
+ public OutlierResult run(Database database, Relation<O> rel) {
+ final DBIDs ids = rel.getDBIDs();
+
+ LOG.verbose("Running kNN preprocessor.");
+ KNNQuery<O> knnq = DatabaseUtil.precomputedKNNQuery(database, rel, getDistanceFunction(), kmax + 1);
+
+ // Initialize store for densities
+ WritableDataStore<double[]> densities = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, double[].class);
+ estimateDensities(rel, knnq, ids, densities);
+
+ // Compute scores:
+ WritableDoubleDataStore kofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB);
+ DoubleMinMax minmax = new DoubleMinMax();
+ computeOutlierScores(knnq, ids, densities, kofs, minmax);
+
+ DoubleRelation scoreres = new MaterializedDoubleRelation("Kernel Density Estimation Outlier Scores", "kdeos-outlier", kofs, ids);
+ OutlierScoreMeta meta = new ProbabilisticOutlierScore(minmax.getMin(), minmax.getMax());
+ return new OutlierResult(meta, scoreres);
+ }
+
+ /**
+ * Perform the kernel density estimation step.
+ *
+ * @param rel Relation to query
+ * @param knnq kNN query
+ * @param ids IDs to process
+ * @param densities Density storage
+ */
+ protected void estimateDensities(Relation<O> rel, KNNQuery<O> knnq, final DBIDs ids, WritableDataStore<double[]> densities) {
+ final int dim = dimensionality(rel);
+ final int knum = kmax + 1 - kmin;
+ // Initialize storage:
+ for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ densities.put(iter, new double[knum]);
+ }
+ // Distribute densities:
+ FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Computing densities.", ids.size(), LOG) : null;
+ double iminbw = (minBandwidth > 0.) ? 1. / (minBandwidth * scale) : Double.POSITIVE_INFINITY;
+ for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ KNNList neighbors = knnq.getKNNForDBID(iter, kmax + 1);
+ int k = 1, idx = 0;
+ double sum = 0.;
+ for(DoubleDBIDListIter kneighbor = neighbors.iter(); k <= kmax && kneighbor.valid(); kneighbor.advance(), k++) {
+ sum += kneighbor.doubleValue();
+ if(k < kmin) {
+ continue;
+ }
+ final double ibw = Math.min(k / (sum * scale), iminbw);
+ final double sca = MathUtil.powi(ibw, dim);
+ for(DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ final double dens;
+ if(sca < Double.POSITIVE_INFINITY) { // NaNs with duplicate points!
+ dens = sca * kernel.density(neighbor.doubleValue() * ibw);
+ }
+ else {
+ dens = neighbor.doubleValue() == 0. ? 1. : 0.;
+ }
+ densities.get(neighbor)[idx] += dens;
+ if(dens < CUTOFF) {
+ break;
+ }
+ }
+ ++idx; // Only if k >= kmin
+ }
+ LOG.incrementProcessed(prog);
+ }
+ LOG.ensureCompleted(prog);
+ }
+
+ /**
+ * Ugly hack to allow using this implementation without having a well-defined
+ * dimensionality.
+ *
+ * @param rel Data relation
+ * @return Dimensionality
+ */
+ private int dimensionality(Relation<O> rel) {
+ // Explicit:
+ if(idim >= 0) {
+ return idim;
+ }
+ // Cast to vector field relation.
+ @SuppressWarnings("unchecked")
+ final Relation<FeatureVector<?>> frel = (Relation<FeatureVector<?>>) rel;
+ int dim = RelationUtil.dimensionality(frel);
+ if(dim < 0) {
+ throw new AbortException("When using KDEOS with non-vectorspace data, the intrinsic dimensionality parameter must be set!");
+ }
+ return dim;
+ }
+
+ /**
+ * Compute the final KDEOS scores.
+ *
+ * @param knnq kNN query
+ * @param ids IDs to process
+ * @param densities Density estimates
+ * @param kdeos Score outputs
+ * @param minmax Minimum and maximum scores
+ */
+ protected void computeOutlierScores(KNNQuery<O> knnq, final DBIDs ids, WritableDataStore<double[]> densities, WritableDoubleDataStore kdeos, DoubleMinMax minmax) {
+ final int knum = kmax + 1 - kmin;
+ FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Computing KDEOS scores.", ids.size(), LOG) : null;
+
+ double[][] scratch = new double[knum][kmax + 5];
+ MeanVariance mv = new MeanVariance();
+
+ for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ double[] dens = densities.get(iter);
+ KNNList neighbors = knnq.getKNNForDBID(iter, kmax);
+ if(scratch[0].length < neighbors.size()) {
+ // Resize scratch. Add some extra margin again.
+ scratch = new double[knum][neighbors.size() + 5];
+ }
+ { // Store density matrix of neighbors
+ int i = 0;
+ for(DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance(), i++) {
+ double[] ndens = densities.get(neighbor);
+ for(int k = 0; k < knum; k++) {
+ scratch[k][i] = ndens[k];
+ }
+ }
+ assert (i == neighbors.size());
+ }
+ // Compute means and stddevs for each k
+ double score = 0.;
+ for(int i = 0; i < knum; i++) {
+ mv.reset();
+ for(int j = 0; j < neighbors.size(); j++) {
+ mv.put(scratch[i][j]);
+ }
+ final double mean = mv.getMean(), stddev = mv.getSampleStddev();
+ if(stddev > 0.) {
+ score += (mean - dens[i]) / stddev;
+ }
+ }
+ score /= knum; // average
+ score = NormalDistribution.standardNormalCDF(score);
+ minmax.put(score);
+ kdeos.put(iter, score);
+ LOG.incrementProcessed(prog);
+ }
+ LOG.ensureCompleted(prog);
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ TypeInformation res = getDistanceFunction().getInputTypeRestriction();
+ if(idim < 0) {
+ res = new CombinedTypeInformation(TypeUtil.NUMBER_VECTOR_FIELD, res);
+ }
+ return TypeUtil.array(res);
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ *
+ * @param <O> Object type
+ */
+ public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {
+ /**
+ * Parameter to specify the kernel density function.
+ */
+ private static final OptionID KERNEL_ID = new OptionID("kdeos.kernel", "Kernel density function to use.");
+
+ /**
+ * Parameter to specify the minimum bandwidth.
+ */
+ private static final OptionID KERNEL_MIN_ID = new OptionID("kdeos.kernel.minbw", "Minimum bandwidth for kernel density estimation.");
+
+ /**
+ * Parameter to specify the kernel scaling factor.
+ */
+ private static final OptionID KERNEL_SCALE_ID = new OptionID("kdeos.kernel.scale", "Scaling factor for the kernel function.");
+
+ /**
+ * Minimum value of k to analyze.
+ */
+ private static final OptionID KMIN_ID = new OptionID("kdeos.k.min", "Minimum value of k to analyze.");
+
+ /**
+ * Maximum value of k to analyze.
+ */
+ private static final OptionID KMAX_ID = new OptionID("kdeos.k.max", "Maximum value of k to analyze.");
+
+ /**
+ * Intrinsic dimensionality.
+ */
+ private static final OptionID IDIM_ID = new OptionID("kdeos.idim", "Intrinsic dimensionality of this data set. Use -1 for using the true data dimensionality, but values such as 0-2 often offer better performance.");
+
+ /**
+ * Kernel function to use for density estimation.
+ */
+ KernelDensityFunction kernel;
+
+ /**
+ * Minimum and maximum number of neighbors to use.
+ */
+ int kmin;
+
+ /**
+ * Minimum and maximum number of neighbors to use.
+ */
+ int kmax;
+
+ /**
+ * Kernel scaling parameter.
+ */
+ double scale;
+
+ /**
+ * Kernel minimum bandwidth.
+ */
+ double minBandwidth = 0.;
+
+ /**
+ * Intrinsic dimensionality.
+ */
+ int idim = -1;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ ObjectParameter<KernelDensityFunction> kernelP = new ObjectParameter<>(KERNEL_ID, KernelDensityFunction.class, GaussianKernelDensityFunction.class);
+ if(config.grab(kernelP)) {
+ kernel = kernelP.instantiateClass(config);
+ }
+
+ IntParameter kminP = new IntParameter(KMIN_ID) //
+ .addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(kminP)) {
+ kmin = kminP.intValue();
+ }
+
+ IntParameter kmaxP = new IntParameter(KMAX_ID) //
+ .addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(kmaxP)) {
+ kmax = kmaxP.intValue();
+ }
+ config.checkConstraint(new LessEqualGlobalConstraint<>(kminP, kmaxP));
+
+ DoubleParameter scaleP = new DoubleParameter(KERNEL_SCALE_ID)//
+ .addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE) //
+ .setDefaultValue(.5);
+ if(config.grab(scaleP)) {
+ // For simpler parameterization, scale kernels by their canonical
+ // bandwidth, when the kernel is configured.
+ scale = scaleP.doubleValue() * ((kernel != null) ? kernel.canonicalBandwidth() : 1.);
+ }
+ DoubleParameter minbwP = new DoubleParameter(KERNEL_MIN_ID) //
+ .addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE) //
+ .setOptional(true);
+ if(config.grab(minbwP)) {
+ minBandwidth = minbwP.doubleValue();
+ }
+ IntParameter idimP = new IntParameter(IDIM_ID, -1);
+ if(config.grab(idimP)) {
+ idim = idimP.intValue();
+ }
+ }
+
+ @Override
+ protected KDEOS<O> makeInstance() {
+ return new KDEOS<>(distanceFunction, kmin, kmax, kernel, minBandwidth, scale, idim);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDF.java
index e5049877..c2e29f54 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDF.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.lof;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -30,27 +30,20 @@ import de.lmu.ifi.dbs.elki.data.type.CombinedTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
-import de.lmu.ifi.dbs.elki.database.QueryUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList;
-import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
-import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
-import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.KNNList;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
-import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
@@ -61,6 +54,7 @@ import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.KernelDensityFunction
import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
@@ -88,10 +82,12 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
* @apiviz.has KernelDensityFunction
*
* @param <O> the type of objects handled by this Algorithm
- * @param <D> Distance type
*/
-@Reference(authors = "L. J. Latecki, A. Lazarevic, D. Pokrajac", title = "Outlier Detection with Kernel Density Functions", booktitle = "Machine Learning and Data Mining in Pattern Recognition", url = "http://dx.doi.org/10.1007/978-3-540-73499-4_6")
-public class LDF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
+@Reference(authors = "L. J. Latecki, A. Lazarevic, D. Pokrajac", //
+title = "Outlier Detection with Kernel Density Functions", //
+booktitle = "Machine Learning and Data Mining in Pattern Recognition", //
+url = "http://dx.doi.org/10.1007/978-3-540-73499-4_6")
+public class LDF<O extends NumberVector> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
@@ -125,7 +121,7 @@ public class LDF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
* @param h Kernel bandwidth scaling
* @param c Score scaling parameter
*/
- public LDF(int k, DistanceFunction<? super O, D> distance, KernelDensityFunction kernel, double h, double c) {
+ public LDF(int k, DistanceFunction<? super O> distance, KernelDensityFunction kernel, double h, double c) {
super(distance);
this.k = k + 1;
this.kernel = kernel;
@@ -142,84 +138,42 @@ public class LDF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
*/
public OutlierResult run(Database database, Relation<O> relation) {
StepProgress stepprog = LOG.isVerbose() ? new StepProgress("LDF", 3) : null;
-
final int dim = RelationUtil.dimensionality(relation);
-
DBIDs ids = relation.getDBIDs();
- // "HEAVY" flag for KNN Query since it is used more than once
- KNNQuery<O, D> knnq = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
- // No optimized kNN query - use a preprocessor!
- if(!(knnq instanceof PreprocessorKNNQuery)) {
- if(stepprog != null) {
- stepprog.beginStep(1, "Materializing neighborhoods w.r.t. distance function.", LOG);
- }
- MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<>(relation, getDistanceFunction(), k);
- database.addIndex(preproc);
- DistanceQuery<O, D> rdq = database.getDistanceQuery(relation, getDistanceFunction());
- knnq = preproc.getKNNQuery(rdq, k);
- }
+ LOG.beginStep(stepprog, 1, "Materializing neighborhoods w.r.t. distance function.");
+ KNNQuery<O> knnq = DatabaseUtil.precomputedKNNQuery(database, relation, getDistanceFunction(), k);
// Compute LDEs
- if(stepprog != null) {
- stepprog.beginStep(2, "Computing LDEs.", LOG);
- }
+ LOG.beginStep(stepprog, 2, "Computing LDEs.");
WritableDoubleDataStore ldes = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
FiniteProgress densProgress = LOG.isVerbose() ? new FiniteProgress("Densities", ids.size(), LOG) : null;
for(DBIDIter it = ids.iter(); it.valid(); it.advance()) {
- final KNNList<D> neighbors = knnq.getKNNForDBID(it, k);
+ final KNNList neighbors = knnq.getKNNForDBID(it, k);
double sum = 0.0;
int count = 0;
- if(neighbors instanceof DoubleDistanceKNNList) {
- // Fast version for double distances
- for(DoubleDistanceDBIDListIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) {
- if(DBIDUtil.equal(neighbor, it)) {
- continue;
- }
- final double nkdist = ((DoubleDistanceKNNList) knnq.getKNNForDBID(neighbor, k)).doubleKNNDistance();
- if(nkdist > 0.) {
- final double v = Math.max(nkdist, neighbor.doubleDistance()) / (h * nkdist);
- sum += kernel.density(v) / MathUtil.powi(h * nkdist, dim);
- count++;
- }
- else {
- sum = Double.POSITIVE_INFINITY;
- count++;
- break;
- }
+ // Fast version for double distances
+ for(DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ if(DBIDUtil.equal(neighbor, it)) {
+ continue;
}
- }
- else {
- for(DistanceDBIDListIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
- if(DBIDUtil.equal(neighbor, it)) {
- continue;
- }
- final double nkdist = knnq.getKNNForDBID(neighbor, k).getKNNDistance().doubleValue();
- if(nkdist > 0.) {
- final double v = Math.max(nkdist, neighbor.getDistance().doubleValue()) / (h * nkdist);
- sum += kernel.density(v) / MathUtil.powi(h * nkdist, dim);
- count++;
- }
- else {
- sum = Double.POSITIVE_INFINITY;
- count++;
- break;
- }
+ final double nkdist = knnq.getKNNForDBID(neighbor, k).getKNNDistance();
+ if(!(nkdist > 0.)) {
+ sum = Double.POSITIVE_INFINITY;
+ count++;
+ break;
}
+ final double v = Math.max(nkdist, neighbor.doubleValue()) / (h * nkdist);
+ sum += kernel.density(v) / MathUtil.powi(h * nkdist, dim);
+ count++;
}
ldes.putDouble(it, sum / count);
- if(densProgress != null) {
- densProgress.incrementProcessed(LOG);
- }
- }
- if(densProgress != null) {
- densProgress.ensureCompleted(LOG);
+ LOG.incrementProcessed(densProgress);
}
+ LOG.ensureCompleted(densProgress);
// Compute local density factors.
- if(stepprog != null) {
- stepprog.beginStep(3, "Computing LDFs.", LOG);
- }
+ LOG.beginStep(stepprog, 3, "Computing LDFs.");
WritableDoubleDataStore ldfs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
// track the maximum value for normalization.
DoubleMinMax lofminmax = new DoubleMinMax();
@@ -227,7 +181,7 @@ public class LDF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("Local Density Factors", ids.size(), LOG) : null;
for(DBIDIter it = ids.iter(); it.valid(); it.advance()) {
final double lrdp = ldes.doubleValue(it);
- final KNNList<D> neighbors = knnq.getKNNForDBID(it, k);
+ final KNNList neighbors = knnq.getKNNForDBID(it, k);
double sum = 0.0;
int count = 0;
for(DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
@@ -245,20 +199,14 @@ public class LDF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
// update minimum and maximum
lofminmax.put(ldf);
- if(progressLOFs != null) {
- progressLOFs.incrementProcessed(LOG);
- }
- }
- if(progressLOFs != null) {
- progressLOFs.ensureCompleted(LOG);
+ LOG.incrementProcessed(progressLOFs);
}
+ LOG.ensureCompleted(progressLOFs);
- if(stepprog != null) {
- stepprog.setCompleted(LOG);
- }
+ LOG.setCompleted(stepprog);
// Build result representation.
- Relation<Double> scoreResult = new MaterializedRelation<>("Local Density Factor", "ldf-outlier", TypeUtil.DOUBLE, ldfs, ids);
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("Local Density Factor", "ldf-outlier", ldfs, ids);
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, 1. / c, 1 / (1 + c));
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
@@ -283,9 +231,8 @@ public class LDF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
* @apiviz.exclude
*
* @param <O> vector type
- * @param <D> distance type
*/
- public static class Parameterizer<O extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ public static class Parameterizer<O extends NumberVector> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {
/**
* Option ID for kernel.
*/
@@ -353,7 +300,7 @@ public class LDF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
}
@Override
- protected LDF<O, D> makeInstance() {
+ protected LDF<O> makeInstance() {
return new LDF<>(k, distanceFunction, kernel, h, c);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDOF.java
index 36c70b48..479b0bab 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDOF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDOF.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.lof;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -33,14 +33,14 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
-import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.KNNList;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
@@ -79,9 +79,12 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
*/
@Title("LDOF: Local Distance-Based Outlier Factor")
@Description("Local outlier detection appraoch suitable for scattered data by averaging the kNN distance over all k nearest neighbors")
-@Reference(authors = "K. Zhang, M. Hutter, H. Jin", title = "A New Local Distance-Based Outlier Detection Approach for Scattered Real-World Data", booktitle = "Proc. 13th Pacific-Asia Conference on Advances in Knowledge Discovery and Data Mining (PAKDD 2009), Bangkok, Thailand, 2009", url = "http://dx.doi.org/10.1007/978-3-642-01307-2_84")
+@Reference(authors = "K. Zhang, M. Hutter, H. Jin", //
+title = "A New Local Distance-Based Outlier Detection Approach for Scattered Real-World Data", //
+booktitle = "Proc. 13th Pacific-Asia Conference on Advances in Knowledge Discovery and Data Mining (PAKDD 2009), Bangkok, Thailand, 2009", //
+url = "http://dx.doi.org/10.1007/978-3-642-01307-2_84")
@Alias({ "de.lmu.ifi.dbs.elki.algorithm.outlier.LDOF" })
-public class LDOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
+public class LDOF<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
@@ -110,7 +113,7 @@ public class LDOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
* @param distanceFunction distance function
* @param k k Parameter
*/
- public LDOF(DistanceFunction<? super O, D> distanceFunction, int k) {
+ public LDOF(DistanceFunction<? super O> distanceFunction, int k) {
super(distanceFunction);
this.k = k;
}
@@ -123,8 +126,8 @@ public class LDOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
* @return Outlier result
*/
public OutlierResult run(Database database, Relation<O> relation) {
- DistanceQuery<O, D> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
- KNNQuery<O, D> knnQuery = database.getKNNQuery(distFunc, k);
+ DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
+ KNNQuery<O> knnQuery = database.getKNNQuery(distFunc, k + 1);
// track the maximum value for normalization
DoubleMinMax ldofminmax = new DoubleMinMax();
@@ -135,23 +138,26 @@ public class LDOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
if(LOG.isVerbose()) {
LOG.verbose("Computing LDOFs");
}
- FiniteProgress progressLDOFs = LOG.isVerbose() ? new FiniteProgress("LDOF_SCORE for objects", relation.size(), LOG) : null;
+ FiniteProgress progressLDOFs = LOG.isVerbose() ? new FiniteProgress("LDOF for objects", relation.size(), LOG) : null;
Mean dxp = new Mean(), Dxp = new Mean();
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- KNNList<D> neighbors = knnQuery.getKNNForDBID(iditer, k);
- // skip the point itself
+ KNNList neighbors = knnQuery.getKNNForDBID(iditer, k + 1);
dxp.reset();
Dxp.reset();
- // TODO: optimize for double distances
- for(DistanceDBIDListIter<D> neighbor1 = neighbors.iter(); neighbor1.valid(); neighbor1.advance()) {
- if(!DBIDUtil.equal(neighbor1, iditer)) {
- dxp.put(neighbor1.getDistance().doubleValue());
- for(DistanceDBIDListIter<D> neighbor2 = neighbors.iter(); neighbor2.valid(); neighbor2.advance()) {
- if(!DBIDUtil.equal(neighbor1, neighbor2) && !DBIDUtil.equal(neighbor2, iditer)) {
- Dxp.put(distFunc.distance(neighbor1, neighbor2).doubleValue());
- }
+ DoubleDBIDListIter neighbor1 = neighbors.iter(), neighbor2 = neighbors.iter();
+ for(; neighbor1.valid(); neighbor1.advance()) {
+ // skip the point itself
+ if(DBIDUtil.equal(neighbor1, iditer)) {
+ continue;
+ }
+ dxp.put(neighbor1.doubleValue());
+ for(neighbor2.seek(neighbor1.getOffset() + 1); neighbor2.valid(); neighbor2.advance()) {
+ // skip the point itself
+ if(DBIDUtil.equal(neighbor2, iditer)) {
+ continue;
}
+ Dxp.put(distFunc.distance(neighbor1, neighbor2));
}
}
double ldof = dxp.getMean() / Dxp.getMean();
@@ -162,16 +168,12 @@ public class LDOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
// update maximum
ldofminmax.put(ldof);
- if(progressLDOFs != null) {
- progressLDOFs.incrementProcessed(LOG);
- }
- }
- if(progressLDOFs != null) {
- progressLDOFs.ensureCompleted(LOG);
+ LOG.incrementProcessed(progressLDOFs);
}
+ LOG.ensureCompleted(progressLDOFs);
// Build result representation.
- Relation<Double> scoreResult = new MaterializedRelation<>("LDOF Outlier Score", "ldof-outlier", TypeUtil.DOUBLE, ldofs, relation.getDBIDs());
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("LDOF Outlier Score", "ldof-outlier", ldofs, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(ldofminmax.getMin(), ldofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, LDOF_BASELINE);
return new OutlierResult(scoreMeta, scoreResult);
}
@@ -193,21 +195,21 @@ public class LDOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
*
* @apiviz.exclude
*/
- public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {
protected int k = 0;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
final IntParameter kP = new IntParameter(K_ID);
- kP.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
+ kP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
if(config.grab(kP)) {
k = kP.getValue();
}
}
@Override
- protected LDOF<O, D> makeInstance() {
+ protected LDOF<O> makeInstance() {
return new LDOF<>(distanceFunction, k);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOCI.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOCI.java
index e76c6034..8d371d4c 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOCI.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOCI.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.lof;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -23,9 +23,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.lof;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
+import java.util.Arrays;
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
@@ -37,15 +35,15 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDPair;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
@@ -54,23 +52,24 @@ import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.utilities.Alias;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.arrays.DoubleIntegerArrayQuickSort;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DistanceParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
-import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleIntPair;
/**
* Fast Outlier Detection Using the "Local Correlation Integral".
*
- * Exact implementation only, not aLOCI. See {@link ALOCI}
+ * Exact implementation only, not aLOCI. See {@link ALOCI}.
*
* Outlier detection using multiple epsilon neighborhoods.
*
+ * This implementation has O(n<sup>3</sup> log n) runtime complexity!
+ *
* Based on: S. Papadimitriou, H. Kitagawa, P. B. Gibbons and C. Faloutsos:
* LOCI: Fast Outlier Detection Using the Local Correlation Integral. In: Proc.
* 19th IEEE Int. Conf. on Data Engineering (ICDE '03), Bangalore, India, 2003.
@@ -80,13 +79,12 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleIntPair;
* @apiviz.has RangeQuery
*
* @param <O> Object type
- * @param <D> Distance type
*/
@Title("LOCI: Fast Outlier Detection Using the Local Correlation Integral")
@Description("Algorithm to compute outliers based on the Local Correlation Integral")
@Reference(authors = "S. Papadimitriou, H. Kitagawa, P. B. Gibbons, C. Faloutsos", title = "LOCI: Fast Outlier Detection Using the Local Correlation Integral", booktitle = "Proc. 19th IEEE Int. Conf. on Data Engineering (ICDE '03), Bangalore, India, 2003", url = "http://dx.doi.org/10.1109/ICDE.2003.1260802")
-@Alias({"de.lmu.ifi.dbs.elki.algorithm.outlier.LOCI"})
-public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
+@Alias({ "de.lmu.ifi.dbs.elki.algorithm.outlier.LOCI" })
+public class LOCI<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
@@ -111,7 +109,7 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
/**
* Holds the value of {@link #RMAX_ID}.
*/
- private D rmax;
+ private double rmax;
/**
* Holds the value of {@link #NMIN_ID}.
@@ -131,7 +129,7 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
* @param nmin Minimum neighborhood size
* @param alpha Alpha value
*/
- public LOCI(DistanceFunction<? super O, D> distanceFunction, D rmax, int nmin, double alpha) {
+ public LOCI(DistanceFunction<? super O> distanceFunction, double rmax, int nmin, double alpha) {
super(distanceFunction);
this.rmax = rmax;
this.nmin = nmin;
@@ -146,96 +144,62 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
* @return Outlier result
*/
public OutlierResult run(Database database, Relation<O> relation) {
- DistanceQuery<O, D> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
- RangeQuery<O, D> rangeQuery = database.getRangeQuery(distFunc);
+ DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
+ RangeQuery<O> rangeQuery = database.getRangeQuery(distFunc);
+ DBIDs ids = relation.getDBIDs();
- FiniteProgress progressPreproc = LOG.isVerbose() ? new FiniteProgress("LOCI preprocessing", relation.size(), LOG) : null;
// LOCI preprocessing step
- WritableDataStore<ArrayList<DoubleIntPair>> interestingDistances = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_SORTED, ArrayList.class);
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DistanceDBIDList<D> neighbors = rangeQuery.getRangeForDBID(iditer, rmax);
- // build list of critical distances
- ArrayList<DoubleIntPair> cdist = new ArrayList<>(neighbors.size() << 1);
- {
- for(int i = 0; i < neighbors.size(); i++) {
- DistanceDBIDPair<D> r = neighbors.get(i);
- if(i + 1 < neighbors.size() && r.getDistance().compareTo(neighbors.get(i + 1).getDistance()) == 0) {
- continue;
- }
- cdist.add(new DoubleIntPair(r.getDistance().doubleValue(), i));
- final double ri = r.getDistance().doubleValue() / alpha;
- if(ri <= rmax.doubleValue()) {
- cdist.add(new DoubleIntPair(ri, Integer.MIN_VALUE));
- }
- }
- }
- Collections.sort(cdist);
- // fill the gaps to have fast lookups of number of neighbors at a given
- // distance.
- int lastk = 0;
- for(DoubleIntPair c : cdist) {
- if(c.second == Integer.MIN_VALUE) {
- c.second = lastk;
- }
- else {
- lastk = c.second;
- }
- }
-
- interestingDistances.put(iditer, cdist);
- if(progressPreproc != null) {
- progressPreproc.incrementProcessed(LOG);
- }
- }
- if(progressPreproc != null) {
- progressPreproc.ensureCompleted(LOG);
- }
+ WritableDataStore<DoubleIntArrayList> interestingDistances = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_SORTED, DoubleIntArrayList.class);
+ precomputeInterestingRadii(ids, rangeQuery, interestingDistances);
// LOCI main step
FiniteProgress progressLOCI = LOG.isVerbose() ? new FiniteProgress("LOCI scores", relation.size(), LOG) : null;
WritableDoubleDataStore mdef_norm = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
WritableDoubleDataStore mdef_radius = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
DoubleMinMax minmax = new DoubleMinMax();
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- final List<DoubleIntPair> cdist = interestingDistances.get(iditer);
- final double maxdist = cdist.get(cdist.size() - 1).first;
- final int maxneig = cdist.get(cdist.size() - 1).second;
+ // Shared instance, to save allocations.
+ MeanVariance mv_n_r_alpha = new MeanVariance();
+
+ for(DBIDIter iditer = ids.iter(); iditer.valid(); iditer.advance()) {
+ final DoubleIntArrayList cdist = interestingDistances.get(iditer);
+ final double maxdist = cdist.getDouble(cdist.size() - 1);
+ final int maxneig = cdist.getInt(cdist.size() - 1);
double maxmdefnorm = 0.0;
double maxnormr = 0;
if(maxneig >= nmin) {
- D range = distFunc.getDistanceFactory().fromDouble(maxdist);
// Compute the largest neighborhood we will need.
- DistanceDBIDList<D> maxneighbors = rangeQuery.getRangeForDBID(iditer, range);
- // TODO: Ensure the set is sorted. Should be a no-op with most indexes.
+ DoubleDBIDList maxneighbors = rangeQuery.getRangeForDBID(iditer, maxdist);
+ // TODO: Ensure the result is sorted. This is currently implied.
+
// For any critical distance, compute the normalized MDEF score.
- for(DoubleIntPair c : cdist) {
+ for(int i = 0, size = cdist.size(); i < size; i++) {
// Only start when minimum size is fulfilled
- if (c.second < nmin) {
+ if(cdist.getInt(i) < nmin) {
continue;
}
- final double r = c.first;
+ final double r = cdist.getDouble(i);
final double alpha_r = alpha * r;
- // compute n(p_i, \alpha * r) from list (note: alpha_r is different from c!)
- final int n_alphar = elementsAtRadius(cdist, alpha_r);
+ // compute n(p_i, \alpha * r) from list (note: alpha_r is not cdist!)
+ final int n_alphar = cdist.getInt(cdist.find(alpha_r));
// compute \hat{n}(p_i, r, \alpha) and the corresponding \simga_{MDEF}
- MeanVariance mv_n_r_alpha = new MeanVariance();
- // TODO: optimize for double distances
- for (DistanceDBIDListIter<D> neighbor = maxneighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ mv_n_r_alpha.reset();
+ for(DoubleDBIDListIter neighbor = maxneighbors.iter(); neighbor.valid(); neighbor.advance()) {
// Stop at radius r
- if(neighbor.getDistance().doubleValue() > r) {
+ if(neighbor.doubleValue() > r) {
break;
}
- int rn_alphar = elementsAtRadius(interestingDistances.get(neighbor), alpha_r);
+ DoubleIntArrayList cdist2 = interestingDistances.get(neighbor);
+ int rn_alphar = cdist2.getInt(cdist2.find(alpha_r));
mv_n_r_alpha.put(rn_alphar);
}
// We only use the average and standard deviation
final double nhat_r_alpha = mv_n_r_alpha.getMean();
final double sigma_nhat_r_alpha = mv_n_r_alpha.getNaiveStddev();
- // Redundant divisions removed.
- final double mdef = (nhat_r_alpha - n_alphar); // / nhat_r_alpha;
- final double sigmamdef = sigma_nhat_r_alpha; // / nhat_r_alpha;
+ // Redundant divisions by nhat_r_alpha removed.
+ final double mdef = nhat_r_alpha - n_alphar;
+ final double sigmamdef = sigma_nhat_r_alpha;
final double mdefnorm = mdef / sigmamdef;
if(mdefnorm > maxmdefnorm) {
@@ -246,46 +210,194 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
}
else {
// FIXME: when nmin was not fulfilled - what is the proper value then?
- maxmdefnorm = 1.0;
+ maxmdefnorm = Double.POSITIVE_INFINITY;
maxnormr = maxdist;
}
mdef_norm.putDouble(iditer, maxmdefnorm);
mdef_radius.putDouble(iditer, maxnormr);
minmax.put(maxmdefnorm);
- if(progressLOCI != null) {
- progressLOCI.incrementProcessed(LOG);
- }
- }
- if(progressLOCI != null) {
- progressLOCI.ensureCompleted(LOG);
+ LOG.incrementProcessed(progressLOCI);
}
- Relation<Double> scoreResult = new MaterializedRelation<>("LOCI normalized MDEF", "loci-mdef-outlier", TypeUtil.DOUBLE, mdef_norm, relation.getDBIDs());
+ LOG.ensureCompleted(progressLOCI);
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("LOCI normalized MDEF", "loci-mdef-outlier", mdef_norm, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
- result.addChildResult(new MaterializedRelation<>("LOCI MDEF Radius", "loci-critical-radius", TypeUtil.DOUBLE, mdef_radius, relation.getDBIDs()));
+ result.addChildResult(new MaterializedDoubleRelation("LOCI MDEF Radius", "loci-critical-radius", mdef_radius, relation.getDBIDs()));
return result;
}
/**
- * Get the number of objects for a given radius, from the list of critical
- * distances, storing (radius, count) pairs.
+ * Preprocessing step: determine the radii of interest for each point.
*
- * @param criticalDistances
- * @param radius
- * @return Number of elements at the given radius
+ * @param ids IDs to process
+ * @param rangeQuery Range query
+ * @param interestingDistances Distances of interest
*/
- protected int elementsAtRadius(List<DoubleIntPair> criticalDistances, final double radius) {
- int n_r = 0;
- for(DoubleIntPair c2 : criticalDistances) {
- if(c2.first > radius) {
- break;
+ protected void precomputeInterestingRadii(DBIDs ids, RangeQuery<O> rangeQuery, WritableDataStore<DoubleIntArrayList> interestingDistances) {
+ FiniteProgress progressPreproc = LOG.isVerbose() ? new FiniteProgress("LOCI preprocessing", ids.size(), LOG) : null;
+ for(DBIDIter iditer = ids.iter(); iditer.valid(); iditer.advance()) {
+ DoubleDBIDList neighbors = rangeQuery.getRangeForDBID(iditer, rmax);
+ // build list of critical distances
+ DoubleIntArrayList cdist = new DoubleIntArrayList(neighbors.size() << 1);
+ {
+ int i = 0;
+ DoubleDBIDListIter ni = neighbors.iter();
+ while(ni.valid()) {
+ final double curdist = ni.doubleValue();
+ ++i;
+ ni.advance();
+ // Skip, if tied to the next object:
+ if(ni.valid() && curdist == ni.doubleValue()) {
+ continue;
+ }
+ cdist.append(curdist, i);
+ // Scale radius, and reinsert
+ if(alpha != 1.) {
+ final double ri = curdist / alpha;
+ if(ri <= rmax) {
+ cdist.append(ri, Integer.MIN_VALUE);
+ }
+ }
+ }
}
- if(c2.second != Integer.MIN_VALUE) {
- // Update
- n_r = c2.second;
+ cdist.sort();
+
+ // fill the gaps to have fast lookups of number of neighbors at a given
+ // distance.
+ int lastk = 0;
+ for(int i = 0, size = cdist.size(); i < size; i++) {
+ final int k = cdist.getInt(i);
+ if(k == Integer.MIN_VALUE) {
+ cdist.setValue(i, lastk);
+ }
+ else {
+ lastk = k;
+ }
}
+ // TODO: shrink the list, removing duplicate radii?
+
+ interestingDistances.put(iditer, cdist);
+ LOG.incrementProcessed(progressPreproc);
+ }
+ LOG.ensureCompleted(progressPreproc);
+ }
+
+ /**
+ * Array of double-int values.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ protected static class DoubleIntArrayList {
+ /**
+ * Double keys
+ */
+ double[] keys;
+
+ /**
+ * Integer values
+ */
+ int[] vals;
+
+ /**
+ * Used size
+ */
+ int size = 0;
+
+ /**
+ * Constructor.
+ *
+ * @param alloc Initial allocation.
+ */
+ public DoubleIntArrayList(int alloc) {
+ keys = new double[alloc];
+ vals = new int[alloc];
+ size = 0;
+ }
+
+ /**
+ * Collection size.
+ *
+ * @return Size
+ */
+ public int size() {
+ return size;
+ }
+
+ /**
+ * Get the key at the given position.
+ *
+ * @param i Position
+ * @return Key
+ */
+ public double getDouble(int i) {
+ return keys[i];
+ }
+
+ /**
+ * Get the value at the given position.
+ *
+ * @param i Position
+ * @return Value
+ */
+ public int getInt(int i) {
+ return vals[i];
+ }
+
+ /**
+ * Get the value at the given position.
+ *
+ * @param i Position
+ * @param val New value
+ */
+ public void setValue(int i, int val) {
+ vals[i] = val;
+ }
+
+ /**
+ * Append a key-value pair.
+ *
+ * @param key Key to append
+ * @param val Value to append.
+ */
+ public void append(double key, int val) {
+ if(size == keys.length) {
+ keys = Arrays.copyOf(keys, size << 1);
+ vals = Arrays.copyOf(vals, size << 1);
+ }
+ keys[size] = key;
+ vals[size] = val;
+ ++size;
+ }
+
+ /**
+ * Find the last position with a smaller or equal key.
+ *
+ * @param search Key
+ * @return Position
+ */
+ public int find(final double search) {
+ int a = 0, b = size - 1;
+ while(a <= b) {
+ final int mid = (a + b) >>> 1;
+ final double cur = keys[mid];
+ if(cur > search) {
+ b = mid - 1;
+ }
+ else { // less or equal!
+ a = mid + 1;
+ }
+ }
+ return b;
+ }
+
+ /**
+ * Sort the array list.
+ */
+ public void sort() {
+ DoubleIntegerArrayQuickSort.sort(keys, vals, size);
}
- return n_r;
}
@Override
@@ -304,9 +416,11 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
* @author Erich Schubert
*
* @apiviz.exclude
+ *
+ * @param <O> Object type
*/
- public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
- protected D rmax = null;
+ public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {
+ protected double rmax;
protected int nmin = 0;
@@ -315,15 +429,14 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final D distanceFactory = (distanceFunction != null) ? distanceFunction.getDistanceFactory() : null;
- final DistanceParameter<D> rmaxP = new DistanceParameter<>(RMAX_ID, distanceFactory);
+ final DoubleParameter rmaxP = new DoubleParameter(RMAX_ID);
if(config.grab(rmaxP)) {
- rmax = rmaxP.getValue();
+ rmax = rmaxP.doubleValue();
}
final IntParameter nminP = new IntParameter(NMIN_ID, 20);
if(config.grab(nminP)) {
- nmin = nminP.getValue();
+ nmin = nminP.intValue();
}
final DoubleParameter alphaP = new DoubleParameter(ALPHA_ID, 0.5);
@@ -333,7 +446,7 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
}
@Override
- protected LOCI<O, D> makeInstance() {
+ protected LOCI<O> makeInstance() {
return new LOCI<>(distanceFunction, rmax, nmin, alpha);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOF.java
index 28166c75..ff5529f5 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOF.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.lof;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -35,19 +35,13 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList;
-import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
-import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
-import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.KNNList;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
-import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
@@ -56,6 +50,7 @@ import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.utilities.Alias;
+import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -75,10 +70,10 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
* within ELKI we have renamed this parameter to &quot;k&quot;.
* </p>
*
+ * Reference:
* <p>
- * Reference: <br>
- * M. M. Breunig, H.-P. Kriegel, R. Ng, J. Sander: LOF: Identifying
- * Density-Based Local Outliers. <br>
+ * M. M. Breunig, H.-P. Kriegel, R. Ng, J. Sander:<br />
+ * LOF: Identifying Density-Based Local Outliers.<br />
* In: Proc. 2nd ACM SIGMOD Int. Conf. on Management of Data (SIGMOD'00),
* Dallas, TX, 2000.
* </p>
@@ -88,37 +83,40 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
*
* @apiviz.has KNNQuery
*
- * @param <O> the type of DatabaseObjects handled by this Algorithm
- * @param <D> Distance type
+ * @param <O> the type of data objects handled by this algorithm
*/
@Title("LOF: Local Outlier Factor")
@Description("Algorithm to compute density-based local outlier factors in a database based on the neighborhood size parameter 'k'")
-@Reference(authors = "M. M. Breunig, H.-P. Kriegel, R. Ng, and J. Sander", title = "LOF: Identifying Density-Based Local Outliers", booktitle = "Proc. 2nd ACM SIGMOD Int. Conf. on Management of Data (SIGMOD '00), Dallas, TX, 2000", url = "http://dx.doi.org/10.1145/342009.335388")
-@Alias({ "de.lmu.ifi.dbs.elki.algorithm.outlier.LOF", "outlier.LOF", "LOF" })
-public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
+@Reference(authors = "M. M. Breunig, H.-P. Kriegel, R. Ng, and J. Sander",//
+title = "LOF: Identifying Density-Based Local Outliers", //
+booktitle = "Proc. 2nd ACM SIGMOD Int. Conf. on Management of Data (SIGMOD '00), Dallas, TX, 2000", //
+url = "http://dx.doi.org/10.1145/342009.335388")
+@Alias({ "de.lmu.ifi.dbs.elki.algorithm.outlier.LOF", "LOF" })
+public class LOF<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
private static final Logging LOG = Logging.getLogger(LOF.class);
/**
- * Holds the value of {@link Parameterizer#K_ID}.
+ * The number of neighbors to query (including the query point!)
*/
protected int k = 2;
/**
* Constructor.
*
- * @param k the value of k
+ * @param k the number of neighbors to use for comparison (excluding the query
+ * point)
* @param distanceFunction the neighborhood distance function
*/
- public LOF(int k, DistanceFunction<? super O, D> distanceFunction) {
+ public LOF(int k, DistanceFunction<? super O> distanceFunction) {
super(distanceFunction);
this.k = k + 1;
}
/**
- * Performs the Generalized LOF_SCORE algorithm on the given database.
+ * Runs the LOF algorithm on the given database.
*
* @param database Database to query
* @param relation Data to process
@@ -126,42 +124,27 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBase
*/
public OutlierResult run(Database database, Relation<O> relation) {
StepProgress stepprog = LOG.isVerbose() ? new StepProgress("LOF", 3) : null;
- DistanceQuery<O, D> dq = database.getDistanceQuery(relation, getDistanceFunction());
- // "HEAVY" flag for knn query since it is used more than once
- KNNQuery<O, D> knnq = database.getKNNQuery(dq, k, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
- // No optimized kNN query - use a preprocessor!
- if(!(knnq instanceof PreprocessorKNNQuery)) {
- if(stepprog != null) {
- stepprog.beginStep(1, "Materializing LOF neighborhoods.", LOG);
- }
- MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<>(relation, getDistanceFunction(), k);
- knnq = preproc.getKNNQuery(dq, k);
- }
DBIDs ids = relation.getDBIDs();
+ LOG.beginStep(stepprog, 1, "Materializing LOF neighborhoods.");
+ KNNQuery<O> knnq = DatabaseUtil.precomputedKNNQuery(database, relation, getDistanceFunction(), k);
+
// Compute LRDs
- if(stepprog != null) {
- stepprog.beginStep(2, "Computing LRDs.", LOG);
- }
+ LOG.beginStep(stepprog, 2, "Computing LRDs.");
WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
computeLRDs(knnq, ids, lrds);
// compute LOF_SCORE of each db object
- if(stepprog != null) {
- stepprog.beginStep(3, "Computing LOFs.", LOG);
- }
- DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
+ LOG.beginStep(stepprog, 3, "Computing LOFs.");
WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_DB);
// track the maximum value for normalization.
DoubleMinMax lofminmax = new DoubleMinMax();
computeLOFScores(knnq, ids, lrds, lofs, lofminmax);
- if(stepprog != null) {
- stepprog.setCompleted(LOG);
- }
+ LOG.setCompleted(stepprog);
// Build result representation.
- Relation<Double> scoreResult = new MaterializedRelation<>("Local Outlier Factor", "lof-outlier", TypeUtil.DOUBLE, lofs, ids);
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("Local Outlier Factor", "lof-outlier", lofs, ids);
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
return new OutlierResult(scoreMeta, scoreResult);
}
@@ -173,50 +156,26 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBase
* @param ids IDs to process
* @param lrds Reachability storage
*/
- private void computeLRDs(KNNQuery<O, D> knnq, DBIDs ids, WritableDoubleDataStore lrds) {
+ private void computeLRDs(KNNQuery<O> knnq, DBIDs ids, WritableDoubleDataStore lrds) {
FiniteProgress lrdsProgress = LOG.isVerbose() ? new FiniteProgress("LRD", ids.size(), LOG) : null;
for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
- final KNNList<D> neighbors = knnq.getKNNForDBID(iter, k);
+ final KNNList neighbors = knnq.getKNNForDBID(iter, k);
double sum = 0.0;
int count = 0;
- if(neighbors instanceof DoubleDistanceKNNList) {
- // Fast version for double distances
- for(DoubleDistanceDBIDListIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) {
- if(DBIDUtil.equal(neighbor, iter)) {
- continue;
- }
- KNNList<D> neighborsNeighbors = knnq.getKNNForDBID(neighbor, k);
- final double nkdist;
- if(neighborsNeighbors instanceof DoubleDistanceKNNList) {
- nkdist = ((DoubleDistanceKNNList) neighborsNeighbors).doubleKNNDistance();
- }
- else {
- nkdist = neighborsNeighbors.getKNNDistance().doubleValue();
- }
- sum += Math.max(neighbor.doubleDistance(), nkdist);
- count++;
- }
- }
- else {
- for(DistanceDBIDListIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
- if(DBIDUtil.equal(neighbor, iter)) {
- continue;
- }
- KNNList<D> neighborsNeighbors = knnq.getKNNForDBID(neighbor, k);
- sum += Math.max(neighbor.getDistance().doubleValue(), neighborsNeighbors.getKNNDistance().doubleValue());
- count++;
+ for(DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ if(DBIDUtil.equal(neighbor, iter)) {
+ continue;
}
+ KNNList neighborsNeighbors = knnq.getKNNForDBID(neighbor, k);
+ sum += Math.max(neighbor.doubleValue(), neighborsNeighbors.getKNNDistance());
+ count++;
}
// Avoid division by 0
final double lrd = (sum > 0) ? (count / sum) : Double.POSITIVE_INFINITY;
lrds.putDouble(iter, lrd);
- if(lrdsProgress != null) {
- lrdsProgress.incrementProcessed(LOG);
- }
- }
- if(lrdsProgress != null) {
- lrdsProgress.ensureCompleted(LOG);
+ LOG.incrementProcessed(lrdsProgress);
}
+ LOG.ensureCompleted(lrdsProgress);
}
/**
@@ -228,14 +187,14 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBase
* @param lofs Local outlier factor storage
* @param lofminmax Score minimum/maximum tracker
*/
- private void computeLOFScores(KNNQuery<O, D> knnq, DBIDs ids, DoubleDataStore lrds, WritableDoubleDataStore lofs, DoubleMinMax lofminmax) {
+ private void computeLOFScores(KNNQuery<O> knnq, DBIDs ids, DoubleDataStore lrds, WritableDoubleDataStore lofs, DoubleMinMax lofminmax) {
FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("LOF_SCORE for objects", ids.size(), LOG) : null;
for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
final double lof;
final double lrdp = lrds.doubleValue(iter);
- final KNNList<D> neighbors = knnq.getKNNForDBID(iter, k);
+ final KNNList neighbors = knnq.getKNNForDBID(iter, k);
if(!Double.isInfinite(lrdp)) {
- double sum = 0.0;
+ double sum = 0.;
int count = 0;
for(DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
// skip the point itself
@@ -258,13 +217,9 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBase
// update minimum and maximum
lofminmax.put(lof);
- if(progressLOFs != null) {
- progressLOFs.incrementProcessed(LOG);
- }
- }
- if(progressLOFs != null) {
- progressLOFs.ensureCompleted(LOG);
+ LOG.incrementProcessed(progressLOFs);
}
+ LOG.ensureCompleted(progressLOFs);
}
@Override
@@ -283,14 +238,16 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBase
* @author Erich Schubert
*
* @apiviz.exclude
+ *
+ * @param <O> Object type
*/
- public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {
/**
* Parameter to specify the number of nearest neighbors of an object to be
- * considered for computing its LOF_SCORE, must be an integer greater than
- * 1.
+ * considered for computing its LOF score, must be an integer greater than
+ * or equal to 1.
*/
- public static final OptionID K_ID = new OptionID("lof.k", "The number of nearest neighbors of an object to be considered for computing its LOF_SCORE.");
+ public static final OptionID K_ID = new OptionID("lof.k", "The number of nearest neighbors of an object to be considered for computing its LOF score.");
/**
* The neighborhood size to use.
@@ -302,14 +259,14 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBase
super.makeOptions(config);
final IntParameter pK = new IntParameter(K_ID);
- pK.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
+ pK.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
if(config.grab(pK)) {
- k = pK.getValue();
+ k = pK.intValue();
}
}
@Override
- protected LOF<O, D> makeInstance() {
+ protected LOF<O> makeInstance() {
return new LOF<>(k, distanceFunction);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LoOP.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LoOP.java
index 525d45f2..6278880f 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LoOP.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LoOP.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.lof;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -35,29 +35,26 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList;
-import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
-import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
-import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.KNNList;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
-import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
+import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.math.MathUtil;
import de.lmu.ifi.dbs.elki.math.Mean;
-import de.lmu.ifi.dbs.elki.math.MeanVariance;
import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore;
import de.lmu.ifi.dbs.elki.utilities.Alias;
+import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -77,82 +74,65 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
* Distance/density based algorithm similar to LOF to detect outliers, but with
* statistical methods to achieve better result stability.
*
+ * Reference:
+ * <p>
+ * Hans-Peter Kriegel, Peer Kröger, Erich Schubert, Arthur Zimek:<br />
+ * LoOP: Local Outlier Probabilities< br />
+ * In Proceedings of the 18th International Conference on Information and
+ * Knowledge Management (CIKM), Hong Kong, China, 2009
+ * </p>
+ *
+ * Implementation notes:
+ * <ul>
+ * <li>The lambda parameter was removed from the pdist term, because it cancels
+ * out.</li>
+ * <li>In ELKI 0.7.0, the {@code k} parameters have changed by 1 to make them
+ * similar to other methods and more intuitive.</li>
+ * </ul>
+ *
* @author Erich Schubert
*
* @apiviz.has KNNQuery
*
* @param <O> type of objects handled by this algorithm
- * @param <D> type of distances used
*/
@Title("LoOP: Local Outlier Probabilities")
@Description("Variant of the LOF algorithm normalized using statistical values.")
-@Reference(authors = "H.-P. Kriegel, P. Kröger, E. Schubert, A. Zimek", title = "LoOP: Local Outlier Probabilities", booktitle = "Proceedings of the 18th International Conference on Information and Knowledge Management (CIKM), Hong Kong, China, 2009", url = "http://dx.doi.org/10.1145/1645953.1646195")
-@Alias({ "de.lmu.ifi.dbs.elki.algorithm.outlier.LoOP", "LoOP", "outlier.LoOP" })
-public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+@Reference(authors = "H.-P. Kriegel, P. Kröger, E. Schubert, A. Zimek", //
+title = "LoOP: Local Outlier Probabilities", //
+booktitle = "Proceedings of the 18th International Conference on Information and Knowledge Management (CIKM), Hong Kong, China, 2009", //
+url = "http://dx.doi.org/10.1145/1645953.1646195")
+@Alias({ "de.lmu.ifi.dbs.elki.algorithm.outlier.LoOP", "LoOP" })
+public class LoOP<O> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
private static final Logging LOG = Logging.getLogger(LoOP.class);
/**
- * The distance function to determine the reachability distance between
- * database objects.
- */
- public static final OptionID REACHABILITY_DISTANCE_FUNCTION_ID = new OptionID("loop.referencedistfunction", "Distance function to determine the density of an object.");
-
- /**
- * The distance function to determine the reachability distance between
- * database objects.
- */
- public static final OptionID COMPARISON_DISTANCE_FUNCTION_ID = new OptionID("loop.comparedistfunction", "Distance function to determine the reference set of an object.");
-
- /**
- * Parameter to specify the number of nearest neighbors of an object to be
- * considered for computing its LOOP_SCORE, must be an integer greater than 1.
- */
- public static final OptionID KREACH_ID = new OptionID("loop.kref", "The number of nearest neighbors of an object to be used for the PRD value.");
-
- /**
- * Parameter to specify the number of nearest neighbors of an object to be
- * considered for computing its LOOP_SCORE, must be an integer greater than 1.
- */
- public static final OptionID KCOMP_ID = new OptionID("loop.kcomp", "The number of nearest neighbors of an object to be considered for computing its LOOP_SCORE.");
-
- /**
- * Parameter to specify the number of nearest neighbors of an object to be
- * considered for computing its LOOP_SCORE, must be an integer greater than 1.
- */
- public static final OptionID LAMBDA_ID = new OptionID("loop.lambda", "The number of standard deviations to consider for density computation.");
-
- /**
- * Holds the value of {@link #KREACH_ID}.
+ * Reachability neighborhood size.
*/
int kreach;
/**
- * Holds the value of {@link #KCOMP_ID}.
+ * Comparison neighborhood size.
*/
int kcomp;
/**
- * Hold the value of {@link #LAMBDA_ID}.
+ * Lambda parameter.
*/
double lambda;
/**
- * Preprocessor Step 1.
+ * Distance function for reachability.
*/
- protected DistanceFunction<? super O, D> reachabilityDistanceFunction;
+ protected DistanceFunction<? super O> reachabilityDistanceFunction;
/**
- * Preprocessor Step 2.
+ * Distance function for comparison set.
*/
- protected DistanceFunction<? super O, D> comparisonDistanceFunction;
-
- /**
- * Include object itself in kNN neighborhood.
- */
- static boolean objectIsInKNN = false;
+ protected DistanceFunction<? super O> comparisonDistanceFunction;
/**
* Constructor with parameters.
@@ -163,7 +143,7 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
* @param comparisonDistanceFunction distance function for comparison
* @param lambda Lambda parameter
*/
- public LoOP(int kreach, int kcomp, DistanceFunction<? super O, D> reachabilityDistanceFunction, DistanceFunction<? super O, D> comparisonDistanceFunction, double lambda) {
+ public LoOP(int kreach, int kcomp, DistanceFunction<? super O> reachabilityDistanceFunction, DistanceFunction<? super O> comparisonDistanceFunction, double lambda) {
super();
this.kreach = kreach;
this.kcomp = kcomp;
@@ -180,35 +160,17 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
* @param stepprog Progress logger, may be {@code null}
* @return result
*/
- protected Pair<KNNQuery<O, D>, KNNQuery<O, D>> getKNNQueries(Database database, Relation<O> relation, StepProgress stepprog) {
- KNNQuery<O, D> knnComp;
- KNNQuery<O, D> knnReach;
+ protected Pair<KNNQuery<O>, KNNQuery<O>> getKNNQueries(Database database, Relation<O> relation, StepProgress stepprog) {
+ KNNQuery<O> knnComp, knnReach;
if(comparisonDistanceFunction == reachabilityDistanceFunction || comparisonDistanceFunction.equals(reachabilityDistanceFunction)) {
- // We need each neighborhood twice - use "HEAVY" flag.
- knnComp = QueryUtil.getKNNQuery(relation, comparisonDistanceFunction, Math.max(kreach, kcomp), DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
- // No optimized kNN query - use a preprocessor!
- if(knnComp == null) {
- if(stepprog != null) {
- stepprog.beginStep(1, "Materializing neighborhoods with respect to reference neighborhood distance function.", LOG);
- }
- MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<>(relation, comparisonDistanceFunction, kcomp);
- database.addIndex(preproc);
- DistanceQuery<O, D> cdq = database.getDistanceQuery(relation, comparisonDistanceFunction);
- knnComp = preproc.getKNNQuery(cdq, kreach, DatabaseQuery.HINT_HEAVY_USE);
- }
- else {
- if(stepprog != null) {
- stepprog.beginStep(1, "Optimized neighborhoods provided by database.", LOG);
- }
- }
+ LOG.beginStep(stepprog, 1, "Materializing neighborhoods with respect to reference neighborhood distance function.");
+ knnComp = DatabaseUtil.precomputedKNNQuery(database, relation, comparisonDistanceFunction, kcomp + 1);
knnReach = knnComp;
}
else {
- if(stepprog != null) {
- stepprog.beginStep(1, "Not materializing distance functions, since we request each DBID once only.", LOG);
- }
- knnComp = QueryUtil.getKNNQuery(relation, comparisonDistanceFunction, kreach);
- knnReach = QueryUtil.getKNNQuery(relation, reachabilityDistanceFunction, kcomp);
+ LOG.beginStep(stepprog, 1, "Not materializing distance functions, since we request each DBID once only.");
+ knnComp = QueryUtil.getKNNQuery(relation, comparisonDistanceFunction, kreach + 1);
+ knnReach = QueryUtil.getKNNQuery(relation, reachabilityDistanceFunction, kcomp + 1);
}
return new Pair<>(knnComp, knnReach);
}
@@ -221,13 +183,11 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
* @return Outlier result
*/
public OutlierResult run(Database database, Relation<O> relation) {
- final double sqrt2 = Math.sqrt(2.0);
-
StepProgress stepprog = LOG.isVerbose() ? new StepProgress(5) : null;
- Pair<KNNQuery<O, D>, KNNQuery<O, D>> pair = getKNNQueries(database, relation, stepprog);
- KNNQuery<O, D> knnComp = pair.getFirst();
- KNNQuery<O, D> knnReach = pair.getSecond();
+ Pair<KNNQuery<O>, KNNQuery<O>> pair = getKNNQueries(database, relation, stepprog);
+ KNNQuery<O> knnComp = pair.getFirst();
+ KNNQuery<O> knnReach = pair.getSecond();
// Assert we got something
if(knnComp == null) {
@@ -237,118 +197,111 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
throw new AbortException("No kNN queries supported by database for density estimation distance function.");
}
+ // FIXME: tie handling!
+
// Probabilistic distances
- WritableDoubleDataStore pdists = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
- Mean mean = new Mean();
- {// computing PRDs
- if(stepprog != null) {
- stepprog.beginStep(3, "Computing pdists", LOG);
- }
- FiniteProgress prdsProgress = LOG.isVerbose() ? new FiniteProgress("pdists", relation.size(), LOG) : null;
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- final KNNList<D> neighbors = knnReach.getKNNForDBID(iditer, kreach);
- mean.reset();
- // use first kref neighbors as reference set
- int ks = 0;
- // TODO: optimize for double distances
- if(neighbors instanceof DoubleDistanceKNNList) {
- for(DoubleDistanceDBIDListIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) {
- if(objectIsInKNN || !DBIDUtil.equal(neighbor, iditer)) {
- final double d = neighbor.doubleDistance();
- mean.put(d * d);
- ks++;
- if(ks >= kreach) {
- break;
- }
- }
- }
- }
- else {
- for(DistanceDBIDListIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
- if(objectIsInKNN || !DBIDUtil.equal(neighbor, iditer)) {
- double d = neighbor.getDistance().doubleValue();
- mean.put(d * d);
- ks++;
- if(ks >= kreach) {
- break;
- }
- }
- }
- }
- double pdist = lambda * Math.sqrt(mean.getMean());
- pdists.putDouble(iditer, pdist);
- if(prdsProgress != null) {
- prdsProgress.incrementProcessed(LOG);
- }
- }
- }
+ WritableDoubleDataStore pdists = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_DB);
+ LOG.beginStep(stepprog, 3, "Computing pdists");
+ computePDists(relation, knnReach, pdists);
// Compute PLOF values.
WritableDoubleDataStore plofs = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
- MeanVariance mvplof = new MeanVariance();
+ LOG.beginStep(stepprog, 4, "Computing PLOF");
+ double nplof = computePLOFs(relation, knnComp, pdists, plofs);
+
+ // Normalize the outlier scores.
+ DoubleMinMax mm = new DoubleMinMax();
{// compute LOOP_SCORE of each db object
- if(stepprog != null) {
- stepprog.beginStep(4, "Computing PLOF", LOG);
- }
+ LOG.beginStep(stepprog, 5, "Computing LoOP scores");
- FiniteProgress progressPLOFs = LOG.isVerbose() ? new FiniteProgress("PLOFs for objects", relation.size(), LOG) : null;
- MeanVariance mv = new MeanVariance();
+ FiniteProgress progressLOOPs = LOG.isVerbose() ? new FiniteProgress("LoOP for objects", relation.size(), LOG) : null;
+ final double norm = 1. / (nplof * MathUtil.SQRT2);
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- final KNNList<D> neighbors = knnComp.getKNNForDBID(iditer, kcomp);
- mv.reset();
- // use first kref neighbors as comparison set.
- int ks = 0;
- for(DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
- if(objectIsInKNN || !DBIDUtil.equal(neighbor, iditer)) {
- mv.put(pdists.doubleValue(neighbor));
- ks++;
- if(ks >= kcomp) {
- break;
- }
- }
- }
- double plof = Math.max(pdists.doubleValue(iditer) / mv.getMean(), 1.0);
- if(Double.isNaN(plof) || Double.isInfinite(plof)) {
- plof = 1.0;
- }
- plofs.putDouble(iditer, plof);
- mvplof.put((plof - 1.0) * (plof - 1.0));
-
- if(progressPLOFs != null) {
- progressPLOFs.incrementProcessed(LOG);
- }
+ double loop = NormalDistribution.erf((plofs.doubleValue(iditer) - 1.) * norm);
+ plofs.putDouble(iditer, loop);
+ mm.put(loop);
+ LOG.incrementProcessed(progressLOOPs);
}
+ LOG.ensureCompleted(progressLOOPs);
}
- double nplof = lambda * Math.sqrt(mvplof.getMean());
- if(LOG.isDebugging()) {
- LOG.verbose("nplof normalization factor is " + nplof + " " + mvplof.getMean() + " " + mvplof.getSampleStddev());
- }
-
- // Compute final LoOP values.
- WritableDoubleDataStore loops = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
- {// compute LOOP_SCORE of each db object
- if(stepprog != null) {
- stepprog.beginStep(5, "Computing LoOP scores", LOG);
- }
+ LOG.setCompleted(stepprog);
- FiniteProgress progressLOOPs = LOG.isVerbose() ? new FiniteProgress("LoOP for objects", relation.size(), LOG) : null;
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- loops.putDouble(iditer, NormalDistribution.erf((plofs.doubleValue(iditer) - 1) / (nplof * sqrt2)));
+ // Build result representation.
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("Local Outlier Probabilities", "loop-outlier", plofs, relation.getDBIDs());
+ OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore(mm.getMin(), mm.getMax(), 0.);
+ return new OutlierResult(scoreMeta, scoreResult);
+ }
- if(progressLOOPs != null) {
- progressLOOPs.incrementProcessed(LOG);
+ /**
+ * Compute the probabilistic distances used by LoOP.
+ *
+ * @param relation Data relation
+ * @param knn kNN query
+ * @param pdists Storage for distances
+ */
+ protected void computePDists(Relation<O> relation, KNNQuery<O> knn, WritableDoubleDataStore pdists) {
+ // computing PRDs
+ FiniteProgress prdsProgress = LOG.isVerbose() ? new FiniteProgress("pdists", relation.size(), LOG) : null;
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ final KNNList neighbors = knn.getKNNForDBID(iditer, kreach + 1);
+ // use first kref neighbors as reference set
+ int ks = 0;
+ double ssum = 0.;
+ for(DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid() && ks < kreach; neighbor.advance()) {
+ if(DBIDUtil.equal(neighbor, iditer)) {
+ continue;
}
+ final double d = neighbor.doubleValue();
+ ssum += d * d;
+ ks++;
}
+ double pdist = ks > 0 ? Math.sqrt(ssum / ks) : 0.;
+ pdists.putDouble(iditer, pdist);
+ LOG.incrementProcessed(prdsProgress);
}
+ LOG.ensureCompleted(prdsProgress);
+ }
+
+ /**
+ * Compute the LOF values, using the pdist distances.
+ *
+ * @param relation Data relation
+ * @param knn kNN query
+ * @param pdists Precomputed distances
+ * @param plofs Storage for PLOFs.
+ * @return Normalization factor.
+ */
+ protected double computePLOFs(Relation<O> relation, KNNQuery<O> knn, WritableDoubleDataStore pdists, WritableDoubleDataStore plofs) {
+ FiniteProgress progressPLOFs = LOG.isVerbose() ? new FiniteProgress("PLOFs for objects", relation.size(), LOG) : null;
+ Mean mvplof = new Mean();
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ final KNNList neighbors = knn.getKNNForDBID(iditer, kcomp + 1);
+ // use first kref neighbors as comparison set.
+ int ks = 0;
+ double sum = 0.;
+ for(DBIDIter neighbor = neighbors.iter(); neighbor.valid() && ks < kcomp; neighbor.advance()) {
+ if(DBIDUtil.equal(neighbor, iditer)) {
+ continue;
+ }
+ sum += pdists.doubleValue(neighbor);
+ ks++;
+ }
+ double plof = Math.max(pdists.doubleValue(iditer) * ks / sum, 1.0);
+ if(Double.isNaN(plof) || Double.isInfinite(plof)) {
+ plof = 1.0;
+ }
+ plofs.putDouble(iditer, plof);
+ mvplof.put((plof - 1.0) * (plof - 1.0));
- if(stepprog != null) {
- stepprog.setCompleted(LOG);
+ LOG.incrementProcessed(progressPLOFs);
}
+ LOG.ensureCompleted(progressPLOFs);
- // Build result representation.
- Relation<Double> scoreResult = new MaterializedRelation<>("Local Outlier Probabilities", "loop-outlier", TypeUtil.DOUBLE, loops, relation.getDBIDs());
- OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore();
- return new OutlierResult(scoreMeta, scoreResult);
+ double nplof = lambda * Math.sqrt(mvplof.getMean());
+ if(LOG.isDebugging()) {
+ LOG.verbose("nplof normalization factor is " + nplof + " " + mvplof.getMean());
+ }
+ return nplof;
}
@Override
@@ -374,8 +327,43 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
* @author Erich Schubert
*
* @apiviz.exclude
+ *
+ * @param <O> Object type
*/
- public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<O> extends AbstractParameterizer {
+ /**
+ * The distance function to determine the reachability distance between
+ * database objects.
+ */
+ public static final OptionID REACHABILITY_DISTANCE_FUNCTION_ID = new OptionID("loop.referencedistfunction", "Distance function to determine the density of an object.");
+
+ /**
+ * The distance function to determine the reachability distance between
+ * database objects.
+ */
+ public static final OptionID COMPARISON_DISTANCE_FUNCTION_ID = new OptionID("loop.comparedistfunction", "Distance function to determine the reference set of an object.");
+
+ /**
+ * Parameter to specify the number of nearest neighbors of an object to be
+ * considered for computing its LOOP_SCORE, must be an integer greater than
+ * 1.
+ */
+ public static final OptionID KREACH_ID = new OptionID("loop.kref", "The number of nearest neighbors of an object to be used for the PRD value.");
+
+ /**
+ * Parameter to specify the number of nearest neighbors of an object to be
+ * considered for computing its LOOP_SCORE, must be an integer greater than
+ * 1.
+ */
+ public static final OptionID KCOMP_ID = new OptionID("loop.kcomp", "The number of nearest neighbors of an object to be considered for computing its LOOP_SCORE.");
+
+ /**
+ * Parameter to specify the number of nearest neighbors of an object to be
+ * considered for computing its LOOP_SCORE, must be an integer greater than
+ * 1.
+ */
+ public static final OptionID LAMBDA_ID = new OptionID("loop.lambda", "The number of standard deviations to consider for density computation.");
+
/**
* Holds the value of {@link #KREACH_ID}.
*/
@@ -394,29 +382,29 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
/**
* Preprocessor Step 1.
*/
- protected DistanceFunction<O, D> reachabilityDistanceFunction = null;
+ protected DistanceFunction<O> reachabilityDistanceFunction = null;
/**
* Preprocessor Step 2.
*/
- protected DistanceFunction<O, D> comparisonDistanceFunction = null;
+ protected DistanceFunction<O> comparisonDistanceFunction = null;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
final IntParameter kcompP = new IntParameter(KCOMP_ID);
- kcompP.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
+ kcompP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
if(config.grab(kcompP)) {
kcomp = kcompP.intValue();
}
- final ObjectParameter<DistanceFunction<O, D>> compDistP = new ObjectParameter<>(COMPARISON_DISTANCE_FUNCTION_ID, DistanceFunction.class, EuclideanDistanceFunction.class);
+ final ObjectParameter<DistanceFunction<O>> compDistP = new ObjectParameter<>(COMPARISON_DISTANCE_FUNCTION_ID, DistanceFunction.class, EuclideanDistanceFunction.class);
if(config.grab(compDistP)) {
comparisonDistanceFunction = compDistP.instantiateClass(config);
}
final IntParameter kreachP = new IntParameter(KREACH_ID);
- kreachP.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
+ kreachP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
kreachP.setOptional(true);
if(config.grab(kreachP)) {
kreach = kreachP.intValue();
@@ -425,7 +413,7 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
kreach = kcomp;
}
- final ObjectParameter<DistanceFunction<O, D>> reachDistP = new ObjectParameter<>(REACHABILITY_DISTANCE_FUNCTION_ID, DistanceFunction.class, true);
+ final ObjectParameter<DistanceFunction<O>> reachDistP = new ObjectParameter<>(REACHABILITY_DISTANCE_FUNCTION_ID, DistanceFunction.class, true);
if(config.grab(reachDistP)) {
reachabilityDistanceFunction = reachDistP.instantiateClass(config);
}
@@ -439,8 +427,8 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
}
@Override
- protected LoOP<O, D> makeInstance() {
- DistanceFunction<O, D> realreach = (reachabilityDistanceFunction != null) ? reachabilityDistanceFunction : comparisonDistanceFunction;
+ protected LoOP<O> makeInstance() {
+ DistanceFunction<O> realreach = (reachabilityDistanceFunction != null) ? reachabilityDistanceFunction : comparisonDistanceFunction;
return new LoOP<>(kreach, kcomp, realreach, comparisonDistanceFunction, lambda);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/OnlineLOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/OnlineLOF.java
index c01c914f..6033ae3e 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/OnlineLOF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/OnlineLOF.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.lof;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -27,15 +27,16 @@ import java.util.List;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.QueryUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList;
-import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
@@ -43,7 +44,6 @@ import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery;
import de.lmu.ifi.dbs.elki.database.query.rknn.RKNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.index.preprocessed.knn.AbstractMaterializeKNNPreprocessor;
import de.lmu.ifi.dbs.elki.index.preprocessed.knn.KNNChangeEvent;
import de.lmu.ifi.dbs.elki.index.preprocessed.knn.KNNListener;
@@ -66,7 +66,7 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
* @apiviz.has FlexibleLOF.LOFResult oneway - - updates
*/
// TODO: related to publication?
-public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends FlexibleLOF<O, D> {
+public class OnlineLOF<O> extends FlexibleLOF<O> {
/**
* The logger for this class.
*/
@@ -80,7 +80,7 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends FlexibleLOF<O,
* @param neighborhoodDistanceFunction the neighborhood distance function
* @param reachabilityDistanceFunction the reachability distance function
*/
- public OnlineLOF(int krefer, int kreach, DistanceFunction<? super O, D> neighborhoodDistanceFunction, DistanceFunction<? super O, D> reachabilityDistanceFunction) {
+ public OnlineLOF(int krefer, int kreach, DistanceFunction<? super O> neighborhoodDistanceFunction, DistanceFunction<? super O> reachabilityDistanceFunction) {
super(krefer, kreach, neighborhoodDistanceFunction, reachabilityDistanceFunction);
}
@@ -93,20 +93,20 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends FlexibleLOF<O,
public OutlierResult run(Database database, Relation<O> relation) {
StepProgress stepprog = LOG.isVerbose() ? new StepProgress("OnlineLOF", 3) : null;
- Pair<Pair<KNNQuery<O, D>, KNNQuery<O, D>>, Pair<RKNNQuery<O, D>, RKNNQuery<O, D>>> queries = getKNNAndRkNNQueries(database, relation, stepprog);
- KNNQuery<O, D> kNNRefer = queries.getFirst().getFirst();
- KNNQuery<O, D> kNNReach = queries.getFirst().getSecond();
- RKNNQuery<O, D> rkNNRefer = queries.getSecond().getFirst();
- RKNNQuery<O, D> rkNNReach = queries.getSecond().getSecond();
+ Pair<Pair<KNNQuery<O>, KNNQuery<O>>, Pair<RKNNQuery<O>, RKNNQuery<O>>> queries = getKNNAndRkNNQueries(database, relation, stepprog);
+ KNNQuery<O> kNNRefer = queries.getFirst().getFirst();
+ KNNQuery<O> kNNReach = queries.getFirst().getSecond();
+ RKNNQuery<O> rkNNRefer = queries.getSecond().getFirst();
+ RKNNQuery<O> rkNNReach = queries.getSecond().getSecond();
- LOFResult<O, D> lofResult = super.doRunInTime(relation.getDBIDs(), kNNRefer, kNNReach, stepprog);
+ LOFResult<O> lofResult = super.doRunInTime(relation.getDBIDs(), kNNRefer, kNNReach, stepprog);
lofResult.setRkNNRefer(rkNNRefer);
lofResult.setRkNNReach(rkNNReach);
// add listener
KNNListener l = new LOFKNNListener(lofResult);
- ((MaterializeKNNPreprocessor<O, D>) ((PreprocessorKNNQuery<O, D, ? extends KNNList<D>>) lofResult.getKNNRefer()).getPreprocessor()).addKNNListener(l);
- ((MaterializeKNNPreprocessor<O, D>) ((PreprocessorKNNQuery<O, D, ? extends KNNList<D>>) lofResult.getKNNReach()).getPreprocessor()).addKNNListener(l);
+ ((MaterializeKNNPreprocessor<O>) ((PreprocessorKNNQuery<O>) lofResult.getKNNRefer()).getPreprocessor()).addKNNListener(l);
+ ((MaterializeKNNPreprocessor<O>) ((PreprocessorKNNQuery<O>) lofResult.getKNNReach()).getPreprocessor()).addKNNListener(l);
return lofResult.getResult();
}
@@ -118,47 +118,48 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends FlexibleLOF<O,
* @param stepprog Progress logger
* @return the kNN and rkNN queries
*/
- private Pair<Pair<KNNQuery<O, D>, KNNQuery<O, D>>, Pair<RKNNQuery<O, D>, RKNNQuery<O, D>>> getKNNAndRkNNQueries(Database database, Relation<O> relation, StepProgress stepprog) {
+ private Pair<Pair<KNNQuery<O>, KNNQuery<O>>, Pair<RKNNQuery<O>, RKNNQuery<O>>> getKNNAndRkNNQueries(Database database, Relation<O> relation, StepProgress stepprog) {
// Use "HEAVY" flag, since this is an online algorithm
- KNNQuery<O, D> kNNRefer = QueryUtil.getKNNQuery(relation, referenceDistanceFunction, krefer, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
- RKNNQuery<O, D> rkNNRefer = QueryUtil.getRKNNQuery(relation, referenceDistanceFunction, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
+ KNNQuery<O> kNNRefer = QueryUtil.getKNNQuery(relation, referenceDistanceFunction, krefer, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
+ RKNNQuery<O> rkNNRefer = QueryUtil.getRKNNQuery(relation, referenceDistanceFunction, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
// No optimized kNN query or RkNN query - use a preprocessor!
- if (kNNRefer == null || rkNNRefer == null) {
- if (stepprog != null) {
+ if(kNNRefer == null || rkNNRefer == null) {
+ if(stepprog != null) {
stepprog.beginStep(1, "Materializing neighborhood w.r.t. reference neighborhood distance function.", LOG);
}
- MaterializeKNNAndRKNNPreprocessor<O, D> preproc = new MaterializeKNNAndRKNNPreprocessor<>(relation, referenceDistanceFunction, krefer);
- DistanceQuery<O, D> ndq = database.getDistanceQuery(relation, referenceDistanceFunction);
+ MaterializeKNNAndRKNNPreprocessor<O> preproc = new MaterializeKNNAndRKNNPreprocessor<>(relation, referenceDistanceFunction, krefer);
+ DistanceQuery<O> ndq = database.getDistanceQuery(relation, referenceDistanceFunction);
kNNRefer = preproc.getKNNQuery(ndq, krefer, DatabaseQuery.HINT_HEAVY_USE);
rkNNRefer = preproc.getRKNNQuery(ndq, krefer, DatabaseQuery.HINT_HEAVY_USE);
// add as index
- relation.getDatabase().addIndex(preproc);
- } else {
- if (stepprog != null) {
+ database.addIndex(preproc);
+ }
+ else {
+ if(stepprog != null) {
stepprog.beginStep(1, "Optimized neighborhood w.r.t. reference neighborhood distance function provided by database.", LOG);
}
}
- KNNQuery<O, D> kNNReach = QueryUtil.getKNNQuery(relation, reachabilityDistanceFunction, kreach, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
- RKNNQuery<O, D> rkNNReach = QueryUtil.getRKNNQuery(relation, reachabilityDistanceFunction, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
- if (kNNReach == null || rkNNReach == null) {
- if (stepprog != null) {
+ KNNQuery<O> kNNReach = QueryUtil.getKNNQuery(relation, reachabilityDistanceFunction, kreach, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
+ RKNNQuery<O> rkNNReach = QueryUtil.getRKNNQuery(relation, reachabilityDistanceFunction, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
+ if(kNNReach == null || rkNNReach == null) {
+ if(stepprog != null) {
stepprog.beginStep(2, "Materializing neighborhood w.r.t. reachability distance function.", LOG);
}
ListParameterization config = new ListParameterization();
config.addParameter(AbstractMaterializeKNNPreprocessor.Factory.DISTANCE_FUNCTION_ID, reachabilityDistanceFunction);
config.addParameter(AbstractMaterializeKNNPreprocessor.Factory.K_ID, kreach);
- MaterializeKNNAndRKNNPreprocessor<O, D> preproc = new MaterializeKNNAndRKNNPreprocessor<>(relation, reachabilityDistanceFunction, kreach);
- DistanceQuery<O, D> rdq = database.getDistanceQuery(relation, reachabilityDistanceFunction);
+ MaterializeKNNAndRKNNPreprocessor<O> preproc = new MaterializeKNNAndRKNNPreprocessor<>(relation, reachabilityDistanceFunction, kreach);
+ DistanceQuery<O> rdq = database.getDistanceQuery(relation, reachabilityDistanceFunction);
kNNReach = preproc.getKNNQuery(rdq, kreach, DatabaseQuery.HINT_HEAVY_USE);
rkNNReach = preproc.getRKNNQuery(rdq, kreach, DatabaseQuery.HINT_HEAVY_USE);
// add as index
relation.getDatabase().addIndex(preproc);
}
- Pair<KNNQuery<O, D>, KNNQuery<O, D>> kNNPair = new Pair<>(kNNRefer, kNNReach);
- Pair<RKNNQuery<O, D>, RKNNQuery<O, D>> rkNNPair = new Pair<>(rkNNRefer, rkNNReach);
+ Pair<KNNQuery<O>, KNNQuery<O>> kNNPair = new Pair<>(kNNRefer, kNNReach);
+ Pair<RKNNQuery<O>, RKNNQuery<O>> rkNNPair = new Pair<>(rkNNRefer, rkNNReach);
return new Pair<>(kNNPair, rkNNPair);
}
@@ -182,36 +183,40 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends FlexibleLOF<O,
/**
* Holds the result of a former run of the LOF algorithm.
*/
- private LOFResult<O, D> lofResult;
+ private LOFResult<O> lofResult;
/**
* Constructs a listener for the LOF algorithm.
*
* @param lofResult the result of a former run of the LOF algorithm
*/
- public LOFKNNListener(LOFResult<O, D> lofResult) {
+ public LOFKNNListener(LOFResult<O> lofResult) {
this.lofResult = lofResult;
}
@Override
public void kNNsChanged(KNNChangeEvent e) {
- AbstractMaterializeKNNPreprocessor<O, D, ?> p1 = ((PreprocessorKNNQuery<O, D, ?>) lofResult.getKNNRefer()).getPreprocessor();
- AbstractMaterializeKNNPreprocessor<O, D, ?> p2 = ((PreprocessorKNNQuery<O, D, ?>) lofResult.getKNNReach()).getPreprocessor();
+ AbstractMaterializeKNNPreprocessor<O> p1 = ((PreprocessorKNNQuery<O>) lofResult.getKNNRefer()).getPreprocessor();
+ AbstractMaterializeKNNPreprocessor<O> p2 = ((PreprocessorKNNQuery<O>) lofResult.getKNNReach()).getPreprocessor();
- if (firstEventReceived == null) {
- if (e.getSource().equals(p1) && e.getSource().equals(p2)) {
+ if(firstEventReceived == null) {
+ if(e.getSource().equals(p1) && e.getSource().equals(p2)) {
kNNsChanged(e, e);
- } else {
+ }
+ else {
firstEventReceived = e;
}
- } else {
- if (e.getSource().equals(p1) && firstEventReceived.getSource().equals(p2)) {
+ }
+ else {
+ if(e.getSource().equals(p1) && firstEventReceived.getSource().equals(p2)) {
kNNsChanged(e, firstEventReceived);
firstEventReceived = null;
- } else if (e.getSource().equals(p2) && firstEventReceived.getSource().equals(p1)) {
+ }
+ else if(e.getSource().equals(p2) && firstEventReceived.getSource().equals(p1)) {
kNNsChanged(firstEventReceived, e);
firstEventReceived = null;
- } else {
+ }
+ else {
throw new UnsupportedOperationException("Event sources do not fit!");
}
}
@@ -225,18 +230,20 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends FlexibleLOF<O,
* @param e2 the change event of the second preprocessor
*/
private void kNNsChanged(KNNChangeEvent e1, KNNChangeEvent e2) {
- if (!e1.getType().equals(e2.getType())) {
+ if(!e1.getType().equals(e2.getType())) {
throw new UnsupportedOperationException("Event types do not fit: " + e1.getType() + " != " + e2.getType());
}
- if (!e1.getObjects().equals(e2.getObjects())) {
+ if(!e1.getObjects().equals(e2.getObjects())) {
throw new UnsupportedOperationException("Objects do not fit: " + e1.getObjects() + " != " + e2.getObjects());
}
- if (e1.getType().equals(KNNChangeEvent.Type.DELETE)) {
+ if(e1.getType().equals(KNNChangeEvent.Type.DELETE)) {
kNNsRemoved(e1.getObjects(), e1.getUpdates(), e2.getUpdates(), lofResult);
- } else if (e1.getType().equals(KNNChangeEvent.Type.INSERT)) {
+ }
+ else if(e1.getType().equals(KNNChangeEvent.Type.INSERT)) {
kNNsInserted(e1.getObjects(), e1.getUpdates(), e2.getUpdates(), lofResult);
- } else {
+ }
+ else {
throw new UnsupportedOperationException("Unsupported event type: " + e1.getType());
}
}
@@ -251,44 +258,43 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends FlexibleLOF<O,
* reachability distance function
* @param lofResult the result of the former LOF run
*/
- private void kNNsInserted(DBIDs insertions, DBIDs updates1, DBIDs updates2, LOFResult<O, D> lofResult) {
+ private void kNNsInserted(DBIDs insertions, DBIDs updates1, DBIDs updates2, LOFResult<O> lofResult) {
StepProgress stepprog = LOG.isVerbose() ? new StepProgress(3) : null;
// recompute lrds
- if (stepprog != null) {
+ if(stepprog != null) {
stepprog.beginStep(1, "Recompute LRDs.", LOG);
}
ArrayDBIDs lrd_ids = DBIDUtil.ensureArray(DBIDUtil.union(insertions, updates2));
- List<? extends DistanceDBIDList<D>> reachDistRKNNs = lofResult.getRkNNReach().getRKNNForBulkDBIDs(lrd_ids, kreach);
+ List<? extends DoubleDBIDList> reachDistRKNNs = lofResult.getRkNNReach().getRKNNForBulkDBIDs(lrd_ids, kreach);
ArrayDBIDs affected_lrd_id_candidates = mergeIDs(reachDistRKNNs, lrd_ids);
ArrayModifiableDBIDs affected_lrd_ids = DBIDUtil.newArray(affected_lrd_id_candidates.size());
- WritableDoubleDataStore new_lrds = computeLRDs(affected_lrd_id_candidates, lofResult.getKNNReach());
- for (DBIDIter iter = affected_lrd_id_candidates.iter(); iter.valid(); iter.advance()) {
+ WritableDoubleDataStore new_lrds = DataStoreUtil.makeDoubleStorage(affected_lrd_id_candidates, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
+ computeLRDs(lofResult.getKNNReach(), affected_lrd_id_candidates, new_lrds);
+ for(DBIDIter iter = affected_lrd_id_candidates.iter(); iter.valid(); iter.advance()) {
double new_lrd = new_lrds.doubleValue(iter);
double old_lrd = lofResult.getLrds().doubleValue(iter);
- if (Double.isNaN(old_lrd) || old_lrd != new_lrd) {
+ if(Double.isNaN(old_lrd) || old_lrd != new_lrd) {
lofResult.getLrds().putDouble(iter, new_lrd);
affected_lrd_ids.add(iter);
}
}
// recompute lofs
- if (stepprog != null) {
+ if(stepprog != null) {
stepprog.beginStep(2, "Recompute LOFS.", LOG);
}
- List<? extends DistanceDBIDList<D>> primDistRKNNs = lofResult.getRkNNRefer().getRKNNForBulkDBIDs(affected_lrd_ids, krefer);
+ List<? extends DoubleDBIDList> primDistRKNNs = lofResult.getRkNNRefer().getRKNNForBulkDBIDs(affected_lrd_ids, krefer);
ArrayDBIDs affected_lof_ids = mergeIDs(primDistRKNNs, affected_lrd_ids, insertions, updates1);
recomputeLOFs(affected_lof_ids, lofResult);
// fire result changed
- if (stepprog != null) {
+ if(stepprog != null) {
stepprog.beginStep(3, "Inform listeners.", LOG);
}
lofResult.getResult().getHierarchy().resultChanged(lofResult.getResult());
- if (stepprog != null) {
- stepprog.setCompleted(LOG);
- }
+ LOG.setCompleted(stepprog);
}
/**
@@ -301,53 +307,52 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends FlexibleLOF<O,
* reachability distance function
* @param lofResult the result of the former LOF run
*/
- private void kNNsRemoved(DBIDs deletions, DBIDs updates1, DBIDs updates2, LOFResult<O, D> lofResult) {
+ private void kNNsRemoved(DBIDs deletions, DBIDs updates1, DBIDs updates2, LOFResult<O> lofResult) {
StepProgress stepprog = LOG.isVerbose() ? new StepProgress(4) : null;
// delete lrds and lofs
- if (stepprog != null) {
+ if(stepprog != null) {
stepprog.beginStep(1, "Delete old LRDs and LOFs.", LOG);
}
- for (DBIDIter iter = deletions.iter(); iter.valid(); iter.advance()) {
+ for(DBIDIter iter = deletions.iter(); iter.valid(); iter.advance()) {
lofResult.getLrds().delete(iter);
lofResult.getLofs().delete(iter);
}
// recompute lrds
- if (stepprog != null) {
+ if(stepprog != null) {
stepprog.beginStep(2, "Recompute LRDs.", LOG);
}
ArrayDBIDs lrd_ids = DBIDUtil.ensureArray(updates2);
- List<? extends DistanceDBIDList<D>> reachDistRKNNs = lofResult.getRkNNReach().getRKNNForBulkDBIDs(lrd_ids, kreach);
+ List<? extends DoubleDBIDList> reachDistRKNNs = lofResult.getRkNNReach().getRKNNForBulkDBIDs(lrd_ids, kreach);
ArrayDBIDs affected_lrd_id_candidates = mergeIDs(reachDistRKNNs, lrd_ids);
ArrayModifiableDBIDs affected_lrd_ids = DBIDUtil.newArray(affected_lrd_id_candidates.size());
- WritableDoubleDataStore new_lrds = computeLRDs(affected_lrd_id_candidates, lofResult.getKNNReach());
- for (DBIDIter iter = affected_lrd_id_candidates.iter(); iter.valid(); iter.advance()) {
+ WritableDoubleDataStore new_lrds = DataStoreUtil.makeDoubleStorage(affected_lrd_id_candidates, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
+ computeLRDs(lofResult.getKNNReach(), affected_lrd_id_candidates, new_lrds);
+ for(DBIDIter iter = affected_lrd_id_candidates.iter(); iter.valid(); iter.advance()) {
double new_lrd = new_lrds.doubleValue(iter);
double old_lrd = lofResult.getLrds().doubleValue(iter);
- if (old_lrd != new_lrd) {
+ if(old_lrd != new_lrd) {
lofResult.getLrds().putDouble(iter, new_lrd);
affected_lrd_ids.add(iter);
}
}
// recompute lofs
- if (stepprog != null) {
+ if(stepprog != null) {
stepprog.beginStep(3, "Recompute LOFS.", LOG);
}
- List<? extends DistanceDBIDList<D>> primDistRKNNs = lofResult.getRkNNRefer().getRKNNForBulkDBIDs(affected_lrd_ids, krefer);
+ List<? extends DoubleDBIDList> primDistRKNNs = lofResult.getRkNNRefer().getRKNNForBulkDBIDs(affected_lrd_ids, krefer);
ArrayDBIDs affected_lof_ids = mergeIDs(primDistRKNNs, affected_lrd_ids, updates1);
recomputeLOFs(affected_lof_ids, lofResult);
// fire result changed
- if (stepprog != null) {
+ if(stepprog != null) {
stepprog.beginStep(4, "Inform listeners.", LOG);
}
lofResult.getResult().getHierarchy().resultChanged(lofResult.getResult());
- if (stepprog != null) {
- stepprog.setCompleted(LOG);
- }
+ LOG.setCompleted(stepprog);
}
/**
@@ -358,12 +363,12 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends FlexibleLOF<O,
* @return a set containing the ids of the query result and the specified
* ids
*/
- private ArrayModifiableDBIDs mergeIDs(List<? extends DistanceDBIDList<D>> queryResults, DBIDs... ids) {
+ private ArrayModifiableDBIDs mergeIDs(List<? extends DoubleDBIDList> queryResults, DBIDs... ids) {
ModifiableDBIDs result = DBIDUtil.newHashSet();
- for (DBIDs dbids : ids) {
+ for(DBIDs dbids : ids) {
result.addDBIDs(dbids);
}
- for (DistanceDBIDList<D> queryResult : queryResults) {
+ for(DoubleDBIDList queryResult : queryResults) {
result.addDBIDs(queryResult);
}
return DBIDUtil.newArray(result);
@@ -375,24 +380,23 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends FlexibleLOF<O,
* @param ids the ids of the lofs to be recomputed
* @param lofResult the result of the former LOF run
*/
- private void recomputeLOFs(DBIDs ids, LOFResult<O, D> lofResult) {
- Pair<WritableDoubleDataStore, DoubleMinMax> lofsAndMax = computeLOFs(ids, lofResult.getLrds(), lofResult.getKNNRefer());
- WritableDoubleDataStore new_lofs = lofsAndMax.getFirst();
- for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ private void recomputeLOFs(DBIDs ids, LOFResult<O> lofResult) {
+ WritableDoubleDataStore new_lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
+ DoubleMinMax new_lofminmax = new DoubleMinMax();
+ computeLOFs(lofResult.getKNNRefer(), ids, lofResult.getLrds(), new_lofs, new_lofminmax);
+ for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
lofResult.getLofs().putDouble(iter, new_lofs.doubleValue(iter));
}
- // track the maximum value for normalization.
- DoubleMinMax new_lofminmax = lofsAndMax.getSecond();
-
// Actualize meta info
- if (new_lofminmax.isValid() && lofResult.getResult().getOutlierMeta().getActualMaximum() < new_lofminmax.getMax()) {
- BasicOutlierScoreMeta scoreMeta = (BasicOutlierScoreMeta) lofResult.getResult().getOutlierMeta();
- scoreMeta.setActualMaximum(new_lofminmax.getMax());
- }
-
- if (new_lofminmax.isValid() && lofResult.getResult().getOutlierMeta().getActualMinimum() > new_lofminmax.getMin()) {
- BasicOutlierScoreMeta scoreMeta = (BasicOutlierScoreMeta) lofResult.getResult().getOutlierMeta();
- scoreMeta.setActualMinimum(new_lofminmax.getMin());
+ if(new_lofminmax.isValid()) {
+ if(lofResult.getResult().getOutlierMeta().getActualMaximum() < new_lofminmax.getMax()) {
+ BasicOutlierScoreMeta scoreMeta = (BasicOutlierScoreMeta) lofResult.getResult().getOutlierMeta();
+ scoreMeta.setActualMaximum(new_lofminmax.getMax());
+ }
+ if(lofResult.getResult().getOutlierMeta().getActualMinimum() > new_lofminmax.getMin()) {
+ BasicOutlierScoreMeta scoreMeta = (BasicOutlierScoreMeta) lofResult.getResult().getOutlierMeta();
+ scoreMeta.setActualMinimum(new_lofminmax.getMin());
+ }
}
}
}
@@ -409,9 +413,9 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends FlexibleLOF<O,
*
* @apiviz.exclude
*/
- public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends FlexibleLOF.Parameterizer<O, D> {
+ public static class Parameterizer<O> extends FlexibleLOF.Parameterizer<O> {
@Override
- protected OnlineLOF<O, D> makeInstance() {
+ protected OnlineLOF<O> makeInstance() {
return new OnlineLOF<>(kreach, krefer, distanceFunction, reachabilityDistanceFunction);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimpleKernelDensityLOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimpleKernelDensityLOF.java
index b990ef35..3ba56b16 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimpleKernelDensityLOF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimpleKernelDensityLOF.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.lof;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -30,27 +30,20 @@ import de.lmu.ifi.dbs.elki.data.type.CombinedTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
-import de.lmu.ifi.dbs.elki.database.QueryUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList;
-import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
-import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
-import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.KNNList;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
-import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
@@ -61,6 +54,7 @@ import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.KernelDensityFunction
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
@@ -77,9 +71,8 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
* @apiviz.has KernelDensityFunction
*
* @param <O> the type of objects handled by this Algorithm
- * @param <D> Distance type
*/
-public class SimpleKernelDensityLOF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
+public class SimpleKernelDensityLOF<O extends NumberVector> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
@@ -101,7 +94,7 @@ public class SimpleKernelDensityLOF<O extends NumberVector<?>, D extends NumberD
* @param k the value of k
* @param kernel Kernel function
*/
- public SimpleKernelDensityLOF(int k, DistanceFunction<? super O, D> distance, KernelDensityFunction kernel) {
+ public SimpleKernelDensityLOF(int k, DistanceFunction<? super O> distance, KernelDensityFunction kernel) {
super(distance);
this.k = k + 1;
this.kernel = kernel;
@@ -116,112 +109,75 @@ public class SimpleKernelDensityLOF<O extends NumberVector<?>, D extends NumberD
*/
public OutlierResult run(Database database, Relation<O> relation) {
StepProgress stepprog = LOG.isVerbose() ? new StepProgress("KernelDensityLOF", 3) : null;
-
final int dim = RelationUtil.dimensionality(relation);
-
DBIDs ids = relation.getDBIDs();
- // "HEAVY" flag for KNN Query since it is used more than once
- KNNQuery<O, D> knnq = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
- // No optimized kNN query - use a preprocessor!
- if (!(knnq instanceof PreprocessorKNNQuery)) {
- if (stepprog != null) {
- stepprog.beginStep(1, "Materializing neighborhoods w.r.t. distance function.", LOG);
- }
- MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<>(relation, getDistanceFunction(), k);
- database.addIndex(preproc);
- DistanceQuery<O, D> rdq = database.getDistanceQuery(relation, getDistanceFunction());
- knnq = preproc.getKNNQuery(rdq, k);
- }
+ LOG.beginStep(stepprog, 1, "Materializing neighborhoods w.r.t. distance function.");
+ KNNQuery<O> knnq = DatabaseUtil.precomputedKNNQuery(database, relation, getDistanceFunction(), k);
// Compute LRDs
- if (stepprog != null) {
- stepprog.beginStep(2, "Computing densities.", LOG);
- }
+ LOG.beginStep(stepprog, 2, "Computing densities.");
WritableDoubleDataStore dens = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
FiniteProgress densProgress = LOG.isVerbose() ? new FiniteProgress("Densities", ids.size(), LOG) : null;
- for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
- final KNNList<D> neighbors = knnq.getKNNForDBID(it, k);
+ for(DBIDIter it = ids.iter(); it.valid(); it.advance()) {
+ final KNNList neighbors = knnq.getKNNForDBID(it, k);
int count = 0;
double sum = 0.0;
- if (neighbors instanceof DoubleDistanceKNNList) {
- // Fast version for double distances
- for (DoubleDistanceDBIDListIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) {
- if (DBIDUtil.equal(neighbor, it)) {
- continue;
- }
- double max = ((DoubleDistanceKNNList)knnq.getKNNForDBID(neighbor, k)).doubleKNNDistance();
- final double v = neighbor.doubleDistance() / max;
- sum += kernel.density(v) / MathUtil.powi(max, dim);
- count++;
- }
- } else {
- for (DistanceDBIDListIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
- if (DBIDUtil.equal(neighbor, it)) {
- continue;
- }
- double max = knnq.getKNNForDBID(neighbor, k).getKNNDistance().doubleValue();
- final double v = neighbor.getDistance().doubleValue() / max;
- sum += kernel.density(v) / MathUtil.powi(max, dim);
- count++;
+ // Fast version for double distances
+ for(DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ if(DBIDUtil.equal(neighbor, it)) {
+ continue;
}
+ double max = knnq.getKNNForDBID(neighbor, k).getKNNDistance();
+ final double v = neighbor.doubleValue() / max;
+ sum += kernel.density(v) / MathUtil.powi(max, dim);
+ count++;
}
final double density = sum / count;
dens.putDouble(it, density);
- if (densProgress != null) {
- densProgress.incrementProcessed(LOG);
- }
- }
- if (densProgress != null) {
- densProgress.ensureCompleted(LOG);
+ LOG.incrementProcessed(densProgress);
}
+ LOG.ensureCompleted(densProgress);
// compute LOF_SCORE of each db object
- if (stepprog != null) {
- stepprog.beginStep(3, "Computing KLOFs.", LOG);
- }
+ LOG.beginStep(stepprog, 3, "Computing KLOFs.");
WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
// track the maximum value for normalization.
DoubleMinMax lofminmax = new DoubleMinMax();
FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("KLOF_SCORE for objects", ids.size(), LOG) : null;
- for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
+ for(DBIDIter it = ids.iter(); it.valid(); it.advance()) {
final double lrdp = dens.doubleValue(it);
final double lof;
- if (lrdp > 0) {
- final KNNList<D> neighbors = knnq.getKNNForDBID(it, k);
+ if(lrdp > 0) {
+ final KNNList neighbors = knnq.getKNNForDBID(it, k);
double sum = 0.0;
int count = 0;
- for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ for(DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
// skip the point itself
- if (DBIDUtil.equal(neighbor, it)) {
+ if(DBIDUtil.equal(neighbor, it)) {
continue;
}
sum += dens.doubleValue(neighbor);
count++;
}
lof = sum / (count * lrdp);
- } else {
+ }
+ else {
lof = 1.0;
}
lofs.putDouble(it, lof);
// update minimum and maximum
lofminmax.put(lof);
- if (progressLOFs != null) {
- progressLOFs.incrementProcessed(LOG);
- }
- }
- if (progressLOFs != null) {
- progressLOFs.ensureCompleted(LOG);
+ LOG.incrementProcessed(progressLOFs);
}
+ LOG.ensureCompleted(progressLOFs);
- if (stepprog != null) {
- stepprog.setCompleted(LOG);
- }
+ LOG.setCompleted(stepprog);
// Build result representation.
- Relation<Double> scoreResult = new MaterializedRelation<>("Kernel Density Local Outlier Factor", "kernel-density-slof-outlier", TypeUtil.DOUBLE, lofs, ids);
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("Kernel Density Local Outlier Factor", "kernel-density-slof-outlier", lofs, ids);
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
@@ -246,9 +202,8 @@ public class SimpleKernelDensityLOF<O extends NumberVector<?>, D extends NumberD
* @apiviz.exclude
*
* @param <O> vector type
- * @param <D> distance type
*/
- public static class Parameterizer<O extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ public static class Parameterizer<O extends NumberVector> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {
/**
* Option ID for kernel density LOF kernel.
*/
@@ -270,18 +225,18 @@ public class SimpleKernelDensityLOF<O extends NumberVector<?>, D extends NumberD
final IntParameter pK = new IntParameter(LOF.Parameterizer.K_ID);
pK.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
- if (config.grab(pK)) {
+ if(config.grab(pK)) {
k = pK.getValue();
}
ObjectParameter<KernelDensityFunction> kernelP = new ObjectParameter<>(KERNEL_ID, KernelDensityFunction.class, EpanechnikovKernelDensityFunction.class);
- if (config.grab(kernelP)) {
+ if(config.grab(kernelP)) {
kernel = kernelP.instantiateClass(config);
}
}
@Override
- protected SimpleKernelDensityLOF<O, D> makeInstance() {
+ protected SimpleKernelDensityLOF<O> makeInstance() {
return new SimpleKernelDensityLOF<>(k, distanceFunction, kernel);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimplifiedLOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimplifiedLOF.java
index d54b053f..8fce6503 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimplifiedLOF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimplifiedLOF.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.lof;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -28,26 +28,19 @@ import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
-import de.lmu.ifi.dbs.elki.database.QueryUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList;
-import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
-import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
-import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.KNNList;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
-import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
@@ -56,6 +49,7 @@ import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.utilities.Alias;
+import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
@@ -68,28 +62,30 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
* Reference:
* <p>
* Erich Schubert, Arthur Zimek, Hans-Peter Kriegel<br />
- * Local outlier detection reconsidered: a generalized view on locality with
- * applications to spatial, video, and network outlier detection<br />
- * In: Data Mining and Knowledge Discovery
+ * Local Outlier Detection Reconsidered: a Generalized View on Locality with
+ * Applications to Spatial, Video, and Network Outlier Detection<br />
+ * Data Mining and Knowledge Discovery, 28(1): 190–237, 2014.
* </p>
*
* @author Erich Schubert
*
* @apiviz.has KNNQuery
*
- * @param <O> the type of DatabaseObjects handled by this Algorithm
- * @param <D> Distance type
+ * @param <O> the type of data objects handled by this algorithm
*/
-@Reference(authors = "Erich Schubert, Arthur Zimek, Hans-Peter Kriegel", title = "Local outlier detection reconsidered: a generalized view on locality with applications to spatial, video, and network outlier detection", booktitle = "Data Mining and Knowledge Discovery", url = "http://dx.doi.org/10.1007/s10618-012-0300-z")
-@Alias({ "SimpleLOF", "outlier.SimpleLOF", "de.lmu.ifi.dbs.elki.algorithm.outlier.SimpleLOF" })
-public class SimplifiedLOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
+@Reference(authors = "E. Schubert, A. Zimek, H.-P. Kriegel", //
+title = "Local Outlier Detection Reconsidered: a Generalized View on Locality with Applications to Spatial, Video, and Network Outlier Detection", //
+booktitle = "Data Mining and Knowledge Discovery, 28(1): 190–237, 2014.", //
+url = "http://dx.doi.org/10.1007/s10618-012-0300-z")
+@Alias({ "SimplifiedLOF", "outlier.SimplifiedLOF", "de.lmu.ifi.dbs.elki.algorithm.outlier.SimplifiedLOF" })
+public class SimplifiedLOF<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
private static final Logging LOG = Logging.getLogger(SimplifiedLOF.class);
/**
- * Parameter k.
+ * The number of neighbors to query, excluding the query point.
*/
protected int k;
@@ -98,7 +94,7 @@ public class SimplifiedLOF<O, D extends NumberDistance<D, ?>> extends AbstractDi
*
* @param k the value of k
*/
- public SimplifiedLOF(int k, DistanceFunction<? super O, D> distance) {
+ public SimplifiedLOF(int k, DistanceFunction<? super O> distance) {
super(distance);
this.k = k + 1;
}
@@ -111,114 +107,103 @@ public class SimplifiedLOF<O, D extends NumberDistance<D, ?>> extends AbstractDi
* @return LOF outlier result
*/
public OutlierResult run(Database database, Relation<O> relation) {
- StepProgress stepprog = LOG.isVerbose() ? new StepProgress("SimpleLOF", 3) : null;
-
+ StepProgress stepprog = LOG.isVerbose() ? new StepProgress("Simplified LOF", 3) : null;
DBIDs ids = relation.getDBIDs();
- // "HEAVY" flag for KNN Query since it is used more than once
- KNNQuery<O, D> knnq = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
- // No optimized kNN query - use a preprocessor!
- if(!(knnq instanceof PreprocessorKNNQuery)) {
- if(stepprog != null) {
- stepprog.beginStep(1, "Materializing neighborhoods w.r.t. distance function.", LOG);
- }
- MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<>(relation, getDistanceFunction(), k);
- database.addIndex(preproc);
- DistanceQuery<O, D> rdq = database.getDistanceQuery(relation, getDistanceFunction());
- knnq = preproc.getKNNQuery(rdq, k);
- }
+ LOG.beginStep(stepprog, 1, "Materializing neighborhoods w.r.t. distance function.");
+ KNNQuery<O> knnq = DatabaseUtil.precomputedKNNQuery(database, relation, getDistanceFunction(), k);
// Compute LRDs
- if(stepprog != null) {
- stepprog.beginStep(2, "Computing densities.", LOG);
- }
+ LOG.beginStep(stepprog, 2, "Computing densities.");
WritableDoubleDataStore dens = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
- FiniteProgress densProgress = LOG.isVerbose() ? new FiniteProgress("Densities", ids.size(), LOG) : null;
- for(DBIDIter it = ids.iter(); it.valid(); it.advance()) {
- final KNNList<D> neighbors = knnq.getKNNForDBID(it, k);
+ computeSimplifiedLRDs(ids, knnq, dens);
+
+ // compute LOF_SCORE of each db object
+ LOG.beginStep(stepprog, 3, "Computing SLOFs.");
+ WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
+ DoubleMinMax lofminmax = new DoubleMinMax();
+ computeSimplifiedLOFs(ids, knnq, dens, lofs, lofminmax);
+
+ LOG.setCompleted(stepprog);
+
+ // Build result representation.
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("Simplified Local Outlier Factor", "simplified-lof-outlier", lofs, ids);
+ OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0., Double.POSITIVE_INFINITY, 1.);
+ OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
+
+ return result;
+ }
+
+ /**
+ * Compute the simplified reachability densities.
+ *
+ * @param ids IDs to process
+ * @param knnq kNN query class
+ * @param lrds Density output
+ */
+ private void computeSimplifiedLRDs(DBIDs ids, KNNQuery<O> knnq, WritableDoubleDataStore lrds) {
+ FiniteProgress lrdsProgress = LOG.isVerbose() ? new FiniteProgress("Densities", ids.size(), LOG) : null;
+ for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ final KNNList neighbors = knnq.getKNNForDBID(iter, k);
double sum = 0.0;
int count = 0;
- if(neighbors instanceof DoubleDistanceKNNList) {
- // Fast version for double distances
- for(DoubleDistanceDBIDListIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) {
- if(DBIDUtil.equal(neighbor, it)) {
- continue;
- }
- sum += neighbor.doubleDistance();
- count++;
- }
- }
- else {
- for(DistanceDBIDListIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
- if(DBIDUtil.equal(neighbor, it)) {
- continue;
- }
- sum += neighbor.getDistance().doubleValue();
- count++;
+ for(DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ if(DBIDUtil.equal(neighbor, iter)) {
+ continue;
}
+ sum += neighbor.doubleValue();
+ count++;
}
// Avoid division by 0
- final double lrd = (sum > 0) ? (count / sum) : 0;
- dens.putDouble(it, lrd);
- if(densProgress != null) {
- densProgress.incrementProcessed(LOG);
- }
- }
- if(densProgress != null) {
- densProgress.ensureCompleted(LOG);
- }
-
- // compute LOF_SCORE of each db object
- if(stepprog != null) {
- stepprog.beginStep(3, "Computing SLOFs.", LOG);
+ final double lrd = (sum > 0) ? (count / sum) : Double.POSITIVE_INFINITY;
+ lrds.putDouble(iter, lrd);
+ LOG.incrementProcessed(lrdsProgress);
}
- WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
- // track the maximum value for normalization.
- DoubleMinMax lofminmax = new DoubleMinMax();
+ LOG.ensureCompleted(lrdsProgress);
+ }
- FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("Simple LOF scores.", ids.size(), LOG) : null;
- for(DBIDIter it = ids.iter(); it.valid(); it.advance()) {
- final double lrdp = dens.doubleValue(it);
+ /**
+ * Compute the simplified LOF factors.
+ *
+ * @param ids IDs to compute for
+ * @param knnq kNN query class
+ * @param slrds Object densities
+ * @param lofs SLOF output storage
+ * @param lofminmax Minimum and maximum scores
+ */
+ private void computeSimplifiedLOFs(DBIDs ids, KNNQuery<O> knnq, WritableDoubleDataStore slrds, WritableDoubleDataStore lofs, DoubleMinMax lofminmax) {
+ FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("Simplified LOF scores.", ids.size(), LOG) : null;
+ for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
final double lof;
- if(lrdp > 0) {
- final KNNList<D> neighbors = knnq.getKNNForDBID(it, k);
- double sum = 0.0;
+ final double lrdp = slrds.doubleValue(iter);
+ final KNNList neighbors = knnq.getKNNForDBID(iter, k);
+ if(!Double.isInfinite(lrdp)) {
+ double sum = 0.;
int count = 0;
for(DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
// skip the point itself
- if(DBIDUtil.equal(neighbor, it)) {
+ if(DBIDUtil.equal(neighbor, iter)) {
continue;
}
- sum += dens.doubleValue(neighbor);
+ final double val = slrds.doubleValue(neighbor);
+ sum += val;
count++;
+ if(Double.isInfinite(val)) {
+ break;
+ }
}
- lof = sum / (count * lrdp);
+ lof = sum / (lrdp * count);
}
else {
lof = 1.0;
}
- lofs.putDouble(it, lof);
+ lofs.putDouble(iter, lof);
// update minimum and maximum
lofminmax.put(lof);
- if(progressLOFs != null) {
- progressLOFs.incrementProcessed(LOG);
- }
- }
- if(progressLOFs != null) {
- progressLOFs.ensureCompleted(LOG);
- }
-
- if(stepprog != null) {
- stepprog.setCompleted(LOG);
+ LOG.incrementProcessed(progressLOFs);
}
-
- // Build result representation.
- Relation<Double> scoreResult = new MaterializedRelation<>("Simple Local Outlier Factor", "simple-lof-outlier", TypeUtil.DOUBLE, lofs, ids);
- OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
- OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
-
- return result;
+ LOG.ensureCompleted(progressLOFs);
}
@Override
@@ -238,10 +223,9 @@ public class SimplifiedLOF<O, D extends NumberDistance<D, ?>> extends AbstractDi
*
* @apiviz.exclude
*
- * @param <O> vector type
- * @param <D> distance type
+ * @param <O> Object type
*/
- public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {
/**
* The neighborhood size to use.
*/
@@ -252,14 +236,14 @@ public class SimplifiedLOF<O, D extends NumberDistance<D, ?>> extends AbstractDi
super.makeOptions(config);
final IntParameter pK = new IntParameter(LOF.Parameterizer.K_ID);
- pK.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
+ pK.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
if(config.grab(pK)) {
k = pK.getValue();
}
}
@Override
- protected SimplifiedLOF<O, D> makeInstance() {
+ protected SimplifiedLOF<O> makeInstance() {
return new SimplifiedLOF<>(k, distanceFunction);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/package-info.java
index 48d4b16a..090e89da 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/package-info.java
@@ -5,7 +5,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/LOFProcessor.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/LOFProcessor.java
new file mode 100644
index 00000000..3c0bf4c8
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/LOFProcessor.java
@@ -0,0 +1,119 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier.lof.parallel;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.database.datastore.DataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.DoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.KNNList;
+import de.lmu.ifi.dbs.elki.parallel.Executor;
+import de.lmu.ifi.dbs.elki.parallel.processor.AbstractDoubleProcessor;
+import de.lmu.ifi.dbs.elki.parallel.variables.SharedDouble;
+
+/**
+ * Processor for computing the LOF.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has Instance
+ */
+public class LOFProcessor extends AbstractDoubleProcessor {
+ /**
+ * KNN store
+ */
+ private DataStore<? extends KNNList> knns;
+
+ /**
+ * LRD store
+ */
+ private DoubleDataStore lrds;
+
+ /**
+ * Exclude object itself from computation.
+ */
+ private boolean noself;
+
+ /**
+ * Constructor.
+ *
+ * @param knns k nearest neighbors
+ * @param lrds Local reachability distances
+ * @param noself Exclude self from neighbors
+ */
+ public LOFProcessor(DataStore<? extends KNNList> knns, DoubleDataStore lrds, boolean noself) {
+ super();
+ this.knns = knns;
+ this.lrds = lrds;
+ this.noself = noself;
+ }
+
+ @Override
+ public Instance instantiate(Executor master) {
+ return new Instance(master.getInstance(output));
+ }
+
+ /**
+ * Instance
+ *
+ * @author Erich Schubert
+ */
+ private class Instance extends AbstractDoubleProcessor.Instance {
+ /**
+ * Constructor.
+ *
+ * @param output Output variable
+ */
+ protected Instance(SharedDouble.Instance output) {
+ super(output);
+ }
+
+ @Override
+ public void map(DBIDRef id) {
+ // Own density
+ final double lrdp = lrds.doubleValue(id);
+ if (Double.isInfinite(lrdp)) {
+ output.set(1.0);
+ return;
+ }
+ // Compute average neighbor density:
+ KNNList knn = knns.get(id);
+ double avlrd = 0.0;
+ int cnt = 0;
+ for (DBIDIter n = knn.iter(); n.valid(); n.advance()) {
+ if (noself && DBIDUtil.equal(n, id)) {
+ continue;
+ }
+ avlrd += lrds.doubleValue(n);
+ cnt++;
+ if (Double.isInfinite(avlrd)) {
+ break;
+ }
+ }
+ avlrd = (cnt > 0) ? (avlrd / cnt) : 0;
+ output.set(avlrd / lrdp);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/LRDProcessor.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/LRDProcessor.java
new file mode 100644
index 00000000..1b62320a
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/LRDProcessor.java
@@ -0,0 +1,103 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier.lof.parallel;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.database.datastore.DataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.DoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.KNNList;
+import de.lmu.ifi.dbs.elki.parallel.Executor;
+import de.lmu.ifi.dbs.elki.parallel.processor.AbstractDoubleProcessor;
+import de.lmu.ifi.dbs.elki.parallel.variables.SharedDouble;
+
+/**
+ * Processor for the "local reachability density" of LOF.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has Instance
+ */
+public class LRDProcessor extends AbstractDoubleProcessor {
+ /**
+ * KNN store
+ */
+ private DataStore<? extends KNNList> knns;
+
+ /**
+ * k-distance store
+ */
+ private DoubleDataStore kdists;
+
+ /**
+ * Constructor.
+ *
+ * @param knns k nearest neighbors
+ * @param kdists k distances
+ */
+ public LRDProcessor(DataStore<? extends KNNList> knns, DoubleDataStore kdists) {
+ super();
+ this.knns = knns;
+ this.kdists = kdists;
+ }
+
+ @Override
+ public Instance instantiate(Executor master) {
+ return new Instance(master.getInstance(output));
+ }
+
+ /**
+ * Instance
+ *
+ * @author Erich Schubert
+ */
+ private class Instance extends AbstractDoubleProcessor.Instance {
+ /**
+ * Constructor.
+ *
+ * @param output Output variable
+ */
+ protected Instance(SharedDouble.Instance output) {
+ super(output);
+ }
+
+ @Override
+ public void map(DBIDRef id) {
+ KNNList knn = knns.get(id);
+ double lrd = 0.0;
+ int size = 0;
+ for(DoubleDBIDListIter n = knn.iter(); n.valid(); n.advance()) {
+ // Do not include the query object
+ if(DBIDUtil.equal(n, id)) {
+ continue;
+ }
+ lrd += Math.max(kdists.doubleValue(n), n.doubleValue());
+ size += 1;
+ }
+ // Avoid division by 0:
+ output.set(lrd > 0 ? size / lrd : Double.POSITIVE_INFINITY);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/ParallelLOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/ParallelLOF.java
new file mode 100644
index 00000000..fa851401
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/ParallelLOF.java
@@ -0,0 +1,208 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier.lof.parallel;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.lof.LOF;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.KNNList;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.parallel.ParallelExecutor;
+import de.lmu.ifi.dbs.elki.parallel.processor.DoubleMinMaxProcessor;
+import de.lmu.ifi.dbs.elki.parallel.processor.KDistanceProcessor;
+import de.lmu.ifi.dbs.elki.parallel.processor.KNNProcessor;
+import de.lmu.ifi.dbs.elki.parallel.processor.WriteDataStoreProcessor;
+import de.lmu.ifi.dbs.elki.parallel.processor.WriteDoubleDataStoreProcessor;
+import de.lmu.ifi.dbs.elki.parallel.variables.SharedDouble;
+import de.lmu.ifi.dbs.elki.parallel.variables.SharedObject;
+import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+
+/**
+ * Parallel implementation of Local Outlier Factor using processors.
+ *
+ * This parallelized implementation is based on the easy-to-parallelize
+ * generalized pattern discussed in
+ * <p>
+ * Erich Schubert, Arthur Zimek, Hans-Peter Kriegel<br />
+ * Local Outlier Detection Reconsidered: a Generalized View on Locality with
+ * Applications to Spatial, Video, and Network Outlier Detection<br />
+ * Data Mining and Knowledge Discovery, 28(1): 190–237, 2014.
+ * </p>
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has LRDProcessor
+ * @apiviz.has LOFProcessor
+ *
+ * @param <O> Object type
+ */
+@Reference(authors = "E. Schubert, A. Zimek, H.-P. Kriegel", //
+title = "Local Outlier Detection Reconsidered: a Generalized View on Locality with Applications to Spatial, Video, and Network Outlier Detection", //
+booktitle = "Data Mining and Knowledge Discovery, 28(1): 190–237, 2014.", //
+url = "http://dx.doi.org/10.1007/s10618-012-0300-z")
+public class ParallelLOF<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm {
+ /**
+ * Parameter k
+ */
+ private int k;
+
+ /**
+ * Constructor.
+ *
+ * @param distanceFunction Distance function
+ * @param k K parameter
+ */
+ public ParallelLOF(DistanceFunction<? super O> distanceFunction, int k) {
+ super(distanceFunction);
+ this.k = k;
+ }
+
+ /**
+ * Class logger
+ */
+ private static final Logging LOG = Logging.getLogger(ParallelLOF.class);
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(getDistanceFunction().getInputTypeRestriction());
+ }
+
+ public OutlierResult run(Database database, Relation<O> relation) {
+ DBIDs ids = relation.getDBIDs();
+ DistanceQuery<O> distq = database.getDistanceQuery(relation, getDistanceFunction());
+ KNNQuery<O> knnq = database.getKNNQuery(distq, k + 1);
+
+ // Phase one: KNN and k-dist
+ WritableDoubleDataStore kdists = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB);
+ WritableDataStore<KNNList> knns = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_DB, KNNList.class);
+ {
+ // Compute kNN
+ KNNProcessor<O> knnm = new KNNProcessor<>(k + 1, knnq);
+ SharedObject<KNNList> knnv = new SharedObject<>();
+ WriteDataStoreProcessor<KNNList> storek = new WriteDataStoreProcessor<>(knns);
+ knnm.connectKNNOutput(knnv);
+ storek.connectInput(knnv);
+ // Compute k-dist
+ KDistanceProcessor kdistm = new KDistanceProcessor(k + 1);
+ SharedDouble kdistv = new SharedDouble();
+ WriteDoubleDataStoreProcessor storem = new WriteDoubleDataStoreProcessor(kdists);
+ kdistm.connectKNNInput(knnv);
+ kdistm.connectOutput(kdistv);
+ storem.connectInput(kdistv);
+
+ ParallelExecutor.run(ids, knnm, storek, kdistm, storem);
+ }
+
+ // Phase two: lrd
+ WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB);
+ {
+ LRDProcessor lrdm = new LRDProcessor(knns, kdists);
+ SharedDouble lrdv = new SharedDouble();
+ WriteDoubleDataStoreProcessor storelrd = new WriteDoubleDataStoreProcessor(lrds);
+
+ lrdm.connectOutput(lrdv);
+ storelrd.connectInput(lrdv);
+ ParallelExecutor.run(ids, lrdm, storelrd);
+ }
+ kdists.destroy(); // No longer needed.
+ kdists = null;
+
+ // Phase three: LOF
+ WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB);
+ DoubleMinMax minmax;
+ {
+ LOFProcessor lofm = new LOFProcessor(knns, lrds, true);
+ SharedDouble lofv = new SharedDouble();
+ DoubleMinMaxProcessor mmm = new DoubleMinMaxProcessor();
+ WriteDoubleDataStoreProcessor storelof = new WriteDoubleDataStoreProcessor(lofs);
+
+ lofm.connectOutput(lofv);
+ mmm.connectInput(lofv);
+ storelof.connectInput(lofv);
+ ParallelExecutor.run(ids, lofm, storelof, mmm);
+
+ minmax = mmm.getMinMax();
+ }
+
+ DoubleRelation scoreres = new MaterializedDoubleRelation("Local Outlier Factor", "lof-outlier", lofs, ids);
+ OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
+ return new OutlierResult(meta, scoreres);
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ *
+ * @param <O> Object type
+ */
+ public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {
+ /**
+ * K parameter
+ */
+ int k;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ IntParameter kP = new IntParameter(LOF.Parameterizer.K_ID);
+ if(config.grab(kP)) {
+ k = kP.intValue();
+ }
+ }
+
+ @Override
+ protected ParallelLOF<O> makeInstance() {
+ return new ParallelLOF<>(distanceFunction, k);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/ParallelSimplifiedLOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/ParallelSimplifiedLOF.java
new file mode 100644
index 00000000..ef67023e
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/ParallelSimplifiedLOF.java
@@ -0,0 +1,197 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier.lof.parallel;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.lof.LOF;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.KNNList;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.parallel.ParallelExecutor;
+import de.lmu.ifi.dbs.elki.parallel.processor.DoubleMinMaxProcessor;
+import de.lmu.ifi.dbs.elki.parallel.processor.KNNProcessor;
+import de.lmu.ifi.dbs.elki.parallel.processor.WriteDataStoreProcessor;
+import de.lmu.ifi.dbs.elki.parallel.processor.WriteDoubleDataStoreProcessor;
+import de.lmu.ifi.dbs.elki.parallel.variables.SharedDouble;
+import de.lmu.ifi.dbs.elki.parallel.variables.SharedObject;
+import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+
+/**
+ * Parallel implementation of Simplified-LOF Outlier detection using processors.
+ *
+ * This parallelized implementation is based on the easy-to-parallelize
+ * generalized pattern discussed in
+ * <p>
+ * Erich Schubert, Arthur Zimek, Hans-Peter Kriegel<br />
+ * Local Outlier Detection Reconsidered: a Generalized View on Locality with
+ * Applications to Spatial, Video, and Network Outlier Detection<br />
+ * Data Mining and Knowledge Discovery, 28(1): 190–237, 2014.
+ * </p>
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has SimplifiedLRDProcessor
+ * @apiviz.has LOFProcessor
+ *
+ * @param <O> Object type
+ */
+@Reference(authors = "E. Schubert, A. Zimek, H.-P. Kriegel", //
+title = "Local Outlier Detection Reconsidered: a Generalized View on Locality with Applications to Spatial, Video, and Network Outlier Detection", //
+booktitle = "Data Mining and Knowledge Discovery, 28(1): 190–237, 2014.", //
+url = "http://dx.doi.org/10.1007/s10618-012-0300-z")
+public class ParallelSimplifiedLOF<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm {
+ /**
+ * Parameter k
+ */
+ private int k;
+
+ /**
+ * Constructor.
+ *
+ * @param distanceFunction Distance function
+ * @param k K parameter
+ */
+ public ParallelSimplifiedLOF(DistanceFunction<? super O> distanceFunction, int k) {
+ super(distanceFunction);
+ this.k = k;
+ }
+
+ /**
+ * Class logger
+ */
+ private static final Logging LOG = Logging.getLogger(ParallelSimplifiedLOF.class);
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(getDistanceFunction().getInputTypeRestriction());
+ }
+
+ public OutlierResult run(Database database, Relation<O> relation) {
+ DBIDs ids = relation.getDBIDs();
+ DistanceQuery<O> distq = database.getDistanceQuery(relation, getDistanceFunction());
+ KNNQuery<O> knnq = database.getKNNQuery(distq, k + 1);
+
+ // Phase one: KNN and k-dist
+ WritableDataStore<KNNList> knns = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_DB, KNNList.class);
+ {
+ // Compute kNN
+ KNNProcessor<O> knnm = new KNNProcessor<>(k + 1, knnq);
+ SharedObject<KNNList> knnv = new SharedObject<>();
+ WriteDataStoreProcessor<KNNList> storek = new WriteDataStoreProcessor<>(knns);
+ knnm.connectKNNOutput(knnv);
+ storek.connectInput(knnv);
+
+ ParallelExecutor.run(ids, knnm, storek);
+ }
+
+ // Phase two: simplified-lrd
+ WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB);
+ {
+ SimplifiedLRDProcessor lrdm = new SimplifiedLRDProcessor(knns);
+ SharedDouble lrdv = new SharedDouble();
+ WriteDoubleDataStoreProcessor storelrd = new WriteDoubleDataStoreProcessor(lrds);
+
+ lrdm.connectOutput(lrdv);
+ storelrd.connectInput(lrdv);
+ ParallelExecutor.run(ids, lrdm, storelrd);
+ }
+
+ // Phase three: Simplified-LOF
+ WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB);
+ DoubleMinMax minmax;
+ {
+ LOFProcessor lofm = new LOFProcessor(knns, lrds, true);
+ SharedDouble lofv = new SharedDouble();
+ DoubleMinMaxProcessor mmm = new DoubleMinMaxProcessor();
+ WriteDoubleDataStoreProcessor storelof = new WriteDoubleDataStoreProcessor(lofs);
+
+ lofm.connectOutput(lofv);
+ mmm.connectInput(lofv);
+ storelof.connectInput(lofv);
+ ParallelExecutor.run(ids, lofm, storelof, mmm);
+
+ minmax = mmm.getMinMax();
+ }
+
+ DoubleRelation scoreres = new MaterializedDoubleRelation("Simplified Local Outlier Factor", "simplified-lof-outlier", lofs, ids);
+ OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
+ return new OutlierResult(meta, scoreres);
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ *
+ * @param <O> Object type
+ */
+ public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {
+ /**
+ * K parameter
+ */
+ int k;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ IntParameter kP = new IntParameter(LOF.Parameterizer.K_ID);
+ if(config.grab(kP)) {
+ k = kP.getValue();
+ }
+ }
+
+ @Override
+ protected ParallelSimplifiedLOF<O> makeInstance() {
+ return new ParallelSimplifiedLOF<>(distanceFunction, k);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/SimplifiedLRDProcessor.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/SimplifiedLRDProcessor.java
new file mode 100644
index 00000000..4698ae6a
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/SimplifiedLRDProcessor.java
@@ -0,0 +1,97 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier.lof.parallel;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.database.datastore.DataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.KNNList;
+import de.lmu.ifi.dbs.elki.parallel.Executor;
+import de.lmu.ifi.dbs.elki.parallel.processor.AbstractDoubleProcessor;
+import de.lmu.ifi.dbs.elki.parallel.variables.SharedDouble;
+
+/**
+ * Processor for the "local reachability density" of LOF.
+ *
+ * Note: we compute 1/lrd, the local reachability distance.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has Instance
+ */
+public class SimplifiedLRDProcessor extends AbstractDoubleProcessor {
+ /**
+ * KNN store
+ */
+ private DataStore<? extends KNNList> knns;
+
+ /**
+ * Constructor.
+ *
+ * @param knns k nearest neighbors
+ */
+ public SimplifiedLRDProcessor(DataStore<? extends KNNList> knns) {
+ super();
+ this.knns = knns;
+ }
+
+ @Override
+ public Instance instantiate(Executor master) {
+ return new Instance(master.getInstance(output));
+ }
+
+ /**
+ * Instance
+ *
+ * @author Erich Schubert
+ */
+ private class Instance extends AbstractDoubleProcessor.Instance {
+ /**
+ * Constructor.
+ *
+ * @param output Output variable
+ */
+ public Instance(SharedDouble.Instance output) {
+ super(output);
+ }
+
+ @Override
+ public void map(DBIDRef id) {
+ KNNList knn = knns.get(id);
+ double lrd = 0.0;
+ int size = 0;
+ for(DoubleDBIDListIter n = knn.iter(); n.valid(); n.advance()) {
+ // Do not include the query object
+ if(DBIDUtil.equal(n, id)) {
+ continue;
+ }
+ lrd += n.doubleValue();
+ size++;
+ }
+ // Avoid division by zero.
+ output.set(lrd > 0 ? size / lrd : 0);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/package-info.java
new file mode 100644
index 00000000..3d708b4c
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/package-info.java
@@ -0,0 +1,44 @@
+/**
+ * Parallelized variants of LOF.
+ *
+ * This parallelization is based on the generalization of outlier detection published in:
+ *
+ * Reference:
+ * <p>
+ * Erich Schubert, Arthur Zimek, Hans-Peter Kriegel<br />
+ * Local Outlier Detection Reconsidered: a Generalized View on Locality with
+ * Applications to Spatial, Video, and Network Outlier Detection<br />
+ * Data Mining and Knowledge Discovery, 28(1): 190–237, 2014.
+ * </p>
+ */
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+@Reference(authors = "E. Schubert, A. Zimek, H.-P. Kriegel", //
+title = "Local Outlier Detection Reconsidered: a Generalized View on Locality with Applications to Spatial, Video, and Network Outlier Detection", //
+booktitle = "Data Mining and Knowledge Discovery, 28(1): 190–237, 2014.", //
+url = "http://dx.doi.org/10.1007/s10618-012-0300-z")
+package de.lmu.ifi.dbs.elki.algorithm.outlier.lof.parallel;
+
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java
index 757b80ad..f0ada6a8 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.meta;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -42,7 +42,8 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.datasource.parser.AbstractParser;
import de.lmu.ifi.dbs.elki.logging.Logging;
@@ -203,7 +204,7 @@ public class ExternalDoubleOutlierScore extends AbstractAlgorithm<OutlierResult>
else {
meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
}
- Relation<Double> scoresult = new MaterializedRelation<>("External Outlier", "external-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
+ DoubleRelation scoresult = new MaterializedDoubleRelation("External Outlier", "external-outlier", scores, relation.getDBIDs());
OutlierResult or = new OutlierResult(meta, scoresult);
// Apply scaling
@@ -212,7 +213,7 @@ public class ExternalDoubleOutlierScore extends AbstractAlgorithm<OutlierResult>
}
DoubleMinMax mm = new DoubleMinMax();
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- double val = scoresult.get(iditer);
+ double val = scoresult.doubleValue(iditer);
val = scaling.getScaled(val);
scores.putDouble(iditer, val);
mm.put(val);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java
index 5b681106..a5eb0c7a 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.meta;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -24,7 +24,6 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.meta;
*/
import java.util.ArrayList;
-import java.util.BitSet;
import java.util.Random;
import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
@@ -38,18 +37,19 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.math.random.RandomFactory;
import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
-import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
+import de.lmu.ifi.dbs.elki.utilities.BitsUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
@@ -131,7 +131,7 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements
* @param relation Relation to use
* @return Outlier detection result
*/
- public OutlierResult run(Database database, Relation<NumberVector<?>> relation) {
+ public OutlierResult run(Database database, Relation<NumberVector> relation) {
final int dbdim = RelationUtil.dimensionality(relation);
final int mindim = dbdim >> 1;
final int maxdim = dbdim - 1;
@@ -141,34 +141,30 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements
{
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("LOF iterations", num, LOG) : null;
for(int i = 0; i < num; i++) {
- BitSet dimset = randomSubspace(dbdim, mindim, maxdim, rand);
+ long[] dimset = randomSubspace(dbdim, mindim, maxdim, rand);
SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(dimset);
- LOF<NumberVector<?>, DoubleDistance> lof = new LOF<>(k, df);
+ LOF<NumberVector> lof = new LOF<>(k, df);
// run LOF and collect the result
OutlierResult result = lof.run(database, relation);
results.add(result);
- if(prog != null) {
- prog.incrementProcessed(LOG);
- }
- }
- if(prog != null) {
- prog.ensureCompleted(LOG);
+ LOG.incrementProcessed(prog);
}
+ LOG.ensureCompleted(prog);
}
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
DoubleMinMax minmax = new DoubleMinMax();
if(breadth) {
FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Combining results", relation.size(), LOG) : null;
- Pair<DBIDIter, Relation<Double>>[] IDVectorOntoScoreVector = Pair.newPairArray(results.size());
+ Pair<DBIDIter, DoubleRelation>[] IDVectorOntoScoreVector = Pair.newPairArray(results.size());
// Mapping score-sorted DBID-Iterators onto their corresponding scores.
// We need to initialize them now be able to iterate them "in parallel".
{
int i = 0;
for(OutlierResult r : results) {
- IDVectorOntoScoreVector[i] = new Pair<DBIDIter, Relation<Double>>(r.getOrdering().iter(relation.getDBIDs()).iter(), r.getScores());
+ IDVectorOntoScoreVector[i] = new Pair<DBIDIter, DoubleRelation>(r.getOrdering().iter(relation.getDBIDs()).iter(), r.getScores());
i++;
}
}
@@ -176,12 +172,12 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements
// Iterating over the *lines* of the AS_t(i)-matrix.
for(int i = 0; i < relation.size(); i++) {
// Iterating over the elements of a line (breadth-first).
- for(Pair<DBIDIter, Relation<Double>> pair : IDVectorOntoScoreVector) {
+ for(Pair<DBIDIter, DoubleRelation> pair : IDVectorOntoScoreVector) {
DBIDIter iter = pair.first;
// Always true if every algorithm returns a complete result (one score
// for every DBID).
if(iter.valid()) {
- double score = pair.second.get(iter);
+ double score = pair.second.doubleValue(iter);
if(Double.isNaN(scores.doubleValue(iter))) {
scores.putDouble(iter, score);
minmax.put(score);
@@ -193,36 +189,28 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements
}
}
// Progress does not take the initial mapping into account.
- if(cprog != null) {
- cprog.incrementProcessed(LOG);
- }
- }
- if(cprog != null) {
- cprog.ensureCompleted(LOG);
+ LOG.incrementProcessed(cprog);
}
+ LOG.ensureCompleted(cprog);
}
else {
FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Combining results", relation.size(), LOG) : null;
for(DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
double sum = 0.0;
for(OutlierResult r : results) {
- final Double s = r.getScores().get(iter);
- if(s != null && !Double.isNaN(s)) {
+ final double s = r.getScores().doubleValue(iter);
+ if(!Double.isNaN(s)) {
sum += s;
}
}
scores.putDouble(iter, sum);
minmax.put(sum);
- if(cprog != null) {
- cprog.incrementProcessed(LOG);
- }
- }
- if(cprog != null) {
- cprog.ensureCompleted(LOG);
+ LOG.incrementProcessed(cprog);
}
+ LOG.ensureCompleted(cprog);
}
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
- Relation<Double> scoreres = new MaterializedRelation<>("Feature bagging", "fb-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
+ DoubleRelation scoreres = new MaterializedDoubleRelation("Feature bagging", "fb-outlier", scores, relation.getDBIDs());
return new OutlierResult(meta, scoreres);
}
@@ -234,8 +222,8 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements
* @param maxdim Maximum number to choose
* @return Subspace as bits.
*/
- private BitSet randomSubspace(final int alldim, final int mindim, final int maxdim, final Random rand) {
- BitSet dimset = new BitSet();
+ private long[] randomSubspace(final int alldim, final int mindim, final int maxdim, final Random rand) {
+ long[] dimset = BitsUtil.zero(alldim);
// Fill with all dimensions
int[] dims = new int[alldim];
for(int d = 0; d < alldim; d++) {
@@ -246,7 +234,7 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements
// Shrink the subspace to the destination size
for(int d = 0; d < alldim - subdim; d++) {
int s = rand.nextInt(alldim - d);
- dimset.set(dims[s]);
+ BitsUtil.setI(dimset, dims[s]);
dims[s] = dims[alldim - d - 1];
}
return dimset;
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/HiCS.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/HiCS.java
index f92a8b80..4858e0df 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/HiCS.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/HiCS.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.meta;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -51,7 +51,8 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.ProjectedView;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
@@ -59,12 +60,12 @@ import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.math.random.RandomFactory;
import de.lmu.ifi.dbs.elki.math.statistics.tests.GoodnessOfFitTest;
import de.lmu.ifi.dbs.elki.math.statistics.tests.KolmogorovSmirnovTest;
import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
-import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TopBoundedHeap;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
@@ -102,7 +103,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
@Title("HiCS: High Contrast Subspaces for Density-Based Outlier Ranking")
@Description("Algorithm to compute High Contrast Subspaces in a database as a pre-processing step for for density-based outlier ranking methods.")
@Reference(authors = "Fabian Keller, Emmanuel Müller, Klemens Böhm", title = "HiCS: High Contrast Subspaces for Density-Based Outlier Ranking", booktitle = "Proc. IEEE 28th International Conference on Data Engineering (ICDE 2012)", url = "http://dx.doi.org/10.1109/ICDE.2012.88")
-public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+public class HiCS<V extends NumberVector> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
/**
* The Logger for this class.
*/
@@ -179,7 +180,7 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe
if(LOG.isVerbose()) {
LOG.verbose("Number of high-contrast subspaces: " + subspaces.size());
}
- List<Relation<Double>> results = new ArrayList<>();
+ List<DoubleRelation> results = new ArrayList<>();
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Calculating Outlier scores for high Contrast subspaces", subspaces.size(), LOG) : null;
// run outlier detection and collect the result
@@ -196,22 +197,18 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe
// run LOF and collect the result
OutlierResult result = outlierAlgorithm.run(pdb);
results.add(result.getScores());
- if(prog != null) {
- prog.incrementProcessed(LOG);
- }
- }
- if(prog != null) {
- prog.ensureCompleted(LOG);
+ LOG.incrementProcessed(prog);
}
+ LOG.ensureCompleted(prog);
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
DoubleMinMax minmax = new DoubleMinMax();
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double sum = 0.0;
- for(Relation<Double> r : results) {
- final Double s = r.get(iditer);
- if(s != null && !Double.isNaN(s)) {
+ for(DoubleRelation r : results) {
+ final double s = r.doubleValue(iditer);
+ if(!Double.isNaN(s)) {
sum += s;
}
}
@@ -219,7 +216,7 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe
minmax.put(sum);
}
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
- Relation<Double> scoreres = new MaterializedRelation<>("HiCS", "HiCS-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
+ DoubleRelation scoreres = new MaterializedDoubleRelation("HiCS", "HiCS-outlier", scores, relation.getDBIDs());
return new OutlierResult(meta, scoreres);
}
@@ -232,7 +229,7 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe
* @param relation Relation to index
* @return List of sorted objects
*/
- private ArrayList<ArrayDBIDs> buildOneDimIndexes(Relation<? extends NumberVector<?>> relation) {
+ private ArrayList<ArrayDBIDs> buildOneDimIndexes(Relation<? extends NumberVector> relation) {
final int dim = RelationUtil.dimensionality(relation);
ArrayList<ArrayDBIDs> subspaceIndex = new ArrayList<>(dim + 1);
@@ -254,7 +251,7 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe
* @param subspaceIndex Subspace indexes
* @return a set of high contrast subspaces
*/
- private Set<HiCSSubspace> calculateSubspaces(Relation<? extends NumberVector<?>> relation, ArrayList<ArrayDBIDs> subspaceIndex, Random random) {
+ private Set<HiCSSubspace> calculateSubspaces(Relation<? extends NumberVector> relation, ArrayList<ArrayDBIDs> subspaceIndex, Random random) {
final int dbdim = RelationUtil.dimensionality(relation);
FiniteProgress dprog = LOG.isVerbose() ? new FiniteProgress("Subspace dimensionality", dbdim, LOG) : null;
@@ -273,14 +270,10 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe
ts.set(j);
calculateContrast(relation, ts, subspaceIndex, random);
dDimensionalList.add(ts);
- if(prog != null) {
- prog.incrementProcessed(LOG);
- }
+ LOG.incrementProcessed(prog);
}
}
- if(prog != null) {
- prog.ensureCompleted(LOG);
- }
+ LOG.ensureCompleted(prog);
IndefiniteProgress qprog = LOG.isVerbose() ? new IndefiniteProgress("Testing subspace candidates", LOG) : null;
for(int d = 3; !dDimensionalList.isEmpty(); d++) {
@@ -313,9 +306,7 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe
calculateContrast(relation, joinedSet, subspaceIndex, random);
dDimensionalList.add(joinedSet);
- if(qprog != null) {
- qprog.incrementProcessed(LOG);
- }
+ LOG.incrementProcessed(qprog);
}
}
// Prune
@@ -328,9 +319,7 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe
}
}
}
- if(qprog != null) {
- qprog.setCompleted(LOG);
- }
+ LOG.setCompleted(qprog);
if(dprog != null) {
dprog.setProcessed(dbdim, LOG);
dprog.ensureCompleted(LOG);
@@ -345,7 +334,7 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe
* @param subspace Subspace
* @param subspaceIndex Subspace indexes
*/
- private void calculateContrast(Relation<? extends NumberVector<?>> relation, HiCSSubspace subspace, ArrayList<ArrayDBIDs> subspaceIndex, Random random) {
+ private void calculateContrast(Relation<? extends NumberVector> relation, HiCSSubspace subspace, ArrayList<ArrayDBIDs> subspaceIndex, Random random) {
final int card = subspace.cardinality();
final double alpha1 = Math.pow(alpha, (1.0 / card));
final int windowsize = (int) (relation.size() * alpha1);
@@ -415,13 +404,9 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe
continue;
}
deviationSum += contrast;
- if(prog != null) {
- prog.incrementProcessed(LOG);
- }
- }
- if(prog != null) {
- prog.ensureCompleted(LOG);
+ LOG.incrementProcessed(prog);
}
+ LOG.ensureCompleted(prog);
subspace.contrast = deviationSum / m;
}
@@ -530,7 +515,7 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe
*
* @param <V> vector type
*/
- public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector> extends AbstractParameterizer {
/**
* Parameter that specifies the number of iterations in the Monte-Carlo
* process of identifying high contrast subspaces.
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/RescaleMetaOutlierAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/RescaleMetaOutlierAlgorithm.java
index 8ebdc27a..885ef1df 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/RescaleMetaOutlierAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/RescaleMetaOutlierAlgorithm.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.meta;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -29,14 +29,13 @@ import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.Algorithm;
import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
-import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
-import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.result.Result;
@@ -100,7 +99,7 @@ public class RescaleMetaOutlierAlgorithm extends AbstractAlgorithm<OutlierResult
Result innerresult = algorithm.run(database);
OutlierResult or = getOutlierResult(innerresult);
- final Relation<Double> scores = or.getScores();
+ final DoubleRelation scores = or.getScores();
if(scaling instanceof OutlierScalingFunction) {
((OutlierScalingFunction) scaling).prepare(or);
}
@@ -109,13 +108,13 @@ public class RescaleMetaOutlierAlgorithm extends AbstractAlgorithm<OutlierResult
DoubleMinMax minmax = new DoubleMinMax();
for(DBIDIter iditer = scores.iterDBIDs(); iditer.valid(); iditer.advance()) {
- double val = scaling.getScaled(scores.get(iditer));
+ double val = scaling.getScaled(scores.doubleValue(iditer));
scaledscores.putDouble(iditer, val);
minmax.put(val);
}
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), scaling.getMin(), scaling.getMax());
- Relation<Double> scoresult = new MaterializedRelation<>("Scaled Outlier", "scaled-outlier", TypeUtil.DOUBLE, scaledscores, scores.getDBIDs());
+ DoubleRelation scoresult = new MaterializedDoubleRelation("Scaled Outlier", "scaled-outlier", scaledscores, scores.getDBIDs());
OutlierResult result = new OutlierResult(meta, scoresult);
result.addChildResult(innerresult);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/SimpleOutlierEnsemble.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/SimpleOutlierEnsemble.java
index d40af384..c255a8b0 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/SimpleOutlierEnsemble.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/SimpleOutlierEnsemble.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.meta;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -40,8 +40,8 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
-import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
@@ -111,13 +111,9 @@ public class SimpleOutlierEnsemble extends AbstractAlgorithm<OutlierResult> impl
results.add(or);
ids.addDBIDs(or.getScores().getDBIDs());
}
- if (prog != null) {
- prog.incrementProcessed(LOG);
- }
- }
- if (prog != null) {
- prog.ensureCompleted(LOG);
+ LOG.incrementProcessed(prog);
}
+ LOG.ensureCompleted(prog);
}
// Combine
WritableDoubleDataStore sumscore = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
@@ -128,8 +124,8 @@ public class SimpleOutlierEnsemble extends AbstractAlgorithm<OutlierResult> impl
double[] scores = new double[num];
int i = 0;
for (OutlierResult r : results) {
- Double score = r.getScores().get(id);
- if (score != null) {
+ double score = r.getScores().doubleValue(id);
+ if (!Double.isNaN(score)) {
scores[i] = score;
i++;
} else {
@@ -147,16 +143,12 @@ public class SimpleOutlierEnsemble extends AbstractAlgorithm<OutlierResult> impl
} else {
LOG.warning("DBID " + id + " was not given any score at all.");
}
- if (cprog != null) {
- cprog.incrementProcessed(LOG);
- }
- }
- if (cprog != null) {
- cprog.ensureCompleted(LOG);
+ LOG.incrementProcessed(cprog);
}
+ LOG.ensureCompleted(cprog);
}
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
- Relation<Double> scores = new MaterializedRelation<>("Simple Outlier Ensemble", "ensemble-outlier", TypeUtil.DOUBLE, sumscore, ids);
+ DoubleRelation scores = new MaterializedDoubleRelation("Simple Outlier Ensemble", "ensemble-outlier", sumscore, ids);
return new OutlierResult(meta, scores);
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/package-info.java
index f28f8db3..2e9743b9 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/package-info.java
@@ -8,7 +8,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2013
+Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/package-info.java
index 0ce6f9b5..aa6da5cf 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/package-info.java
@@ -14,7 +14,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2013
+Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractDistanceBasedSpatialOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractDistanceBasedSpatialOutlier.java
index e059c16c..a501e00f 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractDistanceBasedSpatialOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractDistanceBasedSpatialOutlier.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -27,7 +27,6 @@ import de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPre
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
@@ -39,9 +38,8 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
*
* @param <N> Object type for neighborhood
* @param <O> Non-spatial object type
- * @param <D> Distance value type
*/
-public abstract class AbstractDistanceBasedSpatialOutlier<N, O, D extends NumberDistance<D, ?>> extends AbstractNeighborhoodOutlier<N> {
+public abstract class AbstractDistanceBasedSpatialOutlier<N, O> extends AbstractNeighborhoodOutlier<N> {
/**
* Parameter to specify the non spatial distance function to use
*/
@@ -50,7 +48,7 @@ public abstract class AbstractDistanceBasedSpatialOutlier<N, O, D extends Number
/**
* The distance function to use
*/
- private DistanceFunction<O, D> nonSpatialDistanceFunction;
+ private DistanceFunction<O> nonSpatialDistanceFunction;
/**
* Constructor.
@@ -59,7 +57,7 @@ public abstract class AbstractDistanceBasedSpatialOutlier<N, O, D extends Number
* @param nonSpatialDistanceFunction Distance function to use on the
* non-spatial attributes.
*/
- public AbstractDistanceBasedSpatialOutlier(NeighborSetPredicate.Factory<N> npredf, DistanceFunction<O, D> nonSpatialDistanceFunction) {
+ public AbstractDistanceBasedSpatialOutlier(NeighborSetPredicate.Factory<N> npredf, DistanceFunction<O> nonSpatialDistanceFunction) {
super(npredf);
this.nonSpatialDistanceFunction = nonSpatialDistanceFunction;
}
@@ -69,7 +67,7 @@ public abstract class AbstractDistanceBasedSpatialOutlier<N, O, D extends Number
*
* @return the distance function to use on the non-spatial attributes
*/
- protected DistanceFunction<O, D> getNonSpatialDistanceFunction() {
+ protected DistanceFunction<O> getNonSpatialDistanceFunction() {
return nonSpatialDistanceFunction;
}
@@ -82,18 +80,17 @@ public abstract class AbstractDistanceBasedSpatialOutlier<N, O, D extends Number
*
* @param <N> Object type for neighborhood
* @param <O> Non-spatial object type
- * @param <D> Distance value type
*/
- public abstract static class Parameterizer<N, O, D extends NumberDistance<D, ?>> extends AbstractNeighborhoodOutlier.Parameterizer<N> {
+ public abstract static class Parameterizer<N, O> extends AbstractNeighborhoodOutlier.Parameterizer<N> {
/**
* The distance function to use on the non-spatial attributes.
*/
- protected PrimitiveDistanceFunction<O, D> distanceFunction = null;
+ protected PrimitiveDistanceFunction<O> distanceFunction = null;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- ObjectParameter<PrimitiveDistanceFunction<O, D>> distanceFunctionP = makeParameterDistanceFunction(EuclideanDistanceFunction.class, PrimitiveDistanceFunction.class);
+ ObjectParameter<PrimitiveDistanceFunction<O>> distanceFunctionP = makeParameterDistanceFunction(EuclideanDistanceFunction.class, PrimitiveDistanceFunction.class);
if(config.grab(distanceFunctionP)) {
distanceFunction = distanceFunctionP.instantiateClass(config);
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractNeighborhoodOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractNeighborhoodOutlier.java
index 3b3e71b3..95516a99 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractNeighborhoodOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractNeighborhoodOutlier.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuGLSBackwardSearchAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuGLSBackwardSearchAlgorithm.java
index 5035cf6f..debf0ee2 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuGLSBackwardSearchAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuGLSBackwardSearchAlgorithm.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -37,15 +37,15 @@ import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDVar;
+import de.lmu.ifi.dbs.elki.database.ids.KNNList;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.ProxyView;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
@@ -81,11 +81,10 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
* @author Ahmed Hettab
*
* @param <V> Vector type to use for distances
- * @param <D> Distance function to use
*/
@Title("GLS-Backward Search")
@Reference(authors = "F. Chen and C.-T. Lu and A. P. Boedihardjo", title = "GLS-SOD: A Generalized Local Statistical Approach for Spatial Outlier Detection", booktitle = "Proc. 16th ACM SIGKDD international conference on Knowledge discovery and data mining", url = "http://dx.doi.org/10.1145/1835804.1835939")
-public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<V, D, OutlierResult> implements OutlierAlgorithm {
+public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector> extends AbstractDistanceBasedAlgorithm<V, OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
@@ -108,7 +107,7 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?>, D extends
* @param k number of nearest neighbors to use
* @param alpha Significance niveau
*/
- public CTLuGLSBackwardSearchAlgorithm(DistanceFunction<V, D> distanceFunction, int k, double alpha) {
+ public CTLuGLSBackwardSearchAlgorithm(DistanceFunction<V> distanceFunction, int k, double alpha) {
super(distanceFunction);
this.alpha = alpha;
this.k = k;
@@ -122,7 +121,7 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?>, D extends
* @param relationy Attribute relation
* @return Algorithm result
*/
- public OutlierResult run(Database database, Relation<V> relationx, Relation<? extends NumberVector<?>> relationy) {
+ public OutlierResult run(Database database, Relation<V> relationx, Relation<? extends NumberVector> relationy) {
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relationx.getDBIDs(), DataStoreFactory.HINT_STATIC);
DoubleMinMax mm = new DoubleMinMax(0.0, 0.0);
@@ -151,7 +150,7 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?>, D extends
}
}
- Relation<Double> scoreResult = new MaterializedRelation<>("GLSSODBackward", "GLSSODbackward-outlier", TypeUtil.DOUBLE, scores, relationx.getDBIDs());
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("GLSSODBackward", "GLSSODbackward-outlier", scores, relationx.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax(), 0, Double.POSITIVE_INFINITY, 0);
return new OutlierResult(scoreMeta, scoreResult);
}
@@ -163,11 +162,11 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?>, D extends
* @param relationy Attribute relation
* @return Top outlier and associated score
*/
- private Pair<DBIDVar, Double> singleIteration(Relation<V> relationx, Relation<? extends NumberVector<?>> relationy) {
+ private Pair<DBIDVar, Double> singleIteration(Relation<V> relationx, Relation<? extends NumberVector> relationy) {
final int dim = RelationUtil.dimensionality(relationx);
final int dimy = RelationUtil.dimensionality(relationy);
assert (dim == 2);
- KNNQuery<V, D> knnQuery = QueryUtil.getKNNQuery(relationx, getDistanceFunction(), k + 1);
+ KNNQuery<V> knnQuery = QueryUtil.getKNNQuery(relationx, getDistanceFunction(), k + 1);
// We need stable indexed DBIDs
ArrayModifiableDBIDs ids = DBIDUtil.newArray(relationx.getDBIDs());
@@ -196,7 +195,7 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?>, D extends
}
{
- final NumberVector<?> vecy = relationy.get(id);
+ final NumberVector vecy = relationy.get(id);
for(int d = 0; d < dimy; d++) {
double idy = vecy.doubleValue(d);
Y.set(i, d, idy);
@@ -205,7 +204,7 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?>, D extends
// Fill the neighborhood matrix F:
{
- KNNList<D> neighbors = knnQuery.getKNNForDBID(id, k + 1);
+ KNNList neighbors = knnQuery.getKNNForDBID(id, k + 1);
ModifiableDBIDs neighborhood = DBIDUtil.newArray(neighbors.size());
for(DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
if(DBIDUtil.equal(id, neighbor)) {
@@ -272,9 +271,8 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?>, D extends
* @apiviz.exclude
*
* @param <V> Input vector type
- * @param <D> Distance type
*/
- public static class Parameterizer<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<V, D> {
+ public static class Parameterizer<V extends NumberVector> extends AbstractDistanceBasedAlgorithm.Parameterizer<V> {
/**
* Holds the alpha value - significance niveau
*/
@@ -303,7 +301,7 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?>, D extends
}
@Override
- protected CTLuGLSBackwardSearchAlgorithm<V, D> makeInstance() {
+ protected CTLuGLSBackwardSearchAlgorithm<V> makeInstance() {
return new CTLuGLSBackwardSearchAlgorithm<>(distanceFunction, k, alpha);
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMeanMultipleAttributes.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMeanMultipleAttributes.java
index 1712dd4f..151fe129 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMeanMultipleAttributes.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMeanMultipleAttributes.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -33,11 +33,13 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.math.MathUtil;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid;
import de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
@@ -61,7 +63,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
* <p>
* Implementation note: attribute standardization is not used; this is
* equivalent to using the
- * {@link de.lmu.ifi.dbs.elki.datasource.filter.normalization.AttributeWiseVarianceNormalization
+ * {@link de.lmu.ifi.dbs.elki.datasource.filter.normalization.columnwise.AttributeWiseVarianceNormalization
* AttributeWiseVarianceNormalization} filter.
* </p>
*
@@ -71,7 +73,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
* @param <O> Attribute Vector
*/
@Reference(authors = "Chang-Tien Lu and Dechang Chen and Yufeng Kou", title = "Detecting Spatial Outliers with Multiple Attributes", booktitle = "Proc. 15th IEEE International Conference on Tools with Artificial Intelligence, 2003", url = "http://dx.doi.org/10.1109/TAI.2003.1250179")
-public class CTLuMeanMultipleAttributes<N, O extends NumberVector<?>> extends AbstractNeighborhoodOutlier<N> {
+public class CTLuMeanMultipleAttributes<N, O extends NumberVector> extends AbstractNeighborhoodOutlier<N> {
/**
* logger
*/
@@ -118,13 +120,12 @@ public class CTLuMeanMultipleAttributes<N, O extends NumberVector<?>> extends Ab
DoubleMinMax minmax = new DoubleMinMax();
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(attributes.getDBIDs(), DataStoreFactory.HINT_STATIC);
for(DBIDIter iditer = attributes.iterDBIDs(); iditer.valid(); iditer.advance()) {
- Vector temp = deltas.get(iditer).minus(mean);
- final double score = temp.transposeTimesTimes(cmati, temp);
+ final double score = MathUtil.mahalanobisDistance(cmati, deltas.get(iditer), mean);
minmax.put(score);
scores.putDouble(iditer, score);
}
- Relation<Double> scoreResult = new MaterializedRelation<>("mean multiple attributes spatial outlier", "mean-multipleattributes-outlier", TypeUtil.DOUBLE, scores, attributes.getDBIDs());
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("mean multiple attributes spatial outlier", "mean-multipleattributes-outlier", scores, attributes.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0);
OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
or.addChildResult(npred);
@@ -146,7 +147,7 @@ public class CTLuMeanMultipleAttributes<N, O extends NumberVector<?>> extends Ab
* @param <N> Neighborhood type
* @param <O> Attribute object type
*/
- public static class Parameterizer<N, O extends NumberVector<?>> extends AbstractNeighborhoodOutlier.Parameterizer<N> {
+ public static class Parameterizer<N, O extends NumberVector> extends AbstractNeighborhoodOutlier.Parameterizer<N> {
@Override
protected CTLuMeanMultipleAttributes<N, O> makeInstance() {
return new CTLuMeanMultipleAttributes<>(npredf);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianAlgorithm.java
index 9848d664..4d5afdd0 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianAlgorithm.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -27,14 +27,14 @@ import de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPre
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
-import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
@@ -91,7 +91,7 @@ public class CTLuMedianAlgorithm<N> extends AbstractNeighborhoodOutlier<N> {
* @param relation Data relation (1d!)
* @return Outlier detection result
*/
- public OutlierResult run(Relation<N> nrel, Relation<? extends NumberVector<?>> relation) {
+ public OutlierResult run(Relation<N> nrel, Relation<? extends NumberVector> relation) {
final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(nrel);
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
@@ -132,7 +132,7 @@ public class CTLuMedianAlgorithm<N> extends AbstractNeighborhoodOutlier<N> {
scores.putDouble(iditer, score);
}
- Relation<Double> scoreResult = new MaterializedRelation<>("MO", "Median-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("MO", "Median-outlier", scores, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0);
OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
or.addChildResult(npred);
@@ -146,7 +146,7 @@ public class CTLuMedianAlgorithm<N> extends AbstractNeighborhoodOutlier<N> {
@Override
public TypeInformation[] getInputTypeRestriction() {
- return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), new VectorFieldTypeInformation<NumberVector<?>>(NumberVector.class, 1));
+ return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), TypeUtil.NUMBER_VECTOR_FIELD_1D);
}
/**
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianMultipleAttributes.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianMultipleAttributes.java
index 583958fe..4e993a97 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianMultipleAttributes.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianMultipleAttributes.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -33,11 +33,13 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.math.MathUtil;
import de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
@@ -62,7 +64,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
* <p>
* Implementation note: attribute standardization is not used; this is
* equivalent to using the
- * {@link de.lmu.ifi.dbs.elki.datasource.filter.normalization.AttributeWiseVarianceNormalization
+ * {@link de.lmu.ifi.dbs.elki.datasource.filter.normalization.columnwise.AttributeWiseVarianceNormalization
* AttributeWiseVarianceNormalization} filter.
* </p>
*
@@ -72,7 +74,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
* @param <O> Non Spatial Vector
*/
@Reference(authors = "Chang-Tien Lu and Dechang Chen and Yufeng Kou", title = "Detecting Spatial Outliers with Multiple Attributes", booktitle = "Proc. 15th IEEE International Conference on Tools with Artificial Intelligence, 2003", url = "http://dx.doi.org/10.1109/TAI.2003.1250179")
-public class CTLuMedianMultipleAttributes<N, O extends NumberVector<?>> extends AbstractNeighborhoodOutlier<N> {
+public class CTLuMedianMultipleAttributes<N, O extends NumberVector> extends AbstractNeighborhoodOutlier<N> {
/**
* logger
*/
@@ -144,13 +146,12 @@ public class CTLuMedianMultipleAttributes<N, O extends NumberVector<?>> extends
DoubleMinMax minmax = new DoubleMinMax();
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(attributes.getDBIDs(), DataStoreFactory.HINT_STATIC);
for(DBIDIter iditer = attributes.iterDBIDs(); iditer.valid(); iditer.advance()) {
- Vector temp = deltas.get(iditer).minus(mean);
- final double score = temp.transposeTimesTimes(cmati, temp);
+ final double score = MathUtil.mahalanobisDistance(cmati, deltas.get(iditer), mean);
minmax.put(score);
scores.putDouble(iditer, score);
}
- Relation<Double> scoreResult = new MaterializedRelation<>("Median multiple attributes outlier", "median-outlier", TypeUtil.DOUBLE, scores, attributes.getDBIDs());
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("Median multiple attributes outlier", "median-outlier", scores, attributes.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0);
OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
or.addChildResult(npred);
@@ -172,7 +173,7 @@ public class CTLuMedianMultipleAttributes<N, O extends NumberVector<?>> extends
* @param <N> Neighborhood type
* @param <O> Attributes vector type
*/
- public static class Parameterizer<N, O extends NumberVector<?>> extends AbstractNeighborhoodOutlier.Parameterizer<N> {
+ public static class Parameterizer<N, O extends NumberVector> extends AbstractNeighborhoodOutlier.Parameterizer<N> {
@Override
protected CTLuMedianMultipleAttributes<N, O> makeInstance() {
return new CTLuMedianMultipleAttributes<>(npredf);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMoranScatterplotOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMoranScatterplotOutlier.java
index da527af0..1b59b79b 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMoranScatterplotOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMoranScatterplotOutlier.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -28,13 +28,13 @@ import de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPre
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
-import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
@@ -94,7 +94,7 @@ public class CTLuMoranScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<
* @param relation Data relation (1d!)
* @return Outlier detection result
*/
- public OutlierResult run(Relation<N> nrel, Relation<? extends NumberVector<?>> relation) {
+ public OutlierResult run(Relation<N> nrel, Relation<? extends NumberVector> relation) {
final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(nrel);
// Compute the global mean and variance
@@ -136,7 +136,7 @@ public class CTLuMoranScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<
scores.putDouble(iditer, score);
}
- Relation<Double> scoreResult = new MaterializedRelation<>("MoranOutlier", "Moran Scatterplot Outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("MoranOutlier", "Moran Scatterplot Outlier", scores, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, 0);
OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
or.addChildResult(npred);
@@ -145,7 +145,7 @@ public class CTLuMoranScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<
@Override
public TypeInformation[] getInputTypeRestriction() {
- return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), new VectorFieldTypeInformation<NumberVector<?>>(NumberVector.class, 1));
+ return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), TypeUtil.NUMBER_VECTOR_FIELD_1D);
}
@Override
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java
index 85524b4e..e11785af 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -28,7 +28,6 @@ import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
-import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
@@ -37,13 +36,13 @@ import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.KNNHeap;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.distance.KNNHeap;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.math.MathUtil;
@@ -78,12 +77,11 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
* @author Ahmed Hettab
*
* @param <N> Spatial Vector type
- * @param <D> Distance to use
*/
@Title("Random Walk on Exhaustive Combination")
@Description("Spatial Outlier Detection using Random Walk on Exhaustive Combination")
@Reference(authors = "X. Liu and C.-T. Lu and F. Chen", title = "Spatial outlier detection: random walk based approaches", booktitle = "Proc. 18th SIGSPATIAL International Conference on Advances in Geographic Information Systems, 2010", url = "http://dx.doi.org/10.1145/1869790.1869841")
-public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<N, D, OutlierResult> implements OutlierAlgorithm {
+public class CTLuRandomWalkEC<N> extends AbstractDistanceBasedAlgorithm<N, OutlierResult> implements OutlierAlgorithm {
/**
* Logger.
*/
@@ -112,7 +110,7 @@ public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends Abstrac
* @param c C parameter
* @param k Number of neighbors
*/
- public CTLuRandomWalkEC(DistanceFunction<N, D> distanceFunction, double alpha, double c, int k) {
+ public CTLuRandomWalkEC(DistanceFunction<N> distanceFunction, double alpha, double c, int k) {
super(distanceFunction);
this.alpha = alpha;
this.c = c;
@@ -126,8 +124,8 @@ public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends Abstrac
* @param relation Attribute value relation
* @return Outlier result
*/
- public OutlierResult run(Relation<N> spatial, Relation<? extends NumberVector<?>> relation) {
- DistanceQuery<N, D> distFunc = getDistanceFunction().instantiate(spatial);
+ public OutlierResult run(Relation<N> spatial, Relation<? extends NumberVector> relation) {
+ DistanceQuery<N> distFunc = getDistanceFunction().instantiate(spatial);
WritableDataStore<Vector> similarityVectors = DataStoreUtil.makeStorage(spatial.getDBIDs(), DataStoreFactory.HINT_TEMP, Vector.class);
WritableDataStore<DBIDs> neighbors = DataStoreUtil.makeStorage(spatial.getDBIDs(), DataStoreFactory.HINT_TEMP, DBIDs.class);
@@ -136,7 +134,7 @@ public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends Abstrac
// construct the relation Matrix of the ec-graph
Matrix E = new Matrix(ids.size(), ids.size());
- KNNHeap<D> heap = DBIDUtil.newHeap(distFunc.getDistanceFactory(), k);
+ KNNHeap heap = DBIDUtil.newHeap(k);
{
int i = 0;
for(DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
@@ -148,10 +146,9 @@ public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends Abstrac
continue;
}
final double e;
- final D distance = distFunc.distance(id, n);
+ final double distance = distFunc.distance(id, n);
heap.insert(distance, n);
- double dist = distance.doubleValue();
- if(dist == 0) {
+ if(distance == 0) {
LOG.warning("Zero distances are not supported - skipping: " + DBIDUtil.toString(id) + " " + DBIDUtil.toString(n));
e = 0;
}
@@ -160,7 +157,7 @@ public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends Abstrac
double exp = Math.exp(Math.pow(diff, alpha));
// Implementation note: not inverting exp worked a lot better.
// Therefore we diverge from the article here.
- e = exp / dist;
+ e = exp / distance;
}
E.set(j, i, e);
}
@@ -225,14 +222,14 @@ public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends Abstrac
scores.putDouble(id, score);
}
- Relation<Double> scoreResult = new MaterializedRelation<>("randomwalkec", "RandomWalkEC", TypeUtil.DOUBLE, scores, relation.getDBIDs());
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("randomwalkec", "RandomWalkEC", scores, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
return new OutlierResult(scoreMeta, scoreResult);
}
@Override
public TypeInformation[] getInputTypeRestriction() {
- return TypeUtil.array(getDistanceFunction().getInputTypeRestriction(), new VectorFieldTypeInformation<NumberVector<?>>(NumberVector.class, 1));
+ return TypeUtil.array(getDistanceFunction().getInputTypeRestriction(), TypeUtil.NUMBER_VECTOR_FIELD_1D);
}
@Override
@@ -248,9 +245,8 @@ public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends Abstrac
* @apiviz.exclude
*
* @param <N> Vector type
- * @param <D> Distance type
*/
- public static class Parameterizer<N, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<N, D> {
+ public static class Parameterizer<N> extends AbstractDistanceBasedAlgorithm.Parameterizer<N> {
/**
* Parameter to specify the number of neighbors.
*/
@@ -327,7 +323,7 @@ public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends Abstrac
}
@Override
- protected CTLuRandomWalkEC<N, D> makeInstance() {
+ protected CTLuRandomWalkEC<N> makeInstance() {
return new CTLuRandomWalkEC<>(distanceFunction, alpha, c, k);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuScatterplotOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuScatterplotOutlier.java
index bcbbfd2a..6feb08f6 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuScatterplotOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuScatterplotOutlier.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -27,14 +27,14 @@ import de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPre
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
-import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
@@ -96,7 +96,7 @@ public class CTLuScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<N> {
* @param relation Data relation (1d!)
* @return Outlier detection result
*/
- public OutlierResult run(Relation<N> nrel, Relation<? extends NumberVector<?>> relation) {
+ public OutlierResult run(Relation<N> nrel, Relation<? extends NumberVector> relation) {
final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(nrel);
WritableDoubleDataStore means = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP);
@@ -160,7 +160,7 @@ public class CTLuScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<N> {
}
}
// build representation
- Relation<Double> scoreResult = new MaterializedRelation<>("SPO", "Scatterplot-Outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("SPO", "Scatterplot-Outlier", scores, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0);
OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
or.addChildResult(npred);
@@ -174,7 +174,7 @@ public class CTLuScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<N> {
@Override
public TypeInformation[] getInputTypeRestriction() {
- return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), new VectorFieldTypeInformation<NumberVector<?>>(NumberVector.class, 1));
+ return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), TypeUtil.NUMBER_VECTOR_FIELD_1D);
}
/**
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuZTestOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuZTestOutlier.java
index d6cb5a50..b973109a 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuZTestOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuZTestOutlier.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -27,7 +27,6 @@ import de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPre
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
-import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
@@ -35,7 +34,8 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
@@ -98,7 +98,7 @@ public class CTLuZTestOutlier<N> extends AbstractNeighborhoodOutlier<N> {
* @param relation Data relation (1d!)
* @return Outlier detection result
*/
- public OutlierResult run(Database database, Relation<N> nrel, Relation<? extends NumberVector<?>> relation) {
+ public OutlierResult run(Database database, Relation<N> nrel, Relation<? extends NumberVector> relation) {
final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(nrel);
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
@@ -133,7 +133,7 @@ public class CTLuZTestOutlier<N> extends AbstractNeighborhoodOutlier<N> {
}
// Wrap result
- Relation<Double> scoreResult = new MaterializedRelation<>("ZTest", "Z Test score", TypeUtil.DOUBLE, scores, relation.getDBIDs());
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("ZTest", "Z Test score", scores, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0);
OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
or.addChildResult(npred);
@@ -147,7 +147,7 @@ public class CTLuZTestOutlier<N> extends AbstractNeighborhoodOutlier<N> {
@Override
public TypeInformation[] getInputTypeRestriction() {
- return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), new VectorFieldTypeInformation<NumberVector<?>>(NumberVector.class, 1));
+ return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), TypeUtil.NUMBER_VECTOR_FIELD_1D);
}
/**
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SLOM.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SLOM.java
index 08c3e29b..7fbb8486 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SLOM.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SLOM.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -34,10 +34,10 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
@@ -65,12 +65,11 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
*
* @param <N> the type the spatial neighborhood is defined over
* @param <O> the type of objects handled by the algorithm
- * @param <D> the type of Distance used for non spatial attributes
*/
@Title("SLOM: a new measure for local spatial outliers")
@Description("Spatial local outlier measure (SLOM), which captures the local behaviour of datum in their spatial neighbourhood")
@Reference(authors = "Sanjay Chawla and Pei Sun", title = "SLOM: a new measure for local spatial outliers", booktitle = "Knowledge and Information Systems 9(4), 412-429, 2006", url = "http://dx.doi.org/10.1007/s10115-005-0200-2")
-public class SLOM<N, O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedSpatialOutlier<N, O, D> {
+public class SLOM<N, O> extends AbstractDistanceBasedSpatialOutlier<N, O> {
/**
* The logger for this class.
*/
@@ -83,7 +82,7 @@ public class SLOM<N, O, D extends NumberDistance<D, ?>> extends AbstractDistance
* @param nonSpatialDistanceFunction Distance function to use on the
* non-spatial attributes
*/
- public SLOM(NeighborSetPredicate.Factory<N> npred, PrimitiveDistanceFunction<O, D> nonSpatialDistanceFunction) {
+ public SLOM(NeighborSetPredicate.Factory<N> npred, PrimitiveDistanceFunction<O> nonSpatialDistanceFunction) {
super(npred, nonSpatialDistanceFunction);
}
@@ -95,7 +94,7 @@ public class SLOM<N, O, D extends NumberDistance<D, ?>> extends AbstractDistance
*/
public OutlierResult run(Database database, Relation<N> spatial, Relation<O> relation) {
final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(spatial);
- DistanceQuery<O, D> distFunc = getNonSpatialDistanceFunction().instantiate(relation);
+ DistanceQuery<O> distFunc = getNonSpatialDistanceFunction().instantiate(relation);
WritableDoubleDataStore modifiedDistance = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
// calculate D-Tilde
@@ -109,7 +108,7 @@ public class SLOM<N, O, D extends NumberDistance<D, ?>> extends AbstractDistance
if(DBIDUtil.equal(iditer, iter)) {
continue;
}
- double dist = distFunc.distance(iditer, iter).doubleValue();
+ double dist = distFunc.distance(iditer, iter);
sum += dist;
cnt++;
maxDist = Math.max(maxDist, dist);
@@ -187,7 +186,7 @@ public class SLOM<N, O, D extends NumberDistance<D, ?>> extends AbstractDistance
slomminmax.put(slom);
}
- Relation<Double> scoreResult = new MaterializedRelation<>("SLOM", "slom-outlier", TypeUtil.DOUBLE, sloms, relation.getDBIDs());
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("SLOM", "slom-outlier", sloms, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(slomminmax.getMin(), slomminmax.getMax(), 0.0, Double.POSITIVE_INFINITY);
OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
or.addChildResult(npred);
@@ -213,11 +212,10 @@ public class SLOM<N, O, D extends NumberDistance<D, ?>> extends AbstractDistance
*
* @param <N> Neighborhood type
* @param <O> Data Object type
- * @param <D> Distance type
*/
- public static class Parameterizer<N, O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedSpatialOutlier.Parameterizer<N, O, D> {
+ public static class Parameterizer<N, O> extends AbstractDistanceBasedSpatialOutlier.Parameterizer<N, O> {
@Override
- protected SLOM<N, O, D> makeInstance() {
+ protected SLOM<N, O> makeInstance() {
return new SLOM<>(npredf, distanceFunction);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SOF.java
index a2605f39..f9823e56 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SOF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SOF.java
@@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2013
+Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -32,10 +32,10 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
@@ -65,11 +65,10 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
*
* @param <N> Neighborhood object type
* @param <O> Attribute object type
- * @param <D> Distance type
*/
@Title("Spatial Outlier Factor")
@Reference(authors = "Huang, T., Qin, X.", title = "Detecting outliers in spatial database", booktitle = "Proc. 3rd International Conference on Image and Graphics", url = "http://dx.doi.org/10.1109/ICIG.2004.53")
-public class SOF<N, O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedSpatialOutlier<N, O, D> {
+public class SOF<N, O> extends AbstractDistanceBasedSpatialOutlier<N, O> {
/**
* The logger for this class.
*/
@@ -82,7 +81,7 @@ public class SOF<N, O, D extends NumberDistance<D, ?>> extends AbstractDistanceB
* @param nonSpatialDistanceFunction Distance function on non-spatial
* attributes
*/
- public SOF(NeighborSetPredicate.Factory<N> npred, PrimitiveDistanceFunction<O, D> nonSpatialDistanceFunction) {
+ public SOF(NeighborSetPredicate.Factory<N> npred, PrimitiveDistanceFunction<O> nonSpatialDistanceFunction) {
super(npred, nonSpatialDistanceFunction);
}
@@ -101,7 +100,7 @@ public class SOF<N, O, D extends NumberDistance<D, ?>> extends AbstractDistanceB
*/
public OutlierResult run(Database database, Relation<N> spatial, Relation<O> relation) {
final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(spatial);
- DistanceQuery<O, D> distFunc = getNonSpatialDistanceFunction().instantiate(relation);
+ DistanceQuery<O> distFunc = getNonSpatialDistanceFunction().instantiate(relation);
WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT);
WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
@@ -112,7 +111,7 @@ public class SOF<N, O, D extends NumberDistance<D, ?>> extends AbstractDistanceB
DBIDs neighbors = npred.getNeighborDBIDs(iditer);
double avg = 0;
for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
- avg += distFunc.distance(iditer, iter).doubleValue();
+ avg += distFunc.distance(iditer, iter);
}
double lrd = 1 / (avg / neighbors.size());
if (Double.isNaN(lrd)) {
@@ -138,7 +137,7 @@ public class SOF<N, O, D extends NumberDistance<D, ?>> extends AbstractDistanceB
}
// Build result representation.
- Relation<Double> scoreResult = new MaterializedRelation<>("Spatial Outlier Factor", "sof-outlier", TypeUtil.DOUBLE, lofs, relation.getDBIDs());
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("Spatial Outlier Factor", "sof-outlier", lofs, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
or.addChildResult(npred);
@@ -159,11 +158,10 @@ public class SOF<N, O, D extends NumberDistance<D, ?>> extends AbstractDistanceB
*
* @param <N> Neighborhood type
* @param <O> Attribute object type
- * @param <D> Distance type
*/
- public static class Parameterizer<N, O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedSpatialOutlier.Parameterizer<N, O, D> {
+ public static class Parameterizer<N, O> extends AbstractDistanceBasedSpatialOutlier.Parameterizer<N, O> {
@Override
- protected SOF<N, O, D> makeInstance() {
+ protected SOF<N, O> makeInstance() {
return new SOF<>(npredf, distanceFunction);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java
index 1a1f9a82..e46976ab 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -29,14 +29,14 @@ import de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPre
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
-import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.logging.Logging;
@@ -109,7 +109,7 @@ public class TrimmedMeanApproach<N> extends AbstractNeighborhoodOutlier<N> {
* @param relation Data Relation (1 dimensional!)
* @return Outlier detection result
*/
- public OutlierResult run(Database database, Relation<N> nrel, Relation<? extends NumberVector<?>> relation) {
+ public OutlierResult run(Database database, Relation<N> nrel, Relation<? extends NumberVector> relation) {
assert (RelationUtil.dimensionality(relation) == 1) : "TrimmedMean can only process one-dimensional data sets.";
final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(nrel);
@@ -145,13 +145,9 @@ public class TrimmedMeanApproach<N> extends AbstractNeighborhoodOutlier<N> {
// Error: deviation from trimmed mean
errors.putDouble(iditer, relation.get(iditer).doubleValue(0) - tm);
- if(progress != null) {
- progress.incrementProcessed(LOG);
- }
- }
- if(progress != null) {
- progress.ensureCompleted(LOG);
+ LOG.incrementProcessed(progress);
}
+ LOG.ensureCompleted(progress);
if(LOG.isVerbose()) {
LOG.verbose("Computing median error.");
@@ -187,7 +183,7 @@ public class TrimmedMeanApproach<N> extends AbstractNeighborhoodOutlier<N> {
minmax.put(score);
}
//
- Relation<Double> scoreResult = new MaterializedRelation<>("TrimmedMean", "Trimmed Mean Score", TypeUtil.DOUBLE, scores, relation.getDBIDs());
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("TrimmedMean", "Trimmed Mean Score", scores, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0);
OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
or.addChildResult(npred);
@@ -202,7 +198,7 @@ public class TrimmedMeanApproach<N> extends AbstractNeighborhoodOutlier<N> {
@Override
public TypeInformation[] getInputTypeRestriction() {
// Get one dimensional attribute for analysis.
- return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), new VectorFieldTypeInformation<NumberVector<?>>(NumberVector.class, 1));
+ return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), TypeUtil.NUMBER_VECTOR_FIELD_1D);
}
/**
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/AbstractPrecomputedNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/AbstractPrecomputedNeighborhood.java
index ef237928..506c722a 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/AbstractPrecomputedNeighborhood.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/AbstractPrecomputedNeighborhood.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java
index c93b10cb..145aecb1 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -156,13 +156,9 @@ public class ExtendedNeighborhood extends AbstractPrecomputedNeighborhood {
todo = ntodo;
}
store.put(iter, res);
- if(progress != null) {
- progress.incrementProcessed(LOG);
- }
- }
- if(progress != null) {
- progress.ensureCompleted(LOG);
+ LOG.incrementProcessed(progress);
}
+ LOG.ensureCompleted(progress);
return store;
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java
index 33b5010a..5bdd05bf 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -169,13 +169,11 @@ public class ExternalNeighborhood extends AbstractPrecomputedNeighborhood {
}
}
- try {
- if(LOG.isDebugging()) {
- LOG.verbose("Loading neighborhood file.");
- }
- InputStream in = new FileInputStream(file);
- in = FileUtil.tryGzipInput(in);
- BufferedReader br = new BufferedReader(new InputStreamReader(in));
+ if(LOG.isDebugging()) {
+ LOG.verbose("Loading neighborhood file.");
+ }
+ try(InputStream in = FileUtil.tryGzipInput(new FileInputStream(file));
+ BufferedReader br = new BufferedReader(new InputStreamReader(in))) {
for(String line; (line = br.readLine()) != null;) {
ArrayModifiableDBIDs neighbours = DBIDUtil.newArray();
String[] entries = line.split(" ");
@@ -200,9 +198,6 @@ public class ExternalNeighborhood extends AbstractPrecomputedNeighborhood {
}
}
}
- br.close();
- in.close();
-
return store;
}
catch(IOException e) {
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/NeighborSetPredicate.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/NeighborSetPredicate.java
index 25283d5c..5d35aff8 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/NeighborSetPredicate.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/NeighborSetPredicate.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -28,7 +28,6 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.result.Result;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.Parameterizable;
/**
* Predicate to obtain the neighbors of a reference object as set.
@@ -54,7 +53,7 @@ public interface NeighborSetPredicate extends Result {
*
* @param <O> Input relation object type restriction
*/
- public static interface Factory<O> extends Parameterizable {
+ public static interface Factory<O> {
/**
* Instantiation method.
*
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/PrecomputedKNearestNeighborNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/PrecomputedKNearestNeighborNeighborhood.java
index c43ebba7..18ab30d7 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/PrecomputedKNearestNeighborNeighborhood.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/PrecomputedKNearestNeighborNeighborhood.java
@@ -1,26 +1,27 @@
package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood;
-/*
-This file is part of ELKI:
-Environment for Developing KDD-Applications Supported by Index-Structures
-
-Copyright (C) 2013
-Ludwig-Maximilians-Universität München
-Lehr- und Forschungseinheit für Datenbanksysteme
-ELKI Development Team
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.database.QueryUtil;
@@ -32,11 +33,10 @@ import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
+import de.lmu.ifi.dbs.elki.database.ids.KNNList;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
@@ -48,10 +48,8 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
* Neighborhoods based on k nearest neighbors.
*
* @author Ahmed Hettab
- *
- * @param <D> Distance to use
*/
-public class PrecomputedKNearestNeighborNeighborhood<D extends Distance<D>> extends AbstractPrecomputedNeighborhood {
+public class PrecomputedKNearestNeighborNeighborhood extends AbstractPrecomputedNeighborhood {
/**
* Logger
*/
@@ -88,11 +86,10 @@ public class PrecomputedKNearestNeighborNeighborhood<D extends Distance<D>> exte
*
* @apiviz.stereotype factory
* @apiviz.has PrecomputedKNearestNeighborNeighborhood
- *
+ *
* @param <O> Object type
- * @param <D> Distance type
*/
- public static class Factory<O, D extends Distance<D>> implements NeighborSetPredicate.Factory<O> {
+ public static class Factory<O> implements NeighborSetPredicate.Factory<O> {
/**
* parameter k
*/
@@ -101,12 +98,12 @@ public class PrecomputedKNearestNeighborNeighborhood<D extends Distance<D>> exte
/**
* distance function to use
*/
- private DistanceFunction<? super O, D> distFunc;
+ private DistanceFunction<? super O> distFunc;
/**
* Factory Constructor
*/
- public Factory(int k, DistanceFunction<? super O, D> distFunc) {
+ public Factory(int k, DistanceFunction<? super O> distFunc) {
super();
this.k = k;
this.distFunc = distFunc;
@@ -114,19 +111,19 @@ public class PrecomputedKNearestNeighborNeighborhood<D extends Distance<D>> exte
@Override
public NeighborSetPredicate instantiate(Relation<? extends O> relation) {
- KNNQuery<?, D> knnQuery = QueryUtil.getKNNQuery(relation, distFunc);
+ KNNQuery<?> knnQuery = QueryUtil.getKNNQuery(relation, distFunc);
// TODO: use bulk?
WritableDataStore<DBIDs> s = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, DBIDs.class);
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- KNNList<D> neighbors = knnQuery.getKNNForDBID(iditer, k);
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ KNNList neighbors = knnQuery.getKNNForDBID(iditer, k);
ArrayModifiableDBIDs neighbours = DBIDUtil.newArray(neighbors.size());
- for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ for(DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
neighbours.add(neighbor);
}
s.put(iditer, neighbours);
}
- return new PrecomputedKNearestNeighborNeighborhood<D>(s);
+ return new PrecomputedKNearestNeighborNeighborhood(s);
}
@Override
@@ -142,9 +139,8 @@ public class PrecomputedKNearestNeighborNeighborhood<D extends Distance<D>> exte
* @apiviz.exclude
*
* @param <O> Object type
- * @param <D> Distance type
*/
- public static class Parameterizer<O, D extends Distance<D>> extends AbstractParameterizer {
+ public static class Parameterizer<O> extends AbstractParameterizer {
/**
* Parameter k
*/
@@ -163,7 +159,7 @@ public class PrecomputedKNearestNeighborNeighborhood<D extends Distance<D>> exte
/**
* Distance function
*/
- DistanceFunction<? super O, D> distFunc;
+ DistanceFunction<? super O> distFunc;
@Override
protected void makeOptions(Parameterization config) {
@@ -172,14 +168,14 @@ public class PrecomputedKNearestNeighborNeighborhood<D extends Distance<D>> exte
if(config.grab(kP)) {
k = kP.getValue();
}
- final ObjectParameter<DistanceFunction<? super O, D>> distP = new ObjectParameter<>(DISTANCEFUNCTION_ID, DistanceFunction.class);
+ final ObjectParameter<DistanceFunction<? super O>> distP = new ObjectParameter<>(DISTANCEFUNCTION_ID, DistanceFunction.class);
if(config.grab(distP)) {
distFunc = distP.instantiateClass(config);
}
}
@Override
- protected PrecomputedKNearestNeighborNeighborhood.Factory<O, D> makeInstance() {
+ protected PrecomputedKNearestNeighborNeighborhood.Factory<O> makeInstance() {
return new PrecomputedKNearestNeighborNeighborhood.Factory<>(k, distFunc);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/package-info.java
index fd51ca22..6199412c 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/package-info.java
@@ -5,7 +5,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2013
+Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java
index 4d6ec635..e1abc23c 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.weighted;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/UnweightedNeighborhoodAdapter.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/UnweightedNeighborhoodAdapter.java
index 9bdb7d51..6c2fa7c1 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/UnweightedNeighborhoodAdapter.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/UnweightedNeighborhoodAdapter.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.weighted;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/WeightedNeighborSetPredicate.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/WeightedNeighborSetPredicate.java
index ca0fa620..74ffaaa4 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/WeightedNeighborSetPredicate.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/WeightedNeighborSetPredicate.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.weighted;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -29,7 +29,6 @@ import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.Parameterizable;
/**
* Neighbor predicate with weight support.
@@ -55,7 +54,7 @@ public interface WeightedNeighborSetPredicate {
*
* @param <O> Input relation object type restriction
*/
- public static interface Factory<O> extends Parameterizable {
+ public static interface Factory<O> {
/**
* Instantiation method.
*
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/package-info.java
index d7c7a797..c09fdf19 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/package-info.java
@@ -5,7 +5,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2013
+Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/package-info.java
index 5a65d8c1..e1325935 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/package-info.java
@@ -5,7 +5,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2013
+Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractAggarwalYuOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/AbstractAggarwalYuOutlier.java
index 2b12b306..8ee5e2cd 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractAggarwalYuOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/AbstractAggarwalYuOutlier.java
@@ -1,10 +1,10 @@
-package de.lmu.ifi.dbs.elki.algorithm.outlier;
+package de.lmu.ifi.dbs.elki.algorithm.outlier.subspace;
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -24,17 +24,17 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
*/
import java.util.ArrayList;
-import java.util.Collections;
import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.VectorUtil.SortDBIDsBySingleDimension;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair;
import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
@@ -52,8 +52,8 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.IntIntPair;
* Abstract base class for the sparse-grid-cell based outlier detection of
* Aggarwal and Yu.
*
+ * Reference:
* <p>
- * Reference: <br />
* Outlier detection for high dimensional data<br />
* C.C. Aggarwal, P. S. Yu<br />
* International Conference on Management of Data Proceedings of the 2001 ACM
@@ -66,15 +66,20 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.IntIntPair;
*
* @param <V> Vector type
*/
-@Reference(authors = "C.C. Aggarwal, P. S. Yu", title = "Outlier detection for high dimensional data", booktitle = "Proc. ACM SIGMOD Int. Conf. on Management of Data (SIGMOD 2001), Santa Barbara, CA, 2001", url = "http://dx.doi.org/10.1145/375663.375668")
-public abstract class AbstractAggarwalYuOutlier<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+@Reference(authors = "C.C. Aggarwal, P. S. Yu", //
+title = "Outlier detection for high dimensional data", //
+booktitle = "Proc. ACM SIGMOD Int. Conf. on Management of Data (SIGMOD 2001), Santa Barbara, CA, 2001", //
+url = "http://dx.doi.org/10.1145/375663.375668")
+public abstract class AbstractAggarwalYuOutlier<V extends NumberVector> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
/**
* Symbolic value for subspaces not in use.
- *
- * Note: in some places, the implementations may rely on this having the value
- * 0 currently!
*/
- public static final int DONT_CARE = 0;
+ public static final short DONT_CARE = -1;
+
+ /**
+ * The first bucket.
+ */
+ public static final short GENE_OFFSET = DONT_CARE + 1;
/**
* The number of partitions for each dimension.
@@ -109,38 +114,23 @@ public abstract class AbstractAggarwalYuOutlier<V extends NumberVector<?>> exten
protected ArrayList<ArrayList<DBIDs>> buildRanges(Relation<V> relation) {
final int dim = RelationUtil.dimensionality(relation);
final int size = relation.size();
- final DBIDs allids = relation.getDBIDs();
final ArrayList<ArrayList<DBIDs>> ranges = new ArrayList<>();
- // Temporary projection storage of the database
- final ArrayList<ArrayList<DoubleDBIDPair>> dbAxis = new ArrayList<>(dim);
- for(int i = 0; i < dim; i++) {
- ArrayList<DoubleDBIDPair> axis = new ArrayList<>(size);
- dbAxis.add(i, axis);
- }
- // Project
- for(DBIDIter iter = allids.iter(); iter.valid(); iter.advance()) {
- final V obj = relation.get(iter);
- for(int d = 0; d < dim; d++) {
- dbAxis.get(d).add(DBIDUtil.newPair(obj.doubleValue(d), iter));
- }
- }
+ ArrayModifiableDBIDs ids = DBIDUtil.newArray(relation.getDBIDs());
+ SortDBIDsBySingleDimension sorter = new SortDBIDsBySingleDimension(relation);
// Split into cells
final double part = size * 1.0 / phi;
for(int d = 0; d < dim; d++) {
- ArrayList<DoubleDBIDPair> axis = dbAxis.get(d);
- Collections.sort(axis);
+ sorter.setDimension(d);
+ ids.sort(sorter);
ArrayList<DBIDs> dimranges = new ArrayList<>(phi + 1);
- dimranges.add(allids);
int start = 0;
- for(int r = 0; r < phi; r++) {
- int end = (int) (part * r);
- if(r == phi - 1) {
- end = size;
- }
- ArrayModifiableDBIDs currange = DBIDUtil.newArray(phi + 1);
- for(int i = start; i < end; i++) {
- currange.add(axis.get(i));
+ DBIDArrayIter iter = ids.iter();
+ for(int r = 1; r <= phi; r++) {
+ int end = (r < phi) ? (int) (part * r) : size;
+ ArrayModifiableDBIDs currange = DBIDUtil.newArray(end - start);
+ for(iter.seek(start); iter.getOffset() < end; iter.advance()) {
+ currange.add(iter);
}
start = end;
dimranges.add(currange);
@@ -178,7 +168,7 @@ public abstract class AbstractAggarwalYuOutlier<V extends NumberVector<?>> exten
HashSetModifiableDBIDs ids = DBIDUtil.newHashSet(ranges.get(subspace.get(0).first).get(subspace.get(0).second));
// intersect all selected dimensions
for(int i = 1; i < subspace.size(); i++) {
- DBIDs current = ranges.get(subspace.get(i).first).get(subspace.get(i).second);
+ DBIDs current = ranges.get(subspace.get(i).first).get(subspace.get(i).second - GENE_OFFSET);
ids.retainAll(current);
if(ids.size() == 0) {
break;
@@ -194,15 +184,21 @@ public abstract class AbstractAggarwalYuOutlier<V extends NumberVector<?>> exten
* @param ranges Database ranges
* @return resulting DBIDs
*/
- protected DBIDs computeSubspaceForGene(int[] gene, ArrayList<ArrayList<DBIDs>> ranges) {
- HashSetModifiableDBIDs m = DBIDUtil.newHashSet(ranges.get(0).get(gene[0]));
- // intersect
- for(int i = 1; i < gene.length; i++) {
+ protected DBIDs computeSubspaceForGene(short[] gene, ArrayList<ArrayList<DBIDs>> ranges) {
+ HashSetModifiableDBIDs m = null;
+ // intersect all present restrictions
+ for(int i = 0; i < gene.length; i++) {
if(gene[i] != DONT_CARE) {
- DBIDs current = ranges.get(i).get(gene[i]);
- m.retainAll(current);
+ DBIDs current = ranges.get(i).get(gene[i] - GENE_OFFSET);
+ if(m == null) {
+ m = DBIDUtil.newHashSet(current);
+ }
+ else {
+ m.retainAll(current);
+ }
}
}
+ assert (m != null) : "All genes set to '*', should not happen!";
return m;
}
@@ -242,13 +238,13 @@ public abstract class AbstractAggarwalYuOutlier<V extends NumberVector<?>> exten
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final IntParameter kP = new IntParameter(K_ID);
- kP.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
+ final IntParameter kP = new IntParameter(K_ID)//
+ .addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
if(config.grab(kP)) {
k = kP.getValue();
}
- final IntParameter phiP = new IntParameter(PHI_ID);
- phiP.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
+ final IntParameter phiP = new IntParameter(PHI_ID)//
+ .addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
if(config.grab(phiP)) {
phi = phiP.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuEvolutionary.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/AggarwalYuEvolutionary.java
index c4e5cc5d..b32e5124 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuEvolutionary.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/AggarwalYuEvolutionary.java
@@ -1,10 +1,10 @@
-package de.lmu.ifi.dbs.elki.algorithm.outlier;
+package de.lmu.ifi.dbs.elki.algorithm.outlier.subspace;
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -23,49 +23,48 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import gnu.trove.iterator.TIntIterator;
+import gnu.trove.list.array.TIntArrayList;
+
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
-import java.util.Iterator;
import java.util.Random;
-import java.util.TreeSet;
import de.lmu.ifi.dbs.elki.data.NumberVector;
-import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.math.random.RandomFactory;
import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
-import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
-import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TopBoundedHeap;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
-import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
-import de.lmu.ifi.dbs.elki.utilities.pairs.FCPair;
import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
/**
- * EAFOD provides the evolutionary outlier detection algorithm, an algorithm to
- * detect outliers for high dimensional data.
+ * Evolutionary variant (EAFOD) of the high-dimensional outlier detection
+ * algorithm by Aggarwal and Yu.
* <p>
* Reference: <br />
* Outlier detection for high dimensional data<br />
@@ -86,7 +85,7 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
@Title("EAFOD: the evolutionary outlier detection algorithm")
@Description("Outlier detection for high dimensional data")
@Reference(authors = "C.C. Aggarwal, P. S. Yu", title = "Outlier detection for high dimensional data", booktitle = "Proc. ACM SIGMOD Int. Conf. on Management of Data (SIGMOD 2001), Santa Barbara, CA, 2001", url = "http://dx.doi.org/10.1145/375663.375668")
-public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractAggarwalYuOutlier<V> {
+public class AggarwalYuEvolutionary<V extends NumberVector> extends AbstractAggarwalYuOutlier<V> {
/**
* The logger for this class.
*/
@@ -98,6 +97,11 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
protected final static int MAX_ITERATIONS = 1000;
/**
+ * At which gene homogenity do we have convergence?
+ */
+ protected final static double CONVERGENCE = .85;
+
+ /**
* Holds the value of {@link Parameterizer#M_ID}.
*/
private int m;
@@ -155,7 +159,7 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
}
minmax.put(val);
}
- Relation<Double> scoreResult = new MaterializedRelation<>("AggarwalYuEvolutionary", "aggarwal-yu-outlier", TypeUtil.DOUBLE, outlierScore, relation.getDBIDs());
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("AggarwalYuEvolutionary", "aggarwal-yu-outlier", outlierScore, relation.getDBIDs());
OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), Double.NEGATIVE_INFINITY, 0.0);
return new OutlierResult(meta, scoreResult);
}
@@ -223,14 +227,16 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
bestSol.add(ind);
}
+ IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("Evolutionary search iterations", LOG) : null;
int iterations = 0;
while(!checkConvergence(pop)) {
Collections.sort(pop);
+ // Fitter members are more likely to survive
pop = rouletteRankSelection(pop);
- // Crossover
+ // Crossover survivors
pop = crossoverOptimized(pop);
// Mutation with probability 0.25 , 0.25
- pop = mutation(pop, 0.5, 0.5);
+ pop = mutation(pop, 0.25, 0.25);
// Avoid duplicates
ind: for(Individuum ind : pop) {
for(Heap<Individuum>.UnorderedIter it = bestSol.unorderedIter(); it.valid(); it.advance()) {
@@ -253,11 +259,15 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
LOG.debugFinest(buf.toString());
}
iterations++;
+ LOG.incrementProcessed(prog);
if(iterations > MAX_ITERATIONS) {
LOG.warning("Maximum iterations reached.");
break;
}
}
+ if(prog != null) {
+ prog.setCompleted(LOG);
+ }
return bestSol.unorderedIter();
}
@@ -276,25 +286,28 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
int[][] occur = new int[dim][phi + 1];
// Count gene occurrences
for(Individuum ind : pop) {
- int[] gene = ind.getGene();
+ short[] gene = ind.getGene();
for(int d = 0; d < dim; d++) {
- int val = gene[d] + DONT_CARE;
- if(val < 0 || val >= phi + 1) {
+ if(gene[d] == DONT_CARE) {
+ occur[d][0] += 1;
+ continue;
+ }
+ int val = gene[d] - GENE_OFFSET;
+ if(val < 0 || val >= phi) {
LOG.warning("Invalid gene value encountered: " + val + " in " + ind.toString());
continue;
}
- occur[d][val] += 1;
+ occur[d][val + 1] += 1;
}
}
- int conv = (int) (pop.size() * 0.95);
+ int conv = (int) Math.floor(pop.size() * CONVERGENCE);
if(LOG.isDebuggingFine()) {
LOG.debugFine("Convergence at " + conv + " of " + pop.size() + " individuums.");
}
for(int d = 0; d < dim; d++) {
boolean converged = false;
-
- for(int val = 0; val < phi + 1; val++) {
+ for(int val = 0; val <= phi; val++) {
if(occur[d][val] >= conv) {
converged = true;
break;
@@ -320,24 +333,23 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
// fill population
for(int i = 0; i < popsize; i++) {
// Random Individual
- int[] gene = new int[dim];
+ short[] gene = new short[dim];
// fill don't care ( any dimension == don't care)
- for(int j = 0; j < dim; j++) {
- gene[j] = DONT_CARE;
- }
+ Arrays.fill(gene, DONT_CARE);
// count of don't care positions
int countDim = k;
// fill non don't care positions of the Individual
while(countDim > 0) {
int z = random.nextInt(dim);
- if(gene[z] == DONT_CARE) {
- gene[z] = random.nextInt(phi) + 1;
- countDim--;
+ if(gene[z] != DONT_CARE) {
+ continue;
}
+ gene[z] = (short) (random.nextInt(phi) + GENE_OFFSET);
+ countDim--;
}
population.add(makeIndividuum(gene));
}
- Collections.sort(population);
+ // Collections.sort(population);
return population;
}
@@ -363,94 +375,56 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
// position of selection
for(int i = 0; i < popsize; i++) {
int z = random.nextInt(totalweight);
- for(int j = 0; j < popsize; j++) {
- if(z < popsize - j) {
- // TODO: need clone?
+ for(int j = 0, rank = popsize; j < popsize; ++j, --rank) {
+ if(z < rank) {
survivors.add(population.get(j));
break;
}
- else {
- // decrement
- z -= (popsize - j);
- }
+ z -= rank;
}
}
- if(survivors.size() != popsize) {
- throw new AbortException("Selection step failed - implementation error?");
- }
- // Don't sort, to avoid biasing the crossover!
- // Collections.sort(survivors);
+ assert (survivors.size() == popsize) : "Selection step failed - implementation error?";
return survivors;
}
/**
- * Apply the mutation alogrithm.
+ * Apply the mutation algorithm.
*/
private ArrayList<Individuum> mutation(ArrayList<Individuum> population, double perc1, double perc2) {
// the Mutations
ArrayList<Individuum> mutations = new ArrayList<>();
- // Set of Positions which are don't care in the String
- TreeSet<Integer> Q = new TreeSet<>();
- // Set of Positions which are not don't care in the String
- TreeSet<Integer> R = new TreeSet<>();
+ int[] QR = new int[dim];
// for each individuum
for(int j = 0; j < population.size(); j++) {
- // clear the Sets
- Q.clear();
- R.clear();
- // Fill the Sets with the Positions
+ short[] gene = population.get(j).getGene().clone();
+ // Fill position array for mutation process
+ int q = 0, r = dim;
for(int i = 0; i < dim; i++) {
- if(population.get(j).getGene()[i] == DONT_CARE) {
- Q.add(i);
- }
- else {
- R.add(i);
- }
+ QR[(gene[i] == DONT_CARE) ? (q++) : (--r)] = i;
}
- //
- double r1 = random.nextDouble();
- if(Q.size() != 0) {
- // Mutation Variant 1
- if(r1 <= perc1) {
- // calc Mutation Spot
- Integer[] pos = new Integer[Q.size()];
- pos = Q.toArray(pos);
- int position = random.nextInt(pos.length);
- int depth = pos[position];
- // Mutate don't care into 1....phi
- population.get(j).getGene()[depth] = random.nextInt(phi) + 1;
- // update Sets
- Q.remove(depth);
- R.add(depth);
- // calc new Mutation Spot
- pos = new Integer[R.size()];
- pos = R.toArray(pos);
- position = random.nextInt(pos.length);
- depth = pos[position];
- // Mutate non don't care into don't care
- population.get(j).getGene()[depth] = DONT_CARE;
- // update Sets
- Q.add(depth);
- R.remove(depth);
- }
+ // Mutation variant 1
+ if(q > 0 && r < dim && random.nextDouble() <= perc1) {
+ // Random mutation spots:
+ int rq = random.nextInt(q), rr = random.nextInt(dim - r) + r;
+ int pq = QR[rq], pr = QR[rr];
+ // Mutate don't care (position pq) into 1....phi
+ gene[pq] = (short) (random.nextInt(phi) + GENE_OFFSET);
+ // Mutate non don't care (position pr) into don't care
+ gene[pr] = DONT_CARE;
+ // update sets, by swapping the position vlaues
+ QR[rq] = pr;
+ QR[rr] = pq;
}
- r1 = random.nextDouble();
// Mutation Variant 2
- if(r1 <= perc2) {
+ if(random.nextDouble() <= perc2) {
// calc Mutation Spot
- Integer[] pos = new Integer[R.size()];
- pos = R.toArray(pos);
- int position = random.nextInt(pos.length);
- int depth = pos[position];
+ int pr = random.nextInt(dim - r) + r;
// Mutate 1...phi into another 1...phi
- population.get(j).getGene()[depth] = random.nextInt(phi) + 1;
+ gene[QR[pr]] = (short) (random.nextInt(phi) + GENE_OFFSET);
}
- int[] gene = population.get(j).getGene();
mutations.add(makeIndividuum(gene));
-
}
- Collections.sort(mutations);
return mutations;
}
@@ -460,7 +434,7 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
* @param gene Gene to evaluate
* @return new individuum
*/
- private Individuum makeIndividuum(int[] gene) {
+ private Individuum makeIndividuum(short[] gene) {
final DBIDs ids = computeSubspaceForGene(gene, ranges);
final double fitness = (ids.size() > 0) ? sparsity(ids.size(), dbsize, k, phi) : Double.MAX_VALUE;
return new Individuum(fitness, gene);
@@ -483,7 +457,6 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
if(population.size() % 2 == 1) {
crossover.add(population.get(population.size() - 1));
}
- // Collections.sort(crossover);
return crossover;
}
@@ -497,9 +470,9 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
private Pair<Individuum, Individuum> recombineOptimized(Individuum parent1, Individuum parent2) {
Pair<Individuum, Individuum> recombinePair;
// Set of Positions in which either s1 or s2 are don't care
- ArrayList<Integer> Q = new ArrayList<>(dim);
+ TIntArrayList Q = new TIntArrayList(dim);
// Set of Positions in which neither s1 or s2 is don't care
- ArrayList<Integer> R = new ArrayList<>(dim);
+ TIntArrayList R = new TIntArrayList(dim);
for(int i = 0; i < dim; i++) {
if((parent1.getGene()[i] == DONT_CARE) && (parent2.getGene()[i] != DONT_CARE)) {
@@ -516,21 +489,21 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
Individuum best = combineRecursive(R, 0, Individuum.nullIndividuum(dim).getGene(), parent1, parent2);
// Extends gene greedily
- int[] b = best.getGene();
+ short[] b = best.getGene();
int count = k - R.size();
- Iterator<Integer> q = Q.iterator();
+ TIntIterator q = Q.iterator();
while(count > 0) {
- int[] l1 = b.clone();
- int[] l2 = b.clone();
+ short[] l1 = b.clone();
+ short[] l2 = b.clone();
while(q.hasNext()) {
int next = q.next();
// pos = next;
{
- boolean s1Null = (parent1.getGene()[next] == 0);
- boolean s2Null = (parent1.getGene()[next] == 0);
+ boolean s1Null = (parent1.getGene()[next] == DONT_CARE);
+ boolean s2Null = (parent1.getGene()[next] == DONT_CARE);
l1[next] = parent1.getGene()[next];
l2[next] = parent2.getGene()[next];
@@ -556,7 +529,7 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
}
// create the complementary String
- int[] comp = new int[dim];
+ short[] comp = new short[dim];
for(int i = 0; i < dim; i++) {
if(b[i] == parent1.getGene()[i]) {
@@ -584,26 +557,21 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
* @param parent2 Second parent
* @return best gene combination
*/
- private Individuum combineRecursive(ArrayList<Integer> r, int i, int[] current, Individuum parent1, Individuum parent2) {
+ private Individuum combineRecursive(TIntArrayList r, int i, short[] current, Individuum parent1, Individuum parent2) {
if(i == r.size()) {
return makeIndividuum(current);
}
// Position to modify
int pos = r.get(i);
// Build genes
- int[] gene1 = current.clone();
- int[] gene2 = current; // .clone();
+ short[] gene1 = current.clone();
+ short[] gene2 = current; // .clone();
gene1[pos] = parent1.getGene()[pos];
gene2[pos] = parent2.getGene()[pos];
Individuum i1 = combineRecursive(r, i + 1, gene1, parent1, parent2);
Individuum i2 = combineRecursive(r, i + 1, gene2, parent1, parent2);
// Return the better result.
- if(i1.getFitness() < i2.getFitness()) {
- return i1;
- }
- else {
- return i2;
- }
+ return (i1.getFitness() < i2.getFitness()) ? i1 : i2;
}
}
@@ -611,18 +579,21 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
* Individuum for the evolutionary search.
*
* @author Erich Schubert
- *
- * @apiviz.exclude de.lmu.ifi.dbs.elki.utilities.pairs.FCPair
*/
- private static class Individuum extends FCPair<Double, int[]> {
+ private static class Individuum implements Comparable<Individuum> {
+ double fitness;
+
+ short[] gene;
+
/**
* Constructor
*
* @param fitness Fitness
* @param gene Gene information
*/
- public Individuum(double fitness, int[] gene) {
- super(fitness, gene);
+ public Individuum(double fitness, short[] gene) {
+ this.fitness = fitness;
+ this.gene = gene;
}
/**
@@ -630,8 +601,8 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
*
* @return the gene information
*/
- public int[] getGene() {
- return second;
+ public short[] getGene() {
+ return gene;
}
/**
@@ -640,7 +611,7 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
* @return fitness
*/
public double getFitness() {
- return first;
+ return fitness;
}
/**
@@ -650,14 +621,29 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
* @return new individuum
*/
public static Individuum nullIndividuum(int dim) {
- int[] gene = new int[dim];
+ short[] gene = new short[dim];
Arrays.fill(gene, DONT_CARE);
return new Individuum(0.0, gene);
}
@Override
public String toString() {
- return "I(f=" + first + ",g=" + FormatUtil.format(second) + ")";
+ StringBuilder buf = new StringBuilder();
+ buf.append("I(f=").append(fitness);
+ buf.append(",g=");
+ for(int i = 0; i < gene.length; i++) {
+ if(i > 0) {
+ buf.append(",");
+ }
+ if(gene[i] == DONT_CARE) {
+ buf.append("*");
+ }
+ else {
+ buf.append(gene[i]);
+ }
+ }
+ buf.append(")");
+ return buf.toString();
}
@Override
@@ -666,16 +652,21 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
return false;
}
Individuum other = (Individuum) obj;
- if(other.second.length != this.second.length) {
+ if(other.gene.length != this.gene.length) {
return false;
}
- for(int i = 0; i < this.second.length; i++) {
- if(other.second[i] != this.second[i]) {
+ for(int i = 0; i < this.gene.length; i++) {
+ if(other.gene[i] != this.gene[i]) {
return false;
}
}
return true;
}
+
+ @Override
+ public int compareTo(Individuum o) {
+ return Double.compare(this.fitness, o.fitness);
+ }
}
/**
@@ -685,7 +676,7 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<?>> extends AbstractAggarwalYuOutlier.Parameterizer {
+ public static class Parameterizer<V extends NumberVector> extends AbstractAggarwalYuOutlier.Parameterizer {
/**
* Parameter to specify the number of solutions must be an integer greater
* than 1.
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuNaive.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/AggarwalYuNaive.java
index 1816c3a3..4ee1969b 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuNaive.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/AggarwalYuNaive.java
@@ -1,10 +1,10 @@
-package de.lmu.ifi.dbs.elki.algorithm.outlier;
+package de.lmu.ifi.dbs.elki.algorithm.outlier.subspace;
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -26,13 +26,13 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
import java.util.ArrayList;
import de.lmu.ifi.dbs.elki.data.NumberVector;
-import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.logging.Logging;
@@ -46,12 +46,12 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.pairs.IntIntPair;
/**
- * BruteForce provides a naive brute force algorithm in which all k-subsets of
- * dimensions are examined and calculates the sparsity coefficient to find
- * outliers.
+ * BruteForce variant of the high-dimensional outlier detection algorithm by
+ * Aggarwal and Yu.
*
* The evolutionary approach is implemented as
- * {@link de.lmu.ifi.dbs.elki.algorithm.outlier.AggarwalYuEvolutionary}.
+ * {@link de.lmu.ifi.dbs.elki.algorithm.outlier.subspace.AggarwalYuEvolutionary}
+ * .
*
* <p>
* Reference: <br />
@@ -71,7 +71,7 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.IntIntPair;
@Title("BruteForce: Outlier detection for high dimensional data")
@Description("Examines all possible sets of k dimensional projections")
@Reference(authors = "C.C. Aggarwal, P. S. Yu", title = "Outlier detection for high dimensional data", booktitle = "Proc. ACM SIGMOD Int. Conf. on Management of Data (SIGMOD 2001), Santa Barbara, CA, 2001", url = "http://dx.doi.org/10.1145/375663.375668")
-public class AggarwalYuNaive<V extends NumberVector<?>> extends AbstractAggarwalYuOutlier<V> {
+public class AggarwalYuNaive<V extends NumberVector> extends AbstractAggarwalYuOutlier<V> {
/**
* The logger for this class.
*/
@@ -106,7 +106,7 @@ public class AggarwalYuNaive<V extends NumberVector<?>> extends AbstractAggarwal
// Set of all dim*phi ranges
ArrayList<IntIntPair> q = new ArrayList<>();
for(int i = 0; i < dimensionality; i++) {
- for(int j = 1; j <= phi; j++) {
+ for(int j = 0; j < phi; j++) {
IntIntPair s = new IntIntPair(i, j);
q.add(s);
// Add to first Rk
@@ -148,7 +148,7 @@ public class AggarwalYuNaive<V extends NumberVector<?>> extends AbstractAggarwal
final double sparsityC = sparsity(ids.size(), size, k, phi);
if(sparsityC < 0) {
- for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
double prev = sparsity.doubleValue(iter);
if(Double.isNaN(prev) || sparsityC < prev) {
sparsity.putDouble(iter, sparsityC);
@@ -157,7 +157,7 @@ public class AggarwalYuNaive<V extends NumberVector<?>> extends AbstractAggarwal
}
}
DoubleMinMax minmax = new DoubleMinMax();
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double val = sparsity.doubleValue(iditer);
if(Double.isNaN(val)) {
sparsity.putDouble(iditer, 0.0);
@@ -165,7 +165,7 @@ public class AggarwalYuNaive<V extends NumberVector<?>> extends AbstractAggarwal
}
minmax.put(val);
}
- Relation<Double> scoreResult = new MaterializedRelation<>("AggarwalYuNaive", "aggarwal-yu-outlier", TypeUtil.DOUBLE, sparsity, relation.getDBIDs());
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("AggarwalYuNaive", "aggarwal-yu-outlier", sparsity, relation.getDBIDs());
OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), Double.NEGATIVE_INFINITY, 0.0);
return new OutlierResult(meta, scoreResult);
}
@@ -182,7 +182,7 @@ public class AggarwalYuNaive<V extends NumberVector<?>> extends AbstractAggarwal
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<?>> extends AbstractAggarwalYuOutlier.Parameterizer {
+ public static class Parameterizer<V extends NumberVector> extends AbstractAggarwalYuOutlier.Parameterizer {
@Override
protected AggarwalYuNaive<V> makeInstance() {
return new AggarwalYuNaive<>(k, phi);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java
index c21542da..b3a03ba6 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.subspace;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -24,7 +24,6 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.subspace;
*/
import java.util.Arrays;
-import java.util.BitSet;
import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
@@ -37,21 +36,17 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDPair;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDList;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDPair;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDPairList;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter;
-import de.lmu.ifi.dbs.elki.database.ids.distance.ModifiableDoubleDistanceDBIDList;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
@@ -63,6 +58,7 @@ import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.KernelDensityFunction
import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.BitsUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
@@ -93,7 +89,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
* @param <V> vector type
*/
@Reference(authors = "E. Müller, M. Schiffer, T. Seidl", title = "Adaptive outlierness for subspace outlier ranking", booktitle = "Proc. 19th ACM International Conference on Information and knowledge management")
-public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+public class OUTRES<V extends NumberVector> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
@@ -130,25 +126,21 @@ public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<Outlier
DoubleMinMax minmax = new DoubleMinMax();
KernelDensityEstimator kernel = new KernelDensityEstimator(relation);
- BitSet subspace = new BitSet(kernel.dim);
+ long[] subspace = BitsUtil.zero(kernel.dim);
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("OUTRES scores", relation.size(), LOG) : null;
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- subspace.clear();
+ BitsUtil.zeroI(subspace);
double score = outresScore(0, subspace, iditer, kernel);
ranks.putDouble(iditer, score);
minmax.put(score);
- if(progress != null) {
- progress.incrementProcessed(LOG);
- }
- }
- if(progress != null) {
- progress.ensureCompleted(LOG);
+ LOG.incrementProcessed(progress);
}
+ LOG.ensureCompleted(progress);
OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0., 1., 1.);
- OutlierResult outresResult = new OutlierResult(meta, new MaterializedRelation<>("OUTRES", "outres-score", TypeUtil.DOUBLE, ranks, relation.getDBIDs()));
+ OutlierResult outresResult = new OutlierResult(meta, new MaterializedDoubleRelation("OUTRES", "outres-score", ranks, relation.getDBIDs()));
return outresResult;
}
@@ -161,33 +153,34 @@ public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<Outlier
* @param kernel Kernel
* @return Score
*/
- public double outresScore(final int s, BitSet subspace, DBIDRef id, KernelDensityEstimator kernel) {
+ public double outresScore(final int s, long[] subspace, DBIDRef id, KernelDensityEstimator kernel) {
double score = 1.0; // Initial score is 1.0
final SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(subspace);
MeanVariance meanv = new MeanVariance();
for(int i = s; i < kernel.dim; i++) {
- if(subspace.get(i)) { // TODO: needed? Or should we always start with i=0?
+ if(BitsUtil.get(subspace, i)) { // TODO: needed? Or should we always start
+ // with i=0?
continue;
}
- subspace.set(i);
+ BitsUtil.setI(subspace, i);
df.setSelectedDimensions(subspace);
final double adjustedEps = kernel.adjustedEps(kernel.dim);
// Query with a larger window, to also get neighbors of neighbors
// Subspace euclidean is metric!
- final DoubleDistance range = new DoubleDistance(adjustedEps * 2.);
- RangeQuery<V, DoubleDistance> rq = QueryUtil.getRangeQuery(kernel.relation, df, range);
+ final double range = adjustedEps * 2.;
+ RangeQuery<V> rq = QueryUtil.getRangeQuery(kernel.relation, df, range);
- DistanceDBIDList<DoubleDistance> neighc = rq.getRangeForDBID(id, range);
- DoubleDistanceDBIDList neigh = refineRange(neighc, adjustedEps);
+ DoubleDBIDList neighc = rq.getRangeForDBID(id, range);
+ DoubleDBIDList neigh = refineRange(neighc, adjustedEps);
if(neigh.size() > 2) {
// Relevance test
if(relevantSubspace(subspace, neigh, kernel)) {
final double density = kernel.subspaceDensity(subspace, neigh);
// Compute mean and standard deviation for densities of neighbors.
meanv.reset();
- for (DoubleDistanceDBIDListIter neighbor = neigh.iter(); neighbor.valid(); neighbor.advance()) {
- DoubleDistanceDBIDList n2 = subsetNeighborhoodQuery(neighc, neighbor, df, adjustedEps, kernel);
+ for(DoubleDBIDListIter neighbor = neigh.iter(); neighbor.valid(); neighbor.advance()) {
+ DoubleDBIDList n2 = subsetNeighborhoodQuery(neighc, neighbor, df, adjustedEps, kernel);
meanv.put(kernel.subspaceDensity(subspace, n2));
}
final double deviation = (meanv.getMean() - density) / (2. * meanv.getSampleStddev());
@@ -199,7 +192,7 @@ public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<Outlier
score *= outresScore(i + 1, subspace, id, kernel);
}
}
- subspace.clear(i);
+ BitsUtil.clearI(subspace, i);
}
return score;
}
@@ -211,21 +204,14 @@ public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<Outlier
* @param adjustedEps New epsilon
* @return refined list
*/
- private DoubleDistanceDBIDList refineRange(DistanceDBIDList<DoubleDistance> neighc, double adjustedEps) {
- ModifiableDoubleDistanceDBIDList n = new DoubleDistanceDBIDPairList(neighc.size());
+ private DoubleDBIDList refineRange(DoubleDBIDList neighc, double adjustedEps) {
+ ModifiableDoubleDBIDList n = DBIDUtil.newDistanceDBIDList(neighc.size());
// We don't have a guarantee for this list to be sorted
- for (DistanceDBIDListIter<DoubleDistance> neighbor = neighc.iter(); neighbor.valid(); neighbor.advance()) {
- DistanceDBIDPair<DoubleDistance> p = neighbor.getDistancePair();
- if(p instanceof DoubleDistanceDBIDPair) {
- if(((DoubleDistanceDBIDPair) p).doubleDistance() <= adjustedEps) {
- n.add((DoubleDistanceDBIDPair) p);
- }
- }
- else {
- double dist = p.getDistance().doubleValue();
- if(dist <= adjustedEps) {
- n.add(dist, p);
- }
+ for(DoubleDBIDListIter neighbor = neighc.iter(); neighbor.valid(); neighbor.advance()) {
+ DoubleDBIDPair p = neighbor.getPair();
+ double dist = p.doubleValue();
+ if(dist <= adjustedEps) {
+ n.add(dist, p);
}
}
return n;
@@ -241,12 +227,12 @@ public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<Outlier
* @param kernel Kernel
* @return Neighbors of neighbor object
*/
- private DoubleDistanceDBIDList subsetNeighborhoodQuery(DistanceDBIDList<DoubleDistance> neighc, DBIDRef dbid, PrimitiveDoubleDistanceFunction<? super V> df, double adjustedEps, KernelDensityEstimator kernel) {
- ModifiableDoubleDistanceDBIDList n = new DoubleDistanceDBIDPairList(neighc.size());
+ private DoubleDBIDList subsetNeighborhoodQuery(DoubleDBIDList neighc, DBIDRef dbid, PrimitiveDistanceFunction<? super V> df, double adjustedEps, KernelDensityEstimator kernel) {
+ ModifiableDoubleDBIDList n = DBIDUtil.newDistanceDBIDList(neighc.size());
V query = kernel.relation.get(dbid);
- for (DistanceDBIDListIter<DoubleDistance> neighbor = neighc.iter(); neighbor.valid(); neighbor.advance()) {
- DistanceDBIDPair<DoubleDistance> p = neighbor.getDistancePair();
- double dist = df.doubleDistance(query, kernel.relation.get(p));
+ for(DoubleDBIDListIter neighbor = neighc.iter(); neighbor.valid(); neighbor.advance()) {
+ DoubleDBIDPair p = neighbor.getPair();
+ double dist = df.distance(query, kernel.relation.get(p));
if(dist <= adjustedEps) {
n.add(dist, p);
}
@@ -262,16 +248,16 @@ public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<Outlier
* @param kernel Kernel density estimator
* @return relevance test result
*/
- protected boolean relevantSubspace(BitSet subspace, DoubleDistanceDBIDList neigh, KernelDensityEstimator kernel) {
+ protected boolean relevantSubspace(long[] subspace, DoubleDBIDList neigh, KernelDensityEstimator kernel) {
Relation<V> relation = kernel.relation;
final double crit = K_S_CRITICAL001 / Math.sqrt(neigh.size());
- for(int dim = subspace.nextSetBit(0); dim > 0; dim = subspace.nextSetBit(dim + 1)) {
+ for(int dim = BitsUtil.nextSetBit(subspace, 0); dim > 0; dim = BitsUtil.nextSetBit(subspace, dim + 1)) {
// TODO: can we save this copy somehow?
double[] data = new double[neigh.size()];
{
int count = 0;
- for (DBIDIter neighbor = neigh.iter(); neighbor.valid(); neighbor.advance()) {
+ for(DBIDIter neighbor = neigh.iter(); neighbor.valid(); neighbor.advance()) {
V vector = relation.get(neighbor);
data[count] = vector.doubleValue(dim);
count++;
@@ -347,12 +333,12 @@ public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<Outlier
* @param neighbors Neighbor distance list
* @return Density
*/
- protected double subspaceDensity(BitSet subspace, DoubleDistanceDBIDList neighbors) {
- final double bandwidth = optimalBandwidth(subspace.cardinality());
+ protected double subspaceDensity(long[] subspace, DoubleDBIDList neighbors) {
+ final double bandwidth = optimalBandwidth(BitsUtil.cardinality(subspace));
double density = 0;
- for (DoubleDistanceDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
- double v = neighbor.doubleDistance() / bandwidth;
+ for(DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ double v = neighbor.doubleValue() / bandwidth;
if(v < 1) {
density += 1 - (v * v);
}
@@ -407,7 +393,7 @@ public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<Outlier
*
* @apiviz.exclude
*/
- public static class Parameterizer<O extends NumberVector<?>> extends AbstractParameterizer {
+ public static class Parameterizer<O extends NumberVector> extends AbstractParameterizer {
/**
* Option ID for Epsilon parameter
*/
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OutRankS1.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OutRankS1.java
index 3e248bfa..a87515e5 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OutRankS1.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OutRankS1.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.subspace;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -37,13 +37,14 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
-import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.BitsUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -83,7 +84,7 @@ public class OutRankS1 extends AbstractAlgorithm<OutlierResult> implements Outli
/**
* Clustering algorithm to run.
*/
- protected SubspaceClusteringAlgorithm<? extends SubspaceModel<?>> clusteralg;
+ protected SubspaceClusteringAlgorithm<? extends SubspaceModel> clusteralg;
/**
* Weighting parameter of size vs. dimensionality score.
@@ -97,7 +98,7 @@ public class OutRankS1 extends AbstractAlgorithm<OutlierResult> implements Outli
* {@link SubspaceClusteringAlgorithm}!)
* @param alpha Alpha parameter to balance size and dimensionality.
*/
- public OutRankS1(SubspaceClusteringAlgorithm<? extends SubspaceModel<?>> clusteralg, double alpha) {
+ public OutRankS1(SubspaceClusteringAlgorithm<? extends SubspaceModel> clusteralg, double alpha) {
super();
this.clusteralg = clusteralg;
this.alpha = alpha;
@@ -105,35 +106,35 @@ public class OutRankS1 extends AbstractAlgorithm<OutlierResult> implements Outli
@Override
public OutlierResult run(Database database) {
- DBIDs ids = database.getRelation(TypeUtil.DBID).getDBIDs();
+ DBIDs ids = database.getRelation(TypeUtil.ANY).getDBIDs();
// Run the primary algorithm
- Clustering<? extends SubspaceModel<?>> clustering = clusteralg.run(database);
+ Clustering<? extends SubspaceModel> clustering = clusteralg.run(database);
WritableDoubleDataStore score = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT);
- for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
score.putDouble(iter, 0);
}
int maxdim = 0, maxsize = 0;
// Find maximum dimensionality and cluster size
- for (Cluster<? extends SubspaceModel<?>> cluster : clustering.getAllClusters()) {
+ for(Cluster<? extends SubspaceModel> cluster : clustering.getAllClusters()) {
maxsize = Math.max(maxsize, cluster.size());
- maxdim = Math.max(maxdim, cluster.getModel().getDimensions().cardinality());
+ maxdim = Math.max(maxdim, BitsUtil.cardinality(cluster.getModel().getDimensions()));
}
// Iterate over all clusters:
DoubleMinMax minmax = new DoubleMinMax();
- for (Cluster<? extends SubspaceModel<?>> cluster : clustering.getAllClusters()) {
+ for(Cluster<? extends SubspaceModel> cluster : clustering.getAllClusters()) {
double relsize = cluster.size() / (double) maxsize;
- double reldim = cluster.getModel().getDimensions().cardinality() / (double) maxdim;
+ double reldim = BitsUtil.cardinality(cluster.getModel().getDimensions()) / (double) maxdim;
// Process objects in the cluster
- for (DBIDIter iter = cluster.getIDs().iter(); iter.valid(); iter.advance()) {
+ for(DBIDIter iter = cluster.getIDs().iter(); iter.valid(); iter.advance()) {
double newscore = score.doubleValue(iter) + alpha * relsize + (1 - alpha) * reldim;
score.putDouble(iter, newscore);
minmax.put(newscore);
}
}
- Relation<Double> scoreResult = new MaterializedRelation<>("OutRank-S1", "OUTRANK_S1", TypeUtil.DOUBLE, score, ids);
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("OutRank-S1", "OUTRANK_S1", score, ids);
OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0, Double.POSITIVE_INFINITY);
OutlierResult res = new OutlierResult(meta, scoreResult);
res.addChildResult(clustering);
@@ -171,7 +172,7 @@ public class OutRankS1 extends AbstractAlgorithm<OutlierResult> implements Outli
/**
* Clustering algorithm to run.
*/
- protected SubspaceClusteringAlgorithm<? extends SubspaceModel<?>> algorithm = null;
+ protected SubspaceClusteringAlgorithm<? extends SubspaceModel> algorithm = null;
/**
* Alpha parameter to balance parameters
@@ -181,13 +182,13 @@ public class OutRankS1 extends AbstractAlgorithm<OutlierResult> implements Outli
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- ObjectParameter<SubspaceClusteringAlgorithm<? extends SubspaceModel<?>>> algP = new ObjectParameter<>(ALGORITHM_ID, SubspaceClusteringAlgorithm.class);
- if (config.grab(algP)) {
+ ObjectParameter<SubspaceClusteringAlgorithm<? extends SubspaceModel>> algP = new ObjectParameter<>(ALGORITHM_ID, SubspaceClusteringAlgorithm.class);
+ if(config.grab(algP)) {
algorithm = algP.instantiateClass(config);
}
DoubleParameter alphaP = new DoubleParameter(ALPHA_ID, 0.25);
alphaP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
- if (config.grab(alphaP)) {
+ if(config.grab(alphaP)) {
alpha = alphaP.doubleValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/SOD.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/SOD.java
index 489f811b..b8372884 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/SOD.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/SOD.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.subspace;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -23,8 +23,6 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.subspace;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.BitSet;
-
import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
import de.lmu.ifi.dbs.elki.data.NumberVector;
@@ -42,10 +40,10 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair;
import de.lmu.ifi.dbs.elki.database.query.similarity.SimilarityQuery;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.distance.similarityfunction.SharedNearestNeighborSimilarityFunction;
import de.lmu.ifi.dbs.elki.distance.similarityfunction.SimilarityFunction;
import de.lmu.ifi.dbs.elki.logging.Logging;
@@ -59,6 +57,7 @@ import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.textwriter.TextWriteable;
import de.lmu.ifi.dbs.elki.result.textwriter.TextWriterStream;
+import de.lmu.ifi.dbs.elki.utilities.BitsUtil;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TiedTopBoundedHeap;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
@@ -91,12 +90,11 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
* @apiviz.has SharedNearestNeighborSimilarityFunction
*
* @param <V> the type of NumberVector handled by this Algorithm
- * @param <D> distance type
*/
@Title("SOD: Subspace outlier degree")
@Description("Outlier Detection in Axis-Parallel Subspaces of High Dimensional Data")
@Reference(authors = "H.-P. Kriegel, P. Kröger, E. Schubert, A. Zimek", title = "Outlier Detection in Axis-Parallel Subspaces of High Dimensional Data", booktitle = "Proceedings of the 13th Pacific-Asia Conference on Knowledge Discovery and Data Mining (PAKDD), Bangkok, Thailand, 2009", url = "http://dx.doi.org/10.1007/978-3-642-01307-2")
-public class SOD<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+public class SOD<V extends NumberVector> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
@@ -115,7 +113,7 @@ public class SOD<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
/**
* Similarity function to use.
*/
- private SimilarityFunction<V, D> similarityFunction;
+ private SimilarityFunction<V> similarityFunction;
/**
* Report models.
@@ -130,7 +128,7 @@ public class SOD<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
* @param similarityFunction Shared nearest neighbor similarity function
* @param models Report generated models
*/
- public SOD(int knn, double alpha, SimilarityFunction<V, D> similarityFunction, boolean models) {
+ public SOD(int knn, double alpha, SimilarityFunction<V> similarityFunction, boolean models) {
super();
this.knn = knn;
this.alpha = alpha;
@@ -145,54 +143,51 @@ public class SOD<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
* @return Outlier result
*/
public OutlierResult run(Relation<V> relation) {
- SimilarityQuery<V, D> snnInstance = similarityFunction.instantiate(relation);
+ SimilarityQuery<V> snnInstance = similarityFunction.instantiate(relation);
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Assigning Subspace Outlier Degree", relation.size(), LOG) : null;
final WritableDoubleDataStore sod_scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
WritableDataStore<SODModel> sod_models = null;
- if (models) { // Models requested
+ if(models) { // Models requested
sod_models = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, SODModel.class);
}
DoubleMinMax minmax = new DoubleMinMax();
- for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
- if (progress != null) {
- progress.incrementProcessed(LOG);
- }
+ for(DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
+ LOG.incrementProcessed(progress);
DBIDs neighborhood = getNearestNeighbors(relation, snnInstance, iter);
Vector center;
- BitSet weightVector;
+ long[] weightVector;
double sod;
- if (neighborhood.size() > 0) {
+ if(neighborhood.size() > 0) {
center = Centroid.make(relation, neighborhood);
// Note: per-dimension variances; no covariances.
double[] variances = computePerDimensionVariances(relation, center, neighborhood);
double expectationOfVariance = Mean.of(variances);
- weightVector = new BitSet(variances.length);
- for (int d = 0; d < variances.length; d++) {
- if (variances[d] < alpha * expectationOfVariance) {
- weightVector.set(d, true);
+ weightVector = BitsUtil.zero(variances.length);
+ for(int d = 0; d < variances.length; d++) {
+ if(variances[d] < alpha * expectationOfVariance) {
+ BitsUtil.setI(weightVector, d);
}
}
sod = subspaceOutlierDegree(relation.get(iter), center, weightVector);
- } else {
+ }
+ else {
center = relation.get(iter).getColumnVector();
weightVector = null;
sod = 0.;
}
- if (sod_models != null) {
+ if(sod_models != null) {
sod_models.put(iter, new SODModel(center, weightVector));
}
sod_scores.putDouble(iter, sod);
minmax.put(sod);
}
- if (progress != null) {
- progress.ensureCompleted(LOG);
- }
+ LOG.ensureCompleted(progress);
// combine results.
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
- OutlierResult sodResult = new OutlierResult(meta, new MaterializedRelation<>("Subspace Outlier Degree", "sod-outlier", TypeUtil.DOUBLE, sod_scores, relation.getDBIDs()));
- if (sod_models != null) {
+ OutlierResult sodResult = new OutlierResult(meta, new MaterializedDoubleRelation("Subspace Outlier Degree", "sod-outlier", sod_scores, relation.getDBIDs()));
+ if(sod_models != null) {
Relation<SODModel> models = new MaterializedRelation<>("Subspace Outlier Model", "sod-outlier", new SimpleTypeInformation<>(SODModel.class), sod_models, relation.getDBIDs());
sodResult.addChildResult(models);
}
@@ -200,9 +195,9 @@ public class SOD<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
}
/**
- * Provides the k nearest neighbors in terms of the shared nearest neighbor
+ * Get the k nearest neighbors in terms of the shared nearest neighbor
* distance.
- * <p/>
+ *
* The query object is excluded from the knn list.
*
* FIXME: move this to the database layer.
@@ -213,20 +208,20 @@ public class SOD<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
* @return the k nearest neighbors in terms of the shared nearest neighbor
* distance without the query object
*/
- private DBIDs getNearestNeighbors(Relation<V> relation, SimilarityQuery<V, D> simQ, DBIDRef queryObject) {
+ private DBIDs getNearestNeighbors(Relation<V> relation, SimilarityQuery<V> simQ, DBIDRef queryObject) {
Heap<DoubleDBIDPair> nearestNeighbors = new TiedTopBoundedHeap<>(knn);
- for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
- if (DBIDUtil.equal(iter, queryObject)) {
+ for(DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
+ if(DBIDUtil.equal(iter, queryObject)) {
continue;
}
- double sim = simQ.similarity(queryObject, iter).doubleValue();
- if (sim > 0.) {
+ double sim = simQ.similarity(queryObject, iter);
+ if(sim > 0.) {
nearestNeighbors.add(DBIDUtil.newPair(sim, iter));
}
}
// Collect DBIDs
ArrayModifiableDBIDs dbids = DBIDUtil.newArray(nearestNeighbors.size());
- while (nearestNeighbors.size() > 0) {
+ while(nearestNeighbors.size() > 0) {
dbids.add(nearestNeighbors.poll());
}
return dbids;
@@ -240,17 +235,17 @@ public class SOD<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
* @param neighborhood Neighbors
* @return Per-dimension variances.
*/
- private static double[] computePerDimensionVariances(Relation<? extends NumberVector<?>> relation, Vector center, DBIDs neighborhood) {
+ private static double[] computePerDimensionVariances(Relation<? extends NumberVector> relation, Vector center, DBIDs neighborhood) {
double[] c = center.getArrayRef();
double[] variances = new double[c.length];
- for (DBIDIter iter = neighborhood.iter(); iter.valid(); iter.advance()) {
- NumberVector<?> databaseObject = relation.get(iter);
- for (int d = 0; d < c.length; d++) {
+ for(DBIDIter iter = neighborhood.iter(); iter.valid(); iter.advance()) {
+ NumberVector databaseObject = relation.get(iter);
+ for(int d = 0; d < c.length; d++) {
final double deviation = databaseObject.doubleValue(d) - c[d];
variances[d] += deviation * deviation;
}
}
- for (int d = 0; d < variances.length; d++) {
+ for(int d = 0; d < variances.length; d++) {
variances[d] /= neighborhood.size();
}
return variances;
@@ -264,15 +259,15 @@ public class SOD<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
* @param weightVector Weight vector
* @return sod score
*/
- private double subspaceOutlierDegree(V queryObject, Vector center, BitSet weightVector) {
- final int card = weightVector.cardinality();
- if (card == 0) {
+ private double subspaceOutlierDegree(V queryObject, Vector center, long[] weightVector) {
+ final int card = BitsUtil.cardinality(weightVector);
+ if(card == 0) {
return 0;
}
final SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(weightVector);
- double distance = df.distance(queryObject, center).doubleValue();
- distance /= card; // FIXME: defined as card, should be sqrt(card),
- // unfortunately
+ double distance = df.distance(queryObject, center);
+ distance /= card; // FIXME: defined and published as card, should be
+ // sqrt(card), unfortunately
return distance;
}
@@ -300,7 +295,7 @@ public class SOD<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
/**
* Relevant dimensions.
*/
- private BitSet weightVector;
+ private long[] weightVector;
/**
* Initialize SOD Model
@@ -308,7 +303,7 @@ public class SOD<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
* @param center Center vector
* @param weightVector Selected dimensions
*/
- public SODModel(Vector center, BitSet weightVector) {
+ public SODModel(Vector center, long[] weightVector) {
this.center = center;
this.weightVector = weightVector;
}
@@ -316,7 +311,7 @@ public class SOD<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
@Override
public void writeToText(TextWriterStream out, String label) {
out.commentPrintLn(this.getClass().getSimpleName() + ":");
- out.commentPrintLn("relevant attributes (counting starts with 0): " + this.weightVector.toString());
+ out.commentPrintLn("relevant attributes (starting with 0): " + BitsUtil.toString(weightVector, ", ", 0));
out.commentPrintLn("center of neighborhood: " + out.normalizationRestore(center).toString());
out.commentPrintSeparator();
}
@@ -329,7 +324,7 @@ public class SOD<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector> extends AbstractParameterizer {
/**
* Parameter to specify the number of shared nearest neighbors to be
* considered for learning the subspace properties., must be an integer
@@ -366,7 +361,7 @@ public class SOD<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
/**
* The similarity function.
*/
- private SimilarityFunction<V, D> similarityFunction;
+ private SimilarityFunction<V> similarityFunction;
/**
* Track models.
@@ -376,31 +371,31 @@ public class SOD<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final ObjectParameter<SimilarityFunction<V, D>> simP = new ObjectParameter<>(SIM_ID, SimilarityFunction.class, SharedNearestNeighborSimilarityFunction.class);
- if (config.grab(simP)) {
+ final ObjectParameter<SimilarityFunction<V>> simP = new ObjectParameter<>(SIM_ID, SimilarityFunction.class, SharedNearestNeighborSimilarityFunction.class);
+ if(config.grab(simP)) {
similarityFunction = simP.instantiateClass(config);
}
final IntParameter knnP = new IntParameter(KNN_ID);
knnP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
- if (config.grab(knnP)) {
+ if(config.grab(knnP)) {
knn = knnP.getValue();
}
final DoubleParameter alphaP = new DoubleParameter(ALPHA_ID, 1.1);
alphaP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
- if (config.grab(alphaP)) {
+ if(config.grab(alphaP)) {
alpha = alphaP.doubleValue();
}
final Flag modelsF = new Flag(MODELS_ID);
- if (config.grab(modelsF)) {
+ if(config.grab(modelsF)) {
models = modelsF.isTrue();
}
}
@Override
- protected SOD<V, D> makeInstance() {
+ protected SOD<V> makeInstance() {
return new SOD<>(knn, alpha, similarityFunction, models);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/package-info.java
index c3951821..471d9b8d 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/package-info.java
@@ -7,7 +7,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/svm/LibSVMOneClassOutlierDetection.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/svm/LibSVMOneClassOutlierDetection.java
new file mode 100644
index 00000000..25b9cb30
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/svm/LibSVMOneClassOutlierDetection.java
@@ -0,0 +1,279 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier.svm;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import libsvm.svm;
+import libsvm.svm_model;
+import libsvm.svm_node;
+import libsvm.svm_parameter;
+import libsvm.svm_print_interface;
+import libsvm.svm_problem;
+import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.EnumParameter;
+
+/**
+ * Outlier-detection using one-class support vector machines.
+ *
+ * Important note: from literature, the one-class SVM is trained as if 0 was the
+ * only counterexample. Outliers will only be detected when they are close to
+ * the origin!
+ *
+ * <p>
+ * Reference:<br />
+ * B. Schölkopf, J. C. Platt, J. Shawe-Taylor, A. J. Smola, R. C. Williamson<br />
+ * Estimating the support of a high-dimensional distribution<br />
+ * Neural computation 13.7
+ * </p>
+ *
+ * @author Erich Schubert
+ *
+ * @param V vector type
+ */
+@Reference(authors = "B. Schölkopf, J. C. Platt, J. Shawe-Taylor, A. J. Smola, R. C. Williamson", //
+title = "Estimating the support of a high-dimensional distribution", //
+booktitle = "Neural computation 13.7")
+public class LibSVMOneClassOutlierDetection<V extends NumberVector> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+ /**
+ * Class logger.
+ */
+ private static final Logging LOG = Logging.getLogger(LibSVMOneClassOutlierDetection.class);
+
+ /**
+ * Kernel functions. Expose as enum for convenience.
+ *
+ * @apiviz.exclude
+ */
+ public static enum SVMKernel { //
+ LINEAR, // Linear
+ QUADRATIC, // Quadratic
+ CUBIC, // Cubic
+ RBF, // Radial basis functions
+ SIGMOID, // Sigmoid
+ }
+
+ /**
+ * Kernel function in use.
+ */
+ protected SVMKernel kernel = SVMKernel.RBF;
+
+ /**
+ * Constructor.
+ *
+ * @param kernel Kernel to use with SVM.
+ */
+ public LibSVMOneClassOutlierDetection(SVMKernel kernel) {
+ super();
+ this.kernel = kernel;
+ }
+
+ /**
+ * Run one-class SVM.
+ *
+ * @param relation Data relation
+ * @return Outlier result.
+ */
+ public OutlierResult run(Relation<V> relation) {
+ final int dim = RelationUtil.dimensionality(relation);
+ final ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
+
+ svm.svm_set_print_string_function(LOG_HELPER);
+
+ svm_parameter param = new svm_parameter();
+ param.svm_type = svm_parameter.ONE_CLASS;
+ param.kernel_type = svm_parameter.LINEAR;
+ param.degree = 3;
+ switch(kernel){
+ case LINEAR:
+ param.kernel_type = svm_parameter.LINEAR;
+ break;
+ case QUADRATIC:
+ param.kernel_type = svm_parameter.POLY;
+ param.degree = 2;
+ break;
+ case CUBIC:
+ param.kernel_type = svm_parameter.POLY;
+ param.degree = 3;
+ break;
+ case RBF:
+ param.kernel_type = svm_parameter.RBF;
+ break;
+ case SIGMOID:
+ param.kernel_type = svm_parameter.SIGMOID;
+ break;
+ default:
+ throw new AbortException("Invalid kernel parameter: " + kernel);
+ }
+ // TODO: expose additional parameters to the end user!
+ param.nu = 0.05;
+ param.coef0 = 0.;
+ param.cache_size = 100;
+ param.C = 1e2;
+ param.eps = 1e-4; // not used by one-class?
+ param.p = 0.1; // not used by one-class?
+ param.shrinking = 0;
+ param.probability = 0;
+ param.nr_weight = 0;
+ param.weight_label = new int[0];
+ param.weight = new double[0];
+ param.gamma = 1e-4 / dim;
+
+ // Transform data:
+ svm_problem prob = new svm_problem();
+ prob.l = relation.size();
+ prob.x = new svm_node[prob.l][];
+ prob.y = new double[prob.l];
+ {
+ DBIDIter iter = ids.iter();
+ for(int i = 0; i < prob.l && iter.valid(); iter.advance(), i++) {
+ V vec = relation.get(iter);
+ // TODO: support compact sparse vectors, too!
+ svm_node[] x = new svm_node[dim];
+ for(int d = 0; d < dim; d++) {
+ x[d] = new svm_node();
+ x[d].index = d + 1;
+ x[d].value = vec.doubleValue(d);
+ }
+ prob.x[i] = x;
+ prob.y[i] = +1;
+ }
+ }
+
+ if(LOG.isVerbose()) {
+ LOG.verbose("Training one-class SVM...");
+ }
+ String err = svm.svm_check_parameter(prob, param);
+ if(err != null) {
+ LOG.warning("svm_check_parameter: " + err);
+ }
+ svm_model model = svm.svm_train(prob, param);
+
+ if(LOG.isVerbose()) {
+ LOG.verbose("Predicting...");
+ }
+ WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_DB);
+ DoubleMinMax mm = new DoubleMinMax();
+ {
+ DBIDIter iter = ids.iter();
+ double[] buf = new double[svm.svm_get_nr_class(model)];
+ for(int i = 0; i < prob.l && iter.valid(); iter.advance(), i++) {
+ V vec = relation.get(iter);
+ svm_node[] x = new svm_node[dim];
+ for(int d = 0; d < dim; d++) {
+ x[d] = new svm_node();
+ x[d].index = d + 1;
+ x[d].value = vec.doubleValue(d);
+ }
+ svm.svm_predict_values(model, x, buf);
+ double score = -buf[0] / param.gamma; // Heuristic rescaling, sorry.
+ // Unfortunately, libsvm one-class currently yields a binary decision.
+ scores.putDouble(iter, score);
+ mm.put(score);
+ }
+ }
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("One-Class SVM Decision", "svm-outlier", scores, ids);
+ OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax(), Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, 0.);
+ return new OutlierResult(scoreMeta, scoreResult);
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD);
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Setup logging helper for SVM.
+ */
+ static final svm_print_interface LOG_HELPER = new svm_print_interface() {
+ @Override
+ public void print(String arg0) {
+ if(LOG.isVerbose()) {
+ LOG.verbose(arg0);
+ }
+ }
+ };
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ *
+ * @param <V> Vector type
+ */
+ public static class Parameterizer<V extends NumberVector> extends AbstractParameterizer {
+ /**
+ * Parameter for kernel function.
+ */
+ private static final OptionID KERNEL_ID = new OptionID("svm.kernel", "Kernel to use with SVM.");
+
+ /**
+ * Kernel in use.
+ */
+ protected SVMKernel kernel = SVMKernel.RBF;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ EnumParameter<SVMKernel> kernelP = new EnumParameter<>(KERNEL_ID, SVMKernel.class, SVMKernel.RBF);
+ if(config.grab(kernelP)) {
+ kernel = kernelP.getValue();
+ }
+ }
+
+ @Override
+ protected LibSVMOneClassOutlierDetection<V> makeInstance() {
+ return new LibSVMOneClassOutlierDetection<>(kernel);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/svm/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/svm/package-info.java
new file mode 100644
index 00000000..2afbbaf1
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/svm/package-info.java
@@ -0,0 +1,29 @@
+/**
+ * Support-Vector-Machines for outlier detection.
+ *
+ * @author Erich Schubert
+ */
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package de.lmu.ifi.dbs.elki.algorithm.outlier.svm; \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java
index d10eaef8..f8b4eb3e 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.trivial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -36,7 +36,8 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
@@ -117,7 +118,7 @@ public class ByLabelOutlier extends AbstractAlgorithm<OutlierResult> implements
final double score = (pattern.matcher(label).matches()) ? 1 : 0;
scores.putDouble(iditer, score);
}
- Relation<Double> scoreres = new MaterializedRelation<>("By label outlier scores", "label-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
+ DoubleRelation scoreres = new MaterializedDoubleRelation("By label outlier scores", "label-outlier", scores, relation.getDBIDs());
OutlierScoreMeta meta = new ProbabilisticOutlierScore();
return new OutlierResult(meta, scoreres);
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java
index 44a7975f..c8920617 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.trivial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -31,7 +31,8 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
@@ -73,7 +74,7 @@ public class TrivialAllOutlier extends AbstractAlgorithm<OutlierResult> implemen
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
scores.putDouble(iditer, 1.0);
}
- Relation<Double> scoreres = new MaterializedRelation<>("Trivial all-outlier score", "all-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
+ DoubleRelation scoreres = new MaterializedDoubleRelation("Trivial all-outlier score", "all-outlier", scores, relation.getDBIDs());
OutlierScoreMeta meta = new ProbabilisticOutlierScore();
return new OutlierResult(meta, scoreres);
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAverageCoordinateOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAverageCoordinateOutlier.java
index 6f2f2f38..dbf338f1 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAverageCoordinateOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAverageCoordinateOutlier.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.trivial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -32,7 +32,8 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
@@ -72,13 +73,13 @@ public class TrivialAverageCoordinateOutlier extends AbstractAlgorithm<OutlierRe
* @param relation Relation
* @return Result
*/
- public OutlierResult run(Relation<? extends NumberVector<?>> relation) {
+ public OutlierResult run(Relation<? extends NumberVector> relation) {
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT);
DoubleMinMax minmax = new DoubleMinMax();
Mean m = new Mean();
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
m.reset();
- NumberVector<?> nv = relation.get(iditer);
+ NumberVector nv = relation.get(iditer);
for (int i = 0; i < nv.getDimensionality(); i++) {
m.put(nv.doubleValue(i));
}
@@ -86,7 +87,7 @@ public class TrivialAverageCoordinateOutlier extends AbstractAlgorithm<OutlierRe
scores.putDouble(iditer, score);
minmax.put(score);
}
- Relation<Double> scoreres = new MaterializedRelation<>("Trivial mean score", "mean-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
+ DoubleRelation scoreres = new MaterializedDoubleRelation("Trivial mean score", "mean-outlier", scores, relation.getDBIDs());
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
return new OutlierResult(meta, scoreres);
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java
index 2e952b5f..adaf9431 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.trivial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -39,7 +39,8 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
@@ -99,7 +100,7 @@ public class TrivialGeneratedOutlier extends AbstractAlgorithm<OutlierResult> im
@Override
public OutlierResult run(Database database) {
- Relation<NumberVector<?>> vecs = database.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
+ Relation<NumberVector> vecs = database.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
Relation<Model> models = database.getRelation(new SimpleTypeInformation<>(Model.class));
// Prefer a true class label
try {
@@ -120,7 +121,7 @@ public class TrivialGeneratedOutlier extends AbstractAlgorithm<OutlierResult> im
* @param labels Label relation
* @return Outlier result
*/
- public OutlierResult run(Relation<Model> models, Relation<NumberVector<?>> vecs, Relation<?> labels) {
+ public OutlierResult run(Relation<Model> models, Relation<NumberVector> vecs, Relation<?> labels) {
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(models.getDBIDs(), DataStoreFactory.HINT_HOT);
HashSet<GeneratorSingleCluster> generators = new HashSet<>();
@@ -180,7 +181,7 @@ public class TrivialGeneratedOutlier extends AbstractAlgorithm<OutlierResult> im
}
scores.putDouble(iditer, score);
}
- Relation<Double> scoreres = new MaterializedRelation<>("Model outlier scores", "model-outlier", TypeUtil.DOUBLE, scores, models.getDBIDs());
+ DoubleRelation scoreres = new MaterializedDoubleRelation("Model outlier scores", "model-outlier", scores, models.getDBIDs());
OutlierScoreMeta meta = new ProbabilisticOutlierScore(0., 1.);
return new OutlierResult(meta, scoreres);
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java
index ff3d0296..0a3e27b4 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.trivial;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -31,7 +31,8 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
@@ -73,7 +74,7 @@ public class TrivialNoOutlier extends AbstractAlgorithm<OutlierResult> implement
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
scores.putDouble(iditer, 0.0);
}
- Relation<Double> scoreres = new MaterializedRelation<>("Trivial no-outlier score", "no-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
+ DoubleRelation scoreres = new MaterializedDoubleRelation("Trivial no-outlier score", "no-outlier", scores, relation.getDBIDs());
OutlierScoreMeta meta = new ProbabilisticOutlierScore();
return new OutlierResult(meta, scoreres);
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/package-info.java
index c927cae4..a6ea3186 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/package-info.java
@@ -7,7 +7,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2013
+Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team