diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/algorithm/outlier')
96 files changed, 5483 insertions, 2741 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/COP.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/COP.java index 190d14fe..1d723443 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/COP.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/COP.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -38,14 +38,15 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.ids.KNNList; import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs; -import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList; import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; +import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; import de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid; @@ -84,11 +85,10 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * @author Erich Schubert * * @param <V> the type of NumberVector handled by this Algorithm - * @param <D> Distance type */ @Title("COP: Correlation Outlier Probability") @Reference(authors = "Hans-Peter Kriegel, Peer Kröger, Erich Schubert, Arthur Zimek", title = "Outlier Detection in Arbitrarily Oriented Subspaces", booktitle = "Proc. IEEE International Conference on Data Mining (ICDM 2012)") -public class COP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<V, D, OutlierResult> implements OutlierAlgorithm { +public class COP<V extends NumberVector> extends AbstractDistanceBasedAlgorithm<V, OutlierResult> implements OutlierAlgorithm { /** * The logger for this class. */ @@ -164,7 +164,7 @@ public class COP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte /** * Holds the PCA runner. */ - private PCARunner<V> pca; + private PCARunner pca; /** * Expected amount of outliers. @@ -209,7 +209,7 @@ public class COP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte * @param dist Distance distribution model (ChiSquared, Gamma) * @param models Report models */ - public COP(DistanceFunction<? super V, D> distanceFunction, int k, PCARunner<V> pca, double expect, DistanceDist dist, boolean models) { + public COP(DistanceFunction<? super V> distanceFunction, int k, PCARunner pca, double expect, DistanceDist dist, boolean models) { super(distanceFunction); this.k = k; this.pca = pca; @@ -226,7 +226,7 @@ public class COP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte */ public OutlierResult run(Relation<V> relation) { final DBIDs ids = relation.getDBIDs(); - KNNQuery<V, D> knnQuery = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k + 1); + KNNQuery<V> knnQuery = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k + 1); final int dim = RelationUtil.dimensionality(relation); if(k <= dim + 1) { @@ -244,7 +244,7 @@ public class COP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Correlation Outlier Probabilities", relation.size(), LOG) : null; for(DBIDIter id = ids.iter(); id.valid(); id.advance()) { - KNNList<D> neighbors = knnQuery.getKNNForDBID(id, k + 1); + KNNList neighbors = knnQuery.getKNNForDBID(id, k + 1); ModifiableDBIDs nids = DBIDUtil.newHashSet(neighbors); nids.remove(id); // Do not use query object @@ -324,16 +324,12 @@ public class COP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte cop_dim.putInt(id, dim + 1 - vdim); } - if(prog != null) { - prog.incrementProcessed(LOG); - } - } - if(prog != null) { - prog.ensureCompleted(LOG); + LOG.incrementProcessed(prog); } + LOG.ensureCompleted(prog); // combine results. - Relation<Double> scoreResult = new MaterializedRelation<>("Correlation Outlier Probabilities", COP_SCORES, TypeUtil.DOUBLE, cop_score, ids); + DoubleRelation scoreResult = new MaterializedDoubleRelation("Correlation Outlier Probabilities", COP_SCORES, cop_score, ids); OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore(); OutlierResult result = new OutlierResult(scoreMeta, scoreResult); if(models) { @@ -360,7 +356,7 @@ public class COP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte * * @apiviz.exclude */ - public static class Parameterizer<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<V, D> { + public static class Parameterizer<V extends NumberVector> extends AbstractDistanceBasedAlgorithm.Parameterizer<V> { /** * Parameter to specify the number of nearest neighbors of an object to be * considered for computing its COP_SCORE, must be an integer greater than @@ -415,7 +411,7 @@ public class COP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte /** * Holds the object performing the dependency derivation. */ - PCARunner<V> pca; + PCARunner pca; /** * Distance distributution assumption. @@ -450,7 +446,7 @@ public class COP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte if(config.grab(expectP)) { expect = expectP.doubleValue(); } - ObjectParameter<PCARunner<V>> pcaP = new ObjectParameter<>(PCARUNNER_ID, PCARunner.class, PCARunner.class); + ObjectParameter<PCARunner> pcaP = new ObjectParameter<>(PCARUNNER_ID, PCARunner.class, PCARunner.class); if(config.grab(pcaP)) { pca = pcaP.instantiateClass(config); } @@ -461,7 +457,7 @@ public class COP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte } @Override - protected COP<V, D> makeInstance() { + protected COP<V> makeInstance() { return new COP<>(distanceFunction, k, pca, expect, dist, models); } } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DWOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DWOF.java index ef782390..3d484562 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DWOF.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DWOF.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier; This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -35,18 +35,18 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.KNNList;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
-import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
@@ -92,13 +92,13 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * @author Omar Yousry
*
* @param <O> the type of DatabaseObjects handled by this Algorithm
- * @param <D> Distance type
*/
-
@Title("DWOF: Dynamic Window Outlier Factor")
@Description("Algorithm to compute dynamic-window outlier factors in a database based on the neighborhood size parameter 'k'")
-@Reference(authors = "R. Momtaz, N. Mohssen, M. A. Gowayyed", title = "DWOF: A Robust Density-Based OutlierDetection Approach", booktitle = "Pattern Recognition and Image Analysis, Proc. 6th Iberian Conference, IbPRIA 2013, Funchal, Madeira, Portugal, 2013.", url = "http://dx.doi.org/10.1007%2F978-3-642-38628-2_61")
-public class DWOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
+@Reference(authors = "R. Momtaz, N. Mohssen, M. A. Gowayyed", //
+title = "DWOF: A Robust Density-Based Outlier Detection Approach", //
+booktitle = "Pattern Recognition and Image Analysis, Proc. 6th Iberian Conference, IbPRIA 2013, Funchal, Madeira, Portugal, 2013.", url = "http://dx.doi.org/10.1007%2F978-3-642-38628-2_61")
+public class DWOF<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
@@ -122,7 +122,7 @@ public class DWOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas * @param k the value of k
* @param delta Radius increase factor
*/
- public DWOF(DistanceFunction<? super O, D> distanceFunction, int k, double delta) {
+ public DWOF(DistanceFunction<? super O> distanceFunction, int k, double delta) {
super(distanceFunction);
this.k = k + 1;
this.delta = delta;
@@ -138,10 +138,10 @@ public class DWOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas */
public OutlierResult run(Database database, Relation<O> relation) {
final DBIDs ids = relation.getDBIDs();
- DistanceQuery<O, D> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
+ DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
// Get k nearest neighbor and range query on the relation.
- KNNQuery<O, D> knnq = database.getKNNQuery(distFunc, k, DatabaseQuery.HINT_HEAVY_USE);
- RangeQuery<O, D> rnnQuery = database.getRangeQuery(distFunc, DatabaseQuery.HINT_HEAVY_USE);
+ KNNQuery<O> knnq = database.getKNNQuery(distFunc, k, DatabaseQuery.HINT_HEAVY_USE);
+ RangeQuery<O> rnnQuery = database.getRangeQuery(distFunc, DatabaseQuery.HINT_HEAVY_USE);
StepProgress stepProg = LOG.isVerbose() ? new StepProgress("DWOF", 2) : null;
// DWOF output score storage.
@@ -160,9 +160,7 @@ public class DWOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas }
IndefiniteProgress clusEvalProgress = LOG.isVerbose() ? new IndefiniteProgress("Evaluating DWOFs", LOG) : null;
while(countUnmerged > 0) {
- if(clusEvalProgress != null) {
- clusEvalProgress.incrementProcessed(LOG);
- }
+ LOG.incrementProcessed(clusEvalProgress);
// Increase radii
for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
radii.putDouble(iter, radii.doubleValue(iter) * delta);
@@ -185,19 +183,15 @@ public class DWOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas dwofs.putDouble(iter, dwofs.doubleValue(iter) + newScore);
}
}
- if(clusEvalProgress != null) {
- clusEvalProgress.setCompleted(LOG);
- }
- if(stepProg != null) {
- stepProg.setCompleted(LOG);
- }
+ LOG.setCompleted(clusEvalProgress);
+ LOG.setCompleted(stepProg);
// Build result representation.
DoubleMinMax minmax = new DoubleMinMax();
for(DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
minmax.put(dwofs.doubleValue(iter));
}
OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY);
- Relation<Double> rel = new MaterializedRelation<>("Dynamic-Window Outlier Factors", "dwof-outlier", TypeUtil.DOUBLE, dwofs, ids);
+ DoubleRelation rel = new MaterializedDoubleRelation("Dynamic-Window Outlier Factors", "dwof-outlier", dwofs, ids);
return new OutlierResult(meta, rel);
}
@@ -213,7 +207,7 @@ public class DWOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas * @param knnq kNN search function
* @param radii WritableDoubleDataStore to store radii
*/
- private void initializeRadii(DBIDs ids, KNNQuery<O, D> knnq, DistanceQuery<O, D> distFunc, WritableDoubleDataStore radii) {
+ private void initializeRadii(DBIDs ids, KNNQuery<O> knnq, DistanceQuery<O> distFunc, WritableDoubleDataStore radii) {
FiniteProgress avgDistProgress = LOG.isVerbose() ? new FiniteProgress("Calculating average kNN distances-", ids.size(), LOG) : null;
double absoluteMinDist = Double.POSITIVE_INFINITY;
double minAvgDist = Double.POSITIVE_INFINITY;
@@ -221,7 +215,7 @@ public class DWOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas Mean mean = new Mean();
// Iterate over all objects
for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
- KNNList<D> iterNeighbors = knnq.getKNNForDBID(iter, k);
+ KNNList iterNeighbors = knnq.getKNNForDBID(iter, k);
// skip the point itself
mean.reset();
for(DBIDIter neighbor1 = iterNeighbors.iter(); neighbor1.valid(); neighbor1.advance()) {
@@ -232,7 +226,7 @@ public class DWOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas if(DBIDUtil.equal(neighbor1, neighbor2) || DBIDUtil.equal(neighbor2, iter)) {
continue;
}
- double distance = distFunc.distance(neighbor1, neighbor2).doubleValue();
+ double distance = distFunc.distance(neighbor1, neighbor2);
mean.put(distance);
if(distance > 0. && distance < absoluteMinDist) {
absoluteMinDist = distance;
@@ -244,13 +238,9 @@ public class DWOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas if(currentMean < minAvgDist) {
minAvgDist = currentMean;
}
- if(avgDistProgress != null) {
- avgDistProgress.incrementProcessed(LOG);
- }
- }
- if(avgDistProgress != null) {
- avgDistProgress.ensureCompleted(LOG);
+ LOG.incrementProcessed(avgDistProgress);
}
+ LOG.ensureCompleted(avgDistProgress);
// Initializing the radii of all objects.
for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
@@ -272,7 +262,7 @@ public class DWOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas * @param radii Radii to cluster accordingly
* @param labels Label storage.
*/
- private void clusterData(DBIDs ids, RangeQuery<O, D> rnnQuery, WritableDoubleDataStore radii, WritableDataStore<ModifiableDBIDs> labels) {
+ private void clusterData(DBIDs ids, RangeQuery<O> rnnQuery, WritableDoubleDataStore radii, WritableDataStore<ModifiableDBIDs> labels) {
FiniteProgress clustProg = LOG.isVerbose() ? new FiniteProgress("Density-Based Clustering", ids.size(), LOG) : null;
// Iterate over all objects
for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
@@ -282,18 +272,16 @@ public class DWOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas ModifiableDBIDs newCluster = DBIDUtil.newArray();
newCluster.add(iter);
labels.put(iter, newCluster);
- if(clustProg != null) {
- clustProg.incrementProcessed(LOG);
- }
+ LOG.incrementProcessed(clustProg);
// container of the points to be added and their radii neighbors to the
// cluster
ModifiableDBIDs nChain = DBIDUtil.newArray();
nChain.add(iter);
// iterate over nChain
for(DBIDIter toGetNeighbors = nChain.iter(); toGetNeighbors.valid(); toGetNeighbors.advance()) {
- D range = rnnQuery.getDistanceFactory().fromDouble(radii.doubleValue(toGetNeighbors));
- DistanceDBIDList<D> nNeighbors = rnnQuery.getRangeForDBID(toGetNeighbors, range);
- for(DistanceDBIDListIter<D> iter2 = nNeighbors.iter(); iter2.valid(); iter2.advance()) {
+ double range = radii.doubleValue(toGetNeighbors);
+ DoubleDBIDList nNeighbors = rnnQuery.getRangeForDBID(toGetNeighbors, range);
+ for(DoubleDBIDListIter iter2 = nNeighbors.iter(); iter2.valid(); iter2.advance()) {
if(DBIDUtil.equal(toGetNeighbors, iter2)) {
continue;
}
@@ -301,9 +289,7 @@ public class DWOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas newCluster.add(iter2);
labels.put(iter2, newCluster);
nChain.add(iter2);
- if(clustProg != null) {
- clustProg.incrementProcessed(LOG);
- }
+ LOG.incrementProcessed(clustProg);
}
else if(labels.get(iter2) != newCluster) {
ModifiableDBIDs toBeDeleted = labels.get(iter2);
@@ -316,9 +302,7 @@ public class DWOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas }
}
}
- if(clustProg != null) {
- clustProg.ensureCompleted(LOG);
- }
+ LOG.ensureCompleted(clustProg);
}
/**
@@ -360,8 +344,10 @@ public class DWOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas * @author Omar Yousry
*
* @apiviz.exclude
+ *
+ * @param <O> Object type
*/
- public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {
/**
* Option ID for the number of neighbors.
*/
@@ -400,7 +386,7 @@ public class DWOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas }
@Override
- protected DWOF<O, D> makeInstance() {
+ protected DWOF<O> makeInstance() {
return new DWOF<>(distanceFunction, k, delta);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java index 3f8bb484..2383824e 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java @@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2013 +Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -30,7 +30,8 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.logging.Logging;
@@ -60,7 +61,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag; */
@Title("Gaussian Model Outlier Detection")
@Description("Fit a multivariate gaussian model onto the data, and use the PDF to compute an outlier score.")
-public class GaussianModel<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+public class GaussianModel<V extends NumberVector> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
@@ -136,7 +137,7 @@ public class GaussianModel<V extends NumberVector<?>> extends AbstractAlgorithm< else {
meta = new InvertedOutlierScoreMeta(mm.getMin(), mm.getMax(), 0.0, Double.POSITIVE_INFINITY);
}
- Relation<Double> res = new MaterializedRelation<>("Gaussian Model Outlier Score", "gaussian-model-outlier", TypeUtil.DOUBLE, oscores, relation.getDBIDs());
+ DoubleRelation res = new MaterializedDoubleRelation("Gaussian Model Outlier Score", "gaussian-model-outlier", oscores, relation.getDBIDs());
return new OutlierResult(meta, res);
}
@@ -157,7 +158,7 @@ public class GaussianModel<V extends NumberVector<?>> extends AbstractAlgorithm< *
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector> extends AbstractParameterizer {
protected boolean invert = false;
@Override
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java index e6659a8f..53e573e3 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java @@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2013 +Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -36,7 +36,8 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.generic.MaskedDBIDs;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.logging.Logging;
@@ -78,7 +79,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; @Title("Gaussian-Uniform Mixture Model Outlier Detection")
@Description("Fits a mixture model consisting of a Gaussian and a uniform distribution to the data.")
@Reference(prefix = "Generalization using the likelihood gain as outlier score of", authors = "Eskin, Eleazar", title = "Anomaly detection over noisy data using learned probability distributions", booktitle = "Proc. of the Seventeenth International Conference on Machine Learning (ICML-2000)")
-public class GaussianUniformMixture<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+public class GaussianUniformMixture<V extends NumberVector> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
@@ -184,7 +185,7 @@ public class GaussianUniformMixture<V extends NumberVector<?>> extends AbstractA }
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, 0.0);
- Relation<Double> res = new MaterializedRelation<>("Gaussian Mixture Outlier Score", "gaussian-mixture-outlier", TypeUtil.DOUBLE, oscores, relation.getDBIDs());
+ DoubleRelation res = new MaterializedDoubleRelation("Gaussian Mixture Outlier Score", "gaussian-mixture-outlier", oscores, relation.getDBIDs());
return new OutlierResult(meta, res);
}
@@ -247,7 +248,7 @@ public class GaussianUniformMixture<V extends NumberVector<?>> extends AbstractA *
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector> extends AbstractParameterizer {
protected double l = 1E-7;
protected double c = 0;
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNWeightOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNWeightOutlier.java deleted file mode 100644 index 88603f09..00000000 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNWeightOutlier.java +++ /dev/null @@ -1,189 +0,0 @@ -package de.lmu.ifi.dbs.elki.algorithm.outlier;
-
-/*
- This file is part of ELKI:
- Environment for Developing KDD-Applications Supported by Index-Structures -
- Copyright (C) 2011
- Ludwig-Maximilians-Universität München
- Lehr- und Forschungseinheit für Datenbanksysteme
- ELKI Development Team -
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version. -
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details. -
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
-import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
-import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
-import de.lmu.ifi.dbs.elki.database.Database;
-import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
-import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList;
-import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
-import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
-import de.lmu.ifi.dbs.elki.database.relation.Relation;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
-import de.lmu.ifi.dbs.elki.logging.Logging;
-import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
-import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
-import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
-import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
-import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
-import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
-import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
-import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
-
-/**
- * Outlier Detection based on the accumulated distances of a point to its k
- * nearest neighbors.
- *
- * Based on: F. Angiulli, C. Pizzuti: Fast Outlier Detection in High Dimensional
- * Spaces. In: Proc. European Conference on Principles of Knowledge Discovery
- * and Data Mining (PKDD'02), Helsinki, Finland, 2002.
- *
- * @author Lisa Reichert
- *
- * @apiviz.has KNNQuery
- *
- * @param <O> the type of DatabaseObjects handled by this Algorithm
- * @param <D> the type of Distance used by this Algorithm
- */
-@Title("KNNWeight outlier detection")
-@Description("Outlier Detection based on the distances of an object to its k nearest neighbors.")
-@Reference(authors = "F. Angiulli, C. Pizzuti", title = "Fast Outlier Detection in High Dimensional Spaces", booktitle = "Proc. European Conference on Principles of Knowledge Discovery and Data Mining (PKDD'02), Helsinki, Finland, 2002", url = "http://dx.doi.org/10.1007/3-540-45681-3_2")
-public class KNNWeightOutlier<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
- /**
- * The logger for this class.
- */
- private static final Logging LOG = Logging.getLogger(KNNWeightOutlier.class);
-
- /**
- * Parameter to specify the k nearest neighbor
- */
- public static final OptionID K_ID = new OptionID("knnwod.k", "k nearest neighbor");
-
- /**
- * The kNN query used.
- */
- public static final OptionID KNNQUERY_ID = new OptionID("knnwod.knnquery", "kNN query to use");
-
- /**
- * Holds the value of {@link #K_ID}.
- */
- private int k;
-
- /**
- * Constructor with parameters.
- *
- * @param distanceFunction Distance function
- * @param k k Parameter
- */
- public KNNWeightOutlier(DistanceFunction<? super O, D> distanceFunction, int k) {
- super(distanceFunction);
- this.k = k;
- }
-
- /**
- * Runs the algorithm in the timed evaluation part.
- */
- public OutlierResult run(Database database, Relation<O> relation) {
- final DistanceQuery<O, D> distanceQuery = database.getDistanceQuery(relation, getDistanceFunction());
- KNNQuery<O, D> knnQuery = database.getKNNQuery(distanceQuery, k);
-
- if(LOG.isVerbose()) {
- LOG.verbose("computing outlier degree(sum of the distances to the k nearest neighbors");
- }
- FiniteProgress progressKNNWeight = LOG.isVerbose() ? new FiniteProgress("KNNWOD_KNNWEIGHT for objects", relation.size(), LOG) : null;
-
- DoubleMinMax minmax = new DoubleMinMax();
-
- // compute distance to the k nearest neighbor. n objects with the highest
- // distance are flagged as outliers
- WritableDoubleDataStore knnw_score = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- // compute sum of the distances to the k nearest neighbors
-
- final KNNList<D> knn = knnQuery.getKNNForDBID(iditer, k);
- double skn = 0;
- if(knn instanceof DoubleDistanceKNNList) {
- for(DoubleDistanceDBIDListIter neighbor = ((DoubleDistanceKNNList) knn).iter(); neighbor.valid(); neighbor.advance()) {
- skn += neighbor.doubleDistance();
- }
- }
- else {
- for(DistanceDBIDListIter<D> neighbor = knn.iter(); neighbor.valid(); neighbor.advance()) {
- skn += neighbor.getDistance().doubleValue();
- }
- }
- knnw_score.putDouble(iditer, skn);
- minmax.put(skn);
-
- if(progressKNNWeight != null) {
- progressKNNWeight.incrementProcessed(LOG);
- }
- }
- if(progressKNNWeight != null) {
- progressKNNWeight.ensureCompleted(LOG);
- }
-
- Relation<Double> res = new MaterializedRelation<>("Weighted kNN Outlier Score", "knnw-outlier", TypeUtil.DOUBLE, knnw_score, relation.getDBIDs());
- OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
- return new OutlierResult(meta, res);
- }
-
- @Override
- public TypeInformation[] getInputTypeRestriction() {
- return TypeUtil.array(getDistanceFunction().getInputTypeRestriction());
- }
-
- @Override
- protected Logging getLogger() {
- return LOG;
- }
-
- /**
- * Parameterization class.
- *
- * @author Erich Schubert
- *
- * @apiviz.exclude
- */
- public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
- protected int k = 0;
-
- @Override
- protected void makeOptions(Parameterization config) {
- super.makeOptions(config);
- final IntParameter kP = new IntParameter(K_ID);
- if(config.grab(kP)) {
- k = kP.getValue();
- }
- }
-
- @Override
- protected KNNWeightOutlier<O, D> makeInstance() {
- return new KNNWeightOutlier<>(distanceFunction, k);
- }
- }
-}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java index b1ffae63..61d11935 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier; This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team @@ -27,7 +27,7 @@ import java.util.ArrayList; import java.util.List;
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
-import de.lmu.ifi.dbs.elki.algorithm.clustering.OPTICS;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.optics.OPTICS;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
@@ -38,15 +38,15 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
-import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.KNNList;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
@@ -60,8 +60,8 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
/**
- * OPTICSOF provides the Optics-of algorithm, an algorithm to find Local
- * Outliers in a database.
+ * Optics-OF outlier detection algorithm, an algorithm to find Local Outliers in
+ * a database based on ideas from {@link OPTICS} clustering.
* <p>
* Reference:<br>
* Markus M. Breunig, Hans-Peter Kriegel, Raymond T. N, Jörg Sander:<br />
@@ -79,8 +79,11 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; */
@Title("OPTICS-OF: Identifying Local Outliers")
@Description("Algorithm to compute density-based local outlier factors in a database based on the neighborhood size parameter 'minpts'")
-@Reference(authors = "M. M. Breunig, H.-P. Kriegel, R. Ng, and J. Sander", title = "OPTICS-OF: Identifying Local Outliers", booktitle = "Proc. of the 3rd European Conference on Principles of Knowledge Discovery and Data Mining (PKDD), Prague, Czech Republic", url = "http://springerlink.metapress.com/content/76bx6413gqb4tvta/")
-public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
+@Reference(authors = "M. M. Breunig, H.-P. Kriegel, R. Ng, and J. Sander", //
+title = "OPTICS-OF: Identifying Local Outliers", //
+booktitle = "Proc. of the 3rd European Conference on Principles of Knowledge Discovery and Data Mining (PKDD), Prague, Czech Republic", //
+url = "http://springerlink.metapress.com/content/76bx6413gqb4tvta/")
+public class OPTICSOF<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
@@ -97,7 +100,7 @@ public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanc * @param distanceFunction distance function
* @param minpts minPts parameter
*/
- public OPTICSOF(DistanceFunction<? super O, D> distanceFunction, int minpts) {
+ public OPTICSOF(DistanceFunction<? super O> distanceFunction, int minpts) {
super(distanceFunction);
this.minpts = minpts;
}
@@ -110,13 +113,13 @@ public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanc * @return Outlier detection result
*/
public OutlierResult run(Database database, Relation<O> relation) {
- DistanceQuery<O, D> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
- KNNQuery<O, D> knnQuery = database.getKNNQuery(distQuery, minpts);
- RangeQuery<O, D> rangeQuery = database.getRangeQuery(distQuery);
+ DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
+ KNNQuery<O> knnQuery = database.getKNNQuery(distQuery, minpts);
+ RangeQuery<O> rangeQuery = database.getRangeQuery(distQuery);
DBIDs ids = relation.getDBIDs();
// FIXME: implicit preprocessor.
- WritableDataStore<KNNList<D>> nMinPts = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, KNNList.class);
+ WritableDataStore<KNNList> nMinPts = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, KNNList.class);
WritableDoubleDataStore coreDistance = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
WritableIntegerDataStore minPtsNeighborhoodSize = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, -1);
@@ -124,10 +127,10 @@ public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanc // N_minpts(id) and core-distance(id)
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- KNNList<D> minptsNeighbours = knnQuery.getKNNForDBID(iditer, minpts);
- D d = minptsNeighbours.getKNNDistance();
+ KNNList minptsNeighbours = knnQuery.getKNNForDBID(iditer, minpts);
+ double d = minptsNeighbours.getKNNDistance();
nMinPts.put(iditer, minptsNeighbours);
- coreDistance.putDouble(iditer, d.doubleValue());
+ coreDistance.putDouble(iditer, d);
minPtsNeighborhoodSize.put(iditer, rangeQuery.getRangeForDBID(iditer, d).size());
}
@@ -138,9 +141,9 @@ public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanc List<Double> core = new ArrayList<>();
double lrd = 0;
// TODO: optimize for double distances
- for(DistanceDBIDListIter<D> neighbor = nMinPts.get(iditer).iter(); neighbor.valid(); neighbor.advance()) {
+ for(DoubleDBIDListIter neighbor = nMinPts.get(iditer).iter(); neighbor.valid(); neighbor.advance()) {
double coreDist = coreDistance.doubleValue(neighbor);
- double dist = distQuery.distance(iditer, neighbor).doubleValue();
+ double dist = distQuery.distance(iditer, neighbor);
double rd = Math.max(coreDist, dist);
lrd = rd + lrd;
core.add(rd);
@@ -166,7 +169,7 @@ public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanc ofminmax.put(of);
}
// Build result representation.
- Relation<Double> scoreResult = new MaterializedRelation<>("OPTICS Outlier Scores", "optics-outlier", TypeUtil.DOUBLE, ofs, relation.getDBIDs());
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("OPTICS Outlier Scores", "optics-outlier", ofs, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(ofminmax.getMin(), ofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
return new OutlierResult(scoreMeta, scoreResult);
}
@@ -188,13 +191,13 @@ public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanc *
* @apiviz.exclude
*/
- public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {
protected int minpts = 0;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final IntParameter param = new IntParameter(OPTICS.MINPTS_ID);
+ final IntParameter param = new IntParameter(OPTICS.Parameterizer.MINPTS_ID);
param.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
if(config.grab(param)) {
minpts = param.getValue();
@@ -202,7 +205,7 @@ public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanc }
@Override
- protected OPTICSOF<O, D> makeInstance() {
+ protected OPTICSOF<O> makeInstance() {
return new OPTICSOF<>(distanceFunction, minpts);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OutlierAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OutlierAlgorithm.java index f3ef5ab5..4e0662a1 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OutlierAlgorithm.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OutlierAlgorithm.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ReferenceBasedOutlierDetection.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ReferenceBasedOutlierDetection.java deleted file mode 100644 index d254c9a1..00000000 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ReferenceBasedOutlierDetection.java +++ /dev/null @@ -1,337 +0,0 @@ -package de.lmu.ifi.dbs.elki.algorithm.outlier;
-
-/*
- This file is part of ELKI:
- Environment for Developing KDD-Applications Supported by Index-Structures -
- Copyright (C) 2011
- Ludwig-Maximilians-Universität München
- Lehr- und Forschungseinheit für Datenbanksysteme
- ELKI Development Team -
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version. -
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details. -
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-import java.util.Collection;
-import java.util.Iterator;
-
-import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
-import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
-import de.lmu.ifi.dbs.elki.data.NumberVector;
-import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
-import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
-import de.lmu.ifi.dbs.elki.database.Database;
-import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
-import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDPair;
-import de.lmu.ifi.dbs.elki.database.ids.generic.GenericDistanceDBIDList;
-import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
-import de.lmu.ifi.dbs.elki.database.relation.Relation;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
-import de.lmu.ifi.dbs.elki.logging.Logging;
-import de.lmu.ifi.dbs.elki.math.Mean;
-import de.lmu.ifi.dbs.elki.result.ReferencePointsResult;
-import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
-import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
-import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
-import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
-import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
-import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
-import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
-import de.lmu.ifi.dbs.elki.utilities.referencepoints.GridBasedReferencePoints;
-import de.lmu.ifi.dbs.elki.utilities.referencepoints.ReferencePointsHeuristic;
-
-/**
- * <p>
- * provides the Reference-Based Outlier Detection algorithm, an algorithm that
- * computes kNN distances approximately, using reference points.
- * </p>
- * <p>
- * Reference:<br>
- * Y. Pei, O. R. Zaiane, Y. Gao: An Efficient Reference-Based Approach to
- * Outlier Detection in Large Datasets.</br> In: Proc. IEEE Int. Conf. on Data
- * Mining (ICDM'06), Hong Kong, China, 2006.
- * </p>
- *
- * @author Lisa Reichert
- * @author Erich Schubert
- *
- * @apiviz.composedOf ReferencePointsHeuristic
- *
- * @param <V> a type of {@link NumberVector} as a suitable data object for this
- * algorithm
- * @param <D> the distance type processed
- */
-@Title("An Efficient Reference-based Approach to Outlier Detection in Large Datasets")
-@Description("Computes kNN distances approximately, using reference points with various reference point strategies.")
-@Reference(authors = "Y. Pei, O.R. Zaiane, Y. Gao", title = "An Efficient Reference-based Approach to Outlier Detection in Large Datasets", booktitle = "Proc. 6th IEEE Int. Conf. on Data Mining (ICDM '06), Hong Kong, China, 2006", url = "http://dx.doi.org/10.1109/ICDM.2006.17")
-public class ReferenceBasedOutlierDetection<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
- /**
- * The logger for this class.
- */
- private static final Logging LOG = Logging.getLogger(ReferenceBasedOutlierDetection.class);
-
- /**
- * Parameter for the reference points heuristic.
- */
- public static final OptionID REFP_ID = new OptionID("refod.refp", "The heuristic for finding reference points.");
-
- /**
- * Parameter to specify the number of nearest neighbors of an object, to be
- * considered for computing its REFOD_SCORE, must be an integer greater than
- * 1.
- */
- public static final OptionID K_ID = new OptionID("refod.k", "The number of nearest neighbors");
-
- /**
- * Holds the value of {@link #K_ID}.
- */
- private int k;
-
- /**
- * Stores the reference point strategy
- */
- private ReferencePointsHeuristic<V> refp;
-
- /**
- * Distance function to use.
- */
- private DistanceFunction<V, D> distanceFunction;
-
- /**
- * Constructor with parameters.
- *
- * @param k k Parameter
- * @param distanceFunction distance function
- * @param refp Reference points heuristic
- */
- public ReferenceBasedOutlierDetection(int k, DistanceFunction<V, D> distanceFunction, ReferencePointsHeuristic<V> refp) {
- super();
- this.k = k;
- this.distanceFunction = distanceFunction;
- this.refp = refp;
- }
-
- /**
- * Run the algorithm on the given relation.
- *
- * @param database Database
- * @param relation Relation to process
- * @return Outlier result
- */
- public OutlierResult run(Database database, Relation<V> relation) {
- DistanceQuery<V, D> distFunc = database.getDistanceQuery(relation, distanceFunction);
- Collection<V> refPoints = refp.getReferencePoints(relation);
-
- DBIDs ids = relation.getDBIDs();
- // storage of distance/score values.
- WritableDoubleDataStore rbod_score = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC | DataStoreFactory.HINT_HOT);
-
- // Compute density estimation:
- {
- // compute density for one reference point, to initialize the first
- // density
- // value for each object, then update
- final Iterator<V> iter = refPoints.iterator();
- if(!iter.hasNext()) {
- throw new AbortException("Cannot compute ROS without reference points!");
- }
- V firstRef = iter.next();
- // compute distance vector for the first reference point
- DistanceDBIDList<D> firstReferenceDists = computeDistanceVector(firstRef, relation, distFunc);
- for(int l = 0; l < firstReferenceDists.size(); l++) {
- double density = computeDensity(firstReferenceDists, l);
- // Initial value
- rbod_score.putDouble(firstReferenceDists.get(l), density);
- }
- // compute density values for all remaining reference points
- while(iter.hasNext()) {
- V refPoint = iter.next();
- DistanceDBIDList<D> referenceDists = computeDistanceVector(refPoint, relation, distFunc);
- // compute density value for each object
- for(int l = 0; l < referenceDists.size(); l++) {
- double density = computeDensity(referenceDists, l);
- // Update minimum
- if(density < rbod_score.doubleValue(referenceDists.get(l))) {
- rbod_score.putDouble(referenceDists.get(l), density);
- }
- }
- }
- }
- // compute maximum density
- double maxDensity = 0.0;
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- double dens = rbod_score.doubleValue(iditer);
- if(dens > maxDensity) {
- maxDensity = dens;
- }
- }
- // compute ROS
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- double score = 1 - (rbod_score.doubleValue(iditer) / maxDensity);
- rbod_score.putDouble(iditer, score);
- }
-
- // adds reference points to the result. header information for the
- // visualizer to find the reference points in the result
- ReferencePointsResult<V> refp = new ReferencePointsResult<>("Reference points", "reference-points", refPoints);
-
- Relation<Double> scoreResult = new MaterializedRelation<>("Reference-points Outlier Scores", "reference-outlier", TypeUtil.DOUBLE, rbod_score, relation.getDBIDs());
- OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(0.0, 1.0, 0.0, 1.0, 0.0);
- OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
- result.addChildResult(refp);
- return result;
- }
-
- /**
- * Computes for each object the distance to one reference point. (one
- * dimensional representation of the data set)
- *
- * @param refPoint Reference Point Feature Vector
- * @param database database to work on
- * @param distFunc Distance function to use
- * @return array containing the distance to one reference point for each
- * database object and the object id
- */
- protected DistanceDBIDList<D> computeDistanceVector(V refPoint, Relation<V> database, DistanceQuery<V, D> distFunc) {
- // TODO: optimize for double distances?
- GenericDistanceDBIDList<D> referenceDists = new GenericDistanceDBIDList<>(database.size());
- for(DBIDIter iditer = database.iterDBIDs(); iditer.valid(); iditer.advance()) {
- referenceDists.add(distFunc.distance(iditer, refPoint), iditer);
- }
- referenceDists.sort();
- return referenceDists;
- }
-
- /**
- * Computes the density of an object. The density of an object is the
- * distances to the k nearest neighbors. Neighbors and distances are computed
- * approximately. (approximation for kNN distance: instead of a normal NN
- * search the NN of an object are those objects that have a similar distance
- * to a reference point. The k- nearest neighbors of an object are those
- * objects that lay close to the object in the reference distance vector)
- *
- * @param referenceDists vector of the reference distances,
- * @param index index of the current object
- * @return density for one object and reference point
- */
- protected double computeDensity(DistanceDBIDList<D> referenceDists, int index) {
- final DistanceDBIDPair<D> x = referenceDists.get(index);
- final double xDist = x.getDistance().doubleValue();
-
- int lef = index - 1;
- int rig = index + 1;
- Mean mean = new Mean();
- double lef_d = (lef >= 0) ? referenceDists.get(lef).getDistance().doubleValue() : Double.NEGATIVE_INFINITY;
- double rig_d = (rig < referenceDists.size()) ? referenceDists.get(rig).getDistance().doubleValue() : Double.NEGATIVE_INFINITY;
- while(mean.getCount() < k) {
- if(lef >= 0 && rig < referenceDists.size()) {
- // Prefer n or m?
- if(Math.abs(lef_d - xDist) < Math.abs(rig_d - xDist)) {
- mean.put(Math.abs(lef_d - xDist));
- // Update n
- lef--;
- lef_d = (lef >= 0) ? referenceDists.get(lef).getDistance().doubleValue() : Double.NEGATIVE_INFINITY;
- }
- else {
- mean.put(Math.abs(rig_d - xDist));
- // Update right
- rig++;
- rig_d = (rig < referenceDists.size()) ? referenceDists.get(rig).getDistance().doubleValue() : Double.NEGATIVE_INFINITY;
- }
- }
- else {
- if(lef >= 0) {
- // Choose left, since right is not available.
- mean.put(Math.abs(lef_d - xDist));
- // update left
- lef--;
- lef_d = (lef >= 0) ? referenceDists.get(lef).getDistance().doubleValue() : Double.NEGATIVE_INFINITY;
- }
- else if(rig < referenceDists.size()) {
- // Choose right, since left is not available
- mean.put(Math.abs(rig_d - xDist));
- // Update right
- rig++;
- rig_d = (rig < referenceDists.size()) ? referenceDists.get(rig).getDistance().doubleValue() : Double.NEGATIVE_INFINITY;
- }
- else {
- // Not enough objects in database?
- throw new IndexOutOfBoundsException();
- }
- }
- }
-
- return 1.0 / mean.getMean();
- }
-
- @Override
- public TypeInformation[] getInputTypeRestriction() {
- return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD);
- }
-
- @Override
- protected Logging getLogger() {
- return LOG;
- }
-
- /**
- * Parameterization class.
- *
- * @author Erich Schubert
- *
- * @apiviz.exclude
- */
- public static class Parameterizer<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<V, D> {
- /**
- * Holds the value of {@link #K_ID}.
- */
- private int k;
-
- /**
- * Stores the reference point strategy
- */
- private ReferencePointsHeuristic<V> refp;
-
- @Override
- protected void makeOptions(Parameterization config) {
- super.makeOptions(config);
- final IntParameter pK = new IntParameter(K_ID);
- pK.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
- if(config.grab(pK)) {
- k = pK.getValue();
- }
- final ObjectParameter<ReferencePointsHeuristic<V>> refpP = new ObjectParameter<>(REFP_ID, ReferencePointsHeuristic.class, GridBasedReferencePoints.class);
- if(config.grab(refpP)) {
- refp = refpP.instantiateClass(config);
- }
- }
-
- @Override
- protected ReferenceBasedOutlierDetection<V, D> makeInstance() {
- return new ReferenceBasedOutlierDetection<>(k, distanceFunction, refp);
- }
- }
-}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleCOP.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleCOP.java index 72a727a5..ef8f2192 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleCOP.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleCOP.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -40,13 +40,14 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.ids.KNNList; import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs; -import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList; import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; +import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix; @@ -81,7 +82,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; */ @Title("Simple COP: Correlation Outlier Probability") @Reference(authors = "Arthur Zimek", title = "Correlation Clustering. PhD thesis, Chapter 18", booktitle = "") -public class SimpleCOP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<V, D, OutlierResult> implements OutlierAlgorithm { +public class SimpleCOP<V extends NumberVector> extends AbstractDistanceBasedAlgorithm<V, OutlierResult> implements OutlierAlgorithm { /** * The logger for this class. */ @@ -95,7 +96,7 @@ public class SimpleCOP<V extends NumberVector<?>, D extends NumberDistance<D, ?> /** * Holds the object performing the dependency derivation */ - private DependencyDerivator<V, D> dependencyDerivator; + private DependencyDerivator<V> dependencyDerivator; /** * Constructor. @@ -104,14 +105,14 @@ public class SimpleCOP<V extends NumberVector<?>, D extends NumberDistance<D, ?> * @param k k Parameter * @param pca PCA runner- */ - public SimpleCOP(DistanceFunction<? super V, D> distanceFunction, int k, PCAFilteredRunner<V> pca) { + public SimpleCOP(DistanceFunction<? super V> distanceFunction, int k, PCAFilteredRunner pca) { super(distanceFunction); this.k = k; this.dependencyDerivator = new DependencyDerivator<>(null, FormatUtil.NF, pca, 0, false); } public OutlierResult run(Database database, Relation<V> data) throws IllegalStateException { - KNNQuery<V, D> knnQuery = QueryUtil.getKNNQuery(data, getDistanceFunction(), k + 1); + KNNQuery<V> knnQuery = QueryUtil.getKNNQuery(data, getDistanceFunction(), k + 1); DBIDs ids = data.getDBIDs(); @@ -124,7 +125,7 @@ public class SimpleCOP<V extends NumberVector<?>, D extends NumberDistance<D, ?> FiniteProgress progressLocalPCA = LOG.isVerbose() ? new FiniteProgress("Correlation Outlier Probabilities", data.size(), LOG) : null; double sqrt2 = Math.sqrt(2.0); for(DBIDIter id = data.iterDBIDs(); id.valid(); id.advance()) { - KNNList<D> neighbors = knnQuery.getKNNForDBID(id, k + 1); + KNNList neighbors = knnQuery.getKNNForDBID(id, k + 1); ModifiableDBIDs nids = DBIDUtil.newArray(neighbors); nids.remove(id); @@ -147,16 +148,12 @@ public class SimpleCOP<V extends NumberVector<?>, D extends NumberDistance<D, ?> cop_sol.put(id, depsol); - if(progressLocalPCA != null) { - progressLocalPCA.incrementProcessed(LOG); - } - } - if(progressLocalPCA != null) { - progressLocalPCA.ensureCompleted(LOG); + LOG.incrementProcessed(progressLocalPCA); } + LOG.ensureCompleted(progressLocalPCA); } // combine results. - Relation<Double> scoreResult = new MaterializedRelation<>("Original Correlation Outlier Probabilities", "origcop-outlier", TypeUtil.DOUBLE, cop_score, ids); + DoubleRelation scoreResult = new MaterializedDoubleRelation("Original Correlation Outlier Probabilities", "origcop-outlier", cop_score, ids); OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore(); OutlierResult result = new OutlierResult(scoreMeta, scoreResult); // extra results @@ -184,7 +181,7 @@ public class SimpleCOP<V extends NumberVector<?>, D extends NumberDistance<D, ?> * * @apiviz.exclude */ - public static class Parameterizer<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<V, D> { + public static class Parameterizer<V extends NumberVector> extends AbstractDistanceBasedAlgorithm.Parameterizer<V> { /** * Parameter to specify the number of nearest neighbors of an object to be * considered for computing its COP_SCORE, must be an integer greater than @@ -212,7 +209,7 @@ public class SimpleCOP<V extends NumberVector<?>, D extends NumberDistance<D, ?> /** * Holds the object performing the dependency derivation */ - protected PCAFilteredRunner<V> pca; + protected PCAFilteredRunner pca; @Override protected void makeOptions(Parameterization config) { @@ -222,14 +219,14 @@ public class SimpleCOP<V extends NumberVector<?>, D extends NumberDistance<D, ?> if(config.grab(kP)) { k = kP.intValue(); } - ObjectParameter<PCAFilteredRunner<V>> pcaP = new ObjectParameter<>(PCARUNNER_ID, PCAFilteredRunner.class, PCAFilteredRunner.class); + ObjectParameter<PCAFilteredRunner> pcaP = new ObjectParameter<>(PCARUNNER_ID, PCAFilteredRunner.class, PCAFilteredRunner.class); if(config.grab(pcaP)) { pca = pcaP.instantiateClass(config); } } @Override - protected SimpleCOP<V, D> makeInstance() { + protected SimpleCOP<V> makeInstance() { return new SimpleCOP<>(distanceFunction, k, pca); } } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ABOD.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/anglebased/ABOD.java index 65447713..35dfb1ee 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ABOD.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/anglebased/ABOD.java @@ -1,10 +1,10 @@ -package de.lmu.ifi.dbs.elki.algorithm.outlier; +package de.lmu.ifi.dbs.elki.algorithm.outlier.anglebased; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -24,6 +24,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier; */ import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm; +import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm; import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.data.type.TypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; @@ -36,9 +37,9 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.query.similarity.SimilarityQuery; -import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; +import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; -import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; import de.lmu.ifi.dbs.elki.distance.similarityfunction.SimilarityFunction; import de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.KernelMatrix; import de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.PolynomialKernelFunction; @@ -48,6 +49,7 @@ import de.lmu.ifi.dbs.elki.math.MeanVariance; import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; +import de.lmu.ifi.dbs.elki.utilities.Alias; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; @@ -63,9 +65,13 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * dimensional data sets. Exact version, which has cubic runtime (see also * {@link FastABOD} and {@link LBABOD} for faster versions). * - * H.-P. Kriegel, M. Schubert, and A. Zimek: Angle-Based Outlier Detection in - * High-dimensional Data. In: Proc. 14th ACM SIGKDD Int. Conf. on Knowledge - * Discovery and Data Mining (KDD '08), Las Vegas, NV, 2008. + * Reference: + * <p> + * H.-P. Kriegel, M. Schubert, and A. Zimek:<br /> + * Angle-Based Outlier Detection in High-dimensional Data.<br /> + * In: Proc. 14th ACM SIGKDD Int. Conf. on Knowledge Discovery and Data Mining + * (KDD '08), Las Vegas, NV, 2008. + * </p> * * @author Matthias Schubert (Original Code) * @author Erich Schubert (ELKIfication) @@ -74,8 +80,12 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; */ @Title("ABOD: Angle-Based Outlier Detection") @Description("Outlier detection using variance analysis on angles, especially for high dimensional data sets.") -@Reference(authors = "H.-P. Kriegel, M. Schubert, and A. Zimek", title = "Angle-Based Outlier Detection in High-dimensional Data", booktitle = "Proc. 14th ACM SIGKDD Int. Conf. on Knowledge Discovery and Data Mining (KDD '08), Las Vegas, NV, 2008", url = "http://dx.doi.org/10.1145/1401890.1401946") -public class ABOD<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm { +@Reference(authors = "H.-P. Kriegel, M. Schubert, A. Zimek", // +title = "Angle-Based Outlier Detection in High-dimensional Data", // +booktitle = "Proc. 14th ACM SIGKDD Int. Conf. on Knowledge Discovery and Data Mining (KDD '08), Las Vegas, NV, 2008", // +url = "http://dx.doi.org/10.1145/1401890.1401946") +@Alias({ "de.lmu.ifi.dbs.elki.algorithm.outlier.ABOD", "abod" }) +public class ABOD<V extends NumberVector> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm { /** * The logger for this class. */ @@ -84,14 +94,14 @@ public class ABOD<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe /** * Store the configured Kernel version. */ - protected SimilarityFunction<? super V, DoubleDistance> kernelFunction; + protected SimilarityFunction<? super V> kernelFunction; /** * Constructor for Angle-Based Outlier Detection (ABOD). * * @param kernelFunction kernel function to use */ - public ABOD(SimilarityFunction<? super V, DoubleDistance> kernelFunction) { + public ABOD(SimilarityFunction<? super V> kernelFunction) { super(); this.kernelFunction = kernelFunction; } @@ -105,21 +115,21 @@ public class ABOD<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe public OutlierResult run(Database db, Relation<V> relation) { DBIDs ids = relation.getDBIDs(); // Build a kernel matrix, to make O(n^3) slightly less bad. - SimilarityQuery<V, DoubleDistance> sq = db.getSimilarityQuery(relation, kernelFunction); + SimilarityQuery<V> sq = db.getSimilarityQuery(relation, kernelFunction); KernelMatrix kernelMatrix = new KernelMatrix(sq, relation, ids); WritableDoubleDataStore abodvalues = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC); DoubleMinMax minmaxabod = new DoubleMinMax(); MeanVariance s = new MeanVariance(); - for (DBIDIter pA = ids.iter(); pA.valid(); pA.advance()) { + for(DBIDIter pA = ids.iter(); pA.valid(); pA.advance()) { final double abof = computeABOF(relation, kernelMatrix, pA, s); minmaxabod.put(abof); abodvalues.putDouble(pA, abof); } // Build result representation. - Relation<Double> scoreResult = new MaterializedRelation<>("Angle-Based Outlier Degree", "abod-outlier", TypeUtil.DOUBLE, abodvalues, relation.getDBIDs()); + DoubleRelation scoreResult = new MaterializedDoubleRelation("Angle-Based Outlier Degree", "abod-outlier", abodvalues, relation.getDBIDs()); OutlierScoreMeta scoreMeta = new InvertedOutlierScoreMeta(minmaxabod.getMin(), minmaxabod.getMax(), 0.0, Double.POSITIVE_INFINITY); return new OutlierResult(scoreMeta, scoreResult); } @@ -137,24 +147,24 @@ public class ABOD<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe s.reset(); // Reused double simAA = kernelMatrix.getSimilarity(pA, pA); - for (DBIDIter nB = relation.iterDBIDs(); nB.valid(); nB.advance()) { - if (DBIDUtil.equal(nB, pA)) { + for(DBIDIter nB = relation.iterDBIDs(); nB.valid(); nB.advance()) { + if(DBIDUtil.equal(nB, pA)) { continue; } double simBB = kernelMatrix.getSimilarity(nB, nB); double simAB = kernelMatrix.getSimilarity(pA, nB); double sqdAB = simAA + simBB - simAB - simAB; - if (!(sqdAB > 0.)) { + if(!(sqdAB > 0.)) { continue; } - for (DBIDIter nC = relation.iterDBIDs(); nC.valid(); nC.advance()) { - if (DBIDUtil.equal(nC, pA) || DBIDUtil.compare(nC, nB) < 0) { + for(DBIDIter nC = relation.iterDBIDs(); nC.valid(); nC.advance()) { + if(DBIDUtil.equal(nC, pA) || DBIDUtil.compare(nC, nB) < 0) { continue; } double simCC = kernelMatrix.getSimilarity(nC, nC); double simAC = kernelMatrix.getSimilarity(pA, nC); double sqdAC = simAA + simCC - simAC; - if (!(sqdAC > 0.)) { + if(!(sqdAC > 0.)) { continue; } // Exploit bilinearity of scalar product: @@ -190,7 +200,7 @@ public class ABOD<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe * * @apiviz.exclude */ - public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer { + public static class Parameterizer<V extends NumberVector> extends AbstractParameterizer { /** * Parameter for the kernel function. */ @@ -199,13 +209,13 @@ public class ABOD<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe /** * Distance function. */ - protected SimilarityFunction<V, DoubleDistance> kernelFunction = null; + protected SimilarityFunction<V> kernelFunction = null; @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - final ObjectParameter<SimilarityFunction<V, DoubleDistance>> param = new ObjectParameter<>(KERNEL_FUNCTION_ID, SimilarityFunction.class, PolynomialKernelFunction.class); - if (config.grab(param)) { + final ObjectParameter<SimilarityFunction<V>> param = new ObjectParameter<>(KERNEL_FUNCTION_ID, SimilarityFunction.class, PolynomialKernelFunction.class); + if(config.grab(param)) { kernelFunction = param.instantiateClass(config); } } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/FastABOD.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/anglebased/FastABOD.java index ee6bd434..56bedaac 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/FastABOD.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/anglebased/FastABOD.java @@ -1,10 +1,10 @@ -package de.lmu.ifi.dbs.elki.algorithm.outlier; +package de.lmu.ifi.dbs.elki.algorithm.outlier.anglebased; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -35,9 +35,9 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair; import de.lmu.ifi.dbs.elki.database.query.similarity.SimilarityQuery; -import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; +import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; -import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; import de.lmu.ifi.dbs.elki.distance.similarityfunction.SimilarityFunction; import de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.KernelMatrix; import de.lmu.ifi.dbs.elki.logging.Logging; @@ -46,6 +46,7 @@ import de.lmu.ifi.dbs.elki.math.MeanVariance; import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; +import de.lmu.ifi.dbs.elki.utilities.Alias; import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ComparableMaxHeap; import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ObjectHeap; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; @@ -60,9 +61,13 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * * Fast-ABOD (approximateABOF) version. * - * H.-P. Kriegel, M. Schubert, and A. Zimek: Angle-Based Outlier Detection in - * High-dimensional Data. In: Proc. 14th ACM SIGKDD Int. Conf. on Knowledge - * Discovery and Data Mining (KDD '08), Las Vegas, NV, 2008. + * Reference: + * <p> + * H.-P. Kriegel, M. Schubert, and A. Zimek:<br /> + * Angle-Based Outlier Detection in High-dimensional Data.<br /> + * In: Proc. 14th ACM SIGKDD Int. Conf. on Knowledge Discovery and Data Mining + * (KDD '08), Las Vegas, NV, 2008. + * </p> * * @author Matthias Schubert (Original Code) * @author Erich Schubert (ELKIfication) @@ -71,8 +76,12 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; */ @Title("Approximate ABOD: Angle-Based Outlier Detection") @Description("Outlier detection using variance analysis on angles, especially for high dimensional data sets.") -@Reference(authors = "H.-P. Kriegel, M. Schubert, and A. Zimek", title = "Angle-Based Outlier Detection in High-dimensional Data", booktitle = "Proc. 14th ACM SIGKDD Int. Conf. on Knowledge Discovery and Data Mining (KDD '08), Las Vegas, NV, 2008", url = "http://dx.doi.org/10.1145/1401890.1401946") -public class FastABOD<V extends NumberVector<?>> extends ABOD<V> { +@Reference(authors = "H.-P. Kriegel, M. Schubert, A. Zimek", // +title = "Angle-Based Outlier Detection in High-dimensional Data", // +booktitle = "Proc. 14th ACM SIGKDD Int. Conf. on Knowledge Discovery and Data Mining (KDD '08), Las Vegas, NV, 2008", // +url = "http://dx.doi.org/10.1145/1401890.1401946") +@Alias({ "de.lmu.ifi.dbs.elki.algorithm.outlier.FastABOD", "fastabod" }) +public class FastABOD<V extends NumberVector> extends ABOD<V> { /** * The logger for this class. */ @@ -89,7 +98,7 @@ public class FastABOD<V extends NumberVector<?>> extends ABOD<V> { * @param kernelFunction kernel function to use * @param k Number of nearest neighbors */ - public FastABOD(SimilarityFunction<? super V, DoubleDistance> kernelFunction, int k) { + public FastABOD(SimilarityFunction<? super V> kernelFunction, int k) { super(kernelFunction); this.k = k; } @@ -104,51 +113,52 @@ public class FastABOD<V extends NumberVector<?>> extends ABOD<V> { public OutlierResult run(Database db, Relation<V> relation) { DBIDs ids = relation.getDBIDs(); // Build a kernel matrix, to make O(n^3) slightly less bad. - SimilarityQuery<V, DoubleDistance> sq = db.getSimilarityQuery(relation, kernelFunction); + SimilarityQuery<V> sq = db.getSimilarityQuery(relation, kernelFunction); KernelMatrix kernelMatrix = new KernelMatrix(sq, relation, ids); WritableDoubleDataStore abodvalues = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC); DoubleMinMax minmaxabod = new DoubleMinMax(); MeanVariance s = new MeanVariance(); - for (DBIDIter pA = ids.iter(); pA.valid(); pA.advance()) { + for(DBIDIter pA = ids.iter(); pA.valid(); pA.advance()) { s.reset(); final double simAA = kernelMatrix.getSimilarity(pA, pA); // Choose the k-min nearest ComparableMaxHeap<DoubleDBIDPair> nn = new ComparableMaxHeap<>(k); - for (DBIDIter nB = relation.iterDBIDs(); nB.valid(); nB.advance()) { - if (DBIDUtil.equal(nB, pA)) { + for(DBIDIter nB = relation.iterDBIDs(); nB.valid(); nB.advance()) { + if(DBIDUtil.equal(nB, pA)) { continue; } double simBB = kernelMatrix.getSimilarity(nB, nB); double simAB = kernelMatrix.getSimilarity(pA, nB); double sqdAB = simAA + simBB - simAB - simAB; - if (!(sqdAB > 0.)) { + if(!(sqdAB > 0.)) { continue; } - if (nn.size() < k) { + if(nn.size() < k) { nn.add(DBIDUtil.newPair(sqdAB, nB)); - } else if (sqdAB < nn.peek().doubleValue()) { + } + else if(sqdAB < nn.peek().doubleValue()) { nn.replaceTopElement(DBIDUtil.newPair(sqdAB, nB)); } } - for (ObjectHeap.UnsortedIter<DoubleDBIDPair> iB = nn.unsortedIter(); iB.valid(); iB.advance()) { + for(ObjectHeap.UnsortedIter<DoubleDBIDPair> iB = nn.unsortedIter(); iB.valid(); iB.advance()) { DoubleDBIDPair nB = iB.get(); double sqdAB = nB.doubleValue(); double simAB = kernelMatrix.getSimilarity(pA, nB); - if (!(sqdAB > 0.)) { + if(!(sqdAB > 0.)) { continue; } - for (ObjectHeap.UnsortedIter<DoubleDBIDPair> iC = nn.unsortedIter(); iC.valid(); iC.advance()) { + for(ObjectHeap.UnsortedIter<DoubleDBIDPair> iC = nn.unsortedIter(); iC.valid(); iC.advance()) { DoubleDBIDPair nC = iC.get(); - if (DBIDUtil.compare(nC, nB) < 0) { + if(DBIDUtil.compare(nC, nB) < 0) { continue; } double sqdAC = nC.doubleValue(); double simAC = kernelMatrix.getSimilarity(pA, nC); - if (!(sqdAC > 0.)) { + if(!(sqdAC > 0.)) { continue; } // Exploit bilinearity of scalar product: @@ -169,7 +179,7 @@ public class FastABOD<V extends NumberVector<?>> extends ABOD<V> { } // Build result representation. - Relation<Double> scoreResult = new MaterializedRelation<>("Angle-Based Outlier Degree", "abod-outlier", TypeUtil.DOUBLE, abodvalues, relation.getDBIDs()); + DoubleRelation scoreResult = new MaterializedDoubleRelation("Angle-Based Outlier Degree", "abod-outlier", abodvalues, relation.getDBIDs()); OutlierScoreMeta scoreMeta = new InvertedOutlierScoreMeta(minmaxabod.getMin(), minmaxabod.getMax(), 0.0, Double.POSITIVE_INFINITY); return new OutlierResult(scoreMeta, scoreResult); } @@ -191,7 +201,7 @@ public class FastABOD<V extends NumberVector<?>> extends ABOD<V> { * * @apiviz.exclude */ - public static class Parameterizer<V extends NumberVector<?>> extends ABOD.Parameterizer<V> { + public static class Parameterizer<V extends NumberVector> extends ABOD.Parameterizer<V> { /** * Parameter for the nearest neighbors. */ @@ -206,7 +216,7 @@ public class FastABOD<V extends NumberVector<?>> extends ABOD<V> { protected void makeOptions(Parameterization config) { super.makeOptions(config); final IntParameter kP = new IntParameter(K_ID); - if (config.grab(kP)) { + if(config.grab(kP)) { k = kP.intValue(); } } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LBABOD.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/anglebased/LBABOD.java index 37b4d050..0ef19a50 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LBABOD.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/anglebased/LBABOD.java @@ -1,10 +1,10 @@ -package de.lmu.ifi.dbs.elki.algorithm.outlier; +package de.lmu.ifi.dbs.elki.algorithm.outlier.anglebased; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -35,9 +35,9 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair; import de.lmu.ifi.dbs.elki.database.query.similarity.SimilarityQuery; -import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; +import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; -import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; import de.lmu.ifi.dbs.elki.distance.similarityfunction.SimilarityFunction; import de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.KernelMatrix; import de.lmu.ifi.dbs.elki.logging.Logging; @@ -49,6 +49,7 @@ import de.lmu.ifi.dbs.elki.math.MeanVariance; import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; +import de.lmu.ifi.dbs.elki.utilities.Alias; import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ComparableMaxHeap; import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ComparableMinHeap; import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.DoubleMinHeap; @@ -70,9 +71,13 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * Outlier detection using variance analysis on angles, especially for high * dimensional data sets. * - * H.-P. Kriegel, M. Schubert, and A. Zimek: Angle-Based Outlier Detection in - * High-dimensional Data. In: Proc. 14th ACM SIGKDD Int. Conf. on Knowledge - * Discovery and Data Mining (KDD '08), Las Vegas, NV, 2008. + * Reference: + * <p> + * H.-P. Kriegel, M. Schubert, and A. Zimek:<br /> + * Angle-Based Outlier Detection in High-dimensional Data.<br /> + * In: Proc. 14th ACM SIGKDD Int. Conf. on Knowledge Discovery and Data Mining + * (KDD '08), Las Vegas, NV, 2008. + * </p> * * @author Matthias Schubert (Original Code) * @author Erich Schubert (ELKIfication) @@ -81,8 +86,12 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; */ @Title("LB-ABOD: Lower Bounded Angle-Based Outlier Detection") @Description("Outlier detection using variance analysis on angles, especially for high dimensional data sets.") -@Reference(authors = "H.-P. Kriegel, M. Schubert, and A. Zimek", title = "Angle-Based Outlier Detection in High-dimensional Data", booktitle = "Proc. 14th ACM SIGKDD Int. Conf. on Knowledge Discovery and Data Mining (KDD '08), Las Vegas, NV, 2008", url = "http://dx.doi.org/10.1145/1401890.1401946") -public class LBABOD<V extends NumberVector<?>> extends FastABOD<V> { +@Reference(authors = "H.-P. Kriegel, M. Schubert, A. Zimek", // +title = "Angle-Based Outlier Detection in High-dimensional Data", // +booktitle = "Proc. 14th ACM SIGKDD Int. Conf. on Knowledge Discovery and Data Mining (KDD '08), Las Vegas, NV, 2008", // +url = "http://dx.doi.org/10.1145/1401890.1401946") +@Alias({ "de.lmu.ifi.dbs.elki.algorithm.outlier.LBABOD", "lb-abod" }) +public class LBABOD<V extends NumberVector> extends FastABOD<V> { /** * The logger for this class. */ @@ -100,7 +109,7 @@ public class LBABOD<V extends NumberVector<?>> extends FastABOD<V> { * @param k k parameter * @param l Number of outliers to find exact */ - public LBABOD(SimilarityFunction<? super V, DoubleDistance> kernelFunction, int k, int l) { + public LBABOD(SimilarityFunction<? super V> kernelFunction, int k, int l) { super(kernelFunction, k); this.l = l; } @@ -114,7 +123,7 @@ public class LBABOD<V extends NumberVector<?>> extends FastABOD<V> { @Override public OutlierResult run(Database db, Relation<V> relation) { DBIDs ids = relation.getDBIDs(); - SimilarityQuery<V, DoubleDistance> sq = relation.getDatabase().getSimilarityQuery(relation, kernelFunction); + SimilarityQuery<V> sq = db.getSimilarityQuery(relation, kernelFunction); KernelMatrix kernelMatrix = new KernelMatrix(sq, relation, ids); // Output storage. @@ -237,7 +246,7 @@ public class LBABOD<V extends NumberVector<?>> extends FastABOD<V> { LOG.statistics(new LongStatistic("lb-abod.refinements", refinements)); } // Build result representation. - Relation<Double> scoreResult = new MaterializedRelation<>("Angle-based Outlier Detection", "abod-outlier", TypeUtil.DOUBLE, abodvalues, ids); + DoubleRelation scoreResult = new MaterializedDoubleRelation("Angle-based Outlier Detection", "abod-outlier", abodvalues, ids); OutlierScoreMeta scoreMeta = new InvertedOutlierScoreMeta(minmaxabod.getMin(), minmaxabod.getMax(), 0.0, Double.POSITIVE_INFINITY); return new OutlierResult(scoreMeta, scoreResult); } @@ -259,7 +268,7 @@ public class LBABOD<V extends NumberVector<?>> extends FastABOD<V> { * * @apiviz.exclude */ - public static class Parameterizer<V extends NumberVector<?>> extends FastABOD.Parameterizer<V> { + public static class Parameterizer<V extends NumberVector> extends FastABOD.Parameterizer<V> { /** * Parameter to specify the number of outliers to compute exactly. */ diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/anglebased/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/anglebased/package-info.java new file mode 100644 index 00000000..f729559f --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/anglebased/package-info.java @@ -0,0 +1,27 @@ +/** + * Angle-based outlier detection algorithms. + */ + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2014 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +package de.lmu.ifi.dbs.elki.algorithm.outlier.anglebased;
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/EMOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/clustering/EMOutlier.java index 76191cf2..5a02fb56 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/EMOutlier.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/clustering/EMOutlier.java @@ -1,10 +1,10 @@ -package de.lmu.ifi.dbs.elki.algorithm.outlier;
+package de.lmu.ifi.dbs.elki.algorithm.outlier.clustering;
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team @@ -24,10 +24,10 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier; */
import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
-import de.lmu.ifi.dbs.elki.algorithm.clustering.EM;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.em.EM;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.NumberVector;
-import de.lmu.ifi.dbs.elki.data.model.EMModel;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
@@ -35,7 +35,8 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.result.Result;
@@ -64,7 +65,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz // TODO: re-use an existing EM when present?
@Title("EM Outlier: Outlier Detection based on the generic EM clustering")
@Description("The outlier score assigned is based on the highest cluster probability obtained from EM clustering.")
-public class EMOutlier<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+public class EMOutlier<V extends NumberVector> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
@@ -73,14 +74,14 @@ public class EMOutlier<V extends NumberVector<?>> extends AbstractAlgorithm<Outl /**
* Inner algorithm.
*/
- private EM<V> emClustering;
+ private EM<V, ?> emClustering;
/**
* Constructor with an existing em clustering algorithm.
*
* @param emClustering EM clustering algorithm to use.
*/
- public EMOutlier(EM<V> emClustering) {
+ public EMOutlier(EM<V, ?> emClustering) {
super();
this.emClustering = emClustering;
}
@@ -94,13 +95,13 @@ public class EMOutlier<V extends NumberVector<?>> extends AbstractAlgorithm<Outl */
public OutlierResult run(Database database, Relation<V> relation) {
emClustering.setSoft(true);
- Clustering<EMModel<V>> emresult = emClustering.run(database, relation);
+ Clustering<?> emresult = emClustering.run(database, relation);
Relation<double[]> soft = null;
- for (Iter<Result> iter = emresult.getHierarchy().iterChildren(emresult); iter.valid(); iter.advance()) {
- if (!(iter.get() instanceof Relation)) {
+ for(Iter<Result> iter = emresult.getHierarchy().iterChildren(emresult); iter.valid(); iter.advance()) {
+ if(!(iter.get() instanceof Relation)) {
continue;
}
- if (((Relation<?>) iter.get()).getDataTypeInformation() == EM.SOFT_TYPE) {
+ if(((Relation<?>) iter.get()).getDataTypeInformation() == EM.SOFT_TYPE) {
@SuppressWarnings("unchecked")
Relation<double[]> rel = (Relation<double[]>) iter.get();
soft = rel;
@@ -109,16 +110,16 @@ public class EMOutlier<V extends NumberVector<?>> extends AbstractAlgorithm<Outl double globmax = 0.0;
WritableDoubleDataStore emo_score = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT);
- for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double maxProb = Double.POSITIVE_INFINITY;
double[] probs = soft.get(iditer);
- for (double prob : probs) {
+ for(double prob : probs) {
maxProb = Math.min(1. - prob, maxProb);
}
emo_score.putDouble(iditer, maxProb);
globmax = Math.max(maxProb, globmax);
}
- Relation<Double> scoreres = new MaterializedRelation<>("EM outlier scores", "em-outlier", TypeUtil.DOUBLE, emo_score, relation.getDBIDs());
+ DoubleRelation scoreres = new MaterializedDoubleRelation("EM outlier scores", "em-outlier", emo_score, relation.getDBIDs());
OutlierScoreMeta meta = new ProbabilisticOutlierScore(0.0, globmax);
// combine results.
OutlierResult result = new OutlierResult(meta, scoreres);
@@ -144,13 +145,16 @@ public class EMOutlier<V extends NumberVector<?>> extends AbstractAlgorithm<Outl *
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
- protected EM<V> em = null;
+ public static class Parameterizer<V extends NumberVector> extends AbstractParameterizer {
+ /**
+ * EM clustering algorithm to run.
+ */
+ protected EM<V, ?> em;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- Class<EM<V>> cls = ClassGenericsUtil.uglyCastIntoSubclass(EM.class);
+ Class<EM<V, ?>> cls = ClassGenericsUtil.uglyCastIntoSubclass(EM.class);
em = config.tryInstantiate(cls);
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/clustering/KMeansOutlierDetection.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/clustering/KMeansOutlierDetection.java new file mode 100644 index 00000000..c6155527 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/clustering/KMeansOutlierDetection.java @@ -0,0 +1,178 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier.clustering; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2014 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import java.util.List; + +import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm; +import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeans; +import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansLloyd; +import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm; +import de.lmu.ifi.dbs.elki.data.Cluster; +import de.lmu.ifi.dbs.elki.data.Clustering; +import de.lmu.ifi.dbs.elki.data.NumberVector; +import de.lmu.ifi.dbs.elki.data.model.ModelUtil; +import de.lmu.ifi.dbs.elki.data.type.TypeInformation; +import de.lmu.ifi.dbs.elki.data.type.TypeUtil; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; +import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; +import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; +import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; +import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; +import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.math.DoubleMinMax; +import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; + +/** + * Outlier detection by using k-means clustering. + * + * The scores are assigned by the objects distance to the nearest center. + * + * We don't have a clear reference for this approach, but it seems to be a best + * practise in some areas to remove objects that have the largest distance from + * their center. If you need to cite this approach, please cite the ELKI version + * you used (use the <a href="http://elki.dbs.ifi.lmu.de/wiki/Publications">ELKI + * publication list</a> for citation information and BibTeX templates). + * + * @author Erich Schubert + * + * @apiviz.has KMeans + * + * @param <O> Object type + */ +public class KMeansOutlierDetection<O extends NumberVector> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm { + /** + * Class logger. + */ + private static final Logging LOG = Logging.getLogger(KMeansOutlierDetection.class); + + /** + * Clustering algorithm to use + */ + KMeans<O, ?> clusterer; + + /** + * Constructor. + * + * @param clusterer Clustering algorithm + */ + public KMeansOutlierDetection(KMeans<O, ?> clusterer) { + super(); + this.clusterer = clusterer; + } + + /** + * Run the outlier detection algorithm. + * + * @param database Database + * @param relation Relation + * @return Outlier detection result + */ + public OutlierResult run(Database database, Relation<O> relation) { + DistanceFunction<? super O> df = clusterer.getDistanceFunction(); + DistanceQuery<O> dq = database.getDistanceQuery(relation, df); + + // TODO: improve ELKI api to ensure we're using the same DBIDs! + Clustering<?> c = clusterer.run(database, relation); + + WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_DB); + DoubleMinMax mm = new DoubleMinMax(); + + @SuppressWarnings("unchecked") + NumberVector.Factory<O> factory = (NumberVector.Factory<O>) RelationUtil.assumeVectorField(relation).getFactory(); + List<? extends Cluster<?>> clusters = c.getAllClusters(); + for(Cluster<?> cluster : clusters) { + // FIXME: use a primitive distance function on number vectors instead. + O mean = factory.newNumberVector(ModelUtil.getPrototype(cluster.getModel(), relation)); + for(DBIDIter iter = cluster.getIDs().iter(); iter.valid(); iter.advance()) { + double dist = dq.distance(mean, iter); + scores.put(iter, dist); + mm.put(dist); + } + } + + // Build result representation. + DoubleRelation scoreResult = new MaterializedDoubleRelation("KMeans outlier scores", "kmeans-outlier", scores, relation.getDBIDs()); + OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax(), 0., Double.POSITIVE_INFINITY, 0.); + return new OutlierResult(scoreMeta, scoreResult); + } + + @Override + public TypeInformation[] getInputTypeRestriction() { + return TypeUtil.array(clusterer.getDistanceFunction().getInputTypeRestriction()); + } + + @Override + protected Logging getLogger() { + return LOG; + } + + /** + * Parameterizer. + * + * @author Erich Schubert + * + * @apiviz.exclude + * + * @param <O> Object type + */ + public static class Parameterizer<O extends NumberVector> extends AbstractParameterizer { + /** + * Parameter for choosing the clustering algorithm. + */ + public static final OptionID CLUSTERING_ID = new OptionID("kmeans.algorithm", // + "Clustering algorithm to use for detecting outliers."); + + /** + * Clustering algorithm to use + */ + KMeans<O, ?> clusterer; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + + ObjectParameter<KMeans<O, ?>> clusterP = new ObjectParameter<>(CLUSTERING_ID, KMeans.class, KMeansLloyd.class); + if(config.grab(clusterP)) { + clusterer = clusterP.instantiateClass(config); + } + } + + @Override + protected KMeansOutlierDetection<O> makeInstance() { + return new KMeansOutlierDetection<>(clusterer); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/clustering/SilhouetteOutlierDetection.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/clustering/SilhouetteOutlierDetection.java new file mode 100644 index 00000000..3bd9cf8b --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/clustering/SilhouetteOutlierDetection.java @@ -0,0 +1,253 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier.clustering; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2014 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import java.util.List; + +import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm; +import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm; +import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm; +import de.lmu.ifi.dbs.elki.data.Cluster; +import de.lmu.ifi.dbs.elki.data.Clustering; +import de.lmu.ifi.dbs.elki.data.type.TypeInformation; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; +import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; +import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs; +import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; +import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; +import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; +import de.lmu.ifi.dbs.elki.evaluation.clustering.internal.EvaluateSilhouette; +import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.math.DoubleMinMax; +import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; + +/** + * Outlier detection by using the Silhouette Coefficients. + * + * Silhouette values are computed as in: + * <p> + * P. J. Rousseeuw<br /> + * Silhouettes: A graphical aid to the interpretation and validation of cluster + * analysis<br /> + * In: Journal of Computational and Applied Mathematics Volume 20, November 1987 + * </p> + * + * but then used as outlier scores. To cite this outlier detection approach, + * please cite the ELKI version you used (use the <a + * href="http://elki.dbs.ifi.lmu.de/wiki/Publications">ELKI publication list</a> + * for citation information and BibTeX templates). + * + * @author Erich Schubert + * + * @apiviz.has ClusteringAlgorithm + * + * @param <O> Object type + */ +@Reference(authors = "P. J. Rousseeuw", // +title = "Silhouettes: A graphical aid to the interpretation and validation of cluster analysis", // +booktitle = "Journal of Computational and Applied Mathematics, Volume 20", // +url = "http://dx.doi.org/10.1016%2F0377-0427%2887%2990125-7") +public class SilhouetteOutlierDetection<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm { + /** + * Class logger. + */ + private static final Logging LOG = Logging.getLogger(SilhouetteOutlierDetection.class); + + /** + * Clustering algorithm to use + */ + ClusteringAlgorithm<?> clusterer; + + /** + * Keep noise "clusters" merged, instead of breaking them into singletons. + */ + private boolean mergenoise = false; + + /** + * Constructor. + * + * @param distanceFunction Distance function + * @param clusterer Clustering algorithm + * @param mergenoise Flag to keep "noise" clusters merged, instead of breaking + * them into singletons. + */ + public SilhouetteOutlierDetection(DistanceFunction<? super O> distanceFunction, ClusteringAlgorithm<?> clusterer, boolean mergenoise) { + super(distanceFunction); + this.clusterer = clusterer; + this.mergenoise = mergenoise; + } + + @Override + public OutlierResult run(Database database) { + Relation<O> relation = database.getRelation(getDistanceFunction().getInputTypeRestriction()); + DistanceQuery<O> dq = database.getDistanceQuery(relation, getDistanceFunction()); + + // TODO: improve ELKI api to ensure we're using the same DBIDs! + Clustering<?> c = clusterer.run(database); + + WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_DB); + DoubleMinMax mm = new DoubleMinMax(); + + List<? extends Cluster<?>> clusters = c.getAllClusters(); + for(Cluster<?> cluster : clusters) { + if(cluster.size() <= 1 || (!mergenoise && cluster.isNoise())) { + // As suggested in Rousseeuw, we use 0 for singletons. + for(DBIDIter iter = cluster.getIDs().iter(); iter.valid(); iter.advance()) { + scores.put(iter, 0.); + } + mm.put(0.); + continue; + } + ArrayDBIDs ids = DBIDUtil.ensureArray(cluster.getIDs()); + double[] as = new double[ids.size()]; // temporary storage. + DBIDArrayIter it1 = ids.iter(), it2 = ids.iter(); + for(it1.seek(0); it1.valid(); it1.advance()) { + // a: In-cluster distances + double a = as[it1.getOffset()]; // Already computed distances + for(it2.seek(it1.getOffset() + 1); it2.valid(); it2.advance()) { + final double dist = dq.distance(it1, it2); + a += dist; + as[it2.getOffset()] += dist; + } + a /= (ids.size() - 1); + // b: other clusters: + double min = Double.POSITIVE_INFINITY; + for(Cluster<?> ocluster : clusters) { + if(ocluster == /* yes, reference identity */cluster) { + continue; + } + if(!mergenoise && ocluster.isNoise()) { + // Treat noise cluster as singletons: + for(DBIDIter it3 = ocluster.getIDs().iter(); it3.valid(); it3.advance()) { + double dist = dq.distance(it1, it3); + if(dist < min) { + min = dist; + } + } + continue; + } + final DBIDs oids = ocluster.getIDs(); + double b = 0.; + for(DBIDIter it3 = oids.iter(); it3.valid(); it3.advance()) { + b += dq.distance(it1, it3); + } + b /= oids.size(); + if(b < min) { + min = b; + } + } + final double score = (min - a) / Math.max(min, a); + scores.put(it1, score); + mm.put(score); + } + } + + // Build result representation. + DoubleRelation scoreResult = new MaterializedDoubleRelation("Silhouette Coefficients", "silhouette-outlier", scores, relation.getDBIDs()); + OutlierScoreMeta scoreMeta = new InvertedOutlierScoreMeta(mm.getMin(), mm.getMax(), -1., 1., .5); + return new OutlierResult(scoreMeta, scoreResult); + } + + @Override + public TypeInformation[] getInputTypeRestriction() { + final TypeInformation dt = getDistanceFunction().getInputTypeRestriction(); + TypeInformation[] t = clusterer.getInputTypeRestriction(); + for(TypeInformation i : t) { + if(dt.isAssignableFromType(i)) { + return t; + } + } + // Prepend distance type: + TypeInformation[] t2 = new TypeInformation[t.length + 1]; + t2[0] = dt; + System.arraycopy(t, 0, t2, 1, t.length); + return t2; + } + + @Override + protected Logging getLogger() { + return LOG; + } + + /** + * Parameterizer. + * + * @author Erich Schubert + * + * @apiviz.exclude + * + * @param <O> Object type + */ + public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> { + /** + * Parameter for choosing the clustering algorithm + */ + public static final OptionID CLUSTERING_ID = new OptionID("silhouette.clustering", // + "Clustering algorithm to use for the silhouette coefficients."); + + /** + * Clustering algorithm to use + */ + ClusteringAlgorithm<?> clusterer; + + /** + * Keep noise "clusters" merged, instead of breaking them into singletons. + */ + private boolean mergenoise = false; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + + ObjectParameter<ClusteringAlgorithm<?>> clusterP = new ObjectParameter<>(CLUSTERING_ID, ClusteringAlgorithm.class); + if(config.grab(clusterP)) { + clusterer = clusterP.instantiateClass(config); + } + + Flag noiseP = new Flag(EvaluateSilhouette.Parameterizer.MERGENOISE_ID); + if(config.grab(noiseP)) { + mergenoise = noiseP.isTrue(); + } + } + + @Override + protected SilhouetteOutlierDetection<O> makeInstance() { + return new SilhouetteOutlierDetection<>(distanceFunction, clusterer, mergenoise); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/clustering/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/clustering/package-info.java new file mode 100644 index 00000000..15ee771e --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/clustering/package-info.java @@ -0,0 +1,27 @@ +/** + * Clustering based outlier detection. + */ + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2014 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +package de.lmu.ifi.dbs.elki.algorithm.outlier.clustering;
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractDBOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/AbstractDBOutlier.java index 5cafe04d..1fa43ff6 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractDBOutlier.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/AbstractDBOutlier.java @@ -1,10 +1,10 @@ -package de.lmu.ifi.dbs.elki.algorithm.outlier;
+package de.lmu.ifi.dbs.elki.algorithm.outlier.distance;
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team @@ -24,52 +24,54 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier; */
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DistanceParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
/**
* Simple distance based outlier detection algorithms.
*
+ * Reference:
* <p>
- * Reference: E.M. Knorr, R. T. Ng: Algorithms for Mining Distance-Based
- * Outliers in Large Datasets, In: Procs Int. Conf. on Very Large Databases
- * (VLDB'98), New York, USA, 1998.
+ * E.M. Knorr, R. T. Ng:<br />
+ * Algorithms for Mining Distance-Based Outliers in Large Datasets,<br />
+ * In: Procs Int. Conf. on Very Large Databases (VLDB'98), New York, USA, 1998.
+ * </p>
*
* @author Lisa Reichert
*
* @param <O> the type of DatabaseObjects handled by this Algorithm
- * @param <D> the type of Distance used by this Algorithm
*/
-public abstract class AbstractDBOutlier<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
+@Reference(authors = "E.M. Knorr, R. T. Ng", //
+title = "Algorithms for Mining Distance-Based Outliers in Large Datasets", //
+booktitle = "Procs Int. Conf. on Very Large Databases (VLDB'98), New York, USA, 1998")
+public abstract class AbstractDBOutlier<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm {
/**
- * Parameter to specify the size of the D-neighborhood
+ * Radius parameter d.
*/
- public static final OptionID D_ID = new OptionID("dbod.d", "size of the D-neighborhood");
-
- /**
- * Holds the value of {@link #D_ID}.
- */
- private D d;
+ private double d;
/**
* Constructor with actual parameters.
*
* @param distanceFunction distance function to use
- * @param d d value
+ * @param d radius d value
*/
- public AbstractDBOutlier(DistanceFunction<? super O, D> distanceFunction, D d) {
+ public AbstractDBOutlier(DistanceFunction<? super O> distanceFunction, double d) {
super(distanceFunction);
this.d = d;
}
@@ -86,7 +88,7 @@ public abstract class AbstractDBOutlier<O, D extends Distance<D>> extends Abstra DoubleDataStore dbodscore = computeOutlierScores(database, relation, d);
// Build result representation.
- Relation<Double> scoreResult = new MaterializedRelation<>("Density-Based Outlier Detection", "db-outlier", TypeUtil.DOUBLE, dbodscore, relation.getDBIDs());
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("Density-Based Outlier Detection", "db-outlier", dbodscore, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore();
return new OutlierResult(scoreMeta, scoreResult);
}
@@ -99,7 +101,7 @@ public abstract class AbstractDBOutlier<O, D extends Distance<D>> extends Abstra * @param d distance
* @return computed scores
*/
- protected abstract DoubleDataStore computeOutlierScores(Database database, Relation<O> relation, D d);
+ protected abstract DoubleDataStore computeOutlierScores(Database database, Relation<O> relation, double d);
@Override
public TypeInformation[] getInputTypeRestriction() {
@@ -113,11 +115,16 @@ public abstract class AbstractDBOutlier<O, D extends Distance<D>> extends Abstra *
* @apiviz.exclude
*/
- public abstract static class Parameterizer<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ public abstract static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {
+ /**
+ * Parameter to specify the size of the D-neighborhood
+ */
+ public static final OptionID D_ID = new OptionID("dbod.d", "size of the D-neighborhood");
+
/**
* Query radius
*/
- protected D d = null;
+ protected double d;
@Override
protected void makeOptions(Parameterization config) {
@@ -130,9 +137,9 @@ public abstract class AbstractDBOutlier<O, D extends Distance<D>> extends Abstra *
* @param config Parameterization
*/
- protected void configD(Parameterization config, DistanceFunction<?, D> distanceFunction) {
- final D distanceFactory = (distanceFunction != null) ? distanceFunction.getDistanceFactory() : null;
- final DistanceParameter<D> param = new DistanceParameter<>(D_ID, distanceFactory);
+ protected void configD(Parameterization config, DistanceFunction<?> distanceFunction) {
+ final DoubleParameter param = new DoubleParameter(D_ID) //
+ .addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
if(config.grab(param)) {
d = param.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierDetection.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/DBOutlierDetection.java index 4f4d12bf..62e26830 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierDetection.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/DBOutlierDetection.java @@ -1,10 +1,10 @@ -package de.lmu.ifi.dbs.elki.algorithm.outlier;
+package de.lmu.ifi.dbs.elki.algorithm.outlier.distance;
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team @@ -29,15 +29,15 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.DoubleDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
-import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
+import de.lmu.ifi.dbs.elki.database.ids.KNNList;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.utilities.Alias;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -49,10 +49,13 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; * Simple distanced based outlier detection algorithm. User has to specify two
* parameters An object is flagged as an outlier if at least a fraction p of all
* data objects has a distance above d from c
+ *
+ * Reference:
* <p>
- * Reference: E.M. Knorr, R. T. Ng: Algorithms for Mining Distance-Based
- * Outliers in Large Datasets, In: Procs Int. Conf. on Very Large Databases
- * (VLDB'98), New York, USA, 1998.
+ * E.M. Knorr, R. T. Ng:<br />
+ * Algorithms for Mining Distance-Based Outliers in Large Datasets,<br />
+ * In: Procs Int. Conf. on Very Large Databases (VLDB'98), New York, USA, 1998.
+ * </p>
*
* This paper presents several Distance Based Outlier Detection algorithms.
* Implemented here is a simple index based algorithm as presented in section
@@ -63,25 +66,21 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; * @apiviz.has KNNQuery
*
* @param <O> the type of DatabaseObjects handled by this Algorithm
- * @param <D> the type of Distance used by this Algorithm
*/
@Title("DBOD: Distance Based Outlier Detection")
@Description("If the D-neighborhood of an object contains only very few objects (less than (1-p) percent of the data) this object is flagged as an outlier")
-@Reference(authors = "E.M. Knorr, R. T. Ng", title = "Algorithms for Mining Distance-Based Outliers in Large Datasets", booktitle = "Procs Int. Conf. on Very Large Databases (VLDB'98), New York, USA, 1998")
-public class DBOutlierDetection<O, D extends Distance<D>> extends AbstractDBOutlier<O, D> {
+@Reference(authors = "E.M. Knorr, R. T. Ng", //
+title = "Algorithms for Mining Distance-Based Outliers in Large Datasets", //
+booktitle = "Procs Int. Conf. on Very Large Databases (VLDB'98), New York, USA, 1998")
+@Alias({ "de.lmu.ifi.dbs.elki.algorithm.outlier.DBOutlierDetection" })
+public class DBOutlierDetection<O> extends AbstractDBOutlier<O> {
/**
* The logger for this class.
*/
private static final Logging LOG = Logging.getLogger(DBOutlierDetection.class);
/**
- * Parameter to specify the minimum fraction of objects that must be outside
- * the D- neighborhood of an outlier
- */
- public static final OptionID P_ID = new OptionID("dbod.p", "minimum fraction of objects that must be outside the D-neighborhood of an outlier");
-
- /**
- * Holds the value of {@link #P_ID}.
+ * Density threshold percentage p.
*/
private double p;
@@ -92,15 +91,15 @@ public class DBOutlierDetection<O, D extends Distance<D>> extends AbstractDBOutl * @param d distance query radius
* @param p percentage parameter
*/
- public DBOutlierDetection(DistanceFunction<O, D> distanceFunction, D d, double p) {
+ public DBOutlierDetection(DistanceFunction<O> distanceFunction, double d, double p) {
super(distanceFunction, d);
this.p = p;
}
@Override
- protected DoubleDataStore computeOutlierScores(Database database, Relation<O> relation, D neighborhoodSize) {
- DistanceQuery<O, D> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
- KNNQuery<O, D> knnQuery = database.getKNNQuery(distFunc, DatabaseQuery.HINT_OPTIMIZED_ONLY);
+ protected DoubleDataStore computeOutlierScores(Database database, Relation<O> relation, double neighborhoodSize) {
+ DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
+ KNNQuery<O> knnQuery = database.getKNNQuery(distFunc, DatabaseQuery.HINT_OPTIMIZED_ONLY);
// maximum number of objects in the D-neighborhood of an outlier
int m = (int) ((distFunc.getRelation().size()) * (1 - p));
@@ -115,13 +114,13 @@ public class DBOutlierDetection<O, D extends Distance<D>> extends AbstractDBOutl // if index exists, kNN query. if the distance to the mth nearest neighbor
// is more than d -> object is outlier
if(knnQuery != null) {
- for(DBIDIter iditer = distFunc.getRelation().iterDBIDs(); iditer.valid(); iditer.advance()) { + for(DBIDIter iditer = distFunc.getRelation().iterDBIDs(); iditer.valid(); iditer.advance()) {
counter++;
- final KNNList<D> knns = knnQuery.getKNNForDBID(iditer, m);
+ final KNNList knns = knnQuery.getKNNForDBID(iditer, m);
if(LOG.isDebugging()) {
LOG.debugFine("distance to mth nearest neighbour" + knns.toString());
}
- if(knns.get(Math.min(m, knns.size()) - 1).getDistance().compareTo(neighborhoodSize) <= 0) {
+ if(knns.get(Math.min(m, knns.size()) - 1).doubleValue() <= neighborhoodSize) {
// flag as outlier
scores.putDouble(iditer, 1.0);
}
@@ -136,12 +135,12 @@ public class DBOutlierDetection<O, D extends Distance<D>> extends AbstractDBOutl }
else {
// range query for each object. stop if m objects are found
- for(DBIDIter iditer = distFunc.getRelation().iterDBIDs(); iditer.valid(); iditer.advance()) { + for(DBIDIter iditer = distFunc.getRelation().iterDBIDs(); iditer.valid(); iditer.advance()) {
counter++;
int count = 0;
- for (DBIDIter iterator = distFunc.getRelation().iterDBIDs(); iterator.valid() && count < m; iterator.advance()) {
- D currentDistance = distFunc.distance(iditer, iterator);
- if(currentDistance.compareTo(neighborhoodSize) <= 0) {
+ for(DBIDIter iterator = distFunc.getRelation().iterDBIDs(); iterator.valid() && count < m; iterator.advance()) {
+ double currentDistance = distFunc.distance(iditer, iterator);
+ if(currentDistance <= neighborhoodSize) {
count++;
}
}
@@ -152,9 +151,7 @@ public class DBOutlierDetection<O, D extends Distance<D>> extends AbstractDBOutl progressOFlags.setProcessed(counter, LOG);
}
}
- if(progressOFlags != null) {
- progressOFlags.ensureCompleted(LOG);
- }
+ LOG.ensureCompleted(progressOFlags);
return scores;
}
@@ -170,7 +167,16 @@ public class DBOutlierDetection<O, D extends Distance<D>> extends AbstractDBOutl *
* @apiviz.exclude
*/
- public static class Parameterizer<O, D extends Distance<D>> extends AbstractDBOutlier.Parameterizer<O, D> {
+ public static class Parameterizer<O> extends AbstractDBOutlier.Parameterizer<O> {
+ /**
+ * Parameter to specify the minimum fraction of objects that must be outside
+ * the D- neighborhood of an outlier
+ */
+ public static final OptionID P_ID = new OptionID("dbod.p", "minimum fraction of objects that must be outside the D-neighborhood of an outlier");
+
+ /**
+ * Density threshold p.
+ */
protected double p = 0.0;
@Override
@@ -183,7 +189,7 @@ public class DBOutlierDetection<O, D extends Distance<D>> extends AbstractDBOutl }
@Override
- protected DBOutlierDetection<O, D> makeInstance() {
+ protected DBOutlierDetection<O> makeInstance() {
return new DBOutlierDetection<>(distanceFunction, d, p);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierScore.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/DBOutlierScore.java index d6528682..ac097d75 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierScore.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/DBOutlierScore.java @@ -1,10 +1,10 @@ -package de.lmu.ifi.dbs.elki.algorithm.outlier;
+package de.lmu.ifi.dbs.elki.algorithm.outlier.distance;
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team @@ -33,8 +33,8 @@ import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.utilities.Alias;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -46,17 +46,28 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title; * score thus eliminating this parameter and turning the method into a ranking
* method instead of a labelling one.
*
+ * Reference:
+ * <p>
+ * E.M. Knorr, R. T. Ng:<br />
+ * Algorithms for Mining Distance-Based Outliers in Large Datasets,<br />
+ * In: Procs Int. Conf. on Very Large Databases (VLDB'98), New York, USA, 1998.
+ * </p>
+ *
* @author Lisa Reichert
*
* @apiviz.has RangeQuery
*
* @param <O> Database object type
- * @param <D> Distance type
*/
@Title("Distance based outlier score")
-@Description("Generalization of the original DB-Outlier approach to a ranking method, by turning the fraction parameter into the output value.")
-@Reference(prefix = "Generalization of a method proposed in", authors = "E.M. Knorr, R. T. Ng", title = "Algorithms for Mining Distance-Based Outliers in Large Datasets", booktitle = "Procs Int. Conf. on Very Large Databases (VLDB'98), New York, USA, 1998")
-public class DBOutlierScore<O, D extends Distance<D>> extends AbstractDBOutlier<O, D> {
+@Description("Generalization of the original DB-Outlier approach to a ranking method, "//
+ + "by turning the fraction parameter into the output value.")
+@Reference(prefix = "Generalization of a method proposed in", //
+authors = "E.M. Knorr, R. T. Ng", //
+title = "Algorithms for Mining Distance-Based Outliers in Large Datasets", //
+booktitle = "Procs Int. Conf. on Very Large Databases (VLDB'98), New York, USA, 1998")
+@Alias({ "de.lmu.ifi.dbs.elki.algorithm.outlier.DBOutlierScore" })
+public class DBOutlierScore<O> extends AbstractDBOutlier<O> {
/**
* The logger for this class.
*/
@@ -68,19 +79,19 @@ public class DBOutlierScore<O, D extends Distance<D>> extends AbstractDBOutlier< * @param distanceFunction Distance function
* @param d distance radius parameter
*/
- public DBOutlierScore(DistanceFunction<O, D> distanceFunction, D d) {
+ public DBOutlierScore(DistanceFunction<O> distanceFunction, double d) {
super(distanceFunction, d);
}
@Override
- protected DoubleDataStore computeOutlierScores(Database database, Relation<O> relation, D d) {
- DistanceQuery<O, D> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
- RangeQuery<O, D> rangeQuery = database.getRangeQuery(distFunc);
+ protected DoubleDataStore computeOutlierScores(Database database, Relation<O> relation, double d) {
+ DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
+ RangeQuery<O> rangeQuery = database.getRangeQuery(distFunc);
final double size = distFunc.getRelation().size();
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(distFunc.getRelation().getDBIDs(), DataStoreFactory.HINT_STATIC);
// TODO: use bulk when implemented.
- for(DBIDIter iditer = distFunc.getRelation().iterDBIDs(); iditer.valid(); iditer.advance()) { + for(DBIDIter iditer = distFunc.getRelation().iterDBIDs(); iditer.valid(); iditer.advance()) {
// compute percentage of neighbors in the given neighborhood with size d
double n = (rangeQuery.getRangeForDBID(iditer, d).size()) / size;
scores.putDouble(iditer, 1.0 - n);
@@ -100,9 +111,9 @@ public class DBOutlierScore<O, D extends Distance<D>> extends AbstractDBOutlier< *
* @apiviz.exclude
*/
- public static class Parameterizer<O, D extends Distance<D>> extends AbstractDBOutlier.Parameterizer<O, D> {
+ public static class Parameterizer<O> extends AbstractDBOutlier.Parameterizer<O> {
@Override
- protected DBOutlierScore<O, D> makeInstance() {
+ protected DBOutlierScore<O> makeInstance() {
return new DBOutlierScore<>(distanceFunction, d);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/HilOut.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/HilOut.java index e0cdd0c5..6eac9e95 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/HilOut.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/HilOut.java @@ -1,10 +1,10 @@ -package de.lmu.ifi.dbs.elki.algorithm.outlier;
+package de.lmu.ifi.dbs.elki.algorithm.outlier.distance;
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team @@ -28,6 +28,7 @@ import java.util.HashSet; import java.util.Set;
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
@@ -36,20 +37,17 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDFactory;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair;
import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDPair;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.LPNormDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultUtil;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
@@ -57,9 +55,9 @@ import de.lmu.ifi.dbs.elki.math.spacefillingcurves.HilbertSpatialSorter; import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.Alias;
import de.lmu.ifi.dbs.elki.utilities.BitsUtil;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
-import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ComparatorMaxHeap;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ComparableMaxHeap;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ComparatorMinHeap;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ObjectHeap;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
@@ -71,7 +69,6 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.EnumParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
-import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
/**
* Fast Outlier Detection in High Dimensional Spaces
@@ -96,8 +93,12 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; */
@Title("Fast Outlier Detection in High Dimensional Spaces")
@Description("Algorithm to compute outliers using Hilbert space filling curves")
-@Reference(authors = "F. Angiulli, C. Pizzuti", title = "Fast Outlier Detection in High Dimensional Spaces", booktitle = "Proc. European Conference on Principles of Knowledge Discovery and Data Mining (PKDD'02)", url = "http://dx.doi.org/10.1145/375663.375668")
-public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgorithm<O, DoubleDistance, OutlierResult> implements OutlierAlgorithm {
+@Reference(authors = "F. Angiulli, C. Pizzuti", //
+title = "Fast Outlier Detection in High Dimensional Spaces", //
+booktitle = "Proc. European Conference on Principles of Knowledge Discovery and Data Mining (PKDD'02)", //
+url = "http://dx.doi.org/10.1145/375663.375668")
+@Alias({ "de.lmu.ifi.dbs.elki.algorithm.outlier.HilOut" })
+public class HilOut<O extends NumberVector> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
@@ -131,7 +132,7 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo /**
* Distance query
*/
- private DistanceQuery<O, DoubleDistance> distq;
+ private DistanceQuery<O> distq;
/**
* Set sizes, total and current iteration
@@ -143,13 +144,6 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo */
private double omega_star;
- // public int distcomp = 1;
-
- /**
- * Comparator for sorting the heaps.
- */
- private static final Comparator<? super DistanceDBIDPair<?>> COMPARATOR = DistanceDBIDResultUtil.distanceComparator();
-
/**
* Type of output: all scores (upper bounds) or top n only
*
@@ -190,21 +184,19 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo double[] min;
double diameter = 0; // Actually "length of edge"
{
- Pair<O, O> hbbs = DatabaseUtil.computeMinMax(relation);
- min = new double[d];
- double[] max = new double[d];
- for (int i = 0; i < d; i++) {
- min[i] = hbbs.first.doubleValue(i);
- max[i] = hbbs.second.doubleValue(i);
+ double[][] hbbs = RelationUtil.computeMinMax(relation);
+ min = hbbs[0];
+ double[] max = hbbs[1];
+ for(int i = 0; i < d; i++) {
diameter = Math.max(diameter, max[i] - min[i]);
}
// Enlarge bounding box to have equal lengths.
- for (int i = 0; i < d; i++) {
+ for(int i = 0; i < d; i++) {
double diff = (diameter - (max[i] - min[i])) * .5;
min[i] -= diff;
max[i] += diff;
}
- if (LOG.isVerbose()) {
+ if(LOG.isVerbose()) {
LOG.verbose("Rescaling dataset by " + (1 / diameter) + " to fit the unit cube.");
}
}
@@ -216,7 +208,7 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo FiniteProgress progressHilOut = LOG.isVerbose() ? new FiniteProgress("HilOut iterations", d + 1, LOG) : null;
FiniteProgress progressTrueOut = LOG.isVerbose() ? new FiniteProgress("True outliers found", n, LOG) : null;
// Main part: 1. Phase max. d+1 loops
- for (int j = 0; j <= d && n_star < n; j++) {
+ for(int j = 0; j <= d && n_star < n; j++) {
// initialize (clear) out and wlb - not 100% clear in the paper
h.out.clear();
h.wlb.clear();
@@ -226,51 +218,49 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo scan(h, (int) (k * capital_n / (double) capital_n_star));
// determine the true outliers (n_star)
trueOutliers(h);
- if (progressTrueOut != null) {
+ if(progressTrueOut != null) {
progressTrueOut.setProcessed(n_star, LOG);
}
// Build the top Set as out + wlb
h.top.clear();
HashSetModifiableDBIDs top_keys = DBIDUtil.newHashSet(h.out.size());
- for (ObjectHeap.UnsortedIter<HilFeature> iter = h.out.unsortedIter(); iter.valid(); iter.advance()) {
+ for(ObjectHeap.UnsortedIter<HilFeature> iter = h.out.unsortedIter(); iter.valid(); iter.advance()) {
HilFeature entry = iter.get();
top_keys.add(entry.id);
h.top.add(entry);
}
- for (ObjectHeap.UnsortedIter<HilFeature> iter = h.wlb.unsortedIter(); iter.valid(); iter.advance()) {
+ for(ObjectHeap.UnsortedIter<HilFeature> iter = h.wlb.unsortedIter(); iter.valid(); iter.advance()) {
HilFeature entry = iter.get();
- if (!top_keys.contains(entry.id)) {
+ if(!top_keys.contains(entry.id)) {
// No need to update top_keys - discarded
h.top.add(entry);
}
}
- if (progressHilOut != null) {
- progressHilOut.incrementProcessed(LOG);
- }
+ LOG.incrementProcessed(progressHilOut);
}
// 2. Phase: Additional Scan if less than n true outliers determined
- if (n_star < n) {
+ if(n_star < n) {
h.out.clear();
h.wlb.clear();
// TODO: reinitialize shift to 0?
scan(h, capital_n);
}
- if (progressHilOut != null) {
+ if(progressHilOut != null) {
progressHilOut.setProcessed(d, LOG);
progressHilOut.ensureCompleted(LOG);
}
- if (progressTrueOut != null) {
+ if(progressTrueOut != null) {
progressTrueOut.setProcessed(n, LOG);
progressTrueOut.ensureCompleted(LOG);
}
DoubleMinMax minmax = new DoubleMinMax();
// Return weights in out
- if (tn == ScoreType.TopN) {
+ if(tn == ScoreType.TopN) {
minmax.put(0.0);
- for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
hilout_weight.putDouble(iditer, 0.0);
}
- for (ObjectHeap.UnsortedIter<HilFeature> iter = h.out.unsortedIter(); iter.valid(); iter.advance()) {
+ for(ObjectHeap.UnsortedIter<HilFeature> iter = h.out.unsortedIter(); iter.valid(); iter.advance()) {
HilFeature ent = iter.get();
minmax.put(ent.ubound);
hilout_weight.putDouble(ent.id, ent.ubound);
@@ -278,12 +268,12 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo }
// Return all weights in pf
else {
- for (HilFeature ent : h.pf) {
+ for(HilFeature ent : h.pf) {
minmax.put(ent.ubound);
hilout_weight.putDouble(ent.id, ent.ubound);
}
}
- Relation<Double> scoreResult = new MaterializedRelation<>("HilOut weight", "hilout-weight", TypeUtil.DOUBLE, hilout_weight, relation.getDBIDs());
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("HilOut weight", "hilout-weight", hilout_weight, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY);
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
return result;
@@ -297,35 +287,37 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo */
private void scan(HilbertFeatures hf, int k0) {
final int mink0 = Math.min(2 * k0, capital_n - 1);
- if (LOG.isDebuggingFine()) {
+ if(LOG.isDebuggingFine()) {
LOG.debugFine("Scanning with k0=" + k0 + " (" + mink0 + ")" + " N*=" + capital_n_star);
}
- for (int i = 0; i < hf.pf.length; i++) {
- if (hf.pf[i].ubound < omega_star) {
+ for(int i = 0; i < hf.pf.length; i++) {
+ if(hf.pf[i].ubound < omega_star) {
continue;
}
- if (hf.pf[i].lbound < hf.pf[i].ubound) {
+ if(hf.pf[i].lbound < hf.pf[i].ubound) {
double omega = hf.fastUpperBound(i);
- if (omega < omega_star) {
+ if(omega < omega_star) {
hf.pf[i].ubound = omega;
- } else {
+ }
+ else {
int maxcount;
// capital_n-1 instead of capital_n: all, except self
- if (hf.top.contains(hf.pf[i])) {
+ if(hf.top.contains(hf.pf[i])) {
maxcount = capital_n - 1;
- } else {
+ }
+ else {
maxcount = mink0;
}
innerScan(hf, i, maxcount);
}
}
- if (hf.pf[i].ubound > 0) {
+ if(hf.pf[i].ubound > 0) {
hf.updateOUT(i);
}
- if (hf.pf[i].lbound > 0) {
+ if(hf.pf[i].lbound > 0) {
hf.updateWLB(i);
}
- if (hf.wlb.size() >= n) {
+ if(hf.wlb.size() >= n) {
omega_star = Math.max(omega_star, hf.wlb.peek().lbound);
}
}
@@ -344,40 +336,43 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo int a = i, b = i;
int level = h, levela = h, levelb = h;
// Explore up to "maxcount" neighbors in this pass
- for (int count = 0; count < maxcount; count++) {
+ for(int count = 0; count < maxcount; count++) {
final int c; // Neighbor to explore
- if (a == 0) { // At left end, explore right
+ if(a == 0) { // At left end, explore right
// assert (b < capital_n - 1);
levelb = Math.min(levelb, hf.pf[b].level);
b++;
c = b;
- } else if (b >= capital_n - 1) { // At right end, explore left
+ }
+ else if(b >= capital_n - 1) { // At right end, explore left
// assert (a > 0);
a--;
levela = Math.min(levela, hf.pf[a].level);
c = a;
- } else if (hf.pf[a - 1].level >= hf.pf[b].level) { // Prefer higher level
+ }
+ else if(hf.pf[a - 1].level >= hf.pf[b].level) { // Prefer higher level
a--;
levela = Math.min(levela, hf.pf[a].level);
c = a;
- } else {
+ }
+ else {
// assert (b < capital_n - 1);
levelb = Math.min(levelb, hf.pf[b].level);
b++;
c = b;
}
- if (!hf.pf[i].nn_keys.contains(hf.pf[c].id)) {
+ if(!hf.pf[i].nn_keys.contains(hf.pf[c].id)) {
// hf.distcomp ++;
- hf.pf[i].insert(hf.pf[c].id, distq.distance(p, hf.pf[c].id).doubleValue(), k);
- if (hf.pf[i].nn.size() == k) {
- if (hf.pf[i].sum_nn < omega_star) {
+ hf.pf[i].insert(hf.pf[c].id, distq.distance(p, hf.pf[c].id), k);
+ if(hf.pf[i].nn.size() == k) {
+ if(hf.pf[i].sum_nn < omega_star) {
break; // stop = true
}
final int mlevel = Math.max(levela, levelb);
- if (mlevel < level) {
+ if(mlevel < level) {
level = mlevel;
final double delta = hf.minDistLevel(hf.pf[i].id, level);
- if (delta >= hf.pf[i].nn.peek().doubleDistance()) {
+ if(delta >= hf.pf[i].nn.peek().doubleValue()) {
break; // stop = true
}
}
@@ -387,17 +382,17 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo double br = hf.boxRadius(i, a - 1, b + 1);
double newlb = 0.0;
double newub = 0.0;
- for (ObjectHeap.UnsortedIter<DoubleDistanceDBIDPair> iter = hf.pf[i].nn.unsortedIter(); iter.valid(); iter.advance()) {
- DoubleDistanceDBIDPair entry = iter.get();
- newub += entry.doubleDistance();
- if (entry.doubleDistance() <= br) {
- newlb += entry.doubleDistance();
+ for(ObjectHeap.UnsortedIter<DoubleDBIDPair> iter = hf.pf[i].nn.unsortedIter(); iter.valid(); iter.advance()) {
+ DoubleDBIDPair entry = iter.get();
+ newub += entry.doubleValue();
+ if(entry.doubleValue() <= br) {
+ newlb += entry.doubleValue();
}
}
- if (newlb > hf.pf[i].lbound) {
+ if(newlb > hf.pf[i].lbound) {
hf.pf[i].lbound = newlb;
}
- if (newub < hf.pf[i].ubound) {
+ if(newub < hf.pf[i].ubound) {
hf.pf[i].ubound = newub;
}
}
@@ -411,9 +406,9 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo private void trueOutliers(HilbertFeatures h) {
n_star = 0;
- for (ObjectHeap.UnsortedIter<HilFeature> iter = h.out.unsortedIter(); iter.valid(); iter.advance()) {
+ for(ObjectHeap.UnsortedIter<HilFeature> iter = h.out.unsortedIter(); iter.valid(); iter.advance()) {
HilFeature entry = iter.get();
- if (entry.ubound >= omega_star && (entry.ubound - entry.lbound < 1E-10)) {
+ if(entry.ubound >= omega_star && (entry.ubound - entry.lbound < 1E-10)) {
n_star++;
}
}
@@ -494,8 +489,8 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo this.pf = new HilFeature[relation.size()];
int pos = 0;
- for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- pf[pos++] = new HilFeature(DBIDUtil.deref(iditer), new ComparatorMaxHeap<DoubleDistanceDBIDPair>(k, COMPARATOR));
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ pf[pos++] = new HilFeature(DBIDUtil.deref(iditer), new ComparableMaxHeap<DoubleDBIDPair>(k));
}
this.out = new ComparatorMinHeap<>(n, new Comparator<HilFeature>() {
@Override
@@ -523,42 +518,45 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo // FIXME: 64 bit mode untested - sign bit is tricky to handle correctly
// with the rescaling. 63 bit should be fine. The sign bit probably needs
// to be handled differently, or at least needs careful testing of the API
- if (h >= 32) { // 32 to 63 bit
+ if(h >= 32) { // 32 to 63 bit
final long scale = Long.MAX_VALUE; // = 63 bits
- for (int i = 0; i < pf.length; i++) {
- NumberVector<?> obj = relation.get(pf[i].id);
+ for(int i = 0; i < pf.length; i++) {
+ NumberVector obj = relation.get(pf[i].id);
long[] coord = new long[d];
- for (int dim = 0; dim < d; dim++) {
+ for(int dim = 0; dim < d; dim++) {
coord[dim] = (long) (getDimForObject(obj, dim) * .5 * scale);
}
pf[i].hilbert = HilbertSpatialSorter.coordinatesToHilbert(coord, h, 1);
}
- } else if (h >= 16) { // 16-31 bit
+ }
+ else if(h >= 16) { // 16-31 bit
final int scale = ~1 >>> 1;
- for (int i = 0; i < pf.length; i++) {
- NumberVector<?> obj = relation.get(pf[i].id);
+ for(int i = 0; i < pf.length; i++) {
+ NumberVector obj = relation.get(pf[i].id);
int[] coord = new int[d];
- for (int dim = 0; dim < d; dim++) {
+ for(int dim = 0; dim < d; dim++) {
coord[dim] = (int) (getDimForObject(obj, dim) * .5 * scale);
}
pf[i].hilbert = HilbertSpatialSorter.coordinatesToHilbert(coord, h, 1);
}
- } else if (h >= 8) { // 8-15 bit
+ }
+ else if(h >= 8) { // 8-15 bit
final int scale = ~1 >>> 16;
- for (int i = 0; i < pf.length; i++) {
- NumberVector<?> obj = relation.get(pf[i].id);
+ for(int i = 0; i < pf.length; i++) {
+ NumberVector obj = relation.get(pf[i].id);
short[] coord = new short[d];
- for (int dim = 0; dim < d; dim++) {
+ for(int dim = 0; dim < d; dim++) {
coord[dim] = (short) (getDimForObject(obj, dim) * .5 * scale);
}
pf[i].hilbert = HilbertSpatialSorter.coordinatesToHilbert(coord, h, 16);
}
- } else { // 1-7 bit
+ }
+ else { // 1-7 bit
final int scale = ~1 >>> 8;
- for (int i = 0; i < pf.length; i++) {
- NumberVector<?> obj = relation.get(pf[i].id);
+ for(int i = 0; i < pf.length; i++) {
+ NumberVector obj = relation.get(pf[i].id);
byte[] coord = new byte[d];
- for (int dim = 0; dim < d; dim++) {
+ for(int dim = 0; dim < d; dim++) {
coord[dim] = (byte) (getDimForObject(obj, dim) * .5 * scale);
}
pf[i].hilbert = HilbertSpatialSorter.coordinatesToHilbert(coord, h, 24);
@@ -566,13 +564,13 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo }
java.util.Arrays.sort(pf);
// Update levels
- for (int i = 0; i < pf.length - 1; i++) {
+ for(int i = 0; i < pf.length - 1; i++) {
pf[i].level = minRegLevel(i, i + 1);
}
// Count candidates
capital_n_star = 0;
- for (int i = 0; i < pf.length; i++) {
- if (pf[i].ubound >= omega_star) {
+ for(int i = 0; i < pf.length; i++) {
+ if(pf[i].ubound >= omega_star) {
capital_n_star++;
}
}
@@ -584,11 +582,12 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo * @param i position in pf of the feature to be inserted
*/
private void updateOUT(int i) {
- if (out.size() < n) {
+ if(out.size() < n) {
out.add(pf[i]);
- } else {
+ }
+ else {
HilFeature head = out.peek();
- if (pf[i].ubound > head.ubound) {
+ if(pf[i].ubound > head.ubound) {
// replace smallest
out.replaceTopElement(pf[i]);
}
@@ -601,11 +600,12 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo * @param i position in pf of the feature to be inserted
*/
private void updateWLB(int i) {
- if (wlb.size() < n) {
+ if(wlb.size() < n) {
wlb.add(pf[i]);
- } else {
+ }
+ else {
HilFeature head = wlb.peek();
- if (pf[i].lbound > head.lbound) {
+ if(pf[i].lbound > head.lbound) {
// replace smallest
wlb.replaceTopElement(pf[i]);
}
@@ -622,12 +622,13 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo private double fastUpperBound(int i) {
int pre = i;
int post = i;
- while (post - pre < k) {
+ while(post - pre < k) {
int pre_level = (pre - 1 >= 0) ? pf[pre - 1].level : -2;
int post_level = (post < capital_n - 1) ? pf[post].level : -2;
- if (post_level >= pre_level) {
+ if(post_level >= pre_level) {
post++;
- } else {
+ }
+ else {
pre--;
}
}
@@ -642,12 +643,12 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo * @param level Level of the corresponding r-region
*/
private double minDistLevel(DBID id, int level) {
- final NumberVector<?> obj = relation.get(id);
+ final NumberVector obj = relation.get(id);
// level 1 is supposed to have r=1 as in the original publication
// 2 ^ - (level - 1)
final double r = 1.0 / (1 << (level - 1));
double dist = Double.POSITIVE_INFINITY;
- for (int dim = 0; dim < d; dim++) {
+ for(int dim = 0; dim < d; dim++) {
final double p_m_r = getDimForObject(obj, dim) % r;
dist = Math.min(dist, Math.min(p_m_r, r - p_m_r));
}
@@ -662,36 +663,39 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo * @param level Level of the corresponding r-region
*/
private double maxDistLevel(DBID id, int level) {
- final NumberVector<?> obj = relation.get(id);
+ final NumberVector obj = relation.get(id);
// level 1 is supposed to have r=1 as in the original publication
final double r = 1.0 / (1 << (level - 1));
double dist;
- if (t == 1.0) {
+ if(t == 1.0) {
dist = 0.0;
- for (int dim = 0; dim < d; dim++) {
+ for(int dim = 0; dim < d; dim++) {
final double p_m_r = getDimForObject(obj, dim) % r;
// assert (p_m_r >= 0);
dist += Math.max(p_m_r, r - p_m_r);
}
- } else if (t == 2.0) {
+ }
+ else if(t == 2.0) {
dist = 0.0;
- for (int dim = 0; dim < d; dim++) {
+ for(int dim = 0; dim < d; dim++) {
final double p_m_r = getDimForObject(obj, dim) % r;
// assert (p_m_r >= 0);
double a = Math.max(p_m_r, r - p_m_r);
dist += a * a;
}
dist = Math.sqrt(dist);
- } else if (!Double.isInfinite(t)) {
+ }
+ else if(!Double.isInfinite(t)) {
dist = 0.0;
- for (int dim = 0; dim < d; dim++) {
+ for(int dim = 0; dim < d; dim++) {
final double p_m_r = getDimForObject(obj, dim) % r;
dist += Math.pow(Math.max(p_m_r, r - p_m_r), t);
}
dist = Math.pow(dist, 1.0 / t);
- } else {
+ }
+ else {
dist = Double.NEGATIVE_INFINITY;
- for (int dim = 0; dim < d; dim++) {
+ for(int dim = 0; dim < d; dim++) {
final double p_m_r = getDimForObject(obj, dim) % r;
dist = Math.max(dist, Math.max(p_m_r, r - p_m_r));
}
@@ -707,9 +711,9 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo * @return Number of level shared
*/
private int numberSharedLevels(long[] a, long[] b) {
- for (int i = 0, j = a.length - 1; i < a.length; i++, j--) {
+ for(int i = 0, j = a.length - 1; i < a.length; i++, j--) {
final long diff = a[j] ^ b[j];
- if (diff != 0) {
+ if(diff != 0) {
// expected unused = available - used
final int expected = (a.length * Long.SIZE) - (d * h);
return ((BitsUtil.numberOfLeadingZeros(diff) + i * Long.SIZE) - expected) / d;
@@ -758,14 +762,16 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo private double boxRadius(int i, int a, int b) {
// level are inversely ordered to box sizes. min -> max
final int level;
- if (a < 0) {
- if (b >= pf.length) {
+ if(a < 0) {
+ if(b >= pf.length) {
return Double.POSITIVE_INFINITY;
}
level = maxRegLevel(i, b);
- } else if (b >= pf.length) {
+ }
+ else if(b >= pf.length) {
level = maxRegLevel(i, a);
- } else {
+ }
+ else {
level = Math.max(maxRegLevel(i, a), maxRegLevel(i, b));
}
return minDistLevel(pf[i].id, level);
@@ -778,7 +784,7 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo * @param dim Dimension
* @return Projected and shifted position
*/
- private double getDimForObject(NumberVector<?> obj, int dim) {
+ private double getDimForObject(NumberVector obj, int dim) {
return (obj.doubleValue(dim) - min[dim]) / diameter + shift;
}
}
@@ -822,7 +828,7 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo /**
* Heap with the nearest known neighbors
*/
- public ObjectHeap<DoubleDistanceDBIDPair> nn;
+ public ObjectHeap<DoubleDBIDPair> nn;
/**
* Set representation of the nearest neighbors for faster lookups
@@ -840,7 +846,7 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo * @param id Object ID
* @param nn Heap for neighbors
*/
- public HilFeature(DBID id, ObjectHeap<DoubleDistanceDBIDPair> nn) {
+ public HilFeature(DBID id, ObjectHeap<DoubleDBIDPair> nn) {
super();
this.id = id;
this.nn = nn;
@@ -861,21 +867,22 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo */
protected void insert(DBID id, double dt, int k) {
// assert (!nn_keys.contains(id));
- if (nn.size() < k) {
- DoubleDistanceDBIDPair entry = DBIDFactory.FACTORY.newDistancePair(dt, id);
+ if(nn.size() < k) {
+ DoubleDBIDPair entry = DBIDUtil.newPair(dt, id);
nn.add(entry);
nn_keys.add(id);
sum_nn += dt;
- } else {
- DoubleDistanceDBIDPair head = nn.peek();
- if (dt < head.doubleDistance()) {
+ }
+ else {
+ DoubleDBIDPair head = nn.peek();
+ if(dt < head.doubleValue()) {
head = nn.poll(); // Remove worst
- sum_nn -= head.doubleDistance();
+ sum_nn -= head.doubleValue();
nn_keys.remove(head);
// assert (nn.peek().doubleDistance() <= head.doubleDistance());
- DoubleDistanceDBIDPair entry = DBIDFactory.FACTORY.newDistancePair(dt, id);
+ DoubleDBIDPair entry = DBIDUtil.newPair(dt, id);
nn.add(entry);
nn_keys.add(id);
sum_nn += dt;
@@ -893,7 +900,7 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo *
* @param <O> Vector type
*/
- public static class Parameterizer<O extends NumberVector<?>> extends AbstractParameterizer {
+ public static class Parameterizer<O extends NumberVector> extends AbstractParameterizer {
/**
* Parameter to specify how many next neighbors should be used in the
* computation
@@ -951,27 +958,27 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo super.makeOptions(config);
final IntParameter kP = new IntParameter(K_ID, 5);
- if (config.grab(kP)) {
+ if(config.grab(kP)) {
k = kP.getValue();
}
final IntParameter nP = new IntParameter(N_ID, 10);
- if (config.grab(nP)) {
+ if(config.grab(nP)) {
n = nP.getValue();
}
final IntParameter hP = new IntParameter(H_ID, 32);
- if (config.grab(hP)) {
+ if(config.grab(hP)) {
h = hP.getValue();
}
ObjectParameter<LPNormDistanceFunction> distP = AbstractDistanceBasedAlgorithm.makeParameterDistanceFunction(EuclideanDistanceFunction.class, LPNormDistanceFunction.class);
- if (config.grab(distP)) {
+ if(config.grab(distP)) {
distfunc = distP.instantiateClass(config);
}
final EnumParameter<ScoreType> tnP = new EnumParameter<>(TN_ID, ScoreType.class, ScoreType.TopN);
- if (config.grab(tnP)) {
+ if(config.grab(tnP)) {
tn = tnP.getValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/KNNOutlier.java index 503487c8..97970f0a 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNOutlier.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/KNNOutlier.java @@ -1,10 +1,10 @@ -package de.lmu.ifi.dbs.elki.algorithm.outlier;
+package de.lmu.ifi.dbs.elki.algorithm.outlier.distance;
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team @@ -24,6 +24,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier; */
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
@@ -31,24 +32,25 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList;
-import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
+import de.lmu.ifi.dbs.elki.database.ids.KNNList;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.Alias;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
@@ -56,11 +58,19 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * Outlier Detection based on the distance of an object to its k nearest
* neighbor.
*
+ * This implementation differs from the original pseudocode: the k nearest
+ * neighbors do not exclude the point that is currently evaluated. I.e. for k=1
+ * the resulting score is the distance to the 1-nearest neighbor that is not the
+ * query point and therefore should match k=2 in the exact pseudocode - a value
+ * of k=1 in the original code does not make sense, as the 1NN distance will be
+ * 0 for every point in the database. If you for any reason want to use the
+ * original algorithm, subtract 1 from the k parameter.
+ *
+ * Reference:
* <p>
- * Reference:<br>
- * S. Ramaswamy, R. Rastogi, K. Shim: Efficient Algorithms for Mining Outliers
- * from Large Data Sets.</br> In: Proc. of the Int. Conf. on Management of Data,
- * Dallas, Texas, 2000.
+ * S. Ramaswamy, R. Rastogi, K. Shim:<br />
+ * Efficient Algorithms for Mining Outliers from Large Data Sets.<br />
+ * In: Proc. of the Int. Conf. on Management of Data, Dallas, Texas, 2000.
* </p>
*
* @author Lisa Reichert
@@ -68,24 +78,22 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * @apiviz.has KNNQuery
*
* @param <O> the type of DatabaseObjects handled by this Algorithm
- * @param <D> the type of Distance used by this Algorithm
*/
@Title("KNN outlier: Efficient Algorithms for Mining Outliers from Large Data Sets")
@Description("Outlier Detection based on the distance of an object to its k nearest neighbor.")
-@Reference(authors = "S. Ramaswamy, R. Rastogi, K. Shim", title = "Efficient Algorithms for Mining Outliers from Large Data Sets", booktitle = "Proc. of the Int. Conf. on Management of Data, Dallas, Texas, 2000", url = "http://dx.doi.org/10.1145/342009.335437")
-public class KNNOutlier<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
+@Reference(authors = "S. Ramaswamy, R. Rastogi, K. Shim", //
+title = "Efficient Algorithms for Mining Outliers from Large Data Sets", //
+booktitle = "Proc. of the Int. Conf. on Management of Data, Dallas, Texas, 2000", //
+url = "http://dx.doi.org/10.1145/342009.335437")
+@Alias({ "de.lmu.ifi.dbs.elki.algorithm.outlier.KNNOutlier", "knno" })
+public class KNNOutlier<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
private static final Logging LOG = Logging.getLogger(KNNOutlier.class);
/**
- * Parameter to specify the k nearest neighbor
- */
- public static final OptionID K_ID = new OptionID("knno.k", "k nearest neighbor");
-
- /**
- * The parameter k
+ * The parameter k (including query point!)
*/
private int k;
@@ -93,9 +101,9 @@ public class KNNOutlier<O, D extends NumberDistance<D, ?>> extends AbstractDista * Constructor for a single kNN query.
*
* @param distanceFunction distance function to use
- * @param k Value of k
+ * @param k Value of k (including query point!)
*/
- public KNNOutlier(DistanceFunction<? super O, D> distanceFunction, int k) {
+ public KNNOutlier(DistanceFunction<? super O> distanceFunction, int k) {
super(distanceFunction);
this.k = k;
}
@@ -104,39 +112,27 @@ public class KNNOutlier<O, D extends NumberDistance<D, ?>> extends AbstractDista * Runs the algorithm in the timed evaluation part.
*/
public OutlierResult run(Database database, Relation<O> relation) {
- final DistanceQuery<O, D> distanceQuery = database.getDistanceQuery(relation, getDistanceFunction());
- KNNQuery<O, D> knnQuery = database.getKNNQuery(distanceQuery, k);
+ final DistanceQuery<O> distanceQuery = database.getDistanceQuery(relation, getDistanceFunction());
+ final KNNQuery<O> knnQuery = database.getKNNQuery(distanceQuery, k + 1);
- if(LOG.isVerbose()) {
- LOG.verbose("Computing the kNN outlier degree (distance to the k nearest neighbor)");
- }
- FiniteProgress progressKNNDistance = LOG.isVerbose() ? new FiniteProgress("kNN distance for objects", relation.size(), LOG) : null;
+ FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("kNN distance for objects", relation.size(), LOG) : null;
DoubleMinMax minmax = new DoubleMinMax();
WritableDoubleDataStore knno_score = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
// compute distance to the k nearest neighbor.
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
// distance to the kth nearest neighbor
- final KNNList<D> knns = knnQuery.getKNNForDBID(iditer, k);
- final double dkn;
- if(knns instanceof DoubleDistanceKNNList) {
- dkn = ((DoubleDistanceKNNList) knns).doubleKNNDistance();
- }
- else {
- dkn = knns.getKNNDistance().doubleValue();
- }
+ // (assuming the query point is always included, with distance 0)
+ final KNNList knns = knnQuery.getKNNForDBID(iditer, k + 1);
+ final double dkn = knns.getKNNDistance();
knno_score.putDouble(iditer, dkn);
minmax.put(dkn);
- if(progressKNNDistance != null) {
- progressKNNDistance.incrementProcessed(LOG);
- }
- }
- if(progressKNNDistance != null) {
- progressKNNDistance.ensureCompleted(LOG);
+ LOG.incrementProcessed(prog);
}
- Relation<Double> scoreres = new MaterializedRelation<>("kNN Outlier Score", "knn-outlier", TypeUtil.DOUBLE, knno_score, relation.getDBIDs());
+ LOG.ensureCompleted(prog);
+ DoubleRelation scoreres = new MaterializedDoubleRelation("kNN Outlier Score", "knn-outlier", knno_score, relation.getDBIDs());
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
return new OutlierResult(meta, scoreres);
}
@@ -158,20 +154,31 @@ public class KNNOutlier<O, D extends NumberDistance<D, ?>> extends AbstractDista *
* @apiviz.exclude
*/
- public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {
+ /**
+ * Parameter to specify the k nearest neighbor
+ */
+ public static final OptionID K_ID = new OptionID("knno.k", //
+ "The k nearest neighbor, excluding the query point "//
+ + "(i.e. query point is the 0-nearest-neighbor)");
+
+ /**
+ * k parameter
+ */
protected int k = 0;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final IntParameter kP = new IntParameter(K_ID);
+ final IntParameter kP = new IntParameter(K_ID)//
+ .addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
if(config.grab(kP)) {
k = kP.getValue();
}
}
@Override
- protected KNNOutlier<O, D> makeInstance() {
+ protected KNNOutlier<O> makeInstance() {
return new KNNOutlier<>(distanceFunction, k);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/KNNWeightOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/KNNWeightOutlier.java new file mode 100644 index 00000000..b09f7480 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/KNNWeightOutlier.java @@ -0,0 +1,205 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier.distance;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures +
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team +
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version. +
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details. +
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.KNNList;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.Alias;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+
+/**
+ * Outlier Detection based on the accumulated distances of a point to its k
+ * nearest neighbors.
+ *
+ * As in the original publication (as far as we could tell from the pseudocode
+ * included), the current point is not included in the nearest neighbors (see
+ * figures in the publication). This matches the intuition common in nearest
+ * neighbor classification, where the evaluated instances are not part of the
+ * training set; but it contrasts to the pseudocode of the kNN outlier method
+ * and the database interpretation (which returns all objects stored in the
+ * database).
+ *
+ * Furthermore, we report the sum of the k distances (called "weight" in the
+ * original publication). Other implementations may return the average distance
+ * instead, and therefore yield different results.
+ *
+ * Reference:
+ * <p>
+ * F. Angiulli, C. Pizzuti:<br />
+ * Fast Outlier Detection in High Dimensional Spaces.<br />
+ * In: Proc. European Conference on Principles of Knowledge Discovery and Data
+ * Mining (PKDD'02), Helsinki, Finland, 2002.
+ * </p>
+ *
+ * @author Lisa Reichert
+ *
+ * @apiviz.has KNNQuery
+ *
+ * @param <O> the type of DatabaseObjects handled by this Algorithm
+ */
+@Title("KNNWeight outlier detection")
+@Description("Outlier detection based on the sum of distances of an object to its k nearest neighbors.")
+@Reference(authors = "F. Angiulli, C. Pizzuti", //
+title = "Fast Outlier Detection in High Dimensional Spaces", //
+booktitle = "Proc. European Conference on Principles of Knowledge Discovery and Data Mining (PKDD'02), Helsinki, Finland, 2002", //
+url = "http://dx.doi.org/10.1007/3-540-45681-3_2")
+@Alias({ "de.lmu.ifi.dbs.elki.algorithm.outlier.KNNWeightOutlier", "knnw" })
+public class KNNWeightOutlier<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(KNNWeightOutlier.class);
+
+ /**
+ * Holds the number of nearest neighbors to query (including query point!)
+ */
+ private int k;
+
+ /**
+ * Constructor with parameters.
+ *
+ * @param distanceFunction Distance function
+ * @param k k Parameter (not including query point!)
+ */
+ public KNNWeightOutlier(DistanceFunction<? super O> distanceFunction, int k) {
+ super(distanceFunction);
+ this.k = k;
+ }
+
+ /**
+ * Runs the algorithm in the timed evaluation part.
+ *
+ * @param database Database context
+ * @param relation Data relation
+ */
+ public OutlierResult run(Database database, Relation<O> relation) {
+ final DistanceQuery<O> distanceQuery = database.getDistanceQuery(relation, getDistanceFunction());
+ KNNQuery<O> knnQuery = database.getKNNQuery(distanceQuery, k + 1);
+
+ FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Compute kNN weights.", relation.size(), LOG) : null;
+
+ DoubleMinMax minmax = new DoubleMinMax();
+ WritableDoubleDataStore knnw_score = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ final KNNList knn = knnQuery.getKNNForDBID(iditer, k + 1);
+ double skn = 0; // sum of the distances to the k nearest neighbors
+ int i = 0; // number of neighbors so far
+ for(DoubleDBIDListIter neighbor = knn.iter(); i < k && neighbor.valid(); neighbor.advance()) {
+ if(DBIDUtil.equal(iditer, neighbor)) {
+ continue;
+ }
+ skn += neighbor.doubleValue();
+ ++i;
+ }
+ if(i < k) {
+ // Less than k neighbors found
+ // Approximative index, or k > data set size!
+ skn = Double.POSITIVE_INFINITY;
+ }
+ knnw_score.putDouble(iditer, skn);
+ minmax.put(skn);
+
+ LOG.incrementProcessed(prog);
+ }
+ LOG.ensureCompleted(prog);
+
+ DoubleRelation res = new MaterializedDoubleRelation("kNN weight Outlier Score", "knnw-outlier", knnw_score, relation.getDBIDs());
+ OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0., Double.POSITIVE_INFINITY, 0.);
+ return new OutlierResult(meta, res);
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(getDistanceFunction().getInputTypeRestriction());
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {
+ /**
+ * Parameter to specify the k nearest neighbor.
+ */
+ public static final OptionID K_ID = new OptionID("knnwod.k", //
+ "The k nearest neighbor, excluding the query point "//
+ + "(i.e. query point is the 0-nearest-neighbor)");
+
+ /**
+ * k parameter
+ */
+ protected int k = 0;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ final IntParameter kP = new IntParameter(K_ID) //
+ .addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(kP)) {
+ k = kP.getValue();
+ }
+ }
+
+ @Override
+ protected KNNWeightOutlier<O> makeInstance() {
+ return new KNNWeightOutlier<>(distanceFunction, k);
+ }
+ }
+}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ODIN.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/ODIN.java index a5b39146..67380335 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ODIN.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/ODIN.java @@ -1,10 +1,10 @@ -package de.lmu.ifi.dbs.elki.algorithm.outlier; +package de.lmu.ifi.dbs.elki.algorithm.outlier.distance; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,6 +23,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier; along with this program. If not, see <http://www.gnu.org/licenses/>. */ import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm; +import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm; import de.lmu.ifi.dbs.elki.data.type.TypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; import de.lmu.ifi.dbs.elki.database.Database; @@ -32,17 +33,17 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; -import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; -import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; +import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; +import de.lmu.ifi.dbs.elki.utilities.Alias; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints; @@ -60,16 +61,19 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * <p> * V. Hautamäki and I. Kärkkäinen and P Fränti<br /> * Outlier detection using k-nearest neighbour graph<br /> - * Proc. 17th Int. Conf. Pattern Recognition, ICPR 2004 <br /> + * Proc. 17th Int. Conf. Pattern Recognition, ICPR 2004 * </p> * * @author Erich Schubert * * @param <O> Object type - * @param <D> Distance type */ -@Reference(authors = "V. Hautamäki and I. Kärkkäinen and P Fränti", title = "Outlier detection using k-nearest neighbour graph", booktitle = "Proc. 17th Int. Conf. Pattern Recognition, ICPR 2004", url = "http://dx.doi.org/10.1109/ICPR.2004.1334558") -public class ODIN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm { +@Reference(authors = "V. Hautamäki and I. Kärkkäinen and P Fränti", // +title = "Outlier detection using k-nearest neighbour graph", // +booktitle = "Proc. 17th Int. Conf. Pattern Recognition, ICPR 2004", // +url = "http://dx.doi.org/10.1109/ICPR.2004.1334558") +@Alias({ "de.lmu.ifi.dbs.elki.algorithm.outlier.ODIN" }) +public class ODIN<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm { /** * Class logger. */ @@ -86,7 +90,7 @@ public class ODIN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorit * @param distanceFunction Distance function * @param k k parameter */ - public ODIN(DistanceFunction<? super O, D> distanceFunction, int k) { + public ODIN(DistanceFunction<? super O> distanceFunction, int k) { super(distanceFunction); this.k = k; } @@ -100,8 +104,8 @@ public class ODIN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorit */ public OutlierResult run(Database database, Relation<O> relation) { // Get the query functions: - DistanceQuery<O, D> dq = database.getDistanceQuery(relation, getDistanceFunction()); - KNNQuery<O, D> knnq = database.getKNNQuery(dq, k); + DistanceQuery<O> dq = database.getDistanceQuery(relation, getDistanceFunction()); + KNNQuery<O> knnq = database.getKNNQuery(dq, k); // Get the objects to process, and a data storage for counting and output: DBIDs ids = relation.getDBIDs(); @@ -112,7 +116,7 @@ public class ODIN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorit // Process all objects for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { // Find the nearest neighbors (using an index, if available!) - KNNList<D> neighbors = knnq.getKNNForDBID(iter, k); + DBIDs neighbors = knnq.getKNNForDBID(iter, k); // For each neighbor, except ourselves, increase the in-degree: for(DBIDIter nei = neighbors.iter(); nei.valid(); nei.advance()) { if(DBIDUtil.equal(iter, nei)) { @@ -131,7 +135,7 @@ public class ODIN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorit // Wrap the result and add metadata. OutlierScoreMeta meta = new InvertedOutlierScoreMeta(min, max, 0., inc * (ids.size() - 1), 1); - Relation<Double> rel = new MaterializedRelation<>("ODIN In-Degree", "odin", TypeUtil.DOUBLE, scores, ids); + DoubleRelation rel = new MaterializedDoubleRelation("ODIN In-Degree", "odin", scores, ids); return new OutlierResult(meta, rel); } @@ -153,9 +157,8 @@ public class ODIN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorit * @apiviz.exclude * * @param <O> Object type - * @param <D> Distance type */ - public static class Parameterizer<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> { + public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> { /** * Parameter for the number of nearest neighbors: * @@ -185,7 +188,7 @@ public class ODIN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorit } @Override - protected ODIN<O, D> makeInstance() { + protected ODIN<O> makeInstance() { return new ODIN<>(distanceFunction, k); } } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/ReferenceBasedOutlierDetection.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/ReferenceBasedOutlierDetection.java new file mode 100644 index 00000000..e1407679 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/ReferenceBasedOutlierDetection.java @@ -0,0 +1,325 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier.distance;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures +
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team +
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version. +
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details. +
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.Collection;
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractPrimitiveDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList;
+import de.lmu.ifi.dbs.elki.database.query.distance.PrimitiveDistanceQuery;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.result.ReferencePointsResult;
+import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.Alias;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+import de.lmu.ifi.dbs.elki.utilities.referencepoints.GridBasedReferencePoints;
+import de.lmu.ifi.dbs.elki.utilities.referencepoints.ReferencePointsHeuristic;
+
+/**
+ * Reference-Based Outlier Detection algorithm, an algorithm that computes kNN
+ * distances approximately, using reference points.
+ *
+ * kNN distances are approximated by the difference in distance from a reference
+ * point. For this approximation to be of high quality, triangle inequality is
+ * required; but the algorithm can also process non-metric distances.
+ *
+ * Reference:
+ * <p>
+ * Y. Pei, O. R. Zaiane, Y. Gao<br />
+ * An Efficient Reference-Based Approach to Outlier Detection in Large Datasets<br />
+ * In: Proc. IEEE Int. Conf. on Data Mining (ICDM'06), Hong Kong, China, 2006
+ * </p>
+ *
+ * @author Lisa Reichert
+ * @author Erich Schubert
+ *
+ * @apiviz.composedOf ReferencePointsHeuristic
+ */
+@Title("An Efficient Reference-based Approach to Outlier Detection in Large Datasets")
+@Description("Computes kNN distances approximately, using reference points with various reference point strategies.")
+@Reference(authors = "Y. Pei, O.R. Zaiane, Y. Gao", //
+title = "An Efficient Reference-based Approach to Outlier Detection in Large Datasets", //
+booktitle = "Proc. 6th IEEE Int. Conf. on Data Mining (ICDM '06)", //
+url = "http://dx.doi.org/10.1109/ICDM.2006.17")
+@Alias({ "de.lmu.ifi.dbs.elki.algorithm.outlier.ReferenceBasedOutlierDetection" })
+public class ReferenceBasedOutlierDetection extends AbstractPrimitiveDistanceBasedAlgorithm<NumberVector, OutlierResult> implements OutlierAlgorithm {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(ReferenceBasedOutlierDetection.class);
+
+ /**
+ * Holds the number of neighbors to use for density estimation.
+ */
+ private int k;
+
+ /**
+ * Stores the reference point strategy.
+ */
+ private ReferencePointsHeuristic refp;
+
+ /**
+ * Constructor with parameters.
+ *
+ * @param k k Parameter
+ * @param distanceFunction distance function
+ * @param refp Reference points heuristic
+ */
+ public ReferenceBasedOutlierDetection(int k, PrimitiveDistanceFunction<? super NumberVector> distanceFunction, ReferencePointsHeuristic refp) {
+ super(distanceFunction);
+ this.k = k;
+ this.refp = refp;
+ }
+
+ /**
+ * Run the algorithm on the given relation.
+ *
+ * @param database Database
+ * @param relation Relation to process
+ * @return Outlier result
+ */
+ public OutlierResult run(Database database, Relation<? extends NumberVector> relation) {
+ @SuppressWarnings("unchecked")
+ PrimitiveDistanceQuery<? super NumberVector> distq = (PrimitiveDistanceQuery<? super NumberVector>) database.getDistanceQuery(relation, distanceFunction);
+ Collection<? extends NumberVector> refPoints = refp.getReferencePoints(relation);
+ if(refPoints.size() < 1) {
+ throw new AbortException("Cannot compute ROS without reference points!");
+ }
+
+ DBIDs ids = relation.getDBIDs();
+ if(k >= ids.size()) {
+ throw new AbortException("k must not be chosen larger than the database size!");
+ }
+ // storage of distance/score values.
+ WritableDoubleDataStore rbod_score = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC | DataStoreFactory.HINT_HOT, Double.NaN);
+
+ // Compute density estimation:
+ for(NumberVector refPoint : refPoints) {
+ DoubleDBIDList referenceDists = computeDistanceVector(refPoint, relation, distq);
+ updateDensities(rbod_score, referenceDists);
+ }
+ // compute maximum density
+ DoubleMinMax mm = new DoubleMinMax();
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ mm.put(rbod_score.doubleValue(iditer));
+ }
+ // compute ROS
+ double scale = mm.getMax() > 0. ? 1. / mm.getMax() : 1.;
+ mm.reset(); // Reuse
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ double score = 1 - (rbod_score.doubleValue(iditer) * scale);
+ mm.put(score);
+ rbod_score.putDouble(iditer, score);
+ }
+
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("Reference-points Outlier Scores", "reference-outlier", rbod_score, relation.getDBIDs());
+ OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax(), 0., 1., 0.);
+ OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
+ // adds reference points to the result. header information for the
+ // visualizer to find the reference points in the result
+ result.addChildResult(new ReferencePointsResult<>("Reference points", "reference-points", refPoints));
+ return result;
+ }
+
+ /**
+ * Computes for each object the distance to one reference point. (one
+ * dimensional representation of the data set)
+ *
+ * @param refPoint Reference Point Feature Vector
+ * @param database database to work on
+ * @param distFunc Distance function to use
+ * @return array containing the distance to one reference point for each
+ * database object and the object id
+ */
+ protected DoubleDBIDList computeDistanceVector(NumberVector refPoint, Relation<? extends NumberVector> database, PrimitiveDistanceQuery<? super NumberVector> distFunc) {
+ ModifiableDoubleDBIDList referenceDists = DBIDUtil.newDistanceDBIDList(database.size());
+ for(DBIDIter iditer = database.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ referenceDists.add(distFunc.distance(iditer, refPoint), iditer);
+ }
+ referenceDists.sort();
+ return referenceDists;
+ }
+
+ /**
+ * Update the density estimates for each object.
+ *
+ * @param rbod_score Density storage
+ * @param referenceDists Distances from current reference point
+ */
+ protected void updateDensities(WritableDoubleDataStore rbod_score, DoubleDBIDList referenceDists) {
+ DoubleDBIDListIter it = referenceDists.iter();
+ for(int l = 0; l < referenceDists.size(); l++) {
+ double density = computeDensity(referenceDists, it, l);
+ // computeDensity modified the iterator, reset:
+ it.seek(l);
+ // NaN indicates the first run.
+ if(!(density > rbod_score.doubleValue(it))) {
+ rbod_score.putDouble(it, density);
+ }
+ }
+ }
+
+ /**
+ * Computes the density of an object. The density of an object is the
+ * distances to the k nearest neighbors. Neighbors and distances are computed
+ * approximately. (approximation for kNN distance: instead of a normal NN
+ * search the NN of an object are those objects that have a similar distance
+ * to a reference point. The k- nearest neighbors of an object are those
+ * objects that lay close to the object in the reference distance vector)
+ *
+ * @param referenceDists vector of the reference distances
+ * @param iter Iterator to this list (will be reused)
+ * @param index index of the current object
+ * @return density for one object and reference point
+ */
+ protected double computeDensity(DoubleDBIDList referenceDists, DoubleDBIDListIter iter, int index) {
+ final int size = referenceDists.size();
+ final double xDist = iter.seek(index).doubleValue();
+
+ int lef = index, rig = index;
+ double sum = 0.;
+ double lef_d = (--lef >= 0) ? xDist - iter.seek(lef).doubleValue() : Double.POSITIVE_INFINITY;
+ double rig_d = (++rig < size) ? iter.seek(rig).doubleValue() - xDist : Double.POSITIVE_INFINITY;
+ for(int i = 0; i < k; ++i) {
+ if(lef >= 0 && rig < size) {
+ // Prefer n or m?
+ if(lef_d < rig_d) {
+ sum += lef_d;
+ // Update left
+ lef_d = (--lef >= 0) ? xDist - iter.seek(lef).doubleValue() : Double.POSITIVE_INFINITY;
+ }
+ else {
+ sum += rig_d;
+ // Update right
+ rig_d = (++rig < size) ? iter.seek(rig).doubleValue() - xDist : Double.POSITIVE_INFINITY;
+ }
+ }
+ else if(lef >= 0) {
+ // Choose left, since right is not available.
+ sum += lef_d;
+ // update left
+ lef_d = (--lef >= 0) ? xDist - iter.seek(lef).doubleValue() : Double.POSITIVE_INFINITY;
+ }
+ else if(rig < size) {
+ // Choose right, since left is not available
+ sum += rig_d;
+ // Update right
+ rig_d = (++rig < size) ? iter.seek(rig).doubleValue() - xDist : Double.POSITIVE_INFINITY;
+ }
+ else {
+ // Not enough objects in database?
+ throw new IndexOutOfBoundsException("Less than k objects?");
+ }
+ }
+ return k / sum;
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(distanceFunction.getInputTypeRestriction());
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractPrimitiveDistanceBasedAlgorithm.Parameterizer<NumberVector> {
+ /**
+ * Parameter for the reference points heuristic.
+ */
+ public static final OptionID REFP_ID = new OptionID("refod.refp", "The heuristic for finding reference points.");
+
+ /**
+ * Parameter to specify the number of nearest neighbors of an object, to be
+ * considered for computing its REFOD_SCORE, must be an integer greater than
+ * 1.
+ */
+ public static final OptionID K_ID = new OptionID("refod.k", "The number of nearest neighbors");
+
+ /**
+ * Holds the value of {@link #K_ID}.
+ */
+ private int k;
+
+ /**
+ * Stores the reference point strategy
+ */
+ private ReferencePointsHeuristic refp;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ final IntParameter pK = new IntParameter(K_ID) //
+ .addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
+ if(config.grab(pK)) {
+ k = pK.getValue();
+ }
+ final ObjectParameter<ReferencePointsHeuristic> refpP = new ObjectParameter<>(REFP_ID, ReferencePointsHeuristic.class, GridBasedReferencePoints.class);
+ if(config.grab(refpP)) {
+ refp = refpP.instantiateClass(config);
+ }
+ }
+
+ @Override
+ protected ReferenceBasedOutlierDetection makeInstance() {
+ return new ReferenceBasedOutlierDetection(k, distanceFunction, refp);
+ }
+ }
+}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/package-info.java new file mode 100644 index 00000000..ac292a01 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/package-info.java @@ -0,0 +1,30 @@ +/** + * Distance-based outlier detection algorithms, such as DBOutlier and kNN. + * + * For methods based on <em>local</em> density, see package + * {@link de.lmu.ifi.dbs.elki.algorithm.outlier.lof} instead. + */ + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2014 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +package de.lmu.ifi.dbs.elki.algorithm.outlier.distance;
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/KNNWeightProcessor.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/KNNWeightProcessor.java new file mode 100644 index 00000000..a26a7505 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/KNNWeightProcessor.java @@ -0,0 +1,118 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier.distance.parallel; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2014 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; +import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter; +import de.lmu.ifi.dbs.elki.database.ids.KNNList; +import de.lmu.ifi.dbs.elki.parallel.Executor; +import de.lmu.ifi.dbs.elki.parallel.processor.AbstractDoubleProcessor; +import de.lmu.ifi.dbs.elki.parallel.processor.KNNProcessor; +import de.lmu.ifi.dbs.elki.parallel.variables.SharedDouble; +import de.lmu.ifi.dbs.elki.parallel.variables.SharedObject; + +/** + * Compute the kNN weight score, used by {@link ParallelKNNWeightOutlier}. + * + * Needs the k nearest neighbors as input, for example from {@link KNNProcessor} + * + * @author Erich Schubert + */ +public class KNNWeightProcessor extends AbstractDoubleProcessor { + /** + * K parameter + */ + int k; + + /** + * Constructor. + * + * @param k K parameter + */ + public KNNWeightProcessor(int k) { + super(); + this.k = k; + } + + /** + * KNN query object + */ + SharedObject<? extends KNNList> input; + + /** + * Connect the input channel. + * + * @param input Input channel + */ + public void connectKNNInput(SharedObject<? extends KNNList> input) { + this.input = input; + } + + @Override + public Instance instantiate(Executor executor) { + return new Instance(k, executor.getInstance(input), executor.getInstance(output)); + } + + /** + * Instance for precomputing the kNN. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + private static class Instance extends AbstractDoubleProcessor.Instance { + /** + * k Parameter + */ + int k; + + /** + * kNN query + */ + SharedObject.Instance<? extends KNNList> input; + + /** + * Constructor. + * + * @param k K parameter + * @param input kNN list input + * @param store Datastore to write to + */ + protected Instance(int k, SharedObject.Instance<? extends KNNList> input, SharedDouble.Instance store) { + super(store); + this.k = k; + this.input = input; + } + + @Override + public void map(DBIDRef id) { + final KNNList list = input.get(); + int i = 0; + double sum = 0; + for(DoubleDBIDListIter iter = list.iter(); iter.valid() && i < k; iter.advance(), ++i) { + sum += iter.doubleValue(); + } + output.set(sum); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/ParallelKNNOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/ParallelKNNOutlier.java new file mode 100644 index 00000000..b7b43765 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/ParallelKNNOutlier.java @@ -0,0 +1,181 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier.distance.parallel; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2014 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm; +import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm; +import de.lmu.ifi.dbs.elki.algorithm.outlier.distance.KNNOutlier; +import de.lmu.ifi.dbs.elki.data.type.TypeInformation; +import de.lmu.ifi.dbs.elki.data.type.TypeUtil; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; +import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; +import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.ids.KNNList; +import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; +import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; +import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; +import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.math.DoubleMinMax; +import de.lmu.ifi.dbs.elki.parallel.ParallelExecutor; +import de.lmu.ifi.dbs.elki.parallel.processor.DoubleMinMaxProcessor; +import de.lmu.ifi.dbs.elki.parallel.processor.KDistanceProcessor; +import de.lmu.ifi.dbs.elki.parallel.processor.KNNProcessor; +import de.lmu.ifi.dbs.elki.parallel.processor.WriteDoubleDataStoreProcessor; +import de.lmu.ifi.dbs.elki.parallel.variables.SharedDouble; +import de.lmu.ifi.dbs.elki.parallel.variables.SharedObject; +import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; + +/** + * Parallel implementation of KNN Outlier detection. + * + * Reference: + * <p> + * S. Ramaswamy, R. Rastogi, K. Shim:<br /> + * Efficient Algorithms for Mining Outliers from Large Data Sets.<br /> + * In: Proc. of the Int. Conf. on Management of Data, Dallas, Texas, 2000. + * </p> + * + * This parallelized implementation is based on the easy-to-parallelize + * generalized pattern discussed in + * <p> + * Erich Schubert, Arthur Zimek, Hans-Peter Kriegel<br /> + * Local Outlier Detection Reconsidered: a Generalized View on Locality with + * Applications to Spatial, Video, and Network Outlier Detection<br /> + * Data Mining and Knowledge Discovery, 28(1): 190–237, 2014. + * </p> + * + * @author Erich Schubert + * + * @apiviz.composedOf KNNProcessor + * @apiviz.composedOf KDistanceProcessor + * + * @param <O> Object type + */ +@Reference(authors = "E. Schubert, A. Zimek, H.-P. Kriegel", // +title = "Local Outlier Detection Reconsidered: a Generalized View on Locality with Applications to Spatial, Video, and Network Outlier Detection", // +booktitle = "Data Mining and Knowledge Discovery, 28(1): 190–237, 2014.", // +url = "http://dx.doi.org/10.1007/s10618-012-0300-z") +public class ParallelKNNOutlier<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm { + /** + * Parameter k + */ + private int k; + + /** + * Constructor. + * + * @param distanceFunction Distance function + * @param k K parameter + */ + public ParallelKNNOutlier(DistanceFunction<? super O> distanceFunction, int k) { + super(distanceFunction); + this.k = k; + } + + /** + * Class logger + */ + private static final Logging LOG = Logging.getLogger(ParallelKNNOutlier.class); + + @Override + public TypeInformation[] getInputTypeRestriction() { + return TypeUtil.array(getDistanceFunction().getInputTypeRestriction()); + } + + public OutlierResult run(Database database, Relation<O> relation) { + DBIDs ids = relation.getDBIDs(); + WritableDoubleDataStore store = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB); + DistanceQuery<O> distq = database.getDistanceQuery(relation, getDistanceFunction()); + KNNQuery<O> knnq = database.getKNNQuery(distq, k + 1); + + // Compute the kNN + KNNProcessor<O> knnm = new KNNProcessor<>(k + 1, knnq); + SharedObject<KNNList> knnv = new SharedObject<>(); + knnm.connectKNNOutput(knnv); + // Extract the k-distance + KDistanceProcessor kdistm = new KDistanceProcessor(k + 1); + SharedDouble kdistv = new SharedDouble(); + kdistm.connectKNNInput(knnv); + kdistm.connectOutput(kdistv); + // Store in outlier scores + WriteDoubleDataStoreProcessor storem = new WriteDoubleDataStoreProcessor(store); + storem.connectInput(kdistv); + // Gather statistics + DoubleMinMaxProcessor mmm = new DoubleMinMaxProcessor(); + mmm.connectInput(kdistv); + + ParallelExecutor.run(ids, knnm, kdistm, storem, mmm); + + DoubleMinMax minmax = mmm.getMinMax(); + DoubleRelation scoreres = new MaterializedDoubleRelation("kNN Outlier Score", "knn-outlier", store, ids); + OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0); + return new OutlierResult(meta, scoreres); + } + + @Override + protected Logging getLogger() { + return LOG; + } + + /** + * Parameterization class + * + * @author Erich Schubert + * + * @apiviz.exclude + * + * @param <O> Object type + */ + public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> { + /** + * K parameter + */ + int k; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + + IntParameter kP = new IntParameter(KNNOutlier.Parameterizer.K_ID); + if(config.grab(kP)) { + k = kP.getValue(); + } + } + + @Override + protected ParallelKNNOutlier<O> makeInstance() { + return new ParallelKNNOutlier<>(distanceFunction, k); + } + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/ParallelKNNWeightOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/ParallelKNNWeightOutlier.java new file mode 100644 index 00000000..40639ec5 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/ParallelKNNWeightOutlier.java @@ -0,0 +1,187 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier.distance.parallel; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2014 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm; +import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm; +import de.lmu.ifi.dbs.elki.algorithm.outlier.distance.KNNWeightOutlier; +import de.lmu.ifi.dbs.elki.data.type.TypeInformation; +import de.lmu.ifi.dbs.elki.data.type.TypeUtil; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; +import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; +import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.ids.KNNList; +import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; +import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; +import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; +import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.math.DoubleMinMax; +import de.lmu.ifi.dbs.elki.parallel.ParallelExecutor; +import de.lmu.ifi.dbs.elki.parallel.processor.DoubleMinMaxProcessor; +import de.lmu.ifi.dbs.elki.parallel.processor.KNNProcessor; +import de.lmu.ifi.dbs.elki.parallel.processor.WriteDoubleDataStoreProcessor; +import de.lmu.ifi.dbs.elki.parallel.variables.SharedDouble; +import de.lmu.ifi.dbs.elki.parallel.variables.SharedObject; +import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; + +/** + * Parallel implementation of KNN Weight Outlier detection. + * + * Reference: + * <p> + * F. Angiulli, C. Pizzuti:<br /> + * Fast Outlier Detection in High Dimensional Spaces.<br /> + * In: Proc. European Conference on Principles of Knowledge Discovery and Data + * Mining (PKDD'02), Helsinki, Finland, 2002. + * </p> + * + * This parallelized implementation is based on the easy-to-parallelize + * generalized pattern discussed in + * <p> + * Erich Schubert, Arthur Zimek, Hans-Peter Kriegel<br /> + * Local Outlier Detection Reconsidered: a Generalized View on Locality with + * Applications to Spatial, Video, and Network Outlier Detection<br /> + * Data Mining and Knowledge Discovery, 28(1): 190–237, 2014. + * </p> + * + * @author Erich Schubert + * + * @apiviz.composedOf KNNWeightProcessor + * + * @param <O> Object type + */ +@Reference(authors = "E. Schubert, A. Zimek, H.-P. Kriegel", // +title = "Local Outlier Detection Reconsidered: a Generalized View on Locality with Applications to Spatial, Video, and Network Outlier Detection", // +booktitle = "Data Mining and Knowledge Discovery, 28(1): 190–237, 2014.", // +url = "http://dx.doi.org/10.1007/s10618-012-0300-z") +public class ParallelKNNWeightOutlier<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm { + /** + * Parameter k + */ + private int k; + + /** + * Constructor. + * + * @param distanceFunction Distance function + * @param k K parameter + */ + public ParallelKNNWeightOutlier(DistanceFunction<? super O> distanceFunction, int k) { + super(distanceFunction); + this.k = k; + } + + /** + * Class logger + */ + private static final Logging LOG = Logging.getLogger(ParallelKNNWeightOutlier.class); + + @Override + public TypeInformation[] getInputTypeRestriction() { + return TypeUtil.array(getDistanceFunction().getInputTypeRestriction()); + } + + /** + * Run the parallel kNN weight outlier detector. + * + * @param database Database to process + * @param relation Relation to analyze + * @return Outlier detection result + */ + public OutlierResult run(Database database, Relation<O> relation) { + DBIDs ids = relation.getDBIDs(); + WritableDoubleDataStore store = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB); + DistanceQuery<O> distq = database.getDistanceQuery(relation, getDistanceFunction()); + KNNQuery<O> knnq = database.getKNNQuery(distq, k + 1); + + // Find kNN + KNNProcessor<O> knnm = new KNNProcessor<>(k + 1, knnq); + SharedObject<KNNList> knnv = new SharedObject<>(); + knnm.connectKNNOutput(knnv); + // Extract outlier score + KNNWeightProcessor kdistm = new KNNWeightProcessor(k + 1); + SharedDouble kdistv = new SharedDouble(); + kdistm.connectKNNInput(knnv); + kdistm.connectOutput(kdistv); + // Store in output result + WriteDoubleDataStoreProcessor storem = new WriteDoubleDataStoreProcessor(store); + storem.connectInput(kdistv); + // And gather statistics for metadata + DoubleMinMaxProcessor mmm = new DoubleMinMaxProcessor(); + mmm.connectInput(kdistv); + + ParallelExecutor.run(ids, knnm, kdistm, storem, mmm); + + DoubleMinMax minmax = mmm.getMinMax(); + DoubleRelation scoreres = new MaterializedDoubleRelation("kNN weight Outlier Score", "knnw-outlier", store, ids); + OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0., Double.POSITIVE_INFINITY, 0.); + return new OutlierResult(meta, scoreres); + } + + @Override + protected Logging getLogger() { + return LOG; + } + + /** + * Parameterization class + * + * @author Erich Schubert + * + * @apiviz.exclude + * + * @param <O> Object type + */ + public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> { + /** + * K parameter + */ + int k; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + + IntParameter kP = new IntParameter(KNNWeightOutlier.Parameterizer.K_ID); + if(config.grab(kP)) { + k = kP.getValue(); + } + } + + @Override + protected ParallelKNNWeightOutlier<O> makeInstance() { + return new ParallelKNNWeightOutlier<>(distanceFunction, k); + } + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/package-info.java new file mode 100644 index 00000000..58090507 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/package-info.java @@ -0,0 +1,27 @@ +/** + * Parallel implementations of distance-based outlier detectors. + */ + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2014 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +package de.lmu.ifi.dbs.elki.algorithm.outlier.distance.parallel;
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/ALOCI.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/ALOCI.java index f978365e..60e2ff00 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/ALOCI.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/ALOCI.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.lof; This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -40,21 +40,20 @@ import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs; import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.NumberVectorDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+import de.lmu.ifi.dbs.elki.math.random.RandomFactory;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
-import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -64,7 +63,6 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
-import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
/**
* Fast Outlier Detection Using the "approximate Local Correlation Integral".
@@ -85,12 +83,11 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; * @apiviz.composedOf ALOCIQuadTree
*
* @param <O> Object type
- * @param <D> Distance type
*/
@Title("LOCI: Fast Outlier Detection Using the Local Correlation Integral")
@Description("Algorithm to compute outliers based on the Local Correlation Integral")
@Reference(authors = "S. Papadimitriou, H. Kitagawa, P. B. Gibbons, C. Faloutsos", title = "LOCI: Fast Outlier Detection Using the Local Correlation Integral", booktitle = "Proc. 19th IEEE Int. Conf. on Data Engineering (ICDE '03), Bangalore, India, 2003", url = "http://dx.doi.org/10.1109/ICDE.2003.1260802")
-public class ALOCI<O extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+public class ALOCI<O extends NumberVector> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
@@ -119,7 +116,7 @@ public class ALOCI<O extends NumberVector<?>, D extends NumberDistance<D, ?>> ex /**
* Distance function
*/
- private NumberVectorDistanceFunction<D> distFunc;
+ private NumberVectorDistanceFunction<?> distFunc;
/**
* Constructor.
@@ -130,7 +127,7 @@ public class ALOCI<O extends NumberVector<?>, D extends NumberDistance<D, ?>> ex * @param g Number of grids to use
* @param rnd Random generator.
*/
- public ALOCI(NumberVectorDistanceFunction<D> distanceFunction, int nmin, int alpha, int g, RandomFactory rnd) {
+ public ALOCI(NumberVectorDistanceFunction<?> distanceFunction, int nmin, int alpha, int g, RandomFactory rnd) {
super();
this.distFunc = distanceFunction;
this.nmin = nmin;
@@ -147,13 +144,11 @@ public class ALOCI<O extends NumberVector<?>, D extends NumberDistance<D, ?>> ex // Compute extend of dataset.
double[] min, max;
{
- Pair<O, O> hbbs = DatabaseUtil.computeMinMax(relation);
+ double[][] hbbs = RelationUtil.computeMinMax(relation);
+ min = hbbs[0];
+ max = hbbs[1];
double maxd = 0;
- min = new double[dim];
- max = new double[dim];
for(int i = 0; i < dim; i++) {
- min[i] = hbbs.first.doubleValue(i);
- max[i] = hbbs.second.doubleValue(i);
maxd = Math.max(maxd, max[i] - min[i]);
}
// Enlarge bounding box to have equal lengths.
@@ -169,9 +164,7 @@ public class ALOCI<O extends NumberVector<?>, D extends NumberDistance<D, ?>> ex double[] nshift = new double[dim];
ALOCIQuadTree qt = new ALOCIQuadTree(min, max, nshift, nmin, relation);
qts.add(qt);
- if(progressPreproc != null) {
- progressPreproc.incrementProcessed(LOG);
- }
+ LOG.incrementProcessed(progressPreproc);
/*
* create the remaining g-1 shifted QuadTrees. This not clearly described in
* the paper and therefore implemented in a way that achieves good results
@@ -184,13 +177,9 @@ public class ALOCI<O extends NumberVector<?>, D extends NumberDistance<D, ?>> ex }
qt = new ALOCIQuadTree(min, max, svec, nmin, relation);
qts.add(qt);
- if(progressPreproc != null) {
- progressPreproc.incrementProcessed(LOG);
- }
- }
- if(progressPreproc != null) {
- progressPreproc.ensureCompleted(LOG);
+ LOG.incrementProcessed(progressPreproc);
}
+ LOG.ensureCompleted(progressPreproc);
// aLOCI main loop: evaluate
FiniteProgress progressLOCI = LOG.isVerbose() ? new FiniteProgress("Compute aLOCI scores", relation.size(), LOG) : null;
@@ -211,7 +200,7 @@ public class ALOCI<O extends NumberVector<?>, D extends NumberDistance<D, ?>> ex continue;
}
// TODO: always use manhattan?
- if(ci == null || distFunc.distance(ci.getCenter(), obj).compareTo(distFunc.distance(ci2.getCenter(), obj)) > 0) {
+ if(ci == null || distFunc.distance(ci.getCenter(), obj) > distFunc.distance(ci2.getCenter(), obj)) {
ci = ci2;
}
}
@@ -229,7 +218,7 @@ public class ALOCI<O extends NumberVector<?>, D extends NumberDistance<D, ?>> ex continue;
}
// TODO: always use manhattan?
- if(cj == null || distFunc.distance(cj.getCenter(), ci.getCenter()).compareTo(distFunc.distance(cj2.getCenter(), ci.getCenter())) > 0) {
+ if(cj == null || distFunc.distance(cj.getCenter(), ci.getCenter()) > distFunc.distance(cj2.getCenter(), ci.getCenter())) {
cj = cj2;
}
}
@@ -245,14 +234,10 @@ public class ALOCI<O extends NumberVector<?>, D extends NumberDistance<D, ?>> ex // Store results
mdef_norm.putDouble(iditer, maxmdefnorm);
minmax.put(maxmdefnorm);
- if(progressLOCI != null) {
- progressLOCI.incrementProcessed(LOG);
- }
- }
- if(progressLOCI != null) {
- progressLOCI.ensureCompleted(LOG);
+ LOG.incrementProcessed(progressLOCI);
}
- Relation<Double> scoreResult = new MaterializedRelation<>("aLOCI normalized MDEF", "aloci-mdef-outlier", TypeUtil.DOUBLE, mdef_norm, relation.getDBIDs());
+ LOG.ensureCompleted(progressLOCI);
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("aLOCI normalized MDEF", "aloci-mdef-outlier", mdef_norm, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY);
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
return result;
@@ -336,7 +321,7 @@ public class ALOCI<O extends NumberVector<?>, D extends NumberDistance<D, ?>> ex /**
* Relation indexed.
*/
- private Relation<? extends NumberVector<?>> relation;
+ private Relation<? extends NumberVector> relation;
/**
* Constructor.
@@ -347,7 +332,7 @@ public class ALOCI<O extends NumberVector<?>, D extends NumberDistance<D, ?>> ex * @param nmin Maximum size for a page to split
* @param relation Relation to index
*/
- public ALOCIQuadTree(double[] min, double[] max, double[] shift, int nmin, Relation<? extends NumberVector<?>> relation) {
+ public ALOCIQuadTree(double[] min, double[] max, double[] shift, int nmin, Relation<? extends NumberVector> relation) {
super();
assert (min.length <= 32) : "Quadtrees are only supported for up to 32 dimensions";
this.shift = shift;
@@ -395,11 +380,11 @@ public class ALOCI<O extends NumberVector<?>, D extends NumberDistance<D, ?>> ex if(dim == 0) {
DBIDArrayIter iter = ids.iter();
iter.seek(start);
- NumberVector<?> first = relation.get(iter);
+ NumberVector first = relation.get(iter);
iter.advance();
boolean degenerate = true;
loop: for(; iter.getOffset() < end; iter.advance()) {
- NumberVector<?> other = relation.get(iter);
+ NumberVector other = relation.get(iter);
for(int d = 0; d < lmin.length; d++) {
if(Math.abs(first.doubleValue(d) - other.doubleValue(d)) > 1E-15) {
degenerate = false;
@@ -481,7 +466,7 @@ public class ALOCI<O extends NumberVector<?>, D extends NumberDistance<D, ?>> ex * @param level Level (controls scaling/wraping!)
* @return Shifted position
*/
- private double getShiftedDim(NumberVector<?> obj, int dim, int level) {
+ private double getShiftedDim(NumberVector obj, int dim, int level) {
double pos = obj.doubleValue(dim) + shift[dim];
pos = (pos - min[dim]) / width[dim] * (1 + level);
return pos - Math.floor(pos);
@@ -495,7 +480,7 @@ public class ALOCI<O extends NumberVector<?>, D extends NumberDistance<D, ?>> ex * @param tlevel Target level
* @return Node
*/
- public Node findClosestNode(NumberVector<?> vec, int tlevel) {
+ public Node findClosestNode(NumberVector vec, int tlevel) {
Node cur = root;
for(int level = 0; level <= tlevel; level++) {
if(cur.children == null) {
@@ -650,7 +635,7 @@ public class ALOCI<O extends NumberVector<?>, D extends NumberDistance<D, ?>> ex *
* @apiviz.exclude
*/
- public static class Parameterizer<O extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<O extends NumberVector> extends AbstractParameterizer {
/**
* Parameter to specify the minimum neighborhood size
*/
@@ -694,13 +679,13 @@ public class ALOCI<O extends NumberVector<?>, D extends NumberDistance<D, ?>> ex /**
* The distance function
*/
- private NumberVectorDistanceFunction<D> distanceFunction;
+ private NumberVectorDistanceFunction<?> distanceFunction;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- ObjectParameter<NumberVectorDistanceFunction<D>> distanceFunctionP = makeParameterDistanceFunction(EuclideanDistanceFunction.class, NumberVectorDistanceFunction.class);
+ ObjectParameter<NumberVectorDistanceFunction<?>> distanceFunctionP = makeParameterDistanceFunction(EuclideanDistanceFunction.class, NumberVectorDistanceFunction.class);
if(config.grab(distanceFunctionP)) {
distanceFunction = distanceFunctionP.instantiateClass(config);
}
@@ -730,7 +715,7 @@ public class ALOCI<O extends NumberVector<?>, D extends NumberDistance<D, ?>> ex }
@Override
- protected ALOCI<O, D> makeInstance() {
+ protected ALOCI<O> makeInstance() {
return new ALOCI<>(distanceFunction, nmin, alpha, g, rnd);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/COF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/COF.java new file mode 100644 index 00000000..09b5d8b8 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/COF.java @@ -0,0 +1,276 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier.lof;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.DoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.KNNList;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
+import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+
+/**
+ * Connectivity-based outlier factor (COF).
+ *
+ * Reference:
+ * <p>
+ * J. Tang, Z. Chen, A. W. C. Fu, D. W. Cheung<br />
+ * Enhancing effectiveness of outlier detections for low density patterns.<br />
+ * In Advances in Knowledge Discovery and Data Mining.
+ * </p>
+ *
+ * @author Erich Schubert
+ *
+ * @param <O> Object type
+ */
+@Reference(authors = "J. Tang, Z. Chen, A. W. C. Fu, D. W. Cheung", //
+title = "Enhancing effectiveness of outlier detections for low density patterns", //
+booktitle = "In Advances in Knowledge Discovery and Data Mining", //
+url = "http://dx.doi.org/10.1007/3-540-47887-6_53")
+public class COF<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(COF.class);
+
+ /**
+ * The number of neighbors to query (including the query point!)
+ */
+ protected int k;
+
+ /**
+ * Constructor.
+ *
+ * @param k the number of neighbors to use for comparison (excluding the query
+ * point)
+ * @param distanceFunction the neighborhood distance function
+ */
+ public COF(int k, DistanceFunction<? super O> distanceFunction) {
+ super(distanceFunction);
+ this.k = k + 1;
+ }
+
+ /**
+ * Runs the COF algorithm on the given database.
+ *
+ * @param database Database to query
+ * @param relation Data to process
+ * @return COF outlier result
+ */
+ public OutlierResult run(Database database, Relation<O> relation) {
+ StepProgress stepprog = LOG.isVerbose() ? new StepProgress("COF", 3) : null;
+ DistanceQuery<O> dq = database.getDistanceQuery(relation, getDistanceFunction());
+ LOG.beginStep(stepprog, 1, "Materializing COF neighborhoods.");
+ KNNQuery<O> knnq = DatabaseUtil.precomputedKNNQuery(database, relation, dq, k);
+ DBIDs ids = relation.getDBIDs();
+
+ LOG.beginStep(stepprog, 2, "Computing Average Chaining Distances.");
+ WritableDoubleDataStore acds = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
+ computeAverageChainingDistances(knnq, dq, ids, acds);
+
+ // compute COF_SCORE of each db object
+ LOG.beginStep(stepprog, 3, "Computing Connectivity-based Outlier Factors.");
+ WritableDoubleDataStore cofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_DB);
+ // track the maximum value for normalization.
+ DoubleMinMax cofminmax = new DoubleMinMax();
+ computeCOFScores(knnq, ids, acds, cofs, cofminmax);
+
+ LOG.setCompleted(stepprog);
+
+ // Build result representation.
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("Connectivity-Based Outlier Factor", "cof-outlier", cofs, ids);
+ OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(cofminmax.getMin(), cofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
+ return new OutlierResult(scoreMeta, scoreResult);
+ }
+
+ /**
+ * Computes the average chaining distance, the average length of a path
+ * through the given set of points to each target. The authors of COF decided
+ * to approximate this value using a weighted mean that assumes every object
+ * is reached from the previous point (but actually every point could be best
+ * reachable from the first, in which case this does not make much sense.)
+ *
+ * TODO: can we accelerate this by using the kNN of the neighbors?
+ *
+ * @param knnq KNN query
+ * @param dq Distance query
+ * @param ids IDs to process
+ * @param acds Storage for average chaining distances
+ */
+ protected void computeAverageChainingDistances(KNNQuery<O> knnq, DistanceQuery<O> dq, DBIDs ids, WritableDoubleDataStore acds) {
+ FiniteProgress lrdsProgress = LOG.isVerbose() ? new FiniteProgress("Computing average chaining distances", ids.size(), LOG) : null;
+
+ // Compute the chaining distances.
+ // We do <i>not</i> bother to materialize the chaining order.
+ for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ final KNNList neighbors = knnq.getKNNForDBID(iter, k);
+ final int r = neighbors.size();
+ DoubleDBIDListIter it1 = neighbors.iter(), it2 = neighbors.iter();
+ // Store the current lowest reachability.
+ final double[] mindists = new double[r];
+ for(int i = 0; it1.valid(); it1.advance(), ++i) {
+ mindists[i] = DBIDUtil.equal(it1, iter) ? Double.NaN : it1.doubleValue();
+ }
+
+ double acsum = 0.;
+ for(int j = ((r < k) ? r : k) - 1; j > 0; --j) {
+ // Find the minimum:
+ int minpos = -1;
+ double mindist = Double.NaN;
+ for(int i = 0; i < mindists.length; ++i) {
+ double curdist = mindists[i];
+ // Both values could be NaN, deliberately.
+ if(curdist == curdist && !(curdist > mindist)) {
+ minpos = i;
+ mindist = curdist;
+ }
+ }
+ acsum += mindist * j; // Weighted sum, decreasing weights
+ mindists[minpos] = Double.NaN;
+ it1.seek(minpos);
+ // Update distances
+ it2.seek(0);
+ for(int i = 0; it2.valid(); it2.advance(), ++i) {
+ final double curdist = mindists[i];
+ if(curdist != curdist) {
+ continue; // NaN = processed!
+ }
+ double newdist = dq.distance(it1, it2);
+ if(newdist < curdist) {
+ mindists[i] = newdist;
+ }
+ }
+ }
+ acds.putDouble(iter, acsum / (r * 0.5 * (r - 1.)));
+ LOG.incrementProcessed(lrdsProgress);
+ }
+ LOG.ensureCompleted(lrdsProgress);
+ }
+
+ /**
+ * Compute Connectivity outlier factors.
+ *
+ * @param knnq KNN query
+ * @param ids IDs to process
+ * @param acds Average chaining distances
+ * @param cofs Connectivity outlier factor storage
+ * @param cofminmax Score minimum/maximum tracker
+ */
+ private void computeCOFScores(KNNQuery<O> knnq, DBIDs ids, DoubleDataStore acds, WritableDoubleDataStore cofs, DoubleMinMax cofminmax) {
+ FiniteProgress progressCOFs = LOG.isVerbose() ? new FiniteProgress("COF for objects", ids.size(), LOG) : null;
+ for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ final KNNList neighbors = knnq.getKNNForDBID(iter, k);
+ // Aggregate the average chaining distances of all neighbors:
+ double sum = 0.;
+ for(DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ // skip the point itself
+ if(DBIDUtil.equal(neighbor, iter)) {
+ continue;
+ }
+ sum += acds.doubleValue(neighbor);
+ }
+ final double cof = (sum > 0.) ? (acds.doubleValue(iter) * k / sum) : (acds.doubleValue(iter) > 0. ? Double.POSITIVE_INFINITY : 1.);
+ cofs.putDouble(iter, cof);
+ // update minimum and maximum
+ cofminmax.put(cof);
+
+ LOG.incrementProcessed(progressCOFs);
+ }
+ LOG.ensureCompleted(progressCOFs);
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(getDistanceFunction().getInputTypeRestriction());
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ *
+ * @param <O> Object type
+ */
+ public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {
+ /**
+ * Parameter to specify the neighborhood size for COF. This does not include
+ * the query object.
+ */
+ public static final OptionID K_ID = new OptionID("cof.k", "The number of neighbors (not including the query object) to use for computing the COF score.");
+
+ /**
+ * The neighborhood size to use.
+ */
+ protected int k = 2;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ final IntParameter pK = new IntParameter(K_ID) //
+ .addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(pK)) {
+ k = pK.intValue();
+ }
+ }
+
+ @Override
+ protected COF<O> makeInstance() {
+ return new COF<>(k, distanceFunction);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/FlexibleLOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/FlexibleLOF.java index 2508b6b0..372bf68c 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/FlexibleLOF.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/FlexibleLOF.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.lof; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -38,20 +38,16 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; -import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter; -import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter; -import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList; -import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList; +import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter; +import de.lmu.ifi.dbs.elki.database.ids.KNNList; import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery; -import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery; import de.lmu.ifi.dbs.elki.database.query.rknn.RKNNQuery; -import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; +import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; -import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; import de.lmu.ifi.dbs.elki.logging.progress.StepProgress; @@ -59,6 +55,7 @@ import de.lmu.ifi.dbs.elki.math.DoubleMinMax; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta; +import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; @@ -113,12 +110,14 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; * @apiviz.has KNNQuery * * @param <O> the type of DatabaseObjects handled by this Algorithm - * @param <D> Distance type */ @Title("LOF: Local Outlier Factor") @Description("Algorithm to compute density-based local outlier factors in a database based on the neighborhood size parameter 'k'") -@Reference(authors = "M. M. Breunig, H.-P. Kriegel, R. Ng, and J. Sander", title = "LOF: Identifying Density-Based Local Outliers", booktitle = "Proc. 2nd ACM SIGMOD Int. Conf. on Management of Data (SIGMOD '00), Dallas, TX, 2000", url = "http://dx.doi.org/10.1145/342009.335388") -public class FlexibleLOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm { +@Reference(authors = "M. M. Breunig, H.-P. Kriegel, R. Ng, J. Sander", // +title = "LOF: Identifying Density-Based Local Outliers", // +booktitle = "Proc. 2nd ACM SIGMOD Int. Conf. on Management of Data (SIGMOD '00), Dallas, TX, 2000", // +url = "http://dx.doi.org/10.1145/342009.335388") +public class FlexibleLOF<O> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm { /** * The logger for this class. */ @@ -137,20 +136,12 @@ public class FlexibleLOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgo /** * Neighborhood distance function. */ - protected DistanceFunction<? super O, D> referenceDistanceFunction; + protected DistanceFunction<? super O> referenceDistanceFunction; /** * Reachability distance function. */ - protected DistanceFunction<? super O, D> reachabilityDistanceFunction; - - /** - * Include object itself in kNN neighborhood. - * - * In the official LOF publication, the point itself is not considered to be - * part of its k nearest neighbors. - */ - private static boolean objectIsInKNN = false; + protected DistanceFunction<? super O> reachabilityDistanceFunction; /** * Constructor. @@ -160,10 +151,10 @@ public class FlexibleLOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgo * @param neighborhoodDistanceFunction the neighborhood distance function * @param reachabilityDistanceFunction the reachability distance function */ - public FlexibleLOF(int krefer, int kreach, DistanceFunction<? super O, D> neighborhoodDistanceFunction, DistanceFunction<? super O, D> reachabilityDistanceFunction) { + public FlexibleLOF(int krefer, int kreach, DistanceFunction<? super O> neighborhoodDistanceFunction, DistanceFunction<? super O> reachabilityDistanceFunction) { super(); - this.krefer = krefer + (objectIsInKNN ? 0 : 1); - this.kreach = kreach + (objectIsInKNN ? 0 : 1); + this.krefer = krefer + 1; + this.kreach = kreach + 1; this.referenceDistanceFunction = neighborhoodDistanceFunction; this.reachabilityDistanceFunction = reachabilityDistanceFunction; } @@ -178,9 +169,9 @@ public class FlexibleLOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgo */ public OutlierResult run(Database database, Relation<O> relation) { StepProgress stepprog = LOG.isVerbose() ? new StepProgress("LOF", 3) : null; - Pair<KNNQuery<O, D>, KNNQuery<O, D>> pair = getKNNQueries(database, relation, stepprog); - KNNQuery<O, D> kNNRefer = pair.getFirst(); - KNNQuery<O, D> kNNReach = pair.getSecond(); + Pair<KNNQuery<O>, KNNQuery<O>> pair = getKNNQueries(database, relation, stepprog); + KNNQuery<O> kNNRefer = pair.getFirst(); + KNNQuery<O> kNNReach = pair.getSecond(); return doRunInTime(relation.getDBIDs(), kNNRefer, kNNReach, stepprog).getResult(); } @@ -191,30 +182,29 @@ public class FlexibleLOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgo * @param stepprog the progress logger * @return the kNN queries for the algorithm */ - private Pair<KNNQuery<O, D>, KNNQuery<O, D>> getKNNQueries(Database database, Relation<O> relation, StepProgress stepprog) { + private Pair<KNNQuery<O>, KNNQuery<O>> getKNNQueries(Database database, Relation<O> relation, StepProgress stepprog) { // "HEAVY" flag for knnReach since it is used more than once - KNNQuery<O, D> knnReach = QueryUtil.getKNNQuery(relation, reachabilityDistanceFunction, kreach, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE); + KNNQuery<O> knnReach = QueryUtil.getKNNQuery(relation, reachabilityDistanceFunction, kreach, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE); // No optimized kNN query - use a preprocessor! - if (!(knnReach instanceof PreprocessorKNNQuery)) { - if (stepprog != null) { - if (referenceDistanceFunction.equals(reachabilityDistanceFunction)) { + if(!(knnReach instanceof PreprocessorKNNQuery)) { + if(stepprog != null) { + if(referenceDistanceFunction.equals(reachabilityDistanceFunction)) { stepprog.beginStep(1, "Materializing neighborhoods w.r.t. reference neighborhood distance function.", LOG); - } else { + } + else { stepprog.beginStep(1, "Not materializing neighborhoods w.r.t. reference neighborhood distance function, but materializing neighborhoods w.r.t. reachability distance function.", LOG); } } int kpreproc = (referenceDistanceFunction.equals(reachabilityDistanceFunction)) ? Math.max(kreach, krefer) : kreach; - MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<>(relation, reachabilityDistanceFunction, kpreproc); - database.addIndex(preproc); - DistanceQuery<O, D> rdq = database.getDistanceQuery(relation, reachabilityDistanceFunction); - knnReach = preproc.getKNNQuery(rdq, kreach); + knnReach = DatabaseUtil.precomputedKNNQuery(database, relation, reachabilityDistanceFunction, kpreproc); } // knnReach is only used once - KNNQuery<O, D> knnRefer; - if (referenceDistanceFunction == reachabilityDistanceFunction || referenceDistanceFunction.equals(reachabilityDistanceFunction)) { + KNNQuery<O> knnRefer; + if(referenceDistanceFunction == reachabilityDistanceFunction || referenceDistanceFunction.equals(reachabilityDistanceFunction)) { knnRefer = knnReach; - } else { + } + else { // do not materialize the first neighborhood, since it is used only once knnRefer = QueryUtil.getKNNQuery(relation, referenceDistanceFunction, krefer); } @@ -234,149 +224,118 @@ public class FlexibleLOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgo * @param stepprog Progress logger * @return LOF result */ - protected LOFResult<O, D> doRunInTime(DBIDs ids, KNNQuery<O, D> kNNRefer, KNNQuery<O, D> kNNReach, StepProgress stepprog) { + protected LOFResult<O> doRunInTime(DBIDs ids, KNNQuery<O> kNNRefer, KNNQuery<O> kNNReach, StepProgress stepprog) { // Assert we got something - if (kNNRefer == null) { + if(kNNRefer == null) { throw new AbortException("No kNN queries supported by database for reference neighborhood distance function."); } - if (kNNReach == null) { + if(kNNReach == null) { throw new AbortException("No kNN queries supported by database for reachability distance function."); } // Compute LRDs - if (stepprog != null) { - stepprog.beginStep(2, "Computing LRDs.", LOG); - } - WritableDoubleDataStore lrds = computeLRDs(ids, kNNReach); + LOG.beginStep(stepprog, 2, "Computing LRDs."); + WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP); + computeLRDs(kNNReach, ids, lrds); // compute LOF_SCORE of each db object - if (stepprog != null) { - stepprog.beginStep(3, "Computing LOFs.", LOG); - } - Pair<WritableDoubleDataStore, DoubleMinMax> lofsAndMax = computeLOFs(ids, lrds, kNNRefer); - WritableDoubleDataStore lofs = lofsAndMax.getFirst(); + LOG.beginStep(stepprog, 3, "Computing LOFs."); + WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC); // track the maximum value for normalization. - DoubleMinMax lofminmax = lofsAndMax.getSecond(); + DoubleMinMax lofminmax = new DoubleMinMax(); + computeLOFs(kNNRefer, ids, lrds, lofs, lofminmax); - if (stepprog != null) { - stepprog.setCompleted(LOG); - } + LOG.setCompleted(stepprog); // Build result representation. - Relation<Double> scoreResult = new MaterializedRelation<>("Local Outlier Factor", "lof-outlier", TypeUtil.DOUBLE, lofs, ids); + DoubleRelation scoreResult = new MaterializedDoubleRelation("Local Outlier Factor", "lof-outlier", lofs, ids); OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0); OutlierResult result = new OutlierResult(scoreMeta, scoreResult); - return new LOFResult<>(result, kNNRefer, kNNReach, lrds, lofs); } /** * Computes the local reachability density (LRD) of the specified objects. * - * @param ids the ids of the objects - * @param knnReach the precomputed neighborhood of the objects w.r.t. the + * @param knnq the precomputed neighborhood of the objects w.r.t. the * reachability distance - * @return the LRDs of the objects + * @param ids the ids of the objects + * @param lrds Reachability storage */ - protected WritableDoubleDataStore computeLRDs(DBIDs ids, KNNQuery<O, D> knnReach) { - WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP); + protected void computeLRDs(KNNQuery<O> knnq, DBIDs ids, WritableDoubleDataStore lrds) { FiniteProgress lrdsProgress = LOG.isVerbose() ? new FiniteProgress("LRD", ids.size(), LOG) : null; - for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { - final KNNList<D> neighbors = knnReach.getKNNForDBID(iter, kreach); + for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { + final KNNList neighbors = knnq.getKNNForDBID(iter, kreach); double sum = 0.0; int count = 0; - if (neighbors instanceof DoubleDistanceKNNList) { - // Fast version for double distances - for (DoubleDistanceDBIDListIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) { - if (objectIsInKNN || !DBIDUtil.equal(neighbor, iter)) { - KNNList<D> neighborsNeighbors = knnReach.getKNNForDBID(neighbor, kreach); - final double nkdist; - if (neighborsNeighbors instanceof DoubleDistanceKNNList) { - nkdist = ((DoubleDistanceKNNList) neighborsNeighbors).doubleKNNDistance(); - } else { - nkdist = neighborsNeighbors.getKNNDistance().doubleValue(); - } - sum += Math.max(neighbor.doubleDistance(), nkdist); - count++; - } - } - } else { - for (DistanceDBIDListIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { - if (objectIsInKNN || !DBIDUtil.equal(neighbor, iter)) { - KNNList<D> neighborsNeighbors = knnReach.getKNNForDBID(neighbor, kreach); - sum += Math.max(neighbor.getDistance().doubleValue(), neighborsNeighbors.getKNNDistance().doubleValue()); - count++; - } + for(DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { + if(DBIDUtil.equal(neighbor, iter)) { + continue; } + KNNList neighborsNeighbors = knnq.getKNNForDBID(neighbor, kreach); + sum += Math.max(neighbor.doubleValue(), neighborsNeighbors.getKNNDistance()); + count++; } // Avoid division by 0 final double lrd = (sum > 0) ? (count / sum) : Double.POSITIVE_INFINITY; lrds.putDouble(iter, lrd); - if (lrdsProgress != null) { - lrdsProgress.incrementProcessed(LOG); - } - } - if (lrdsProgress != null) { - lrdsProgress.ensureCompleted(LOG); + LOG.incrementProcessed(lrdsProgress); } - return lrds; + LOG.ensureCompleted(lrdsProgress); } /** * Computes the Local outlier factor (LOF) of the specified objects. * - * @param ids the ids of the objects - * @param lrds the LRDs of the objects - * @param knnRefer the precomputed neighborhood of the objects w.r.t. the + * @param knnq the precomputed neighborhood of the objects w.r.t. the * reference distance - * @return the LOFs of the objects and the maximum LOF + * @param ids IDs to process + * @param lrds Local reachability distances + * @param lofs Local outlier factor storage + * @param lofminmax Score minimum/maximum tracker */ - protected Pair<WritableDoubleDataStore, DoubleMinMax> computeLOFs(DBIDs ids, DoubleDataStore lrds, KNNQuery<O, D> knnRefer) { - WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC); - // track the maximum value for normalization. - DoubleMinMax lofminmax = new DoubleMinMax(); - + protected void computeLOFs(KNNQuery<O> knnq, DBIDs ids, DoubleDataStore lrds, WritableDoubleDataStore lofs, DoubleMinMax lofminmax) { FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("LOF_SCORE for objects", ids.size(), LOG) : null; - for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { - final double lrdp = lrds.doubleValue(iter); + for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { final double lof; - if (lrdp > 0 && !Double.isInfinite(lrdp)) { - final KNNList<D> neighbors = knnRefer.getKNNForDBID(iter, krefer); - double sum = 0.0; + final double lrdp = lrds.doubleValue(iter); + final KNNList neighbors = knnq.getKNNForDBID(iter, krefer); + if(!Double.isInfinite(lrdp)) { + double sum = 0.; int count = 0; - for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { + for(DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { // skip the point itself - if (objectIsInKNN || !DBIDUtil.equal(neighbor, iter)) { - sum += lrds.doubleValue(neighbor); - count++; + if(DBIDUtil.equal(neighbor, iter)) { + continue; + } + final double val = lrds.doubleValue(neighbor); + sum += val; + count++; + if(Double.isInfinite(val)) { + break; } } - lof = sum / (count * lrdp); - } else { + lof = sum / (lrdp * count); + } + else { lof = 1.0; } lofs.putDouble(iter, lof); // update minimum and maximum - if (!Double.isInfinite(lof)) { - lofminmax.put(lof); - } + lofminmax.put(lof); - if (progressLOFs != null) { - progressLOFs.incrementProcessed(LOG); - } + LOG.incrementProcessed(progressLOFs); } - if (progressLOFs != null) { - progressLOFs.ensureCompleted(LOG); - } - return new Pair<>(lofs, lofminmax); + LOG.ensureCompleted(progressLOFs); } @Override public TypeInformation[] getInputTypeRestriction() { final TypeInformation type; - if (reachabilityDistanceFunction.equals(referenceDistanceFunction)) { + if(reachabilityDistanceFunction.equals(referenceDistanceFunction)) { type = reachabilityDistanceFunction.getInputTypeRestriction(); - } else { + } + else { type = new CombinedTypeInformation(referenceDistanceFunction.getInputTypeRestriction(), reachabilityDistanceFunction.getInputTypeRestriction()); } return TypeUtil.array(type); @@ -393,7 +352,7 @@ public class FlexibleLOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgo * * @author Elke Achtert */ - public static class LOFResult<O, D extends NumberDistance<D, ?>> { + public static class LOFResult<O> { /** * The result of the run of the {@link FlexibleLOF} algorithm. */ @@ -402,22 +361,22 @@ public class FlexibleLOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgo /** * The kNN query w.r.t. the reference neighborhood distance. */ - private final KNNQuery<O, D> kNNRefer; + private final KNNQuery<O> kNNRefer; /** * The kNN query w.r.t. the reachability distance. */ - private final KNNQuery<O, D> kNNReach; + private final KNNQuery<O> kNNReach; /** * The RkNN query w.r.t. the reference neighborhood distance. */ - private RKNNQuery<O, D> rkNNRefer; + private RKNNQuery<O> rkNNRefer; /** * The rkNN query w.r.t. the reachability distance. */ - private RKNNQuery<O, D> rkNNReach; + private RKNNQuery<O> rkNNReach; /** * The LRD values of the objects. @@ -439,7 +398,7 @@ public class FlexibleLOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgo * @param lrds the LRD values of the objects * @param lofs the LOF values of the objects */ - public LOFResult(OutlierResult result, KNNQuery<O, D> kNNRefer, KNNQuery<O, D> kNNReach, WritableDoubleDataStore lrds, WritableDoubleDataStore lofs) { + public LOFResult(OutlierResult result, KNNQuery<O> kNNRefer, KNNQuery<O> kNNReach, WritableDoubleDataStore lrds, WritableDoubleDataStore lofs) { this.result = result; this.kNNRefer = kNNRefer; this.kNNReach = kNNReach; @@ -452,7 +411,7 @@ public class FlexibleLOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgo * * @return the kNN query w.r.t. the reference neighborhood distance */ - public KNNQuery<O, D> getKNNRefer() { + public KNNQuery<O> getKNNRefer() { return kNNRefer; } @@ -461,7 +420,7 @@ public class FlexibleLOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgo * * @return the kNN query w.r.t. the reachability distance */ - public KNNQuery<O, D> getKNNReach() { + public KNNQuery<O> getKNNReach() { return kNNReach; } @@ -497,7 +456,7 @@ public class FlexibleLOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgo * * @param rkNNRefer the query to set */ - public void setRkNNRefer(RKNNQuery<O, D> rkNNRefer) { + public void setRkNNRefer(RKNNQuery<O> rkNNRefer) { this.rkNNRefer = rkNNRefer; } @@ -506,7 +465,7 @@ public class FlexibleLOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgo * * @return the RkNN query w.r.t. the reference neighborhood distance */ - public RKNNQuery<O, D> getRkNNRefer() { + public RKNNQuery<O> getRkNNRefer() { return rkNNRefer; } @@ -515,7 +474,7 @@ public class FlexibleLOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgo * * @return the RkNN query w.r.t. the reachability distance */ - public RKNNQuery<O, D> getRkNNReach() { + public RKNNQuery<O> getRkNNReach() { return rkNNReach; } @@ -524,7 +483,7 @@ public class FlexibleLOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgo * * @param rkNNReach the query to set */ - public void setRkNNReach(RKNNQuery<O, D> rkNNReach) { + public void setRkNNReach(RKNNQuery<O> rkNNReach) { this.rkNNReach = rkNNReach; } } @@ -536,7 +495,7 @@ public class FlexibleLOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgo * * @apiviz.exclude */ - public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> { + public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> { /** * The distance function to determine the reachability distance between * database objects. @@ -545,16 +504,16 @@ public class FlexibleLOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgo /** * Parameter to specify the number of nearest neighbors of an object to be - * considered for computing its LOF_SCORE, must be an integer greater than - * 1. + * considered for computing its LOF score, must be an integer greater or + * equal to 1. */ - public static final OptionID KREF_ID = new OptionID("lof.krefer", "The number of nearest neighbors of an object to be considered for computing its LOF_SCORE."); + public static final OptionID KREF_ID = new OptionID("lof.krefer", "The number of nearest neighbors of an object to be considered for computing its LOF score."); /** * Parameter to specify the number of nearest neighbors of an object to be * considered for computing its reachability distance. */ - public static final OptionID KREACH_ID = new OptionID("lof.kreach", "The number of nearest neighbors of an object to be considered for computing its LOF_SCORE."); + public static final OptionID KREACH_ID = new OptionID("lof.kreach", "The number of nearest neighbors of an object to be considered for computing its LOF score."); /** * The reference set size to use. @@ -569,43 +528,45 @@ public class FlexibleLOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgo /** * Neighborhood distance function. */ - protected DistanceFunction<O, D> neighborhoodDistanceFunction = null; + protected DistanceFunction<O> neighborhoodDistanceFunction = null; /** * Reachability distance function. */ - protected DistanceFunction<O, D> reachabilityDistanceFunction = null; + protected DistanceFunction<O> reachabilityDistanceFunction = null; @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); final IntParameter pK = new IntParameter(KREF_ID); - pK.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT); - if (config.grab(pK)) { + pK.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT); + if(config.grab(pK)) { krefer = pK.intValue(); } final IntParameter pK2 = new IntParameter(KREACH_ID); pK2.setOptional(true); - pK2.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT); - if (config.grab(pK2)) { + pK2.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT); + if(config.grab(pK2)) { kreach = pK2.intValue(); - } else { + } + else { kreach = krefer; } - final ObjectParameter<DistanceFunction<O, D>> reachDistP = new ObjectParameter<>(REACHABILITY_DISTANCE_FUNCTION_ID, DistanceFunction.class); + final ObjectParameter<DistanceFunction<O>> reachDistP = new ObjectParameter<>(REACHABILITY_DISTANCE_FUNCTION_ID, DistanceFunction.class); reachDistP.setOptional(true); - if (config.grab(reachDistP)) { + if(config.grab(reachDistP)) { reachabilityDistanceFunction = reachDistP.instantiateClass(config); - } else { + } + else { reachabilityDistanceFunction = distanceFunction; } } @Override - protected FlexibleLOF<O, D> makeInstance() { + protected FlexibleLOF<O> makeInstance() { return new FlexibleLOF<>(kreach, krefer, distanceFunction, reachabilityDistanceFunction); } } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/INFLO.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/INFLO.java index 28fcf01b..611701ab 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/INFLO.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/INFLO.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.lof; This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team @@ -34,15 +34,16 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.KNNList;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.math.Mean;
@@ -59,18 +60,19 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
/**
- * INFLO provides the Mining Algorithms (Two-way Search Method) for Influence
- * Outliers using Symmetric Relationship
- * <p>
+ * Influence Outliers using Symmetric Relationship (INFLO) using two-way search,
+ * is an outlier detection method based on LOF; but also using the reverse kNN.
+ *
* Reference: <br>
* <p>
- * Jin, W., Tung, A., Han, J., and Wang, W. 2006<br/>
- * Ranking outliers using symmetric neighborhood relationship<br/>
- * In Proc. Pacific-Asia Conf. on Knowledge Discovery and Data Mining (PAKDD),
- * Singapore
+ * W. Jin, A. Tung, J. Han, and W. Wang<br />
+ * Ranking outliers using symmetric neighborhood relationship<br />
+ * Proc. 10th Pacific-Asia conference on Advances in Knowledge Discovery and
+ * Data Mining, 2006.
* </p>
*
* @author Ahmed Hettab
+ * @author Erich Schubert
*
* @apiviz.has KNNQuery
*
@@ -78,35 +80,23 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; */
@Title("INFLO: Influenced Outlierness Factor")
@Description("Ranking Outliers Using Symmetric Neigborhood Relationship")
-@Reference(authors = "Jin, W., Tung, A., Han, J., and Wang, W", title = "Ranking outliers using symmetric neighborhood relationship", booktitle = "Proc. Pacific-Asia Conf. on Knowledge Discovery and Data Mining (PAKDD), Singapore, 2006", url = "http://dx.doi.org/10.1007/11731139_68")
-public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
+@Reference(authors = "W. Jin, A. Tung, J. Han, and W. Wang", //
+title = "Ranking outliers using symmetric neighborhood relationship", //
+booktitle = "Proc. 10th Pacific-Asia conference on Advances in Knowledge Discovery and Data Mining", //
+url = "http://dx.doi.org/10.1007/11731139_68")
+public class INFLO<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
private static final Logging LOG = Logging.getLogger(INFLO.class);
/**
- * Parameter to specify if any object is a Core Object must be a double
- * greater than 0.0
- * <p>
- * see paper "Two-way search method" 3.2
- */
- public static final OptionID M_ID = new OptionID("inflo.m", "The threshold");
-
- /**
- * Holds the value of {@link #M_ID}.
+ * Pruning threshold m.
*/
private double m;
/**
- * Parameter to specify the number of nearest neighbors of an object to be
- * considered for computing its INFLO_SCORE. must be an integer greater than
- * 1.
- */
- public static final OptionID K_ID = new OptionID("inflo.k", "The number of nearest neighbors of an object to be considered for computing its INFLO_SCORE.");
-
- /**
- * Holds the value of {@link #K_ID}.
+ * Number of neighbors to use.
*/
private int k;
@@ -117,7 +107,7 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa * @param m m Parameter
* @param k k Parameter
*/
- public INFLO(DistanceFunction<? super O, D> distanceFunction, double m, int k) {
+ public INFLO(DistanceFunction<? super O> distanceFunction, double m, int k) {
super(distanceFunction);
this.m = m;
this.k = k;
@@ -131,9 +121,9 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa * @return Outlier result
*/
public OutlierResult run(Database database, Relation<O> relation) {
- DistanceQuery<O, D> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
+ DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
+ KNNQuery<O> knnQuery = database.getKNNQuery(distFunc, k + 1, DatabaseQuery.HINT_HEAVY_USE);
- ModifiableDBIDs processedIDs = DBIDUtil.newHashSet(relation.size());
ModifiableDBIDs pruned = DBIDUtil.newHashSet();
// KNNS
WritableDataStore<ModifiableDBIDs> knns = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, ModifiableDBIDs.class);
@@ -147,72 +137,112 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa rnns.put(iditer, DBIDUtil.newArray());
}
- // TODO: use kNN preprocessor?
- KNNQuery<O, D> knnQuery = database.getKNNQuery(distFunc, k, DatabaseQuery.HINT_HEAVY_USE);
+ computeNeighborhoods(relation, knnQuery, pruned, knns, rnns, density);
- for(DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) {
- // if not visited count=0
- int count = rnns.get(id).size();
- if(!processedIDs.contains(id)) {
- // TODO: use exactly k neighbors?
- KNNList<D> list = knnQuery.getKNNForDBID(id, k);
- knns.get(id).addDBIDs(list);
- processedIDs.add(id);
- density.putDouble(id, 1 / list.getKNNDistance().doubleValue());
+ // Calculate INFLO for any Object
+ DoubleMinMax inflominmax = new DoubleMinMax();
+ WritableDoubleDataStore inflos = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
+ // Note: this modifies knns, by adding rknns!
+ computeINFLO(relation, pruned, knns, rnns, density, inflos, inflominmax);
- }
- ModifiableDBIDs s = knns.get(id);
- for(DBIDIter q = knns.get(id).iter(); q.valid(); q.advance()) {
- if(!processedIDs.contains(q)) {
- // TODO: use exactly k neighbors?
- KNNList<D> listQ = knnQuery.getKNNForDBID(q, k);
- knns.get(q).addDBIDs(listQ);
- density.putDouble(q, 1 / listQ.getKNNDistance().doubleValue());
- processedIDs.add(q);
- }
+ // Build result representation.
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("Influence Outlier Score", "inflo-outlier", inflos, relation.getDBIDs());
+ OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(inflominmax.getMin(), inflominmax.getMax(), 0., Double.POSITIVE_INFINITY, 1.);
+ return new OutlierResult(scoreMeta, scoreResult);
+ }
- if(knns.get(q).contains(id)) {
- rnns.get(q).add(id);
- rnns.get(id).add(q);
+ /**
+ * Compute neighborhoods
+ *
+ * @param relation
+ * @param knnQuery
+ * @param pruned
+ * @param knns
+ * @param rnns
+ * @param density
+ */
+ protected void computeNeighborhoods(Relation<O> relation, KNNQuery<O> knnQuery, ModifiableDBIDs pruned, WritableDataStore<ModifiableDBIDs> knns, WritableDataStore<ModifiableDBIDs> rnns, WritableDoubleDataStore density) {
+ for(DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
+ // if not visited count=0
+ int count = rnns.get(iter).size();
+ DBIDs knn = getKNN(iter, knnQuery, knns, density);
+ for(DBIDIter niter = knn.iter(); niter.valid(); niter.advance()) {
+ // Ignore the query point itself.
+ if(DBIDUtil.equal(iter, niter)) {
+ continue;
+ }
+ if(getKNN(niter, knnQuery, knns, density).contains(iter)) {
+ rnns.get(niter).add(iter);
+ rnns.get(iter).add(niter);
count++;
}
}
- if(count >= s.size() * m) {
- pruned.add(id);
+ if(count >= knn.size() * m) {
+ pruned.add(iter);
}
}
+ }
- // Calculate INFLO for any Object
- // IF Object is pruned INFLO=1.0
- DoubleMinMax inflominmax = new DoubleMinMax();
- WritableDoubleDataStore inflos = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
- for(DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) {
- if(!pruned.contains(id)) {
- ModifiableDBIDs knn = knns.get(id);
- ModifiableDBIDs rnn = rnns.get(id);
-
- double denP = density.doubleValue(id);
- knn.addDBIDs(rnn);
- Mean mean = new Mean();
- for(DBIDIter iter = knn.iter(); iter.valid(); iter.advance()) {
- mean.put(density.doubleValue(iter));
+ /**
+ * Compute the final INFLO scores.
+ *
+ * @param relation Data relation
+ * @param pruned Pruned objects
+ * @param knns kNN storage
+ * @param rnns reverse kNN storage
+ * @param density Density estimation
+ * @param inflos Inflo score storage
+ * @param inflominmax Output of minimum and maximum
+ */
+ protected void computeINFLO(Relation<O> relation, ModifiableDBIDs pruned, WritableDataStore<ModifiableDBIDs> knns, WritableDataStore<ModifiableDBIDs> rnns, WritableDoubleDataStore density, WritableDoubleDataStore inflos, DoubleMinMax inflominmax) {
+ Mean mean = new Mean();
+ for(DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
+ if(pruned.contains(iter)) {
+ inflos.putDouble(iter, 1.);
+ inflominmax.put(1.);
+ continue;
+ }
+ ModifiableDBIDs knn = knns.get(iter), rnn = rnns.get(iter);
+ knn.addDBIDs(rnn);
+ // Compute mean density of NN \cup RNN
+ mean.reset();
+ for(DBIDIter niter = knn.iter(); niter.valid(); niter.advance()) {
+ if(DBIDUtil.equal(iter, niter)) {
+ continue;
}
- double den = mean.getMean() / denP;
- inflos.putDouble(id, den);
- // update minimum and maximum
- inflominmax.put(den);
-
+ mean.put(density.doubleValue(niter));
}
- if(pruned.contains(id)) {
- inflos.putDouble(id, 1.0);
- inflominmax.put(1.0);
+ double denP = density.doubleValue(iter);
+ double den;
+ if(denP > 0.) {
+ den = mean.getMean() / denP;
}
+ else {
+ den = mean.getMean() == 0 ? 1. : Double.POSITIVE_INFINITY;
+ }
+ inflos.putDouble(iter, den);
+ // update minimum and maximum
+ inflominmax.put(den);
}
+ }
- // Build result representation.
- Relation<Double> scoreResult = new MaterializedRelation<>("Influence Outlier Score", "inflo-outlier", TypeUtil.DOUBLE, inflos, relation.getDBIDs());
- OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(inflominmax.getMin(), inflominmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
- return new OutlierResult(scoreMeta, scoreResult);
+ /**
+ * Get the (forward only) kNN of an object, including the query point
+ *
+ * @param q Query point
+ * @param knnQuery Query function
+ * @param knns kNN storage
+ * @param density Density storage
+ * @return Neighbor list
+ */
+ protected DBIDs getKNN(DBIDIter q, KNNQuery<O> knnQuery, WritableDataStore<ModifiableDBIDs> knns, WritableDoubleDataStore density) {
+ ModifiableDBIDs s = knns.get(q);
+ if(s.size() == 0) {
+ KNNList listQ = knnQuery.getKNNForDBID(q, k + 1);
+ s.addDBIDs(listQ);
+ density.putDouble(q, 1. / listQ.getKNNDistance());
+ }
+ return s;
}
@Override
@@ -232,29 +262,49 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa *
* @apiviz.exclude
*/
- public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {
+ /**
+ * Parameter to specify if any object is a Core Object must be a double
+ * greater than 0.0
+ *
+ * see paper "Two-way search method" 3.2
+ */
+ public static final OptionID M_ID = new OptionID("inflo.m", "The pruning threshold");
+
+ /**
+ * Parameter to specify the number of nearest neighbors of an object to be
+ * considered for computing its INFLO score.
+ */
+ public static final OptionID K_ID = new OptionID("inflo.k", "The number of nearest neighbors of an object to be considered for computing its INFLO score.");
+
+ /**
+ * M parameter
+ */
protected double m = 1.0;
+ /**
+ * Number of neighbors to use.
+ */
protected int k = 0;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final DoubleParameter mP = new DoubleParameter(M_ID, 1.0);
- mP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
+ final DoubleParameter mP = new DoubleParameter(M_ID, 1.0)//
+ .addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE);
if(config.grab(mP)) {
m = mP.doubleValue();
}
- final IntParameter kP = new IntParameter(K_ID);
- kP.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
+ final IntParameter kP = new IntParameter(K_ID) //
+ .addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
if(config.grab(kP)) {
k = kP.intValue();
}
}
@Override
- protected INFLO<O, D> makeInstance() {
+ protected INFLO<O> makeInstance() {
return new INFLO<>(distanceFunction, m, k);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/KDEOS.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/KDEOS.java new file mode 100644 index 00000000..2183872f --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/KDEOS.java @@ -0,0 +1,445 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier.lof; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2014 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm; +import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm; +import de.lmu.ifi.dbs.elki.data.FeatureVector; +import de.lmu.ifi.dbs.elki.data.type.CombinedTypeInformation; +import de.lmu.ifi.dbs.elki.data.type.TypeInformation; +import de.lmu.ifi.dbs.elki.data.type.TypeUtil; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; +import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore; +import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; +import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter; +import de.lmu.ifi.dbs.elki.database.ids.KNNList; +import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; +import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; +import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; +import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; +import de.lmu.ifi.dbs.elki.math.DoubleMinMax; +import de.lmu.ifi.dbs.elki.math.MathUtil; +import de.lmu.ifi.dbs.elki.math.MeanVariance; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution; +import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.GaussianKernelDensityFunction; +import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.KernelDensityFunction; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; +import de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore; +import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessEqualGlobalConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; + +/** + * Generalized Outlier Detection with Flexible Kernel Density Estimates. + * + * This is an outlier detection inspired by LOF, but using kernel density + * estimation (KDE) from statistics. Unfortunately, for higher dimensional data, + * kernel density estimation itself becomes difficult. At this point, the + * <tt>kdeos.idim</tt> parameter can become useful, which allows to either + * disable dimensionality adjustment completely (<tt>0</tt>) or to set it to a + * lower dimensionality than the data representation. This may sound like a hack + * at first, but real data is often of lower intrinsic dimensionality, and + * embedded into a higher data representation. Adjusting the kernel to account + * for the representation seems to yield worse results than using a lower, + * intrinsic, dimensionality. + * + * If your data set has many duplicates, the <tt>kdeos.kernel.minbw</tt> + * parameter sets a minimum kernel bandwidth, which may improve results in these + * cases, as it prevents kernels from degenerating to single points. + * + * Reference: + * <p> + * Erich Schubert, Arthur Zimek, Hans-Peter Kriegel<br /> + * Generalized Outlier Detection with Flexible Kernel Density Estimates<br /> + * In Proceedings of the 14th SIAM International Conference on Data Mining + * (SDM), Philadelphia, PA, 2014. + * </p> + * + * @author Erich Schubert + * + * @apiviz.has KNNQuery + * @apiviz.has KernelDensityFunction + * + * @param <O> Object type + */ +@Reference(authors = "Erich Schubert, Arthur Zimek, Hans-Peter Kriegel", // +title = "Generalized Outlier Detection with Flexible Kernel Density Estimates", // +booktitle = "Proc. 14th SIAM International Conference on Data Mining (SDM), Philadelphia, PA, 2014", // +url = "http://dx.doi.org/10.1137/1.9781611973440.63") +public class KDEOS<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm { + /** + * Class logger. + */ + private static final Logging LOG = Logging.getLogger(KDEOS.class); + + /** + * Kernel function to use for density estimation. + */ + KernelDensityFunction kernel; + + /** + * Minimum and maximum number of neighbors to use. + */ + int kmin, kmax; + + /** + * Kernel scaling parameter. + */ + double scale; + + /** + * Kernel minimum bandwidth. + */ + double minBandwidth = 1e-6; + + /** + * Intrinsic dimensionality. + */ + int idim = -1; + + /** + * Significance cutoff when computing kernel density. + */ + final static double CUTOFF = 1e-20; + + /** + * Constructor. + * + * @param distanceFunction Distance function + * @param kmin Minimum number of neighbors + * @param kmax Maximum number of neighbors + * @param kernel Kernel function + * @param scale Kernel scaling parameter + * @param idim Intrinsic dimensionality (use 0 to use real dimensionality) + */ + public KDEOS(DistanceFunction<? super O> distanceFunction, int kmin, int kmax, KernelDensityFunction kernel, double minBandwidth, double scale, int idim) { + super(distanceFunction); + this.kmin = kmin; + this.kmax = kmax; + this.kernel = kernel; + this.minBandwidth = minBandwidth; + this.scale = scale; + this.idim = idim; + } + + /** + * Run the KDEOS outlier detection algorithm. + * + * @param database Database to query + * @param rel Relation to process + * @return Outlier detection result + */ + public OutlierResult run(Database database, Relation<O> rel) { + final DBIDs ids = rel.getDBIDs(); + + LOG.verbose("Running kNN preprocessor."); + KNNQuery<O> knnq = DatabaseUtil.precomputedKNNQuery(database, rel, getDistanceFunction(), kmax + 1); + + // Initialize store for densities + WritableDataStore<double[]> densities = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, double[].class); + estimateDensities(rel, knnq, ids, densities); + + // Compute scores: + WritableDoubleDataStore kofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB); + DoubleMinMax minmax = new DoubleMinMax(); + computeOutlierScores(knnq, ids, densities, kofs, minmax); + + DoubleRelation scoreres = new MaterializedDoubleRelation("Kernel Density Estimation Outlier Scores", "kdeos-outlier", kofs, ids); + OutlierScoreMeta meta = new ProbabilisticOutlierScore(minmax.getMin(), minmax.getMax()); + return new OutlierResult(meta, scoreres); + } + + /** + * Perform the kernel density estimation step. + * + * @param rel Relation to query + * @param knnq kNN query + * @param ids IDs to process + * @param densities Density storage + */ + protected void estimateDensities(Relation<O> rel, KNNQuery<O> knnq, final DBIDs ids, WritableDataStore<double[]> densities) { + final int dim = dimensionality(rel); + final int knum = kmax + 1 - kmin; + // Initialize storage: + for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { + densities.put(iter, new double[knum]); + } + // Distribute densities: + FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Computing densities.", ids.size(), LOG) : null; + double iminbw = (minBandwidth > 0.) ? 1. / (minBandwidth * scale) : Double.POSITIVE_INFINITY; + for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { + KNNList neighbors = knnq.getKNNForDBID(iter, kmax + 1); + int k = 1, idx = 0; + double sum = 0.; + for(DoubleDBIDListIter kneighbor = neighbors.iter(); k <= kmax && kneighbor.valid(); kneighbor.advance(), k++) { + sum += kneighbor.doubleValue(); + if(k < kmin) { + continue; + } + final double ibw = Math.min(k / (sum * scale), iminbw); + final double sca = MathUtil.powi(ibw, dim); + for(DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { + final double dens; + if(sca < Double.POSITIVE_INFINITY) { // NaNs with duplicate points! + dens = sca * kernel.density(neighbor.doubleValue() * ibw); + } + else { + dens = neighbor.doubleValue() == 0. ? 1. : 0.; + } + densities.get(neighbor)[idx] += dens; + if(dens < CUTOFF) { + break; + } + } + ++idx; // Only if k >= kmin + } + LOG.incrementProcessed(prog); + } + LOG.ensureCompleted(prog); + } + + /** + * Ugly hack to allow using this implementation without having a well-defined + * dimensionality. + * + * @param rel Data relation + * @return Dimensionality + */ + private int dimensionality(Relation<O> rel) { + // Explicit: + if(idim >= 0) { + return idim; + } + // Cast to vector field relation. + @SuppressWarnings("unchecked") + final Relation<FeatureVector<?>> frel = (Relation<FeatureVector<?>>) rel; + int dim = RelationUtil.dimensionality(frel); + if(dim < 0) { + throw new AbortException("When using KDEOS with non-vectorspace data, the intrinsic dimensionality parameter must be set!"); + } + return dim; + } + + /** + * Compute the final KDEOS scores. + * + * @param knnq kNN query + * @param ids IDs to process + * @param densities Density estimates + * @param kdeos Score outputs + * @param minmax Minimum and maximum scores + */ + protected void computeOutlierScores(KNNQuery<O> knnq, final DBIDs ids, WritableDataStore<double[]> densities, WritableDoubleDataStore kdeos, DoubleMinMax minmax) { + final int knum = kmax + 1 - kmin; + FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Computing KDEOS scores.", ids.size(), LOG) : null; + + double[][] scratch = new double[knum][kmax + 5]; + MeanVariance mv = new MeanVariance(); + + for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { + double[] dens = densities.get(iter); + KNNList neighbors = knnq.getKNNForDBID(iter, kmax); + if(scratch[0].length < neighbors.size()) { + // Resize scratch. Add some extra margin again. + scratch = new double[knum][neighbors.size() + 5]; + } + { // Store density matrix of neighbors + int i = 0; + for(DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance(), i++) { + double[] ndens = densities.get(neighbor); + for(int k = 0; k < knum; k++) { + scratch[k][i] = ndens[k]; + } + } + assert (i == neighbors.size()); + } + // Compute means and stddevs for each k + double score = 0.; + for(int i = 0; i < knum; i++) { + mv.reset(); + for(int j = 0; j < neighbors.size(); j++) { + mv.put(scratch[i][j]); + } + final double mean = mv.getMean(), stddev = mv.getSampleStddev(); + if(stddev > 0.) { + score += (mean - dens[i]) / stddev; + } + } + score /= knum; // average + score = NormalDistribution.standardNormalCDF(score); + minmax.put(score); + kdeos.put(iter, score); + LOG.incrementProcessed(prog); + } + LOG.ensureCompleted(prog); + } + + @Override + public TypeInformation[] getInputTypeRestriction() { + TypeInformation res = getDistanceFunction().getInputTypeRestriction(); + if(idim < 0) { + res = new CombinedTypeInformation(TypeUtil.NUMBER_VECTOR_FIELD, res); + } + return TypeUtil.array(res); + } + + @Override + protected Logging getLogger() { + return LOG; + } + + /** + * Parameterization class + * + * @author Erich Schubert + * + * @apiviz.exclude + * + * @param <O> Object type + */ + public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> { + /** + * Parameter to specify the kernel density function. + */ + private static final OptionID KERNEL_ID = new OptionID("kdeos.kernel", "Kernel density function to use."); + + /** + * Parameter to specify the minimum bandwidth. + */ + private static final OptionID KERNEL_MIN_ID = new OptionID("kdeos.kernel.minbw", "Minimum bandwidth for kernel density estimation."); + + /** + * Parameter to specify the kernel scaling factor. + */ + private static final OptionID KERNEL_SCALE_ID = new OptionID("kdeos.kernel.scale", "Scaling factor for the kernel function."); + + /** + * Minimum value of k to analyze. + */ + private static final OptionID KMIN_ID = new OptionID("kdeos.k.min", "Minimum value of k to analyze."); + + /** + * Maximum value of k to analyze. + */ + private static final OptionID KMAX_ID = new OptionID("kdeos.k.max", "Maximum value of k to analyze."); + + /** + * Intrinsic dimensionality. + */ + private static final OptionID IDIM_ID = new OptionID("kdeos.idim", "Intrinsic dimensionality of this data set. Use -1 for using the true data dimensionality, but values such as 0-2 often offer better performance."); + + /** + * Kernel function to use for density estimation. + */ + KernelDensityFunction kernel; + + /** + * Minimum and maximum number of neighbors to use. + */ + int kmin; + + /** + * Minimum and maximum number of neighbors to use. + */ + int kmax; + + /** + * Kernel scaling parameter. + */ + double scale; + + /** + * Kernel minimum bandwidth. + */ + double minBandwidth = 0.; + + /** + * Intrinsic dimensionality. + */ + int idim = -1; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + + ObjectParameter<KernelDensityFunction> kernelP = new ObjectParameter<>(KERNEL_ID, KernelDensityFunction.class, GaussianKernelDensityFunction.class); + if(config.grab(kernelP)) { + kernel = kernelP.instantiateClass(config); + } + + IntParameter kminP = new IntParameter(KMIN_ID) // + .addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT); + if(config.grab(kminP)) { + kmin = kminP.intValue(); + } + + IntParameter kmaxP = new IntParameter(KMAX_ID) // + .addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT); + if(config.grab(kmaxP)) { + kmax = kmaxP.intValue(); + } + config.checkConstraint(new LessEqualGlobalConstraint<>(kminP, kmaxP)); + + DoubleParameter scaleP = new DoubleParameter(KERNEL_SCALE_ID)// + .addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE) // + .setDefaultValue(.5); + if(config.grab(scaleP)) { + // For simpler parameterization, scale kernels by their canonical + // bandwidth, when the kernel is configured. + scale = scaleP.doubleValue() * ((kernel != null) ? kernel.canonicalBandwidth() : 1.); + } + DoubleParameter minbwP = new DoubleParameter(KERNEL_MIN_ID) // + .addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE) // + .setOptional(true); + if(config.grab(minbwP)) { + minBandwidth = minbwP.doubleValue(); + } + IntParameter idimP = new IntParameter(IDIM_ID, -1); + if(config.grab(idimP)) { + idim = idimP.intValue(); + } + } + + @Override + protected KDEOS<O> makeInstance() { + return new KDEOS<>(distanceFunction, kmin, kmax, kernel, minBandwidth, scale, idim); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDF.java index e5049877..c2e29f54 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDF.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDF.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.lof; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -30,27 +30,20 @@ import de.lmu.ifi.dbs.elki.data.type.CombinedTypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; import de.lmu.ifi.dbs.elki.database.Database; -import de.lmu.ifi.dbs.elki.database.QueryUtil; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; -import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter; -import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter; -import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList; -import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList; -import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery; -import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; +import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter; +import de.lmu.ifi.dbs.elki.database.ids.KNNList; import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; -import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery; -import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; +import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; -import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; import de.lmu.ifi.dbs.elki.logging.progress.StepProgress; @@ -61,6 +54,7 @@ import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.KernelDensityFunction import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; +import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints; @@ -88,10 +82,12 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * @apiviz.has KernelDensityFunction * * @param <O> the type of objects handled by this Algorithm - * @param <D> Distance type */ -@Reference(authors = "L. J. Latecki, A. Lazarevic, D. Pokrajac", title = "Outlier Detection with Kernel Density Functions", booktitle = "Machine Learning and Data Mining in Pattern Recognition", url = "http://dx.doi.org/10.1007/978-3-540-73499-4_6") -public class LDF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm { +@Reference(authors = "L. J. Latecki, A. Lazarevic, D. Pokrajac", // +title = "Outlier Detection with Kernel Density Functions", // +booktitle = "Machine Learning and Data Mining in Pattern Recognition", // +url = "http://dx.doi.org/10.1007/978-3-540-73499-4_6") +public class LDF<O extends NumberVector> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm { /** * The logger for this class. */ @@ -125,7 +121,7 @@ public class LDF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> exte * @param h Kernel bandwidth scaling * @param c Score scaling parameter */ - public LDF(int k, DistanceFunction<? super O, D> distance, KernelDensityFunction kernel, double h, double c) { + public LDF(int k, DistanceFunction<? super O> distance, KernelDensityFunction kernel, double h, double c) { super(distance); this.k = k + 1; this.kernel = kernel; @@ -142,84 +138,42 @@ public class LDF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> exte */ public OutlierResult run(Database database, Relation<O> relation) { StepProgress stepprog = LOG.isVerbose() ? new StepProgress("LDF", 3) : null; - final int dim = RelationUtil.dimensionality(relation); - DBIDs ids = relation.getDBIDs(); - // "HEAVY" flag for KNN Query since it is used more than once - KNNQuery<O, D> knnq = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE); - // No optimized kNN query - use a preprocessor! - if(!(knnq instanceof PreprocessorKNNQuery)) { - if(stepprog != null) { - stepprog.beginStep(1, "Materializing neighborhoods w.r.t. distance function.", LOG); - } - MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<>(relation, getDistanceFunction(), k); - database.addIndex(preproc); - DistanceQuery<O, D> rdq = database.getDistanceQuery(relation, getDistanceFunction()); - knnq = preproc.getKNNQuery(rdq, k); - } + LOG.beginStep(stepprog, 1, "Materializing neighborhoods w.r.t. distance function."); + KNNQuery<O> knnq = DatabaseUtil.precomputedKNNQuery(database, relation, getDistanceFunction(), k); // Compute LDEs - if(stepprog != null) { - stepprog.beginStep(2, "Computing LDEs.", LOG); - } + LOG.beginStep(stepprog, 2, "Computing LDEs."); WritableDoubleDataStore ldes = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP); FiniteProgress densProgress = LOG.isVerbose() ? new FiniteProgress("Densities", ids.size(), LOG) : null; for(DBIDIter it = ids.iter(); it.valid(); it.advance()) { - final KNNList<D> neighbors = knnq.getKNNForDBID(it, k); + final KNNList neighbors = knnq.getKNNForDBID(it, k); double sum = 0.0; int count = 0; - if(neighbors instanceof DoubleDistanceKNNList) { - // Fast version for double distances - for(DoubleDistanceDBIDListIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) { - if(DBIDUtil.equal(neighbor, it)) { - continue; - } - final double nkdist = ((DoubleDistanceKNNList) knnq.getKNNForDBID(neighbor, k)).doubleKNNDistance(); - if(nkdist > 0.) { - final double v = Math.max(nkdist, neighbor.doubleDistance()) / (h * nkdist); - sum += kernel.density(v) / MathUtil.powi(h * nkdist, dim); - count++; - } - else { - sum = Double.POSITIVE_INFINITY; - count++; - break; - } + // Fast version for double distances + for(DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { + if(DBIDUtil.equal(neighbor, it)) { + continue; } - } - else { - for(DistanceDBIDListIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { - if(DBIDUtil.equal(neighbor, it)) { - continue; - } - final double nkdist = knnq.getKNNForDBID(neighbor, k).getKNNDistance().doubleValue(); - if(nkdist > 0.) { - final double v = Math.max(nkdist, neighbor.getDistance().doubleValue()) / (h * nkdist); - sum += kernel.density(v) / MathUtil.powi(h * nkdist, dim); - count++; - } - else { - sum = Double.POSITIVE_INFINITY; - count++; - break; - } + final double nkdist = knnq.getKNNForDBID(neighbor, k).getKNNDistance(); + if(!(nkdist > 0.)) { + sum = Double.POSITIVE_INFINITY; + count++; + break; } + final double v = Math.max(nkdist, neighbor.doubleValue()) / (h * nkdist); + sum += kernel.density(v) / MathUtil.powi(h * nkdist, dim); + count++; } ldes.putDouble(it, sum / count); - if(densProgress != null) { - densProgress.incrementProcessed(LOG); - } - } - if(densProgress != null) { - densProgress.ensureCompleted(LOG); + LOG.incrementProcessed(densProgress); } + LOG.ensureCompleted(densProgress); // Compute local density factors. - if(stepprog != null) { - stepprog.beginStep(3, "Computing LDFs.", LOG); - } + LOG.beginStep(stepprog, 3, "Computing LDFs."); WritableDoubleDataStore ldfs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC); // track the maximum value for normalization. DoubleMinMax lofminmax = new DoubleMinMax(); @@ -227,7 +181,7 @@ public class LDF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> exte FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("Local Density Factors", ids.size(), LOG) : null; for(DBIDIter it = ids.iter(); it.valid(); it.advance()) { final double lrdp = ldes.doubleValue(it); - final KNNList<D> neighbors = knnq.getKNNForDBID(it, k); + final KNNList neighbors = knnq.getKNNForDBID(it, k); double sum = 0.0; int count = 0; for(DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { @@ -245,20 +199,14 @@ public class LDF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> exte // update minimum and maximum lofminmax.put(ldf); - if(progressLOFs != null) { - progressLOFs.incrementProcessed(LOG); - } - } - if(progressLOFs != null) { - progressLOFs.ensureCompleted(LOG); + LOG.incrementProcessed(progressLOFs); } + LOG.ensureCompleted(progressLOFs); - if(stepprog != null) { - stepprog.setCompleted(LOG); - } + LOG.setCompleted(stepprog); // Build result representation. - Relation<Double> scoreResult = new MaterializedRelation<>("Local Density Factor", "ldf-outlier", TypeUtil.DOUBLE, ldfs, ids); + DoubleRelation scoreResult = new MaterializedDoubleRelation("Local Density Factor", "ldf-outlier", ldfs, ids); OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, 1. / c, 1 / (1 + c)); OutlierResult result = new OutlierResult(scoreMeta, scoreResult); @@ -283,9 +231,8 @@ public class LDF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> exte * @apiviz.exclude * * @param <O> vector type - * @param <D> distance type */ - public static class Parameterizer<O extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> { + public static class Parameterizer<O extends NumberVector> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> { /** * Option ID for kernel. */ @@ -353,7 +300,7 @@ public class LDF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> exte } @Override - protected LDF<O, D> makeInstance() { + protected LDF<O> makeInstance() { return new LDF<>(k, distanceFunction, kernel, h, c); } } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDOF.java index 36c70b48..479b0bab 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDOF.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDOF.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.lof; This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team @@ -33,14 +33,14 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
-import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.KNNList;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
@@ -79,9 +79,12 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; */
@Title("LDOF: Local Distance-Based Outlier Factor")
@Description("Local outlier detection appraoch suitable for scattered data by averaging the kNN distance over all k nearest neighbors")
-@Reference(authors = "K. Zhang, M. Hutter, H. Jin", title = "A New Local Distance-Based Outlier Detection Approach for Scattered Real-World Data", booktitle = "Proc. 13th Pacific-Asia Conference on Advances in Knowledge Discovery and Data Mining (PAKDD 2009), Bangkok, Thailand, 2009", url = "http://dx.doi.org/10.1007/978-3-642-01307-2_84")
+@Reference(authors = "K. Zhang, M. Hutter, H. Jin", //
+title = "A New Local Distance-Based Outlier Detection Approach for Scattered Real-World Data", //
+booktitle = "Proc. 13th Pacific-Asia Conference on Advances in Knowledge Discovery and Data Mining (PAKDD 2009), Bangkok, Thailand, 2009", //
+url = "http://dx.doi.org/10.1007/978-3-642-01307-2_84")
@Alias({ "de.lmu.ifi.dbs.elki.algorithm.outlier.LDOF" })
-public class LDOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
+public class LDOF<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
@@ -110,7 +113,7 @@ public class LDOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas * @param distanceFunction distance function
* @param k k Parameter
*/
- public LDOF(DistanceFunction<? super O, D> distanceFunction, int k) {
+ public LDOF(DistanceFunction<? super O> distanceFunction, int k) {
super(distanceFunction);
this.k = k;
}
@@ -123,8 +126,8 @@ public class LDOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas * @return Outlier result
*/
public OutlierResult run(Database database, Relation<O> relation) {
- DistanceQuery<O, D> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
- KNNQuery<O, D> knnQuery = database.getKNNQuery(distFunc, k);
+ DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
+ KNNQuery<O> knnQuery = database.getKNNQuery(distFunc, k + 1);
// track the maximum value for normalization
DoubleMinMax ldofminmax = new DoubleMinMax();
@@ -135,23 +138,26 @@ public class LDOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas if(LOG.isVerbose()) {
LOG.verbose("Computing LDOFs");
}
- FiniteProgress progressLDOFs = LOG.isVerbose() ? new FiniteProgress("LDOF_SCORE for objects", relation.size(), LOG) : null;
+ FiniteProgress progressLDOFs = LOG.isVerbose() ? new FiniteProgress("LDOF for objects", relation.size(), LOG) : null;
Mean dxp = new Mean(), Dxp = new Mean();
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- KNNList<D> neighbors = knnQuery.getKNNForDBID(iditer, k);
- // skip the point itself
+ KNNList neighbors = knnQuery.getKNNForDBID(iditer, k + 1);
dxp.reset();
Dxp.reset();
- // TODO: optimize for double distances
- for(DistanceDBIDListIter<D> neighbor1 = neighbors.iter(); neighbor1.valid(); neighbor1.advance()) {
- if(!DBIDUtil.equal(neighbor1, iditer)) {
- dxp.put(neighbor1.getDistance().doubleValue());
- for(DistanceDBIDListIter<D> neighbor2 = neighbors.iter(); neighbor2.valid(); neighbor2.advance()) {
- if(!DBIDUtil.equal(neighbor1, neighbor2) && !DBIDUtil.equal(neighbor2, iditer)) {
- Dxp.put(distFunc.distance(neighbor1, neighbor2).doubleValue());
- }
+ DoubleDBIDListIter neighbor1 = neighbors.iter(), neighbor2 = neighbors.iter();
+ for(; neighbor1.valid(); neighbor1.advance()) {
+ // skip the point itself
+ if(DBIDUtil.equal(neighbor1, iditer)) {
+ continue;
+ }
+ dxp.put(neighbor1.doubleValue());
+ for(neighbor2.seek(neighbor1.getOffset() + 1); neighbor2.valid(); neighbor2.advance()) {
+ // skip the point itself
+ if(DBIDUtil.equal(neighbor2, iditer)) {
+ continue;
}
+ Dxp.put(distFunc.distance(neighbor1, neighbor2));
}
}
double ldof = dxp.getMean() / Dxp.getMean();
@@ -162,16 +168,12 @@ public class LDOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas // update maximum
ldofminmax.put(ldof);
- if(progressLDOFs != null) {
- progressLDOFs.incrementProcessed(LOG);
- }
- }
- if(progressLDOFs != null) {
- progressLDOFs.ensureCompleted(LOG);
+ LOG.incrementProcessed(progressLDOFs);
}
+ LOG.ensureCompleted(progressLDOFs);
// Build result representation.
- Relation<Double> scoreResult = new MaterializedRelation<>("LDOF Outlier Score", "ldof-outlier", TypeUtil.DOUBLE, ldofs, relation.getDBIDs());
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("LDOF Outlier Score", "ldof-outlier", ldofs, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(ldofminmax.getMin(), ldofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, LDOF_BASELINE);
return new OutlierResult(scoreMeta, scoreResult);
}
@@ -193,21 +195,21 @@ public class LDOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas *
* @apiviz.exclude
*/
- public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {
protected int k = 0;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
final IntParameter kP = new IntParameter(K_ID);
- kP.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT);
+ kP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
if(config.grab(kP)) {
k = kP.getValue();
}
}
@Override
- protected LDOF<O, D> makeInstance() {
+ protected LDOF<O> makeInstance() {
return new LDOF<>(distanceFunction, k);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOCI.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOCI.java index e76c6034..8d371d4c 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOCI.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOCI.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.lof; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,9 +23,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.lof; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; +import java.util.Arrays; import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm; import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm; @@ -37,15 +35,15 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; -import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList; -import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDPair; -import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList; +import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery; -import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; +import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; import de.lmu.ifi.dbs.elki.math.DoubleMinMax; @@ -54,23 +52,24 @@ import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta; import de.lmu.ifi.dbs.elki.utilities.Alias; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arrays.DoubleIntegerArrayQuickSort; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DistanceParameter; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; -import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleIntPair; /** * Fast Outlier Detection Using the "Local Correlation Integral". * - * Exact implementation only, not aLOCI. See {@link ALOCI} + * Exact implementation only, not aLOCI. See {@link ALOCI}. * * Outlier detection using multiple epsilon neighborhoods. * + * This implementation has O(n<sup>3</sup> log n) runtime complexity! + * * Based on: S. Papadimitriou, H. Kitagawa, P. B. Gibbons and C. Faloutsos: * LOCI: Fast Outlier Detection Using the Local Correlation Integral. In: Proc. * 19th IEEE Int. Conf. on Data Engineering (ICDE '03), Bangalore, India, 2003. @@ -80,13 +79,12 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleIntPair; * @apiviz.has RangeQuery * * @param <O> Object type - * @param <D> Distance type */ @Title("LOCI: Fast Outlier Detection Using the Local Correlation Integral") @Description("Algorithm to compute outliers based on the Local Correlation Integral") @Reference(authors = "S. Papadimitriou, H. Kitagawa, P. B. Gibbons, C. Faloutsos", title = "LOCI: Fast Outlier Detection Using the Local Correlation Integral", booktitle = "Proc. 19th IEEE Int. Conf. on Data Engineering (ICDE '03), Bangalore, India, 2003", url = "http://dx.doi.org/10.1109/ICDE.2003.1260802") -@Alias({"de.lmu.ifi.dbs.elki.algorithm.outlier.LOCI"}) -public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm { +@Alias({ "de.lmu.ifi.dbs.elki.algorithm.outlier.LOCI" }) +public class LOCI<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm { /** * The logger for this class. */ @@ -111,7 +109,7 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas /** * Holds the value of {@link #RMAX_ID}. */ - private D rmax; + private double rmax; /** * Holds the value of {@link #NMIN_ID}. @@ -131,7 +129,7 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas * @param nmin Minimum neighborhood size * @param alpha Alpha value */ - public LOCI(DistanceFunction<? super O, D> distanceFunction, D rmax, int nmin, double alpha) { + public LOCI(DistanceFunction<? super O> distanceFunction, double rmax, int nmin, double alpha) { super(distanceFunction); this.rmax = rmax; this.nmin = nmin; @@ -146,96 +144,62 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas * @return Outlier result */ public OutlierResult run(Database database, Relation<O> relation) { - DistanceQuery<O, D> distFunc = database.getDistanceQuery(relation, getDistanceFunction()); - RangeQuery<O, D> rangeQuery = database.getRangeQuery(distFunc); + DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction()); + RangeQuery<O> rangeQuery = database.getRangeQuery(distFunc); + DBIDs ids = relation.getDBIDs(); - FiniteProgress progressPreproc = LOG.isVerbose() ? new FiniteProgress("LOCI preprocessing", relation.size(), LOG) : null; // LOCI preprocessing step - WritableDataStore<ArrayList<DoubleIntPair>> interestingDistances = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_SORTED, ArrayList.class); - for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { - DistanceDBIDList<D> neighbors = rangeQuery.getRangeForDBID(iditer, rmax); - // build list of critical distances - ArrayList<DoubleIntPair> cdist = new ArrayList<>(neighbors.size() << 1); - { - for(int i = 0; i < neighbors.size(); i++) { - DistanceDBIDPair<D> r = neighbors.get(i); - if(i + 1 < neighbors.size() && r.getDistance().compareTo(neighbors.get(i + 1).getDistance()) == 0) { - continue; - } - cdist.add(new DoubleIntPair(r.getDistance().doubleValue(), i)); - final double ri = r.getDistance().doubleValue() / alpha; - if(ri <= rmax.doubleValue()) { - cdist.add(new DoubleIntPair(ri, Integer.MIN_VALUE)); - } - } - } - Collections.sort(cdist); - // fill the gaps to have fast lookups of number of neighbors at a given - // distance. - int lastk = 0; - for(DoubleIntPair c : cdist) { - if(c.second == Integer.MIN_VALUE) { - c.second = lastk; - } - else { - lastk = c.second; - } - } - - interestingDistances.put(iditer, cdist); - if(progressPreproc != null) { - progressPreproc.incrementProcessed(LOG); - } - } - if(progressPreproc != null) { - progressPreproc.ensureCompleted(LOG); - } + WritableDataStore<DoubleIntArrayList> interestingDistances = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_SORTED, DoubleIntArrayList.class); + precomputeInterestingRadii(ids, rangeQuery, interestingDistances); // LOCI main step FiniteProgress progressLOCI = LOG.isVerbose() ? new FiniteProgress("LOCI scores", relation.size(), LOG) : null; WritableDoubleDataStore mdef_norm = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC); WritableDoubleDataStore mdef_radius = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC); DoubleMinMax minmax = new DoubleMinMax(); - for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { - final List<DoubleIntPair> cdist = interestingDistances.get(iditer); - final double maxdist = cdist.get(cdist.size() - 1).first; - final int maxneig = cdist.get(cdist.size() - 1).second; + // Shared instance, to save allocations. + MeanVariance mv_n_r_alpha = new MeanVariance(); + + for(DBIDIter iditer = ids.iter(); iditer.valid(); iditer.advance()) { + final DoubleIntArrayList cdist = interestingDistances.get(iditer); + final double maxdist = cdist.getDouble(cdist.size() - 1); + final int maxneig = cdist.getInt(cdist.size() - 1); double maxmdefnorm = 0.0; double maxnormr = 0; if(maxneig >= nmin) { - D range = distFunc.getDistanceFactory().fromDouble(maxdist); // Compute the largest neighborhood we will need. - DistanceDBIDList<D> maxneighbors = rangeQuery.getRangeForDBID(iditer, range); - // TODO: Ensure the set is sorted. Should be a no-op with most indexes. + DoubleDBIDList maxneighbors = rangeQuery.getRangeForDBID(iditer, maxdist); + // TODO: Ensure the result is sorted. This is currently implied. + // For any critical distance, compute the normalized MDEF score. - for(DoubleIntPair c : cdist) { + for(int i = 0, size = cdist.size(); i < size; i++) { // Only start when minimum size is fulfilled - if (c.second < nmin) { + if(cdist.getInt(i) < nmin) { continue; } - final double r = c.first; + final double r = cdist.getDouble(i); final double alpha_r = alpha * r; - // compute n(p_i, \alpha * r) from list (note: alpha_r is different from c!) - final int n_alphar = elementsAtRadius(cdist, alpha_r); + // compute n(p_i, \alpha * r) from list (note: alpha_r is not cdist!) + final int n_alphar = cdist.getInt(cdist.find(alpha_r)); // compute \hat{n}(p_i, r, \alpha) and the corresponding \simga_{MDEF} - MeanVariance mv_n_r_alpha = new MeanVariance(); - // TODO: optimize for double distances - for (DistanceDBIDListIter<D> neighbor = maxneighbors.iter(); neighbor.valid(); neighbor.advance()) { + mv_n_r_alpha.reset(); + for(DoubleDBIDListIter neighbor = maxneighbors.iter(); neighbor.valid(); neighbor.advance()) { // Stop at radius r - if(neighbor.getDistance().doubleValue() > r) { + if(neighbor.doubleValue() > r) { break; } - int rn_alphar = elementsAtRadius(interestingDistances.get(neighbor), alpha_r); + DoubleIntArrayList cdist2 = interestingDistances.get(neighbor); + int rn_alphar = cdist2.getInt(cdist2.find(alpha_r)); mv_n_r_alpha.put(rn_alphar); } // We only use the average and standard deviation final double nhat_r_alpha = mv_n_r_alpha.getMean(); final double sigma_nhat_r_alpha = mv_n_r_alpha.getNaiveStddev(); - // Redundant divisions removed. - final double mdef = (nhat_r_alpha - n_alphar); // / nhat_r_alpha; - final double sigmamdef = sigma_nhat_r_alpha; // / nhat_r_alpha; + // Redundant divisions by nhat_r_alpha removed. + final double mdef = nhat_r_alpha - n_alphar; + final double sigmamdef = sigma_nhat_r_alpha; final double mdefnorm = mdef / sigmamdef; if(mdefnorm > maxmdefnorm) { @@ -246,46 +210,194 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas } else { // FIXME: when nmin was not fulfilled - what is the proper value then? - maxmdefnorm = 1.0; + maxmdefnorm = Double.POSITIVE_INFINITY; maxnormr = maxdist; } mdef_norm.putDouble(iditer, maxmdefnorm); mdef_radius.putDouble(iditer, maxnormr); minmax.put(maxmdefnorm); - if(progressLOCI != null) { - progressLOCI.incrementProcessed(LOG); - } - } - if(progressLOCI != null) { - progressLOCI.ensureCompleted(LOG); + LOG.incrementProcessed(progressLOCI); } - Relation<Double> scoreResult = new MaterializedRelation<>("LOCI normalized MDEF", "loci-mdef-outlier", TypeUtil.DOUBLE, mdef_norm, relation.getDBIDs()); + LOG.ensureCompleted(progressLOCI); + DoubleRelation scoreResult = new MaterializedDoubleRelation("LOCI normalized MDEF", "loci-mdef-outlier", mdef_norm, relation.getDBIDs()); OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0); OutlierResult result = new OutlierResult(scoreMeta, scoreResult); - result.addChildResult(new MaterializedRelation<>("LOCI MDEF Radius", "loci-critical-radius", TypeUtil.DOUBLE, mdef_radius, relation.getDBIDs())); + result.addChildResult(new MaterializedDoubleRelation("LOCI MDEF Radius", "loci-critical-radius", mdef_radius, relation.getDBIDs())); return result; } /** - * Get the number of objects for a given radius, from the list of critical - * distances, storing (radius, count) pairs. + * Preprocessing step: determine the radii of interest for each point. * - * @param criticalDistances - * @param radius - * @return Number of elements at the given radius + * @param ids IDs to process + * @param rangeQuery Range query + * @param interestingDistances Distances of interest */ - protected int elementsAtRadius(List<DoubleIntPair> criticalDistances, final double radius) { - int n_r = 0; - for(DoubleIntPair c2 : criticalDistances) { - if(c2.first > radius) { - break; + protected void precomputeInterestingRadii(DBIDs ids, RangeQuery<O> rangeQuery, WritableDataStore<DoubleIntArrayList> interestingDistances) { + FiniteProgress progressPreproc = LOG.isVerbose() ? new FiniteProgress("LOCI preprocessing", ids.size(), LOG) : null; + for(DBIDIter iditer = ids.iter(); iditer.valid(); iditer.advance()) { + DoubleDBIDList neighbors = rangeQuery.getRangeForDBID(iditer, rmax); + // build list of critical distances + DoubleIntArrayList cdist = new DoubleIntArrayList(neighbors.size() << 1); + { + int i = 0; + DoubleDBIDListIter ni = neighbors.iter(); + while(ni.valid()) { + final double curdist = ni.doubleValue(); + ++i; + ni.advance(); + // Skip, if tied to the next object: + if(ni.valid() && curdist == ni.doubleValue()) { + continue; + } + cdist.append(curdist, i); + // Scale radius, and reinsert + if(alpha != 1.) { + final double ri = curdist / alpha; + if(ri <= rmax) { + cdist.append(ri, Integer.MIN_VALUE); + } + } + } } - if(c2.second != Integer.MIN_VALUE) { - // Update - n_r = c2.second; + cdist.sort(); + + // fill the gaps to have fast lookups of number of neighbors at a given + // distance. + int lastk = 0; + for(int i = 0, size = cdist.size(); i < size; i++) { + final int k = cdist.getInt(i); + if(k == Integer.MIN_VALUE) { + cdist.setValue(i, lastk); + } + else { + lastk = k; + } } + // TODO: shrink the list, removing duplicate radii? + + interestingDistances.put(iditer, cdist); + LOG.incrementProcessed(progressPreproc); + } + LOG.ensureCompleted(progressPreproc); + } + + /** + * Array of double-int values. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + protected static class DoubleIntArrayList { + /** + * Double keys + */ + double[] keys; + + /** + * Integer values + */ + int[] vals; + + /** + * Used size + */ + int size = 0; + + /** + * Constructor. + * + * @param alloc Initial allocation. + */ + public DoubleIntArrayList(int alloc) { + keys = new double[alloc]; + vals = new int[alloc]; + size = 0; + } + + /** + * Collection size. + * + * @return Size + */ + public int size() { + return size; + } + + /** + * Get the key at the given position. + * + * @param i Position + * @return Key + */ + public double getDouble(int i) { + return keys[i]; + } + + /** + * Get the value at the given position. + * + * @param i Position + * @return Value + */ + public int getInt(int i) { + return vals[i]; + } + + /** + * Get the value at the given position. + * + * @param i Position + * @param val New value + */ + public void setValue(int i, int val) { + vals[i] = val; + } + + /** + * Append a key-value pair. + * + * @param key Key to append + * @param val Value to append. + */ + public void append(double key, int val) { + if(size == keys.length) { + keys = Arrays.copyOf(keys, size << 1); + vals = Arrays.copyOf(vals, size << 1); + } + keys[size] = key; + vals[size] = val; + ++size; + } + + /** + * Find the last position with a smaller or equal key. + * + * @param search Key + * @return Position + */ + public int find(final double search) { + int a = 0, b = size - 1; + while(a <= b) { + final int mid = (a + b) >>> 1; + final double cur = keys[mid]; + if(cur > search) { + b = mid - 1; + } + else { // less or equal! + a = mid + 1; + } + } + return b; + } + + /** + * Sort the array list. + */ + public void sort() { + DoubleIntegerArrayQuickSort.sort(keys, vals, size); } - return n_r; } @Override @@ -304,9 +416,11 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas * @author Erich Schubert * * @apiviz.exclude + * + * @param <O> Object type */ - public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> { - protected D rmax = null; + public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> { + protected double rmax; protected int nmin = 0; @@ -315,15 +429,14 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - final D distanceFactory = (distanceFunction != null) ? distanceFunction.getDistanceFactory() : null; - final DistanceParameter<D> rmaxP = new DistanceParameter<>(RMAX_ID, distanceFactory); + final DoubleParameter rmaxP = new DoubleParameter(RMAX_ID); if(config.grab(rmaxP)) { - rmax = rmaxP.getValue(); + rmax = rmaxP.doubleValue(); } final IntParameter nminP = new IntParameter(NMIN_ID, 20); if(config.grab(nminP)) { - nmin = nminP.getValue(); + nmin = nminP.intValue(); } final DoubleParameter alphaP = new DoubleParameter(ALPHA_ID, 0.5); @@ -333,7 +446,7 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas } @Override - protected LOCI<O, D> makeInstance() { + protected LOCI<O> makeInstance() { return new LOCI<>(distanceFunction, rmax, nmin, alpha); } } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOF.java index 28166c75..ff5529f5 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOF.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOF.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.lof; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -35,19 +35,13 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; -import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter; -import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter; -import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList; -import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList; -import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery; -import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; +import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter; +import de.lmu.ifi.dbs.elki.database.ids.KNNList; import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; -import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery; -import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; +import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; -import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; import de.lmu.ifi.dbs.elki.logging.progress.StepProgress; @@ -56,6 +50,7 @@ import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta; import de.lmu.ifi.dbs.elki.utilities.Alias; +import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; @@ -75,10 +70,10 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * within ELKI we have renamed this parameter to "k". * </p> * + * Reference: * <p> - * Reference: <br> - * M. M. Breunig, H.-P. Kriegel, R. Ng, J. Sander: LOF: Identifying - * Density-Based Local Outliers. <br> + * M. M. Breunig, H.-P. Kriegel, R. Ng, J. Sander:<br /> + * LOF: Identifying Density-Based Local Outliers.<br /> * In: Proc. 2nd ACM SIGMOD Int. Conf. on Management of Data (SIGMOD'00), * Dallas, TX, 2000. * </p> @@ -88,37 +83,40 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * * @apiviz.has KNNQuery * - * @param <O> the type of DatabaseObjects handled by this Algorithm - * @param <D> Distance type + * @param <O> the type of data objects handled by this algorithm */ @Title("LOF: Local Outlier Factor") @Description("Algorithm to compute density-based local outlier factors in a database based on the neighborhood size parameter 'k'") -@Reference(authors = "M. M. Breunig, H.-P. Kriegel, R. Ng, and J. Sander", title = "LOF: Identifying Density-Based Local Outliers", booktitle = "Proc. 2nd ACM SIGMOD Int. Conf. on Management of Data (SIGMOD '00), Dallas, TX, 2000", url = "http://dx.doi.org/10.1145/342009.335388") -@Alias({ "de.lmu.ifi.dbs.elki.algorithm.outlier.LOF", "outlier.LOF", "LOF" }) -public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm { +@Reference(authors = "M. M. Breunig, H.-P. Kriegel, R. Ng, and J. Sander",// +title = "LOF: Identifying Density-Based Local Outliers", // +booktitle = "Proc. 2nd ACM SIGMOD Int. Conf. on Management of Data (SIGMOD '00), Dallas, TX, 2000", // +url = "http://dx.doi.org/10.1145/342009.335388") +@Alias({ "de.lmu.ifi.dbs.elki.algorithm.outlier.LOF", "LOF" }) +public class LOF<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm { /** * The logger for this class. */ private static final Logging LOG = Logging.getLogger(LOF.class); /** - * Holds the value of {@link Parameterizer#K_ID}. + * The number of neighbors to query (including the query point!) */ protected int k = 2; /** * Constructor. * - * @param k the value of k + * @param k the number of neighbors to use for comparison (excluding the query + * point) * @param distanceFunction the neighborhood distance function */ - public LOF(int k, DistanceFunction<? super O, D> distanceFunction) { + public LOF(int k, DistanceFunction<? super O> distanceFunction) { super(distanceFunction); this.k = k + 1; } /** - * Performs the Generalized LOF_SCORE algorithm on the given database. + * Runs the LOF algorithm on the given database. * * @param database Database to query * @param relation Data to process @@ -126,42 +124,27 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBase */ public OutlierResult run(Database database, Relation<O> relation) { StepProgress stepprog = LOG.isVerbose() ? new StepProgress("LOF", 3) : null; - DistanceQuery<O, D> dq = database.getDistanceQuery(relation, getDistanceFunction()); - // "HEAVY" flag for knn query since it is used more than once - KNNQuery<O, D> knnq = database.getKNNQuery(dq, k, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE); - // No optimized kNN query - use a preprocessor! - if(!(knnq instanceof PreprocessorKNNQuery)) { - if(stepprog != null) { - stepprog.beginStep(1, "Materializing LOF neighborhoods.", LOG); - } - MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<>(relation, getDistanceFunction(), k); - knnq = preproc.getKNNQuery(dq, k); - } DBIDs ids = relation.getDBIDs(); + LOG.beginStep(stepprog, 1, "Materializing LOF neighborhoods."); + KNNQuery<O> knnq = DatabaseUtil.precomputedKNNQuery(database, relation, getDistanceFunction(), k); + // Compute LRDs - if(stepprog != null) { - stepprog.beginStep(2, "Computing LRDs.", LOG); - } + LOG.beginStep(stepprog, 2, "Computing LRDs."); WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP); computeLRDs(knnq, ids, lrds); // compute LOF_SCORE of each db object - if(stepprog != null) { - stepprog.beginStep(3, "Computing LOFs.", LOG); - } - DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC); + LOG.beginStep(stepprog, 3, "Computing LOFs."); WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_DB); // track the maximum value for normalization. DoubleMinMax lofminmax = new DoubleMinMax(); computeLOFScores(knnq, ids, lrds, lofs, lofminmax); - if(stepprog != null) { - stepprog.setCompleted(LOG); - } + LOG.setCompleted(stepprog); // Build result representation. - Relation<Double> scoreResult = new MaterializedRelation<>("Local Outlier Factor", "lof-outlier", TypeUtil.DOUBLE, lofs, ids); + DoubleRelation scoreResult = new MaterializedDoubleRelation("Local Outlier Factor", "lof-outlier", lofs, ids); OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0); return new OutlierResult(scoreMeta, scoreResult); } @@ -173,50 +156,26 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBase * @param ids IDs to process * @param lrds Reachability storage */ - private void computeLRDs(KNNQuery<O, D> knnq, DBIDs ids, WritableDoubleDataStore lrds) { + private void computeLRDs(KNNQuery<O> knnq, DBIDs ids, WritableDoubleDataStore lrds) { FiniteProgress lrdsProgress = LOG.isVerbose() ? new FiniteProgress("LRD", ids.size(), LOG) : null; for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { - final KNNList<D> neighbors = knnq.getKNNForDBID(iter, k); + final KNNList neighbors = knnq.getKNNForDBID(iter, k); double sum = 0.0; int count = 0; - if(neighbors instanceof DoubleDistanceKNNList) { - // Fast version for double distances - for(DoubleDistanceDBIDListIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) { - if(DBIDUtil.equal(neighbor, iter)) { - continue; - } - KNNList<D> neighborsNeighbors = knnq.getKNNForDBID(neighbor, k); - final double nkdist; - if(neighborsNeighbors instanceof DoubleDistanceKNNList) { - nkdist = ((DoubleDistanceKNNList) neighborsNeighbors).doubleKNNDistance(); - } - else { - nkdist = neighborsNeighbors.getKNNDistance().doubleValue(); - } - sum += Math.max(neighbor.doubleDistance(), nkdist); - count++; - } - } - else { - for(DistanceDBIDListIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { - if(DBIDUtil.equal(neighbor, iter)) { - continue; - } - KNNList<D> neighborsNeighbors = knnq.getKNNForDBID(neighbor, k); - sum += Math.max(neighbor.getDistance().doubleValue(), neighborsNeighbors.getKNNDistance().doubleValue()); - count++; + for(DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { + if(DBIDUtil.equal(neighbor, iter)) { + continue; } + KNNList neighborsNeighbors = knnq.getKNNForDBID(neighbor, k); + sum += Math.max(neighbor.doubleValue(), neighborsNeighbors.getKNNDistance()); + count++; } // Avoid division by 0 final double lrd = (sum > 0) ? (count / sum) : Double.POSITIVE_INFINITY; lrds.putDouble(iter, lrd); - if(lrdsProgress != null) { - lrdsProgress.incrementProcessed(LOG); - } - } - if(lrdsProgress != null) { - lrdsProgress.ensureCompleted(LOG); + LOG.incrementProcessed(lrdsProgress); } + LOG.ensureCompleted(lrdsProgress); } /** @@ -228,14 +187,14 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBase * @param lofs Local outlier factor storage * @param lofminmax Score minimum/maximum tracker */ - private void computeLOFScores(KNNQuery<O, D> knnq, DBIDs ids, DoubleDataStore lrds, WritableDoubleDataStore lofs, DoubleMinMax lofminmax) { + private void computeLOFScores(KNNQuery<O> knnq, DBIDs ids, DoubleDataStore lrds, WritableDoubleDataStore lofs, DoubleMinMax lofminmax) { FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("LOF_SCORE for objects", ids.size(), LOG) : null; for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { final double lof; final double lrdp = lrds.doubleValue(iter); - final KNNList<D> neighbors = knnq.getKNNForDBID(iter, k); + final KNNList neighbors = knnq.getKNNForDBID(iter, k); if(!Double.isInfinite(lrdp)) { - double sum = 0.0; + double sum = 0.; int count = 0; for(DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { // skip the point itself @@ -258,13 +217,9 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBase // update minimum and maximum lofminmax.put(lof); - if(progressLOFs != null) { - progressLOFs.incrementProcessed(LOG); - } - } - if(progressLOFs != null) { - progressLOFs.ensureCompleted(LOG); + LOG.incrementProcessed(progressLOFs); } + LOG.ensureCompleted(progressLOFs); } @Override @@ -283,14 +238,16 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBase * @author Erich Schubert * * @apiviz.exclude + * + * @param <O> Object type */ - public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> { + public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> { /** * Parameter to specify the number of nearest neighbors of an object to be - * considered for computing its LOF_SCORE, must be an integer greater than - * 1. + * considered for computing its LOF score, must be an integer greater than + * or equal to 1. */ - public static final OptionID K_ID = new OptionID("lof.k", "The number of nearest neighbors of an object to be considered for computing its LOF_SCORE."); + public static final OptionID K_ID = new OptionID("lof.k", "The number of nearest neighbors of an object to be considered for computing its LOF score."); /** * The neighborhood size to use. @@ -302,14 +259,14 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBase super.makeOptions(config); final IntParameter pK = new IntParameter(K_ID); - pK.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT); + pK.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT); if(config.grab(pK)) { - k = pK.getValue(); + k = pK.intValue(); } } @Override - protected LOF<O, D> makeInstance() { + protected LOF<O> makeInstance() { return new LOF<>(k, distanceFunction); } } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LoOP.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LoOP.java index 525d45f2..6278880f 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LoOP.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LoOP.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.lof; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -35,29 +35,26 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; -import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter; -import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter; -import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList; -import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList; -import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery; -import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; +import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter; +import de.lmu.ifi.dbs.elki.database.ids.KNNList; import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; -import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; +import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; -import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; import de.lmu.ifi.dbs.elki.logging.progress.StepProgress; +import de.lmu.ifi.dbs.elki.math.DoubleMinMax; +import de.lmu.ifi.dbs.elki.math.MathUtil; import de.lmu.ifi.dbs.elki.math.Mean; -import de.lmu.ifi.dbs.elki.math.MeanVariance; import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore; import de.lmu.ifi.dbs.elki.utilities.Alias; +import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; @@ -77,82 +74,65 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; * Distance/density based algorithm similar to LOF to detect outliers, but with * statistical methods to achieve better result stability. * + * Reference: + * <p> + * Hans-Peter Kriegel, Peer Kröger, Erich Schubert, Arthur Zimek:<br /> + * LoOP: Local Outlier Probabilities< br /> + * In Proceedings of the 18th International Conference on Information and + * Knowledge Management (CIKM), Hong Kong, China, 2009 + * </p> + * + * Implementation notes: + * <ul> + * <li>The lambda parameter was removed from the pdist term, because it cancels + * out.</li> + * <li>In ELKI 0.7.0, the {@code k} parameters have changed by 1 to make them + * similar to other methods and more intuitive.</li> + * </ul> + * * @author Erich Schubert * * @apiviz.has KNNQuery * * @param <O> type of objects handled by this algorithm - * @param <D> type of distances used */ @Title("LoOP: Local Outlier Probabilities") @Description("Variant of the LOF algorithm normalized using statistical values.") -@Reference(authors = "H.-P. Kriegel, P. Kröger, E. Schubert, A. Zimek", title = "LoOP: Local Outlier Probabilities", booktitle = "Proceedings of the 18th International Conference on Information and Knowledge Management (CIKM), Hong Kong, China, 2009", url = "http://dx.doi.org/10.1145/1645953.1646195") -@Alias({ "de.lmu.ifi.dbs.elki.algorithm.outlier.LoOP", "LoOP", "outlier.LoOP" }) -public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm { +@Reference(authors = "H.-P. Kriegel, P. Kröger, E. Schubert, A. Zimek", // +title = "LoOP: Local Outlier Probabilities", // +booktitle = "Proceedings of the 18th International Conference on Information and Knowledge Management (CIKM), Hong Kong, China, 2009", // +url = "http://dx.doi.org/10.1145/1645953.1646195") +@Alias({ "de.lmu.ifi.dbs.elki.algorithm.outlier.LoOP", "LoOP" }) +public class LoOP<O> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm { /** * The logger for this class. */ private static final Logging LOG = Logging.getLogger(LoOP.class); /** - * The distance function to determine the reachability distance between - * database objects. - */ - public static final OptionID REACHABILITY_DISTANCE_FUNCTION_ID = new OptionID("loop.referencedistfunction", "Distance function to determine the density of an object."); - - /** - * The distance function to determine the reachability distance between - * database objects. - */ - public static final OptionID COMPARISON_DISTANCE_FUNCTION_ID = new OptionID("loop.comparedistfunction", "Distance function to determine the reference set of an object."); - - /** - * Parameter to specify the number of nearest neighbors of an object to be - * considered for computing its LOOP_SCORE, must be an integer greater than 1. - */ - public static final OptionID KREACH_ID = new OptionID("loop.kref", "The number of nearest neighbors of an object to be used for the PRD value."); - - /** - * Parameter to specify the number of nearest neighbors of an object to be - * considered for computing its LOOP_SCORE, must be an integer greater than 1. - */ - public static final OptionID KCOMP_ID = new OptionID("loop.kcomp", "The number of nearest neighbors of an object to be considered for computing its LOOP_SCORE."); - - /** - * Parameter to specify the number of nearest neighbors of an object to be - * considered for computing its LOOP_SCORE, must be an integer greater than 1. - */ - public static final OptionID LAMBDA_ID = new OptionID("loop.lambda", "The number of standard deviations to consider for density computation."); - - /** - * Holds the value of {@link #KREACH_ID}. + * Reachability neighborhood size. */ int kreach; /** - * Holds the value of {@link #KCOMP_ID}. + * Comparison neighborhood size. */ int kcomp; /** - * Hold the value of {@link #LAMBDA_ID}. + * Lambda parameter. */ double lambda; /** - * Preprocessor Step 1. + * Distance function for reachability. */ - protected DistanceFunction<? super O, D> reachabilityDistanceFunction; + protected DistanceFunction<? super O> reachabilityDistanceFunction; /** - * Preprocessor Step 2. + * Distance function for comparison set. */ - protected DistanceFunction<? super O, D> comparisonDistanceFunction; - - /** - * Include object itself in kNN neighborhood. - */ - static boolean objectIsInKNN = false; + protected DistanceFunction<? super O> comparisonDistanceFunction; /** * Constructor with parameters. @@ -163,7 +143,7 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O * @param comparisonDistanceFunction distance function for comparison * @param lambda Lambda parameter */ - public LoOP(int kreach, int kcomp, DistanceFunction<? super O, D> reachabilityDistanceFunction, DistanceFunction<? super O, D> comparisonDistanceFunction, double lambda) { + public LoOP(int kreach, int kcomp, DistanceFunction<? super O> reachabilityDistanceFunction, DistanceFunction<? super O> comparisonDistanceFunction, double lambda) { super(); this.kreach = kreach; this.kcomp = kcomp; @@ -180,35 +160,17 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O * @param stepprog Progress logger, may be {@code null} * @return result */ - protected Pair<KNNQuery<O, D>, KNNQuery<O, D>> getKNNQueries(Database database, Relation<O> relation, StepProgress stepprog) { - KNNQuery<O, D> knnComp; - KNNQuery<O, D> knnReach; + protected Pair<KNNQuery<O>, KNNQuery<O>> getKNNQueries(Database database, Relation<O> relation, StepProgress stepprog) { + KNNQuery<O> knnComp, knnReach; if(comparisonDistanceFunction == reachabilityDistanceFunction || comparisonDistanceFunction.equals(reachabilityDistanceFunction)) { - // We need each neighborhood twice - use "HEAVY" flag. - knnComp = QueryUtil.getKNNQuery(relation, comparisonDistanceFunction, Math.max(kreach, kcomp), DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE); - // No optimized kNN query - use a preprocessor! - if(knnComp == null) { - if(stepprog != null) { - stepprog.beginStep(1, "Materializing neighborhoods with respect to reference neighborhood distance function.", LOG); - } - MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<>(relation, comparisonDistanceFunction, kcomp); - database.addIndex(preproc); - DistanceQuery<O, D> cdq = database.getDistanceQuery(relation, comparisonDistanceFunction); - knnComp = preproc.getKNNQuery(cdq, kreach, DatabaseQuery.HINT_HEAVY_USE); - } - else { - if(stepprog != null) { - stepprog.beginStep(1, "Optimized neighborhoods provided by database.", LOG); - } - } + LOG.beginStep(stepprog, 1, "Materializing neighborhoods with respect to reference neighborhood distance function."); + knnComp = DatabaseUtil.precomputedKNNQuery(database, relation, comparisonDistanceFunction, kcomp + 1); knnReach = knnComp; } else { - if(stepprog != null) { - stepprog.beginStep(1, "Not materializing distance functions, since we request each DBID once only.", LOG); - } - knnComp = QueryUtil.getKNNQuery(relation, comparisonDistanceFunction, kreach); - knnReach = QueryUtil.getKNNQuery(relation, reachabilityDistanceFunction, kcomp); + LOG.beginStep(stepprog, 1, "Not materializing distance functions, since we request each DBID once only."); + knnComp = QueryUtil.getKNNQuery(relation, comparisonDistanceFunction, kreach + 1); + knnReach = QueryUtil.getKNNQuery(relation, reachabilityDistanceFunction, kcomp + 1); } return new Pair<>(knnComp, knnReach); } @@ -221,13 +183,11 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O * @return Outlier result */ public OutlierResult run(Database database, Relation<O> relation) { - final double sqrt2 = Math.sqrt(2.0); - StepProgress stepprog = LOG.isVerbose() ? new StepProgress(5) : null; - Pair<KNNQuery<O, D>, KNNQuery<O, D>> pair = getKNNQueries(database, relation, stepprog); - KNNQuery<O, D> knnComp = pair.getFirst(); - KNNQuery<O, D> knnReach = pair.getSecond(); + Pair<KNNQuery<O>, KNNQuery<O>> pair = getKNNQueries(database, relation, stepprog); + KNNQuery<O> knnComp = pair.getFirst(); + KNNQuery<O> knnReach = pair.getSecond(); // Assert we got something if(knnComp == null) { @@ -237,118 +197,111 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O throw new AbortException("No kNN queries supported by database for density estimation distance function."); } + // FIXME: tie handling! + // Probabilistic distances - WritableDoubleDataStore pdists = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP); - Mean mean = new Mean(); - {// computing PRDs - if(stepprog != null) { - stepprog.beginStep(3, "Computing pdists", LOG); - } - FiniteProgress prdsProgress = LOG.isVerbose() ? new FiniteProgress("pdists", relation.size(), LOG) : null; - for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { - final KNNList<D> neighbors = knnReach.getKNNForDBID(iditer, kreach); - mean.reset(); - // use first kref neighbors as reference set - int ks = 0; - // TODO: optimize for double distances - if(neighbors instanceof DoubleDistanceKNNList) { - for(DoubleDistanceDBIDListIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) { - if(objectIsInKNN || !DBIDUtil.equal(neighbor, iditer)) { - final double d = neighbor.doubleDistance(); - mean.put(d * d); - ks++; - if(ks >= kreach) { - break; - } - } - } - } - else { - for(DistanceDBIDListIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { - if(objectIsInKNN || !DBIDUtil.equal(neighbor, iditer)) { - double d = neighbor.getDistance().doubleValue(); - mean.put(d * d); - ks++; - if(ks >= kreach) { - break; - } - } - } - } - double pdist = lambda * Math.sqrt(mean.getMean()); - pdists.putDouble(iditer, pdist); - if(prdsProgress != null) { - prdsProgress.incrementProcessed(LOG); - } - } - } + WritableDoubleDataStore pdists = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_DB); + LOG.beginStep(stepprog, 3, "Computing pdists"); + computePDists(relation, knnReach, pdists); // Compute PLOF values. WritableDoubleDataStore plofs = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP); - MeanVariance mvplof = new MeanVariance(); + LOG.beginStep(stepprog, 4, "Computing PLOF"); + double nplof = computePLOFs(relation, knnComp, pdists, plofs); + + // Normalize the outlier scores. + DoubleMinMax mm = new DoubleMinMax(); {// compute LOOP_SCORE of each db object - if(stepprog != null) { - stepprog.beginStep(4, "Computing PLOF", LOG); - } + LOG.beginStep(stepprog, 5, "Computing LoOP scores"); - FiniteProgress progressPLOFs = LOG.isVerbose() ? new FiniteProgress("PLOFs for objects", relation.size(), LOG) : null; - MeanVariance mv = new MeanVariance(); + FiniteProgress progressLOOPs = LOG.isVerbose() ? new FiniteProgress("LoOP for objects", relation.size(), LOG) : null; + final double norm = 1. / (nplof * MathUtil.SQRT2); for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { - final KNNList<D> neighbors = knnComp.getKNNForDBID(iditer, kcomp); - mv.reset(); - // use first kref neighbors as comparison set. - int ks = 0; - for(DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { - if(objectIsInKNN || !DBIDUtil.equal(neighbor, iditer)) { - mv.put(pdists.doubleValue(neighbor)); - ks++; - if(ks >= kcomp) { - break; - } - } - } - double plof = Math.max(pdists.doubleValue(iditer) / mv.getMean(), 1.0); - if(Double.isNaN(plof) || Double.isInfinite(plof)) { - plof = 1.0; - } - plofs.putDouble(iditer, plof); - mvplof.put((plof - 1.0) * (plof - 1.0)); - - if(progressPLOFs != null) { - progressPLOFs.incrementProcessed(LOG); - } + double loop = NormalDistribution.erf((plofs.doubleValue(iditer) - 1.) * norm); + plofs.putDouble(iditer, loop); + mm.put(loop); + LOG.incrementProcessed(progressLOOPs); } + LOG.ensureCompleted(progressLOOPs); } - double nplof = lambda * Math.sqrt(mvplof.getMean()); - if(LOG.isDebugging()) { - LOG.verbose("nplof normalization factor is " + nplof + " " + mvplof.getMean() + " " + mvplof.getSampleStddev()); - } - - // Compute final LoOP values. - WritableDoubleDataStore loops = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC); - {// compute LOOP_SCORE of each db object - if(stepprog != null) { - stepprog.beginStep(5, "Computing LoOP scores", LOG); - } + LOG.setCompleted(stepprog); - FiniteProgress progressLOOPs = LOG.isVerbose() ? new FiniteProgress("LoOP for objects", relation.size(), LOG) : null; - for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { - loops.putDouble(iditer, NormalDistribution.erf((plofs.doubleValue(iditer) - 1) / (nplof * sqrt2))); + // Build result representation. + DoubleRelation scoreResult = new MaterializedDoubleRelation("Local Outlier Probabilities", "loop-outlier", plofs, relation.getDBIDs()); + OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore(mm.getMin(), mm.getMax(), 0.); + return new OutlierResult(scoreMeta, scoreResult); + } - if(progressLOOPs != null) { - progressLOOPs.incrementProcessed(LOG); + /** + * Compute the probabilistic distances used by LoOP. + * + * @param relation Data relation + * @param knn kNN query + * @param pdists Storage for distances + */ + protected void computePDists(Relation<O> relation, KNNQuery<O> knn, WritableDoubleDataStore pdists) { + // computing PRDs + FiniteProgress prdsProgress = LOG.isVerbose() ? new FiniteProgress("pdists", relation.size(), LOG) : null; + for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { + final KNNList neighbors = knn.getKNNForDBID(iditer, kreach + 1); + // use first kref neighbors as reference set + int ks = 0; + double ssum = 0.; + for(DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid() && ks < kreach; neighbor.advance()) { + if(DBIDUtil.equal(neighbor, iditer)) { + continue; } + final double d = neighbor.doubleValue(); + ssum += d * d; + ks++; } + double pdist = ks > 0 ? Math.sqrt(ssum / ks) : 0.; + pdists.putDouble(iditer, pdist); + LOG.incrementProcessed(prdsProgress); } + LOG.ensureCompleted(prdsProgress); + } + + /** + * Compute the LOF values, using the pdist distances. + * + * @param relation Data relation + * @param knn kNN query + * @param pdists Precomputed distances + * @param plofs Storage for PLOFs. + * @return Normalization factor. + */ + protected double computePLOFs(Relation<O> relation, KNNQuery<O> knn, WritableDoubleDataStore pdists, WritableDoubleDataStore plofs) { + FiniteProgress progressPLOFs = LOG.isVerbose() ? new FiniteProgress("PLOFs for objects", relation.size(), LOG) : null; + Mean mvplof = new Mean(); + for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { + final KNNList neighbors = knn.getKNNForDBID(iditer, kcomp + 1); + // use first kref neighbors as comparison set. + int ks = 0; + double sum = 0.; + for(DBIDIter neighbor = neighbors.iter(); neighbor.valid() && ks < kcomp; neighbor.advance()) { + if(DBIDUtil.equal(neighbor, iditer)) { + continue; + } + sum += pdists.doubleValue(neighbor); + ks++; + } + double plof = Math.max(pdists.doubleValue(iditer) * ks / sum, 1.0); + if(Double.isNaN(plof) || Double.isInfinite(plof)) { + plof = 1.0; + } + plofs.putDouble(iditer, plof); + mvplof.put((plof - 1.0) * (plof - 1.0)); - if(stepprog != null) { - stepprog.setCompleted(LOG); + LOG.incrementProcessed(progressPLOFs); } + LOG.ensureCompleted(progressPLOFs); - // Build result representation. - Relation<Double> scoreResult = new MaterializedRelation<>("Local Outlier Probabilities", "loop-outlier", TypeUtil.DOUBLE, loops, relation.getDBIDs()); - OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore(); - return new OutlierResult(scoreMeta, scoreResult); + double nplof = lambda * Math.sqrt(mvplof.getMean()); + if(LOG.isDebugging()) { + LOG.verbose("nplof normalization factor is " + nplof + " " + mvplof.getMean()); + } + return nplof; } @Override @@ -374,8 +327,43 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O * @author Erich Schubert * * @apiviz.exclude + * + * @param <O> Object type */ - public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractParameterizer { + public static class Parameterizer<O> extends AbstractParameterizer { + /** + * The distance function to determine the reachability distance between + * database objects. + */ + public static final OptionID REACHABILITY_DISTANCE_FUNCTION_ID = new OptionID("loop.referencedistfunction", "Distance function to determine the density of an object."); + + /** + * The distance function to determine the reachability distance between + * database objects. + */ + public static final OptionID COMPARISON_DISTANCE_FUNCTION_ID = new OptionID("loop.comparedistfunction", "Distance function to determine the reference set of an object."); + + /** + * Parameter to specify the number of nearest neighbors of an object to be + * considered for computing its LOOP_SCORE, must be an integer greater than + * 1. + */ + public static final OptionID KREACH_ID = new OptionID("loop.kref", "The number of nearest neighbors of an object to be used for the PRD value."); + + /** + * Parameter to specify the number of nearest neighbors of an object to be + * considered for computing its LOOP_SCORE, must be an integer greater than + * 1. + */ + public static final OptionID KCOMP_ID = new OptionID("loop.kcomp", "The number of nearest neighbors of an object to be considered for computing its LOOP_SCORE."); + + /** + * Parameter to specify the number of nearest neighbors of an object to be + * considered for computing its LOOP_SCORE, must be an integer greater than + * 1. + */ + public static final OptionID LAMBDA_ID = new OptionID("loop.lambda", "The number of standard deviations to consider for density computation."); + /** * Holds the value of {@link #KREACH_ID}. */ @@ -394,29 +382,29 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O /** * Preprocessor Step 1. */ - protected DistanceFunction<O, D> reachabilityDistanceFunction = null; + protected DistanceFunction<O> reachabilityDistanceFunction = null; /** * Preprocessor Step 2. */ - protected DistanceFunction<O, D> comparisonDistanceFunction = null; + protected DistanceFunction<O> comparisonDistanceFunction = null; @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); final IntParameter kcompP = new IntParameter(KCOMP_ID); - kcompP.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT); + kcompP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT); if(config.grab(kcompP)) { kcomp = kcompP.intValue(); } - final ObjectParameter<DistanceFunction<O, D>> compDistP = new ObjectParameter<>(COMPARISON_DISTANCE_FUNCTION_ID, DistanceFunction.class, EuclideanDistanceFunction.class); + final ObjectParameter<DistanceFunction<O>> compDistP = new ObjectParameter<>(COMPARISON_DISTANCE_FUNCTION_ID, DistanceFunction.class, EuclideanDistanceFunction.class); if(config.grab(compDistP)) { comparisonDistanceFunction = compDistP.instantiateClass(config); } final IntParameter kreachP = new IntParameter(KREACH_ID); - kreachP.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT); + kreachP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT); kreachP.setOptional(true); if(config.grab(kreachP)) { kreach = kreachP.intValue(); @@ -425,7 +413,7 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O kreach = kcomp; } - final ObjectParameter<DistanceFunction<O, D>> reachDistP = new ObjectParameter<>(REACHABILITY_DISTANCE_FUNCTION_ID, DistanceFunction.class, true); + final ObjectParameter<DistanceFunction<O>> reachDistP = new ObjectParameter<>(REACHABILITY_DISTANCE_FUNCTION_ID, DistanceFunction.class, true); if(config.grab(reachDistP)) { reachabilityDistanceFunction = reachDistP.instantiateClass(config); } @@ -439,8 +427,8 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O } @Override - protected LoOP<O, D> makeInstance() { - DistanceFunction<O, D> realreach = (reachabilityDistanceFunction != null) ? reachabilityDistanceFunction : comparisonDistanceFunction; + protected LoOP<O> makeInstance() { + DistanceFunction<O> realreach = (reachabilityDistanceFunction != null) ? reachabilityDistanceFunction : comparisonDistanceFunction; return new LoOP<>(kreach, kcomp, realreach, comparisonDistanceFunction, lambda); } } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/OnlineLOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/OnlineLOF.java index c01c914f..6033ae3e 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/OnlineLOF.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/OnlineLOF.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.lof; This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team @@ -27,15 +27,16 @@ import java.util.List; import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.QueryUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList;
-import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
@@ -43,7 +44,6 @@ import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery; import de.lmu.ifi.dbs.elki.database.query.rknn.RKNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.index.preprocessed.knn.AbstractMaterializeKNNPreprocessor;
import de.lmu.ifi.dbs.elki.index.preprocessed.knn.KNNChangeEvent;
import de.lmu.ifi.dbs.elki.index.preprocessed.knn.KNNListener;
@@ -66,7 +66,7 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; * @apiviz.has FlexibleLOF.LOFResult oneway - - updates
*/
// TODO: related to publication?
-public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends FlexibleLOF<O, D> {
+public class OnlineLOF<O> extends FlexibleLOF<O> {
/**
* The logger for this class.
*/
@@ -80,7 +80,7 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends FlexibleLOF<O, * @param neighborhoodDistanceFunction the neighborhood distance function
* @param reachabilityDistanceFunction the reachability distance function
*/
- public OnlineLOF(int krefer, int kreach, DistanceFunction<? super O, D> neighborhoodDistanceFunction, DistanceFunction<? super O, D> reachabilityDistanceFunction) {
+ public OnlineLOF(int krefer, int kreach, DistanceFunction<? super O> neighborhoodDistanceFunction, DistanceFunction<? super O> reachabilityDistanceFunction) {
super(krefer, kreach, neighborhoodDistanceFunction, reachabilityDistanceFunction);
}
@@ -93,20 +93,20 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends FlexibleLOF<O, public OutlierResult run(Database database, Relation<O> relation) {
StepProgress stepprog = LOG.isVerbose() ? new StepProgress("OnlineLOF", 3) : null;
- Pair<Pair<KNNQuery<O, D>, KNNQuery<O, D>>, Pair<RKNNQuery<O, D>, RKNNQuery<O, D>>> queries = getKNNAndRkNNQueries(database, relation, stepprog);
- KNNQuery<O, D> kNNRefer = queries.getFirst().getFirst();
- KNNQuery<O, D> kNNReach = queries.getFirst().getSecond();
- RKNNQuery<O, D> rkNNRefer = queries.getSecond().getFirst();
- RKNNQuery<O, D> rkNNReach = queries.getSecond().getSecond();
+ Pair<Pair<KNNQuery<O>, KNNQuery<O>>, Pair<RKNNQuery<O>, RKNNQuery<O>>> queries = getKNNAndRkNNQueries(database, relation, stepprog);
+ KNNQuery<O> kNNRefer = queries.getFirst().getFirst();
+ KNNQuery<O> kNNReach = queries.getFirst().getSecond();
+ RKNNQuery<O> rkNNRefer = queries.getSecond().getFirst();
+ RKNNQuery<O> rkNNReach = queries.getSecond().getSecond();
- LOFResult<O, D> lofResult = super.doRunInTime(relation.getDBIDs(), kNNRefer, kNNReach, stepprog);
+ LOFResult<O> lofResult = super.doRunInTime(relation.getDBIDs(), kNNRefer, kNNReach, stepprog);
lofResult.setRkNNRefer(rkNNRefer);
lofResult.setRkNNReach(rkNNReach);
// add listener
KNNListener l = new LOFKNNListener(lofResult);
- ((MaterializeKNNPreprocessor<O, D>) ((PreprocessorKNNQuery<O, D, ? extends KNNList<D>>) lofResult.getKNNRefer()).getPreprocessor()).addKNNListener(l);
- ((MaterializeKNNPreprocessor<O, D>) ((PreprocessorKNNQuery<O, D, ? extends KNNList<D>>) lofResult.getKNNReach()).getPreprocessor()).addKNNListener(l);
+ ((MaterializeKNNPreprocessor<O>) ((PreprocessorKNNQuery<O>) lofResult.getKNNRefer()).getPreprocessor()).addKNNListener(l);
+ ((MaterializeKNNPreprocessor<O>) ((PreprocessorKNNQuery<O>) lofResult.getKNNReach()).getPreprocessor()).addKNNListener(l);
return lofResult.getResult();
}
@@ -118,47 +118,48 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends FlexibleLOF<O, * @param stepprog Progress logger
* @return the kNN and rkNN queries
*/
- private Pair<Pair<KNNQuery<O, D>, KNNQuery<O, D>>, Pair<RKNNQuery<O, D>, RKNNQuery<O, D>>> getKNNAndRkNNQueries(Database database, Relation<O> relation, StepProgress stepprog) {
+ private Pair<Pair<KNNQuery<O>, KNNQuery<O>>, Pair<RKNNQuery<O>, RKNNQuery<O>>> getKNNAndRkNNQueries(Database database, Relation<O> relation, StepProgress stepprog) {
// Use "HEAVY" flag, since this is an online algorithm
- KNNQuery<O, D> kNNRefer = QueryUtil.getKNNQuery(relation, referenceDistanceFunction, krefer, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
- RKNNQuery<O, D> rkNNRefer = QueryUtil.getRKNNQuery(relation, referenceDistanceFunction, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
+ KNNQuery<O> kNNRefer = QueryUtil.getKNNQuery(relation, referenceDistanceFunction, krefer, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
+ RKNNQuery<O> rkNNRefer = QueryUtil.getRKNNQuery(relation, referenceDistanceFunction, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
// No optimized kNN query or RkNN query - use a preprocessor!
- if (kNNRefer == null || rkNNRefer == null) {
- if (stepprog != null) {
+ if(kNNRefer == null || rkNNRefer == null) {
+ if(stepprog != null) {
stepprog.beginStep(1, "Materializing neighborhood w.r.t. reference neighborhood distance function.", LOG);
}
- MaterializeKNNAndRKNNPreprocessor<O, D> preproc = new MaterializeKNNAndRKNNPreprocessor<>(relation, referenceDistanceFunction, krefer);
- DistanceQuery<O, D> ndq = database.getDistanceQuery(relation, referenceDistanceFunction);
+ MaterializeKNNAndRKNNPreprocessor<O> preproc = new MaterializeKNNAndRKNNPreprocessor<>(relation, referenceDistanceFunction, krefer);
+ DistanceQuery<O> ndq = database.getDistanceQuery(relation, referenceDistanceFunction);
kNNRefer = preproc.getKNNQuery(ndq, krefer, DatabaseQuery.HINT_HEAVY_USE);
rkNNRefer = preproc.getRKNNQuery(ndq, krefer, DatabaseQuery.HINT_HEAVY_USE);
// add as index
- relation.getDatabase().addIndex(preproc);
- } else {
- if (stepprog != null) {
+ database.addIndex(preproc);
+ }
+ else {
+ if(stepprog != null) {
stepprog.beginStep(1, "Optimized neighborhood w.r.t. reference neighborhood distance function provided by database.", LOG);
}
}
- KNNQuery<O, D> kNNReach = QueryUtil.getKNNQuery(relation, reachabilityDistanceFunction, kreach, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
- RKNNQuery<O, D> rkNNReach = QueryUtil.getRKNNQuery(relation, reachabilityDistanceFunction, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
- if (kNNReach == null || rkNNReach == null) {
- if (stepprog != null) {
+ KNNQuery<O> kNNReach = QueryUtil.getKNNQuery(relation, reachabilityDistanceFunction, kreach, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
+ RKNNQuery<O> rkNNReach = QueryUtil.getRKNNQuery(relation, reachabilityDistanceFunction, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
+ if(kNNReach == null || rkNNReach == null) {
+ if(stepprog != null) {
stepprog.beginStep(2, "Materializing neighborhood w.r.t. reachability distance function.", LOG);
}
ListParameterization config = new ListParameterization();
config.addParameter(AbstractMaterializeKNNPreprocessor.Factory.DISTANCE_FUNCTION_ID, reachabilityDistanceFunction);
config.addParameter(AbstractMaterializeKNNPreprocessor.Factory.K_ID, kreach);
- MaterializeKNNAndRKNNPreprocessor<O, D> preproc = new MaterializeKNNAndRKNNPreprocessor<>(relation, reachabilityDistanceFunction, kreach);
- DistanceQuery<O, D> rdq = database.getDistanceQuery(relation, reachabilityDistanceFunction);
+ MaterializeKNNAndRKNNPreprocessor<O> preproc = new MaterializeKNNAndRKNNPreprocessor<>(relation, reachabilityDistanceFunction, kreach);
+ DistanceQuery<O> rdq = database.getDistanceQuery(relation, reachabilityDistanceFunction);
kNNReach = preproc.getKNNQuery(rdq, kreach, DatabaseQuery.HINT_HEAVY_USE);
rkNNReach = preproc.getRKNNQuery(rdq, kreach, DatabaseQuery.HINT_HEAVY_USE);
// add as index
relation.getDatabase().addIndex(preproc);
}
- Pair<KNNQuery<O, D>, KNNQuery<O, D>> kNNPair = new Pair<>(kNNRefer, kNNReach);
- Pair<RKNNQuery<O, D>, RKNNQuery<O, D>> rkNNPair = new Pair<>(rkNNRefer, rkNNReach);
+ Pair<KNNQuery<O>, KNNQuery<O>> kNNPair = new Pair<>(kNNRefer, kNNReach);
+ Pair<RKNNQuery<O>, RKNNQuery<O>> rkNNPair = new Pair<>(rkNNRefer, rkNNReach);
return new Pair<>(kNNPair, rkNNPair);
}
@@ -182,36 +183,40 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends FlexibleLOF<O, /**
* Holds the result of a former run of the LOF algorithm.
*/
- private LOFResult<O, D> lofResult;
+ private LOFResult<O> lofResult;
/**
* Constructs a listener for the LOF algorithm.
*
* @param lofResult the result of a former run of the LOF algorithm
*/
- public LOFKNNListener(LOFResult<O, D> lofResult) {
+ public LOFKNNListener(LOFResult<O> lofResult) {
this.lofResult = lofResult;
}
@Override
public void kNNsChanged(KNNChangeEvent e) {
- AbstractMaterializeKNNPreprocessor<O, D, ?> p1 = ((PreprocessorKNNQuery<O, D, ?>) lofResult.getKNNRefer()).getPreprocessor();
- AbstractMaterializeKNNPreprocessor<O, D, ?> p2 = ((PreprocessorKNNQuery<O, D, ?>) lofResult.getKNNReach()).getPreprocessor();
+ AbstractMaterializeKNNPreprocessor<O> p1 = ((PreprocessorKNNQuery<O>) lofResult.getKNNRefer()).getPreprocessor();
+ AbstractMaterializeKNNPreprocessor<O> p2 = ((PreprocessorKNNQuery<O>) lofResult.getKNNReach()).getPreprocessor();
- if (firstEventReceived == null) {
- if (e.getSource().equals(p1) && e.getSource().equals(p2)) {
+ if(firstEventReceived == null) {
+ if(e.getSource().equals(p1) && e.getSource().equals(p2)) {
kNNsChanged(e, e);
- } else {
+ }
+ else {
firstEventReceived = e;
}
- } else {
- if (e.getSource().equals(p1) && firstEventReceived.getSource().equals(p2)) {
+ }
+ else {
+ if(e.getSource().equals(p1) && firstEventReceived.getSource().equals(p2)) {
kNNsChanged(e, firstEventReceived);
firstEventReceived = null;
- } else if (e.getSource().equals(p2) && firstEventReceived.getSource().equals(p1)) {
+ }
+ else if(e.getSource().equals(p2) && firstEventReceived.getSource().equals(p1)) {
kNNsChanged(firstEventReceived, e);
firstEventReceived = null;
- } else {
+ }
+ else {
throw new UnsupportedOperationException("Event sources do not fit!");
}
}
@@ -225,18 +230,20 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends FlexibleLOF<O, * @param e2 the change event of the second preprocessor
*/
private void kNNsChanged(KNNChangeEvent e1, KNNChangeEvent e2) {
- if (!e1.getType().equals(e2.getType())) {
+ if(!e1.getType().equals(e2.getType())) {
throw new UnsupportedOperationException("Event types do not fit: " + e1.getType() + " != " + e2.getType());
}
- if (!e1.getObjects().equals(e2.getObjects())) {
+ if(!e1.getObjects().equals(e2.getObjects())) {
throw new UnsupportedOperationException("Objects do not fit: " + e1.getObjects() + " != " + e2.getObjects());
}
- if (e1.getType().equals(KNNChangeEvent.Type.DELETE)) {
+ if(e1.getType().equals(KNNChangeEvent.Type.DELETE)) {
kNNsRemoved(e1.getObjects(), e1.getUpdates(), e2.getUpdates(), lofResult);
- } else if (e1.getType().equals(KNNChangeEvent.Type.INSERT)) {
+ }
+ else if(e1.getType().equals(KNNChangeEvent.Type.INSERT)) {
kNNsInserted(e1.getObjects(), e1.getUpdates(), e2.getUpdates(), lofResult);
- } else {
+ }
+ else {
throw new UnsupportedOperationException("Unsupported event type: " + e1.getType());
}
}
@@ -251,44 +258,43 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends FlexibleLOF<O, * reachability distance function
* @param lofResult the result of the former LOF run
*/
- private void kNNsInserted(DBIDs insertions, DBIDs updates1, DBIDs updates2, LOFResult<O, D> lofResult) {
+ private void kNNsInserted(DBIDs insertions, DBIDs updates1, DBIDs updates2, LOFResult<O> lofResult) {
StepProgress stepprog = LOG.isVerbose() ? new StepProgress(3) : null;
// recompute lrds
- if (stepprog != null) {
+ if(stepprog != null) {
stepprog.beginStep(1, "Recompute LRDs.", LOG);
}
ArrayDBIDs lrd_ids = DBIDUtil.ensureArray(DBIDUtil.union(insertions, updates2));
- List<? extends DistanceDBIDList<D>> reachDistRKNNs = lofResult.getRkNNReach().getRKNNForBulkDBIDs(lrd_ids, kreach);
+ List<? extends DoubleDBIDList> reachDistRKNNs = lofResult.getRkNNReach().getRKNNForBulkDBIDs(lrd_ids, kreach);
ArrayDBIDs affected_lrd_id_candidates = mergeIDs(reachDistRKNNs, lrd_ids);
ArrayModifiableDBIDs affected_lrd_ids = DBIDUtil.newArray(affected_lrd_id_candidates.size());
- WritableDoubleDataStore new_lrds = computeLRDs(affected_lrd_id_candidates, lofResult.getKNNReach());
- for (DBIDIter iter = affected_lrd_id_candidates.iter(); iter.valid(); iter.advance()) {
+ WritableDoubleDataStore new_lrds = DataStoreUtil.makeDoubleStorage(affected_lrd_id_candidates, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
+ computeLRDs(lofResult.getKNNReach(), affected_lrd_id_candidates, new_lrds);
+ for(DBIDIter iter = affected_lrd_id_candidates.iter(); iter.valid(); iter.advance()) {
double new_lrd = new_lrds.doubleValue(iter);
double old_lrd = lofResult.getLrds().doubleValue(iter);
- if (Double.isNaN(old_lrd) || old_lrd != new_lrd) {
+ if(Double.isNaN(old_lrd) || old_lrd != new_lrd) {
lofResult.getLrds().putDouble(iter, new_lrd);
affected_lrd_ids.add(iter);
}
}
// recompute lofs
- if (stepprog != null) {
+ if(stepprog != null) {
stepprog.beginStep(2, "Recompute LOFS.", LOG);
}
- List<? extends DistanceDBIDList<D>> primDistRKNNs = lofResult.getRkNNRefer().getRKNNForBulkDBIDs(affected_lrd_ids, krefer);
+ List<? extends DoubleDBIDList> primDistRKNNs = lofResult.getRkNNRefer().getRKNNForBulkDBIDs(affected_lrd_ids, krefer);
ArrayDBIDs affected_lof_ids = mergeIDs(primDistRKNNs, affected_lrd_ids, insertions, updates1);
recomputeLOFs(affected_lof_ids, lofResult);
// fire result changed
- if (stepprog != null) {
+ if(stepprog != null) {
stepprog.beginStep(3, "Inform listeners.", LOG);
}
lofResult.getResult().getHierarchy().resultChanged(lofResult.getResult());
- if (stepprog != null) {
- stepprog.setCompleted(LOG);
- }
+ LOG.setCompleted(stepprog);
}
/**
@@ -301,53 +307,52 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends FlexibleLOF<O, * reachability distance function
* @param lofResult the result of the former LOF run
*/
- private void kNNsRemoved(DBIDs deletions, DBIDs updates1, DBIDs updates2, LOFResult<O, D> lofResult) {
+ private void kNNsRemoved(DBIDs deletions, DBIDs updates1, DBIDs updates2, LOFResult<O> lofResult) {
StepProgress stepprog = LOG.isVerbose() ? new StepProgress(4) : null;
// delete lrds and lofs
- if (stepprog != null) {
+ if(stepprog != null) {
stepprog.beginStep(1, "Delete old LRDs and LOFs.", LOG);
}
- for (DBIDIter iter = deletions.iter(); iter.valid(); iter.advance()) {
+ for(DBIDIter iter = deletions.iter(); iter.valid(); iter.advance()) {
lofResult.getLrds().delete(iter);
lofResult.getLofs().delete(iter);
}
// recompute lrds
- if (stepprog != null) {
+ if(stepprog != null) {
stepprog.beginStep(2, "Recompute LRDs.", LOG);
}
ArrayDBIDs lrd_ids = DBIDUtil.ensureArray(updates2);
- List<? extends DistanceDBIDList<D>> reachDistRKNNs = lofResult.getRkNNReach().getRKNNForBulkDBIDs(lrd_ids, kreach);
+ List<? extends DoubleDBIDList> reachDistRKNNs = lofResult.getRkNNReach().getRKNNForBulkDBIDs(lrd_ids, kreach);
ArrayDBIDs affected_lrd_id_candidates = mergeIDs(reachDistRKNNs, lrd_ids);
ArrayModifiableDBIDs affected_lrd_ids = DBIDUtil.newArray(affected_lrd_id_candidates.size());
- WritableDoubleDataStore new_lrds = computeLRDs(affected_lrd_id_candidates, lofResult.getKNNReach());
- for (DBIDIter iter = affected_lrd_id_candidates.iter(); iter.valid(); iter.advance()) {
+ WritableDoubleDataStore new_lrds = DataStoreUtil.makeDoubleStorage(affected_lrd_id_candidates, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
+ computeLRDs(lofResult.getKNNReach(), affected_lrd_id_candidates, new_lrds);
+ for(DBIDIter iter = affected_lrd_id_candidates.iter(); iter.valid(); iter.advance()) {
double new_lrd = new_lrds.doubleValue(iter);
double old_lrd = lofResult.getLrds().doubleValue(iter);
- if (old_lrd != new_lrd) {
+ if(old_lrd != new_lrd) {
lofResult.getLrds().putDouble(iter, new_lrd);
affected_lrd_ids.add(iter);
}
}
// recompute lofs
- if (stepprog != null) {
+ if(stepprog != null) {
stepprog.beginStep(3, "Recompute LOFS.", LOG);
}
- List<? extends DistanceDBIDList<D>> primDistRKNNs = lofResult.getRkNNRefer().getRKNNForBulkDBIDs(affected_lrd_ids, krefer);
+ List<? extends DoubleDBIDList> primDistRKNNs = lofResult.getRkNNRefer().getRKNNForBulkDBIDs(affected_lrd_ids, krefer);
ArrayDBIDs affected_lof_ids = mergeIDs(primDistRKNNs, affected_lrd_ids, updates1);
recomputeLOFs(affected_lof_ids, lofResult);
// fire result changed
- if (stepprog != null) {
+ if(stepprog != null) {
stepprog.beginStep(4, "Inform listeners.", LOG);
}
lofResult.getResult().getHierarchy().resultChanged(lofResult.getResult());
- if (stepprog != null) {
- stepprog.setCompleted(LOG);
- }
+ LOG.setCompleted(stepprog);
}
/**
@@ -358,12 +363,12 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends FlexibleLOF<O, * @return a set containing the ids of the query result and the specified
* ids
*/
- private ArrayModifiableDBIDs mergeIDs(List<? extends DistanceDBIDList<D>> queryResults, DBIDs... ids) {
+ private ArrayModifiableDBIDs mergeIDs(List<? extends DoubleDBIDList> queryResults, DBIDs... ids) {
ModifiableDBIDs result = DBIDUtil.newHashSet();
- for (DBIDs dbids : ids) {
+ for(DBIDs dbids : ids) {
result.addDBIDs(dbids);
}
- for (DistanceDBIDList<D> queryResult : queryResults) {
+ for(DoubleDBIDList queryResult : queryResults) {
result.addDBIDs(queryResult);
}
return DBIDUtil.newArray(result);
@@ -375,24 +380,23 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends FlexibleLOF<O, * @param ids the ids of the lofs to be recomputed
* @param lofResult the result of the former LOF run
*/
- private void recomputeLOFs(DBIDs ids, LOFResult<O, D> lofResult) {
- Pair<WritableDoubleDataStore, DoubleMinMax> lofsAndMax = computeLOFs(ids, lofResult.getLrds(), lofResult.getKNNRefer());
- WritableDoubleDataStore new_lofs = lofsAndMax.getFirst();
- for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ private void recomputeLOFs(DBIDs ids, LOFResult<O> lofResult) {
+ WritableDoubleDataStore new_lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
+ DoubleMinMax new_lofminmax = new DoubleMinMax();
+ computeLOFs(lofResult.getKNNRefer(), ids, lofResult.getLrds(), new_lofs, new_lofminmax);
+ for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
lofResult.getLofs().putDouble(iter, new_lofs.doubleValue(iter));
}
- // track the maximum value for normalization.
- DoubleMinMax new_lofminmax = lofsAndMax.getSecond();
-
// Actualize meta info
- if (new_lofminmax.isValid() && lofResult.getResult().getOutlierMeta().getActualMaximum() < new_lofminmax.getMax()) {
- BasicOutlierScoreMeta scoreMeta = (BasicOutlierScoreMeta) lofResult.getResult().getOutlierMeta();
- scoreMeta.setActualMaximum(new_lofminmax.getMax());
- }
-
- if (new_lofminmax.isValid() && lofResult.getResult().getOutlierMeta().getActualMinimum() > new_lofminmax.getMin()) {
- BasicOutlierScoreMeta scoreMeta = (BasicOutlierScoreMeta) lofResult.getResult().getOutlierMeta();
- scoreMeta.setActualMinimum(new_lofminmax.getMin());
+ if(new_lofminmax.isValid()) {
+ if(lofResult.getResult().getOutlierMeta().getActualMaximum() < new_lofminmax.getMax()) {
+ BasicOutlierScoreMeta scoreMeta = (BasicOutlierScoreMeta) lofResult.getResult().getOutlierMeta();
+ scoreMeta.setActualMaximum(new_lofminmax.getMax());
+ }
+ if(lofResult.getResult().getOutlierMeta().getActualMinimum() > new_lofminmax.getMin()) {
+ BasicOutlierScoreMeta scoreMeta = (BasicOutlierScoreMeta) lofResult.getResult().getOutlierMeta();
+ scoreMeta.setActualMinimum(new_lofminmax.getMin());
+ }
}
}
}
@@ -409,9 +413,9 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends FlexibleLOF<O, *
* @apiviz.exclude
*/
- public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends FlexibleLOF.Parameterizer<O, D> {
+ public static class Parameterizer<O> extends FlexibleLOF.Parameterizer<O> {
@Override
- protected OnlineLOF<O, D> makeInstance() {
+ protected OnlineLOF<O> makeInstance() {
return new OnlineLOF<>(kreach, krefer, distanceFunction, reachabilityDistanceFunction);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimpleKernelDensityLOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimpleKernelDensityLOF.java index b990ef35..3ba56b16 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimpleKernelDensityLOF.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimpleKernelDensityLOF.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.lof; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -30,27 +30,20 @@ import de.lmu.ifi.dbs.elki.data.type.CombinedTypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; import de.lmu.ifi.dbs.elki.database.Database; -import de.lmu.ifi.dbs.elki.database.QueryUtil; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; -import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter; -import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter; -import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList; -import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList; -import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery; -import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; +import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter; +import de.lmu.ifi.dbs.elki.database.ids.KNNList; import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; -import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery; -import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; +import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; -import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; import de.lmu.ifi.dbs.elki.logging.progress.StepProgress; @@ -61,6 +54,7 @@ import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.KernelDensityFunction import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta; +import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; @@ -77,9 +71,8 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * @apiviz.has KernelDensityFunction * * @param <O> the type of objects handled by this Algorithm - * @param <D> Distance type */ -public class SimpleKernelDensityLOF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm { +public class SimpleKernelDensityLOF<O extends NumberVector> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm { /** * The logger for this class. */ @@ -101,7 +94,7 @@ public class SimpleKernelDensityLOF<O extends NumberVector<?>, D extends NumberD * @param k the value of k * @param kernel Kernel function */ - public SimpleKernelDensityLOF(int k, DistanceFunction<? super O, D> distance, KernelDensityFunction kernel) { + public SimpleKernelDensityLOF(int k, DistanceFunction<? super O> distance, KernelDensityFunction kernel) { super(distance); this.k = k + 1; this.kernel = kernel; @@ -116,112 +109,75 @@ public class SimpleKernelDensityLOF<O extends NumberVector<?>, D extends NumberD */ public OutlierResult run(Database database, Relation<O> relation) { StepProgress stepprog = LOG.isVerbose() ? new StepProgress("KernelDensityLOF", 3) : null; - final int dim = RelationUtil.dimensionality(relation); - DBIDs ids = relation.getDBIDs(); - // "HEAVY" flag for KNN Query since it is used more than once - KNNQuery<O, D> knnq = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE); - // No optimized kNN query - use a preprocessor! - if (!(knnq instanceof PreprocessorKNNQuery)) { - if (stepprog != null) { - stepprog.beginStep(1, "Materializing neighborhoods w.r.t. distance function.", LOG); - } - MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<>(relation, getDistanceFunction(), k); - database.addIndex(preproc); - DistanceQuery<O, D> rdq = database.getDistanceQuery(relation, getDistanceFunction()); - knnq = preproc.getKNNQuery(rdq, k); - } + LOG.beginStep(stepprog, 1, "Materializing neighborhoods w.r.t. distance function."); + KNNQuery<O> knnq = DatabaseUtil.precomputedKNNQuery(database, relation, getDistanceFunction(), k); // Compute LRDs - if (stepprog != null) { - stepprog.beginStep(2, "Computing densities.", LOG); - } + LOG.beginStep(stepprog, 2, "Computing densities."); WritableDoubleDataStore dens = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP); FiniteProgress densProgress = LOG.isVerbose() ? new FiniteProgress("Densities", ids.size(), LOG) : null; - for (DBIDIter it = ids.iter(); it.valid(); it.advance()) { - final KNNList<D> neighbors = knnq.getKNNForDBID(it, k); + for(DBIDIter it = ids.iter(); it.valid(); it.advance()) { + final KNNList neighbors = knnq.getKNNForDBID(it, k); int count = 0; double sum = 0.0; - if (neighbors instanceof DoubleDistanceKNNList) { - // Fast version for double distances - for (DoubleDistanceDBIDListIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) { - if (DBIDUtil.equal(neighbor, it)) { - continue; - } - double max = ((DoubleDistanceKNNList)knnq.getKNNForDBID(neighbor, k)).doubleKNNDistance(); - final double v = neighbor.doubleDistance() / max; - sum += kernel.density(v) / MathUtil.powi(max, dim); - count++; - } - } else { - for (DistanceDBIDListIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { - if (DBIDUtil.equal(neighbor, it)) { - continue; - } - double max = knnq.getKNNForDBID(neighbor, k).getKNNDistance().doubleValue(); - final double v = neighbor.getDistance().doubleValue() / max; - sum += kernel.density(v) / MathUtil.powi(max, dim); - count++; + // Fast version for double distances + for(DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { + if(DBIDUtil.equal(neighbor, it)) { + continue; } + double max = knnq.getKNNForDBID(neighbor, k).getKNNDistance(); + final double v = neighbor.doubleValue() / max; + sum += kernel.density(v) / MathUtil.powi(max, dim); + count++; } final double density = sum / count; dens.putDouble(it, density); - if (densProgress != null) { - densProgress.incrementProcessed(LOG); - } - } - if (densProgress != null) { - densProgress.ensureCompleted(LOG); + LOG.incrementProcessed(densProgress); } + LOG.ensureCompleted(densProgress); // compute LOF_SCORE of each db object - if (stepprog != null) { - stepprog.beginStep(3, "Computing KLOFs.", LOG); - } + LOG.beginStep(stepprog, 3, "Computing KLOFs."); WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC); // track the maximum value for normalization. DoubleMinMax lofminmax = new DoubleMinMax(); FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("KLOF_SCORE for objects", ids.size(), LOG) : null; - for (DBIDIter it = ids.iter(); it.valid(); it.advance()) { + for(DBIDIter it = ids.iter(); it.valid(); it.advance()) { final double lrdp = dens.doubleValue(it); final double lof; - if (lrdp > 0) { - final KNNList<D> neighbors = knnq.getKNNForDBID(it, k); + if(lrdp > 0) { + final KNNList neighbors = knnq.getKNNForDBID(it, k); double sum = 0.0; int count = 0; - for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { + for(DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { // skip the point itself - if (DBIDUtil.equal(neighbor, it)) { + if(DBIDUtil.equal(neighbor, it)) { continue; } sum += dens.doubleValue(neighbor); count++; } lof = sum / (count * lrdp); - } else { + } + else { lof = 1.0; } lofs.putDouble(it, lof); // update minimum and maximum lofminmax.put(lof); - if (progressLOFs != null) { - progressLOFs.incrementProcessed(LOG); - } - } - if (progressLOFs != null) { - progressLOFs.ensureCompleted(LOG); + LOG.incrementProcessed(progressLOFs); } + LOG.ensureCompleted(progressLOFs); - if (stepprog != null) { - stepprog.setCompleted(LOG); - } + LOG.setCompleted(stepprog); // Build result representation. - Relation<Double> scoreResult = new MaterializedRelation<>("Kernel Density Local Outlier Factor", "kernel-density-slof-outlier", TypeUtil.DOUBLE, lofs, ids); + DoubleRelation scoreResult = new MaterializedDoubleRelation("Kernel Density Local Outlier Factor", "kernel-density-slof-outlier", lofs, ids); OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0); OutlierResult result = new OutlierResult(scoreMeta, scoreResult); @@ -246,9 +202,8 @@ public class SimpleKernelDensityLOF<O extends NumberVector<?>, D extends NumberD * @apiviz.exclude * * @param <O> vector type - * @param <D> distance type */ - public static class Parameterizer<O extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> { + public static class Parameterizer<O extends NumberVector> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> { /** * Option ID for kernel density LOF kernel. */ @@ -270,18 +225,18 @@ public class SimpleKernelDensityLOF<O extends NumberVector<?>, D extends NumberD final IntParameter pK = new IntParameter(LOF.Parameterizer.K_ID); pK.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT); - if (config.grab(pK)) { + if(config.grab(pK)) { k = pK.getValue(); } ObjectParameter<KernelDensityFunction> kernelP = new ObjectParameter<>(KERNEL_ID, KernelDensityFunction.class, EpanechnikovKernelDensityFunction.class); - if (config.grab(kernelP)) { + if(config.grab(kernelP)) { kernel = kernelP.instantiateClass(config); } } @Override - protected SimpleKernelDensityLOF<O, D> makeInstance() { + protected SimpleKernelDensityLOF<O> makeInstance() { return new SimpleKernelDensityLOF<>(k, distanceFunction, kernel); } } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimplifiedLOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimplifiedLOF.java index d54b053f..8fce6503 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimplifiedLOF.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimplifiedLOF.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.lof; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -28,26 +28,19 @@ import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm; import de.lmu.ifi.dbs.elki.data.type.TypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; import de.lmu.ifi.dbs.elki.database.Database; -import de.lmu.ifi.dbs.elki.database.QueryUtil; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; -import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter; -import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter; -import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList; -import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList; -import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery; -import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; +import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter; +import de.lmu.ifi.dbs.elki.database.ids.KNNList; import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; -import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery; -import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; +import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; -import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; import de.lmu.ifi.dbs.elki.logging.progress.StepProgress; @@ -56,6 +49,7 @@ import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta; import de.lmu.ifi.dbs.elki.utilities.Alias; +import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; @@ -68,28 +62,30 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * Reference: * <p> * Erich Schubert, Arthur Zimek, Hans-Peter Kriegel<br /> - * Local outlier detection reconsidered: a generalized view on locality with - * applications to spatial, video, and network outlier detection<br /> - * In: Data Mining and Knowledge Discovery + * Local Outlier Detection Reconsidered: a Generalized View on Locality with + * Applications to Spatial, Video, and Network Outlier Detection<br /> + * Data Mining and Knowledge Discovery, 28(1): 190–237, 2014. * </p> * * @author Erich Schubert * * @apiviz.has KNNQuery * - * @param <O> the type of DatabaseObjects handled by this Algorithm - * @param <D> Distance type + * @param <O> the type of data objects handled by this algorithm */ -@Reference(authors = "Erich Schubert, Arthur Zimek, Hans-Peter Kriegel", title = "Local outlier detection reconsidered: a generalized view on locality with applications to spatial, video, and network outlier detection", booktitle = "Data Mining and Knowledge Discovery", url = "http://dx.doi.org/10.1007/s10618-012-0300-z") -@Alias({ "SimpleLOF", "outlier.SimpleLOF", "de.lmu.ifi.dbs.elki.algorithm.outlier.SimpleLOF" }) -public class SimplifiedLOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm { +@Reference(authors = "E. Schubert, A. Zimek, H.-P. Kriegel", // +title = "Local Outlier Detection Reconsidered: a Generalized View on Locality with Applications to Spatial, Video, and Network Outlier Detection", // +booktitle = "Data Mining and Knowledge Discovery, 28(1): 190–237, 2014.", // +url = "http://dx.doi.org/10.1007/s10618-012-0300-z") +@Alias({ "SimplifiedLOF", "outlier.SimplifiedLOF", "de.lmu.ifi.dbs.elki.algorithm.outlier.SimplifiedLOF" }) +public class SimplifiedLOF<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm { /** * The logger for this class. */ private static final Logging LOG = Logging.getLogger(SimplifiedLOF.class); /** - * Parameter k. + * The number of neighbors to query, excluding the query point. */ protected int k; @@ -98,7 +94,7 @@ public class SimplifiedLOF<O, D extends NumberDistance<D, ?>> extends AbstractDi * * @param k the value of k */ - public SimplifiedLOF(int k, DistanceFunction<? super O, D> distance) { + public SimplifiedLOF(int k, DistanceFunction<? super O> distance) { super(distance); this.k = k + 1; } @@ -111,114 +107,103 @@ public class SimplifiedLOF<O, D extends NumberDistance<D, ?>> extends AbstractDi * @return LOF outlier result */ public OutlierResult run(Database database, Relation<O> relation) { - StepProgress stepprog = LOG.isVerbose() ? new StepProgress("SimpleLOF", 3) : null; - + StepProgress stepprog = LOG.isVerbose() ? new StepProgress("Simplified LOF", 3) : null; DBIDs ids = relation.getDBIDs(); - // "HEAVY" flag for KNN Query since it is used more than once - KNNQuery<O, D> knnq = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE); - // No optimized kNN query - use a preprocessor! - if(!(knnq instanceof PreprocessorKNNQuery)) { - if(stepprog != null) { - stepprog.beginStep(1, "Materializing neighborhoods w.r.t. distance function.", LOG); - } - MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<>(relation, getDistanceFunction(), k); - database.addIndex(preproc); - DistanceQuery<O, D> rdq = database.getDistanceQuery(relation, getDistanceFunction()); - knnq = preproc.getKNNQuery(rdq, k); - } + LOG.beginStep(stepprog, 1, "Materializing neighborhoods w.r.t. distance function."); + KNNQuery<O> knnq = DatabaseUtil.precomputedKNNQuery(database, relation, getDistanceFunction(), k); // Compute LRDs - if(stepprog != null) { - stepprog.beginStep(2, "Computing densities.", LOG); - } + LOG.beginStep(stepprog, 2, "Computing densities."); WritableDoubleDataStore dens = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP); - FiniteProgress densProgress = LOG.isVerbose() ? new FiniteProgress("Densities", ids.size(), LOG) : null; - for(DBIDIter it = ids.iter(); it.valid(); it.advance()) { - final KNNList<D> neighbors = knnq.getKNNForDBID(it, k); + computeSimplifiedLRDs(ids, knnq, dens); + + // compute LOF_SCORE of each db object + LOG.beginStep(stepprog, 3, "Computing SLOFs."); + WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC); + DoubleMinMax lofminmax = new DoubleMinMax(); + computeSimplifiedLOFs(ids, knnq, dens, lofs, lofminmax); + + LOG.setCompleted(stepprog); + + // Build result representation. + DoubleRelation scoreResult = new MaterializedDoubleRelation("Simplified Local Outlier Factor", "simplified-lof-outlier", lofs, ids); + OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0., Double.POSITIVE_INFINITY, 1.); + OutlierResult result = new OutlierResult(scoreMeta, scoreResult); + + return result; + } + + /** + * Compute the simplified reachability densities. + * + * @param ids IDs to process + * @param knnq kNN query class + * @param lrds Density output + */ + private void computeSimplifiedLRDs(DBIDs ids, KNNQuery<O> knnq, WritableDoubleDataStore lrds) { + FiniteProgress lrdsProgress = LOG.isVerbose() ? new FiniteProgress("Densities", ids.size(), LOG) : null; + for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { + final KNNList neighbors = knnq.getKNNForDBID(iter, k); double sum = 0.0; int count = 0; - if(neighbors instanceof DoubleDistanceKNNList) { - // Fast version for double distances - for(DoubleDistanceDBIDListIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) { - if(DBIDUtil.equal(neighbor, it)) { - continue; - } - sum += neighbor.doubleDistance(); - count++; - } - } - else { - for(DistanceDBIDListIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { - if(DBIDUtil.equal(neighbor, it)) { - continue; - } - sum += neighbor.getDistance().doubleValue(); - count++; + for(DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { + if(DBIDUtil.equal(neighbor, iter)) { + continue; } + sum += neighbor.doubleValue(); + count++; } // Avoid division by 0 - final double lrd = (sum > 0) ? (count / sum) : 0; - dens.putDouble(it, lrd); - if(densProgress != null) { - densProgress.incrementProcessed(LOG); - } - } - if(densProgress != null) { - densProgress.ensureCompleted(LOG); - } - - // compute LOF_SCORE of each db object - if(stepprog != null) { - stepprog.beginStep(3, "Computing SLOFs.", LOG); + final double lrd = (sum > 0) ? (count / sum) : Double.POSITIVE_INFINITY; + lrds.putDouble(iter, lrd); + LOG.incrementProcessed(lrdsProgress); } - WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC); - // track the maximum value for normalization. - DoubleMinMax lofminmax = new DoubleMinMax(); + LOG.ensureCompleted(lrdsProgress); + } - FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("Simple LOF scores.", ids.size(), LOG) : null; - for(DBIDIter it = ids.iter(); it.valid(); it.advance()) { - final double lrdp = dens.doubleValue(it); + /** + * Compute the simplified LOF factors. + * + * @param ids IDs to compute for + * @param knnq kNN query class + * @param slrds Object densities + * @param lofs SLOF output storage + * @param lofminmax Minimum and maximum scores + */ + private void computeSimplifiedLOFs(DBIDs ids, KNNQuery<O> knnq, WritableDoubleDataStore slrds, WritableDoubleDataStore lofs, DoubleMinMax lofminmax) { + FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("Simplified LOF scores.", ids.size(), LOG) : null; + for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { final double lof; - if(lrdp > 0) { - final KNNList<D> neighbors = knnq.getKNNForDBID(it, k); - double sum = 0.0; + final double lrdp = slrds.doubleValue(iter); + final KNNList neighbors = knnq.getKNNForDBID(iter, k); + if(!Double.isInfinite(lrdp)) { + double sum = 0.; int count = 0; for(DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { // skip the point itself - if(DBIDUtil.equal(neighbor, it)) { + if(DBIDUtil.equal(neighbor, iter)) { continue; } - sum += dens.doubleValue(neighbor); + final double val = slrds.doubleValue(neighbor); + sum += val; count++; + if(Double.isInfinite(val)) { + break; + } } - lof = sum / (count * lrdp); + lof = sum / (lrdp * count); } else { lof = 1.0; } - lofs.putDouble(it, lof); + lofs.putDouble(iter, lof); // update minimum and maximum lofminmax.put(lof); - if(progressLOFs != null) { - progressLOFs.incrementProcessed(LOG); - } - } - if(progressLOFs != null) { - progressLOFs.ensureCompleted(LOG); - } - - if(stepprog != null) { - stepprog.setCompleted(LOG); + LOG.incrementProcessed(progressLOFs); } - - // Build result representation. - Relation<Double> scoreResult = new MaterializedRelation<>("Simple Local Outlier Factor", "simple-lof-outlier", TypeUtil.DOUBLE, lofs, ids); - OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0); - OutlierResult result = new OutlierResult(scoreMeta, scoreResult); - - return result; + LOG.ensureCompleted(progressLOFs); } @Override @@ -238,10 +223,9 @@ public class SimplifiedLOF<O, D extends NumberDistance<D, ?>> extends AbstractDi * * @apiviz.exclude * - * @param <O> vector type - * @param <D> distance type + * @param <O> Object type */ - public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> { + public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> { /** * The neighborhood size to use. */ @@ -252,14 +236,14 @@ public class SimplifiedLOF<O, D extends NumberDistance<D, ?>> extends AbstractDi super.makeOptions(config); final IntParameter pK = new IntParameter(LOF.Parameterizer.K_ID); - pK.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT); + pK.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT); if(config.grab(pK)) { k = pK.getValue(); } } @Override - protected SimplifiedLOF<O, D> makeInstance() { + protected SimplifiedLOF<O> makeInstance() { return new SimplifiedLOF<>(k, distanceFunction); } } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/package-info.java index 48d4b16a..090e89da 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/package-info.java @@ -5,7 +5,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/LOFProcessor.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/LOFProcessor.java new file mode 100644 index 00000000..3c0bf4c8 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/LOFProcessor.java @@ -0,0 +1,119 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier.lof.parallel; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2014 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.database.datastore.DataStore; +import de.lmu.ifi.dbs.elki.database.datastore.DoubleDataStore; +import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; +import de.lmu.ifi.dbs.elki.database.ids.KNNList; +import de.lmu.ifi.dbs.elki.parallel.Executor; +import de.lmu.ifi.dbs.elki.parallel.processor.AbstractDoubleProcessor; +import de.lmu.ifi.dbs.elki.parallel.variables.SharedDouble; + +/** + * Processor for computing the LOF. + * + * @author Erich Schubert + * + * @apiviz.has Instance + */ +public class LOFProcessor extends AbstractDoubleProcessor { + /** + * KNN store + */ + private DataStore<? extends KNNList> knns; + + /** + * LRD store + */ + private DoubleDataStore lrds; + + /** + * Exclude object itself from computation. + */ + private boolean noself; + + /** + * Constructor. + * + * @param knns k nearest neighbors + * @param lrds Local reachability distances + * @param noself Exclude self from neighbors + */ + public LOFProcessor(DataStore<? extends KNNList> knns, DoubleDataStore lrds, boolean noself) { + super(); + this.knns = knns; + this.lrds = lrds; + this.noself = noself; + } + + @Override + public Instance instantiate(Executor master) { + return new Instance(master.getInstance(output)); + } + + /** + * Instance + * + * @author Erich Schubert + */ + private class Instance extends AbstractDoubleProcessor.Instance { + /** + * Constructor. + * + * @param output Output variable + */ + protected Instance(SharedDouble.Instance output) { + super(output); + } + + @Override + public void map(DBIDRef id) { + // Own density + final double lrdp = lrds.doubleValue(id); + if (Double.isInfinite(lrdp)) { + output.set(1.0); + return; + } + // Compute average neighbor density: + KNNList knn = knns.get(id); + double avlrd = 0.0; + int cnt = 0; + for (DBIDIter n = knn.iter(); n.valid(); n.advance()) { + if (noself && DBIDUtil.equal(n, id)) { + continue; + } + avlrd += lrds.doubleValue(n); + cnt++; + if (Double.isInfinite(avlrd)) { + break; + } + } + avlrd = (cnt > 0) ? (avlrd / cnt) : 0; + output.set(avlrd / lrdp); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/LRDProcessor.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/LRDProcessor.java new file mode 100644 index 00000000..1b62320a --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/LRDProcessor.java @@ -0,0 +1,103 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier.lof.parallel; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2014 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.database.datastore.DataStore; +import de.lmu.ifi.dbs.elki.database.datastore.DoubleDataStore; +import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; +import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter; +import de.lmu.ifi.dbs.elki.database.ids.KNNList; +import de.lmu.ifi.dbs.elki.parallel.Executor; +import de.lmu.ifi.dbs.elki.parallel.processor.AbstractDoubleProcessor; +import de.lmu.ifi.dbs.elki.parallel.variables.SharedDouble; + +/** + * Processor for the "local reachability density" of LOF. + * + * @author Erich Schubert + * + * @apiviz.has Instance + */ +public class LRDProcessor extends AbstractDoubleProcessor { + /** + * KNN store + */ + private DataStore<? extends KNNList> knns; + + /** + * k-distance store + */ + private DoubleDataStore kdists; + + /** + * Constructor. + * + * @param knns k nearest neighbors + * @param kdists k distances + */ + public LRDProcessor(DataStore<? extends KNNList> knns, DoubleDataStore kdists) { + super(); + this.knns = knns; + this.kdists = kdists; + } + + @Override + public Instance instantiate(Executor master) { + return new Instance(master.getInstance(output)); + } + + /** + * Instance + * + * @author Erich Schubert + */ + private class Instance extends AbstractDoubleProcessor.Instance { + /** + * Constructor. + * + * @param output Output variable + */ + protected Instance(SharedDouble.Instance output) { + super(output); + } + + @Override + public void map(DBIDRef id) { + KNNList knn = knns.get(id); + double lrd = 0.0; + int size = 0; + for(DoubleDBIDListIter n = knn.iter(); n.valid(); n.advance()) { + // Do not include the query object + if(DBIDUtil.equal(n, id)) { + continue; + } + lrd += Math.max(kdists.doubleValue(n), n.doubleValue()); + size += 1; + } + // Avoid division by 0: + output.set(lrd > 0 ? size / lrd : Double.POSITIVE_INFINITY); + } + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/ParallelLOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/ParallelLOF.java new file mode 100644 index 00000000..fa851401 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/ParallelLOF.java @@ -0,0 +1,208 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier.lof.parallel; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2014 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm; +import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm; +import de.lmu.ifi.dbs.elki.algorithm.outlier.lof.LOF; +import de.lmu.ifi.dbs.elki.data.type.TypeInformation; +import de.lmu.ifi.dbs.elki.data.type.TypeUtil; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; +import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore; +import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; +import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.ids.KNNList; +import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; +import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; +import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; +import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.math.DoubleMinMax; +import de.lmu.ifi.dbs.elki.parallel.ParallelExecutor; +import de.lmu.ifi.dbs.elki.parallel.processor.DoubleMinMaxProcessor; +import de.lmu.ifi.dbs.elki.parallel.processor.KDistanceProcessor; +import de.lmu.ifi.dbs.elki.parallel.processor.KNNProcessor; +import de.lmu.ifi.dbs.elki.parallel.processor.WriteDataStoreProcessor; +import de.lmu.ifi.dbs.elki.parallel.processor.WriteDoubleDataStoreProcessor; +import de.lmu.ifi.dbs.elki.parallel.variables.SharedDouble; +import de.lmu.ifi.dbs.elki.parallel.variables.SharedObject; +import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; + +/** + * Parallel implementation of Local Outlier Factor using processors. + * + * This parallelized implementation is based on the easy-to-parallelize + * generalized pattern discussed in + * <p> + * Erich Schubert, Arthur Zimek, Hans-Peter Kriegel<br /> + * Local Outlier Detection Reconsidered: a Generalized View on Locality with + * Applications to Spatial, Video, and Network Outlier Detection<br /> + * Data Mining and Knowledge Discovery, 28(1): 190–237, 2014. + * </p> + * + * @author Erich Schubert + * + * @apiviz.has LRDProcessor + * @apiviz.has LOFProcessor + * + * @param <O> Object type + */ +@Reference(authors = "E. Schubert, A. Zimek, H.-P. Kriegel", // +title = "Local Outlier Detection Reconsidered: a Generalized View on Locality with Applications to Spatial, Video, and Network Outlier Detection", // +booktitle = "Data Mining and Knowledge Discovery, 28(1): 190–237, 2014.", // +url = "http://dx.doi.org/10.1007/s10618-012-0300-z") +public class ParallelLOF<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm { + /** + * Parameter k + */ + private int k; + + /** + * Constructor. + * + * @param distanceFunction Distance function + * @param k K parameter + */ + public ParallelLOF(DistanceFunction<? super O> distanceFunction, int k) { + super(distanceFunction); + this.k = k; + } + + /** + * Class logger + */ + private static final Logging LOG = Logging.getLogger(ParallelLOF.class); + + @Override + public TypeInformation[] getInputTypeRestriction() { + return TypeUtil.array(getDistanceFunction().getInputTypeRestriction()); + } + + public OutlierResult run(Database database, Relation<O> relation) { + DBIDs ids = relation.getDBIDs(); + DistanceQuery<O> distq = database.getDistanceQuery(relation, getDistanceFunction()); + KNNQuery<O> knnq = database.getKNNQuery(distq, k + 1); + + // Phase one: KNN and k-dist + WritableDoubleDataStore kdists = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB); + WritableDataStore<KNNList> knns = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_DB, KNNList.class); + { + // Compute kNN + KNNProcessor<O> knnm = new KNNProcessor<>(k + 1, knnq); + SharedObject<KNNList> knnv = new SharedObject<>(); + WriteDataStoreProcessor<KNNList> storek = new WriteDataStoreProcessor<>(knns); + knnm.connectKNNOutput(knnv); + storek.connectInput(knnv); + // Compute k-dist + KDistanceProcessor kdistm = new KDistanceProcessor(k + 1); + SharedDouble kdistv = new SharedDouble(); + WriteDoubleDataStoreProcessor storem = new WriteDoubleDataStoreProcessor(kdists); + kdistm.connectKNNInput(knnv); + kdistm.connectOutput(kdistv); + storem.connectInput(kdistv); + + ParallelExecutor.run(ids, knnm, storek, kdistm, storem); + } + + // Phase two: lrd + WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB); + { + LRDProcessor lrdm = new LRDProcessor(knns, kdists); + SharedDouble lrdv = new SharedDouble(); + WriteDoubleDataStoreProcessor storelrd = new WriteDoubleDataStoreProcessor(lrds); + + lrdm.connectOutput(lrdv); + storelrd.connectInput(lrdv); + ParallelExecutor.run(ids, lrdm, storelrd); + } + kdists.destroy(); // No longer needed. + kdists = null; + + // Phase three: LOF + WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB); + DoubleMinMax minmax; + { + LOFProcessor lofm = new LOFProcessor(knns, lrds, true); + SharedDouble lofv = new SharedDouble(); + DoubleMinMaxProcessor mmm = new DoubleMinMaxProcessor(); + WriteDoubleDataStoreProcessor storelof = new WriteDoubleDataStoreProcessor(lofs); + + lofm.connectOutput(lofv); + mmm.connectInput(lofv); + storelof.connectInput(lofv); + ParallelExecutor.run(ids, lofm, storelof, mmm); + + minmax = mmm.getMinMax(); + } + + DoubleRelation scoreres = new MaterializedDoubleRelation("Local Outlier Factor", "lof-outlier", lofs, ids); + OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0); + return new OutlierResult(meta, scoreres); + } + + @Override + protected Logging getLogger() { + return LOG; + } + + /** + * Parameterization class + * + * @author Erich Schubert + * + * @apiviz.exclude + * + * @param <O> Object type + */ + public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> { + /** + * K parameter + */ + int k; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + + IntParameter kP = new IntParameter(LOF.Parameterizer.K_ID); + if(config.grab(kP)) { + k = kP.intValue(); + } + } + + @Override + protected ParallelLOF<O> makeInstance() { + return new ParallelLOF<>(distanceFunction, k); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/ParallelSimplifiedLOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/ParallelSimplifiedLOF.java new file mode 100644 index 00000000..ef67023e --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/ParallelSimplifiedLOF.java @@ -0,0 +1,197 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier.lof.parallel; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2014 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm; +import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm; +import de.lmu.ifi.dbs.elki.algorithm.outlier.lof.LOF; +import de.lmu.ifi.dbs.elki.data.type.TypeInformation; +import de.lmu.ifi.dbs.elki.data.type.TypeUtil; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; +import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore; +import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; +import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.ids.KNNList; +import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; +import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; +import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; +import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.math.DoubleMinMax; +import de.lmu.ifi.dbs.elki.parallel.ParallelExecutor; +import de.lmu.ifi.dbs.elki.parallel.processor.DoubleMinMaxProcessor; +import de.lmu.ifi.dbs.elki.parallel.processor.KNNProcessor; +import de.lmu.ifi.dbs.elki.parallel.processor.WriteDataStoreProcessor; +import de.lmu.ifi.dbs.elki.parallel.processor.WriteDoubleDataStoreProcessor; +import de.lmu.ifi.dbs.elki.parallel.variables.SharedDouble; +import de.lmu.ifi.dbs.elki.parallel.variables.SharedObject; +import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; + +/** + * Parallel implementation of Simplified-LOF Outlier detection using processors. + * + * This parallelized implementation is based on the easy-to-parallelize + * generalized pattern discussed in + * <p> + * Erich Schubert, Arthur Zimek, Hans-Peter Kriegel<br /> + * Local Outlier Detection Reconsidered: a Generalized View on Locality with + * Applications to Spatial, Video, and Network Outlier Detection<br /> + * Data Mining and Knowledge Discovery, 28(1): 190–237, 2014. + * </p> + * + * @author Erich Schubert + * + * @apiviz.has SimplifiedLRDProcessor + * @apiviz.has LOFProcessor + * + * @param <O> Object type + */ +@Reference(authors = "E. Schubert, A. Zimek, H.-P. Kriegel", // +title = "Local Outlier Detection Reconsidered: a Generalized View on Locality with Applications to Spatial, Video, and Network Outlier Detection", // +booktitle = "Data Mining and Knowledge Discovery, 28(1): 190–237, 2014.", // +url = "http://dx.doi.org/10.1007/s10618-012-0300-z") +public class ParallelSimplifiedLOF<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm { + /** + * Parameter k + */ + private int k; + + /** + * Constructor. + * + * @param distanceFunction Distance function + * @param k K parameter + */ + public ParallelSimplifiedLOF(DistanceFunction<? super O> distanceFunction, int k) { + super(distanceFunction); + this.k = k; + } + + /** + * Class logger + */ + private static final Logging LOG = Logging.getLogger(ParallelSimplifiedLOF.class); + + @Override + public TypeInformation[] getInputTypeRestriction() { + return TypeUtil.array(getDistanceFunction().getInputTypeRestriction()); + } + + public OutlierResult run(Database database, Relation<O> relation) { + DBIDs ids = relation.getDBIDs(); + DistanceQuery<O> distq = database.getDistanceQuery(relation, getDistanceFunction()); + KNNQuery<O> knnq = database.getKNNQuery(distq, k + 1); + + // Phase one: KNN and k-dist + WritableDataStore<KNNList> knns = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_DB, KNNList.class); + { + // Compute kNN + KNNProcessor<O> knnm = new KNNProcessor<>(k + 1, knnq); + SharedObject<KNNList> knnv = new SharedObject<>(); + WriteDataStoreProcessor<KNNList> storek = new WriteDataStoreProcessor<>(knns); + knnm.connectKNNOutput(knnv); + storek.connectInput(knnv); + + ParallelExecutor.run(ids, knnm, storek); + } + + // Phase two: simplified-lrd + WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB); + { + SimplifiedLRDProcessor lrdm = new SimplifiedLRDProcessor(knns); + SharedDouble lrdv = new SharedDouble(); + WriteDoubleDataStoreProcessor storelrd = new WriteDoubleDataStoreProcessor(lrds); + + lrdm.connectOutput(lrdv); + storelrd.connectInput(lrdv); + ParallelExecutor.run(ids, lrdm, storelrd); + } + + // Phase three: Simplified-LOF + WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB); + DoubleMinMax minmax; + { + LOFProcessor lofm = new LOFProcessor(knns, lrds, true); + SharedDouble lofv = new SharedDouble(); + DoubleMinMaxProcessor mmm = new DoubleMinMaxProcessor(); + WriteDoubleDataStoreProcessor storelof = new WriteDoubleDataStoreProcessor(lofs); + + lofm.connectOutput(lofv); + mmm.connectInput(lofv); + storelof.connectInput(lofv); + ParallelExecutor.run(ids, lofm, storelof, mmm); + + minmax = mmm.getMinMax(); + } + + DoubleRelation scoreres = new MaterializedDoubleRelation("Simplified Local Outlier Factor", "simplified-lof-outlier", lofs, ids); + OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0); + return new OutlierResult(meta, scoreres); + } + + @Override + protected Logging getLogger() { + return LOG; + } + + /** + * Parameterization class + * + * @author Erich Schubert + * + * @apiviz.exclude + * + * @param <O> Object type + */ + public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> { + /** + * K parameter + */ + int k; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + + IntParameter kP = new IntParameter(LOF.Parameterizer.K_ID); + if(config.grab(kP)) { + k = kP.getValue(); + } + } + + @Override + protected ParallelSimplifiedLOF<O> makeInstance() { + return new ParallelSimplifiedLOF<>(distanceFunction, k); + } + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/SimplifiedLRDProcessor.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/SimplifiedLRDProcessor.java new file mode 100644 index 00000000..4698ae6a --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/SimplifiedLRDProcessor.java @@ -0,0 +1,97 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier.lof.parallel; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2014 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.database.datastore.DataStore; +import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; +import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter; +import de.lmu.ifi.dbs.elki.database.ids.KNNList; +import de.lmu.ifi.dbs.elki.parallel.Executor; +import de.lmu.ifi.dbs.elki.parallel.processor.AbstractDoubleProcessor; +import de.lmu.ifi.dbs.elki.parallel.variables.SharedDouble; + +/** + * Processor for the "local reachability density" of LOF. + * + * Note: we compute 1/lrd, the local reachability distance. + * + * @author Erich Schubert + * + * @apiviz.has Instance + */ +public class SimplifiedLRDProcessor extends AbstractDoubleProcessor { + /** + * KNN store + */ + private DataStore<? extends KNNList> knns; + + /** + * Constructor. + * + * @param knns k nearest neighbors + */ + public SimplifiedLRDProcessor(DataStore<? extends KNNList> knns) { + super(); + this.knns = knns; + } + + @Override + public Instance instantiate(Executor master) { + return new Instance(master.getInstance(output)); + } + + /** + * Instance + * + * @author Erich Schubert + */ + private class Instance extends AbstractDoubleProcessor.Instance { + /** + * Constructor. + * + * @param output Output variable + */ + public Instance(SharedDouble.Instance output) { + super(output); + } + + @Override + public void map(DBIDRef id) { + KNNList knn = knns.get(id); + double lrd = 0.0; + int size = 0; + for(DoubleDBIDListIter n = knn.iter(); n.valid(); n.advance()) { + // Do not include the query object + if(DBIDUtil.equal(n, id)) { + continue; + } + lrd += n.doubleValue(); + size++; + } + // Avoid division by zero. + output.set(lrd > 0 ? size / lrd : 0); + } + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/package-info.java new file mode 100644 index 00000000..3d708b4c --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/parallel/package-info.java @@ -0,0 +1,44 @@ +/** + * Parallelized variants of LOF. + * + * This parallelization is based on the generalization of outlier detection published in: + * + * Reference: + * <p> + * Erich Schubert, Arthur Zimek, Hans-Peter Kriegel<br /> + * Local Outlier Detection Reconsidered: a Generalized View on Locality with + * Applications to Spatial, Video, and Network Outlier Detection<br /> + * Data Mining and Knowledge Discovery, 28(1): 190–237, 2014. + * </p> + */ +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2014 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +@Reference(authors = "E. Schubert, A. Zimek, H.-P. Kriegel", // +title = "Local Outlier Detection Reconsidered: a Generalized View on Locality with Applications to Spatial, Video, and Network Outlier Detection", // +booktitle = "Data Mining and Knowledge Discovery, 28(1): 190–237, 2014.", // +url = "http://dx.doi.org/10.1007/s10618-012-0300-z") +package de.lmu.ifi.dbs.elki.algorithm.outlier.lof.parallel; + +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; + diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java index 757b80ad..f0ada6a8 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.meta; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -42,7 +42,8 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; -import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; +import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.datasource.parser.AbstractParser; import de.lmu.ifi.dbs.elki.logging.Logging; @@ -203,7 +204,7 @@ public class ExternalDoubleOutlierScore extends AbstractAlgorithm<OutlierResult> else { meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax()); } - Relation<Double> scoresult = new MaterializedRelation<>("External Outlier", "external-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs()); + DoubleRelation scoresult = new MaterializedDoubleRelation("External Outlier", "external-outlier", scores, relation.getDBIDs()); OutlierResult or = new OutlierResult(meta, scoresult); // Apply scaling @@ -212,7 +213,7 @@ public class ExternalDoubleOutlierScore extends AbstractAlgorithm<OutlierResult> } DoubleMinMax mm = new DoubleMinMax(); for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { - double val = scoresult.get(iditer); + double val = scoresult.doubleValue(iditer); val = scaling.getScaled(val); scores.putDouble(iditer, val); mm.put(val); diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java index 5b681106..a5eb0c7a 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.meta; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -24,7 +24,6 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.meta; */ import java.util.ArrayList; -import java.util.BitSet; import java.util.Random; import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm; @@ -38,18 +37,19 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; -import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; +import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; import de.lmu.ifi.dbs.elki.math.DoubleMinMax; +import de.lmu.ifi.dbs.elki.math.random.RandomFactory; import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; -import de.lmu.ifi.dbs.elki.utilities.RandomFactory; +import de.lmu.ifi.dbs.elki.utilities.BitsUtil; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; @@ -131,7 +131,7 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements * @param relation Relation to use * @return Outlier detection result */ - public OutlierResult run(Database database, Relation<NumberVector<?>> relation) { + public OutlierResult run(Database database, Relation<NumberVector> relation) { final int dbdim = RelationUtil.dimensionality(relation); final int mindim = dbdim >> 1; final int maxdim = dbdim - 1; @@ -141,34 +141,30 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements { FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("LOF iterations", num, LOG) : null; for(int i = 0; i < num; i++) { - BitSet dimset = randomSubspace(dbdim, mindim, maxdim, rand); + long[] dimset = randomSubspace(dbdim, mindim, maxdim, rand); SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(dimset); - LOF<NumberVector<?>, DoubleDistance> lof = new LOF<>(k, df); + LOF<NumberVector> lof = new LOF<>(k, df); // run LOF and collect the result OutlierResult result = lof.run(database, relation); results.add(result); - if(prog != null) { - prog.incrementProcessed(LOG); - } - } - if(prog != null) { - prog.ensureCompleted(LOG); + LOG.incrementProcessed(prog); } + LOG.ensureCompleted(prog); } WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC); DoubleMinMax minmax = new DoubleMinMax(); if(breadth) { FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Combining results", relation.size(), LOG) : null; - Pair<DBIDIter, Relation<Double>>[] IDVectorOntoScoreVector = Pair.newPairArray(results.size()); + Pair<DBIDIter, DoubleRelation>[] IDVectorOntoScoreVector = Pair.newPairArray(results.size()); // Mapping score-sorted DBID-Iterators onto their corresponding scores. // We need to initialize them now be able to iterate them "in parallel". { int i = 0; for(OutlierResult r : results) { - IDVectorOntoScoreVector[i] = new Pair<DBIDIter, Relation<Double>>(r.getOrdering().iter(relation.getDBIDs()).iter(), r.getScores()); + IDVectorOntoScoreVector[i] = new Pair<DBIDIter, DoubleRelation>(r.getOrdering().iter(relation.getDBIDs()).iter(), r.getScores()); i++; } } @@ -176,12 +172,12 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements // Iterating over the *lines* of the AS_t(i)-matrix. for(int i = 0; i < relation.size(); i++) { // Iterating over the elements of a line (breadth-first). - for(Pair<DBIDIter, Relation<Double>> pair : IDVectorOntoScoreVector) { + for(Pair<DBIDIter, DoubleRelation> pair : IDVectorOntoScoreVector) { DBIDIter iter = pair.first; // Always true if every algorithm returns a complete result (one score // for every DBID). if(iter.valid()) { - double score = pair.second.get(iter); + double score = pair.second.doubleValue(iter); if(Double.isNaN(scores.doubleValue(iter))) { scores.putDouble(iter, score); minmax.put(score); @@ -193,36 +189,28 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements } } // Progress does not take the initial mapping into account. - if(cprog != null) { - cprog.incrementProcessed(LOG); - } - } - if(cprog != null) { - cprog.ensureCompleted(LOG); + LOG.incrementProcessed(cprog); } + LOG.ensureCompleted(cprog); } else { FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Combining results", relation.size(), LOG) : null; for(DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) { double sum = 0.0; for(OutlierResult r : results) { - final Double s = r.getScores().get(iter); - if(s != null && !Double.isNaN(s)) { + final double s = r.getScores().doubleValue(iter); + if(!Double.isNaN(s)) { sum += s; } } scores.putDouble(iter, sum); minmax.put(sum); - if(cprog != null) { - cprog.incrementProcessed(LOG); - } - } - if(cprog != null) { - cprog.ensureCompleted(LOG); + LOG.incrementProcessed(cprog); } + LOG.ensureCompleted(cprog); } OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax()); - Relation<Double> scoreres = new MaterializedRelation<>("Feature bagging", "fb-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs()); + DoubleRelation scoreres = new MaterializedDoubleRelation("Feature bagging", "fb-outlier", scores, relation.getDBIDs()); return new OutlierResult(meta, scoreres); } @@ -234,8 +222,8 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements * @param maxdim Maximum number to choose * @return Subspace as bits. */ - private BitSet randomSubspace(final int alldim, final int mindim, final int maxdim, final Random rand) { - BitSet dimset = new BitSet(); + private long[] randomSubspace(final int alldim, final int mindim, final int maxdim, final Random rand) { + long[] dimset = BitsUtil.zero(alldim); // Fill with all dimensions int[] dims = new int[alldim]; for(int d = 0; d < alldim; d++) { @@ -246,7 +234,7 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements // Shrink the subspace to the destination size for(int d = 0; d < alldim - subdim; d++) { int s = rand.nextInt(alldim - d); - dimset.set(dims[s]); + BitsUtil.setI(dimset, dims[s]); dims[s] = dims[alldim - d - 1]; } return dimset; diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/HiCS.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/HiCS.java index f92a8b80..4858e0df 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/HiCS.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/HiCS.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.meta; This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -51,7 +51,8 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.ProjectedView;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
@@ -59,12 +60,12 @@ import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.math.random.RandomFactory;
import de.lmu.ifi.dbs.elki.math.statistics.tests.GoodnessOfFitTest;
import de.lmu.ifi.dbs.elki.math.statistics.tests.KolmogorovSmirnovTest;
import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
-import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TopBoundedHeap;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
@@ -102,7 +103,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter; @Title("HiCS: High Contrast Subspaces for Density-Based Outlier Ranking")
@Description("Algorithm to compute High Contrast Subspaces in a database as a pre-processing step for for density-based outlier ranking methods.")
@Reference(authors = "Fabian Keller, Emmanuel Müller, Klemens Böhm", title = "HiCS: High Contrast Subspaces for Density-Based Outlier Ranking", booktitle = "Proc. IEEE 28th International Conference on Data Engineering (ICDE 2012)", url = "http://dx.doi.org/10.1109/ICDE.2012.88")
-public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+public class HiCS<V extends NumberVector> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
/**
* The Logger for this class.
*/
@@ -179,7 +180,7 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe if(LOG.isVerbose()) {
LOG.verbose("Number of high-contrast subspaces: " + subspaces.size());
}
- List<Relation<Double>> results = new ArrayList<>();
+ List<DoubleRelation> results = new ArrayList<>();
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Calculating Outlier scores for high Contrast subspaces", subspaces.size(), LOG) : null;
// run outlier detection and collect the result
@@ -196,22 +197,18 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe // run LOF and collect the result
OutlierResult result = outlierAlgorithm.run(pdb);
results.add(result.getScores());
- if(prog != null) {
- prog.incrementProcessed(LOG);
- }
- }
- if(prog != null) {
- prog.ensureCompleted(LOG);
+ LOG.incrementProcessed(prog);
}
+ LOG.ensureCompleted(prog);
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
DoubleMinMax minmax = new DoubleMinMax();
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double sum = 0.0;
- for(Relation<Double> r : results) {
- final Double s = r.get(iditer);
- if(s != null && !Double.isNaN(s)) {
+ for(DoubleRelation r : results) {
+ final double s = r.doubleValue(iditer);
+ if(!Double.isNaN(s)) {
sum += s;
}
}
@@ -219,7 +216,7 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe minmax.put(sum);
}
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
- Relation<Double> scoreres = new MaterializedRelation<>("HiCS", "HiCS-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
+ DoubleRelation scoreres = new MaterializedDoubleRelation("HiCS", "HiCS-outlier", scores, relation.getDBIDs());
return new OutlierResult(meta, scoreres);
}
@@ -232,7 +229,7 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe * @param relation Relation to index
* @return List of sorted objects
*/
- private ArrayList<ArrayDBIDs> buildOneDimIndexes(Relation<? extends NumberVector<?>> relation) {
+ private ArrayList<ArrayDBIDs> buildOneDimIndexes(Relation<? extends NumberVector> relation) {
final int dim = RelationUtil.dimensionality(relation);
ArrayList<ArrayDBIDs> subspaceIndex = new ArrayList<>(dim + 1);
@@ -254,7 +251,7 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe * @param subspaceIndex Subspace indexes
* @return a set of high contrast subspaces
*/
- private Set<HiCSSubspace> calculateSubspaces(Relation<? extends NumberVector<?>> relation, ArrayList<ArrayDBIDs> subspaceIndex, Random random) {
+ private Set<HiCSSubspace> calculateSubspaces(Relation<? extends NumberVector> relation, ArrayList<ArrayDBIDs> subspaceIndex, Random random) {
final int dbdim = RelationUtil.dimensionality(relation);
FiniteProgress dprog = LOG.isVerbose() ? new FiniteProgress("Subspace dimensionality", dbdim, LOG) : null;
@@ -273,14 +270,10 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe ts.set(j);
calculateContrast(relation, ts, subspaceIndex, random);
dDimensionalList.add(ts);
- if(prog != null) {
- prog.incrementProcessed(LOG);
- }
+ LOG.incrementProcessed(prog);
}
}
- if(prog != null) {
- prog.ensureCompleted(LOG);
- }
+ LOG.ensureCompleted(prog);
IndefiniteProgress qprog = LOG.isVerbose() ? new IndefiniteProgress("Testing subspace candidates", LOG) : null;
for(int d = 3; !dDimensionalList.isEmpty(); d++) {
@@ -313,9 +306,7 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe calculateContrast(relation, joinedSet, subspaceIndex, random);
dDimensionalList.add(joinedSet);
- if(qprog != null) {
- qprog.incrementProcessed(LOG);
- }
+ LOG.incrementProcessed(qprog);
}
}
// Prune
@@ -328,9 +319,7 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe }
}
}
- if(qprog != null) {
- qprog.setCompleted(LOG);
- }
+ LOG.setCompleted(qprog);
if(dprog != null) {
dprog.setProcessed(dbdim, LOG);
dprog.ensureCompleted(LOG);
@@ -345,7 +334,7 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe * @param subspace Subspace
* @param subspaceIndex Subspace indexes
*/
- private void calculateContrast(Relation<? extends NumberVector<?>> relation, HiCSSubspace subspace, ArrayList<ArrayDBIDs> subspaceIndex, Random random) {
+ private void calculateContrast(Relation<? extends NumberVector> relation, HiCSSubspace subspace, ArrayList<ArrayDBIDs> subspaceIndex, Random random) {
final int card = subspace.cardinality();
final double alpha1 = Math.pow(alpha, (1.0 / card));
final int windowsize = (int) (relation.size() * alpha1);
@@ -415,13 +404,9 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe continue;
}
deviationSum += contrast;
- if(prog != null) {
- prog.incrementProcessed(LOG);
- }
- }
- if(prog != null) {
- prog.ensureCompleted(LOG);
+ LOG.incrementProcessed(prog);
}
+ LOG.ensureCompleted(prog);
subspace.contrast = deviationSum / m;
}
@@ -530,7 +515,7 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe *
* @param <V> vector type
*/
- public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector> extends AbstractParameterizer {
/**
* Parameter that specifies the number of iterations in the Monte-Carlo
* process of identifying high contrast subspaces.
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/RescaleMetaOutlierAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/RescaleMetaOutlierAlgorithm.java index 8ebdc27a..885ef1df 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/RescaleMetaOutlierAlgorithm.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/RescaleMetaOutlierAlgorithm.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.meta; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -29,14 +29,13 @@ import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm; import de.lmu.ifi.dbs.elki.algorithm.Algorithm; import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm; import de.lmu.ifi.dbs.elki.data.type.TypeInformation; -import de.lmu.ifi.dbs.elki.data.type.TypeUtil; import de.lmu.ifi.dbs.elki.database.Database; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; -import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; -import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.math.DoubleMinMax; import de.lmu.ifi.dbs.elki.result.Result; @@ -100,7 +99,7 @@ public class RescaleMetaOutlierAlgorithm extends AbstractAlgorithm<OutlierResult Result innerresult = algorithm.run(database); OutlierResult or = getOutlierResult(innerresult); - final Relation<Double> scores = or.getScores(); + final DoubleRelation scores = or.getScores(); if(scaling instanceof OutlierScalingFunction) { ((OutlierScalingFunction) scaling).prepare(or); } @@ -109,13 +108,13 @@ public class RescaleMetaOutlierAlgorithm extends AbstractAlgorithm<OutlierResult DoubleMinMax minmax = new DoubleMinMax(); for(DBIDIter iditer = scores.iterDBIDs(); iditer.valid(); iditer.advance()) { - double val = scaling.getScaled(scores.get(iditer)); + double val = scaling.getScaled(scores.doubleValue(iditer)); scaledscores.putDouble(iditer, val); minmax.put(val); } OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), scaling.getMin(), scaling.getMax()); - Relation<Double> scoresult = new MaterializedRelation<>("Scaled Outlier", "scaled-outlier", TypeUtil.DOUBLE, scaledscores, scores.getDBIDs()); + DoubleRelation scoresult = new MaterializedDoubleRelation("Scaled Outlier", "scaled-outlier", scaledscores, scores.getDBIDs()); OutlierResult result = new OutlierResult(meta, scoresult); result.addChildResult(innerresult); diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/SimpleOutlierEnsemble.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/SimpleOutlierEnsemble.java index d40af384..c255a8b0 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/SimpleOutlierEnsemble.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/SimpleOutlierEnsemble.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.meta; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -40,8 +40,8 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs; -import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; -import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; import de.lmu.ifi.dbs.elki.math.DoubleMinMax; @@ -111,13 +111,9 @@ public class SimpleOutlierEnsemble extends AbstractAlgorithm<OutlierResult> impl results.add(or); ids.addDBIDs(or.getScores().getDBIDs()); } - if (prog != null) { - prog.incrementProcessed(LOG); - } - } - if (prog != null) { - prog.ensureCompleted(LOG); + LOG.incrementProcessed(prog); } + LOG.ensureCompleted(prog); } // Combine WritableDoubleDataStore sumscore = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC); @@ -128,8 +124,8 @@ public class SimpleOutlierEnsemble extends AbstractAlgorithm<OutlierResult> impl double[] scores = new double[num]; int i = 0; for (OutlierResult r : results) { - Double score = r.getScores().get(id); - if (score != null) { + double score = r.getScores().doubleValue(id); + if (!Double.isNaN(score)) { scores[i] = score; i++; } else { @@ -147,16 +143,12 @@ public class SimpleOutlierEnsemble extends AbstractAlgorithm<OutlierResult> impl } else { LOG.warning("DBID " + id + " was not given any score at all."); } - if (cprog != null) { - cprog.incrementProcessed(LOG); - } - } - if (cprog != null) { - cprog.ensureCompleted(LOG); + LOG.incrementProcessed(cprog); } + LOG.ensureCompleted(cprog); } OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax()); - Relation<Double> scores = new MaterializedRelation<>("Simple Outlier Ensemble", "ensemble-outlier", TypeUtil.DOUBLE, sumscore, ids); + DoubleRelation scores = new MaterializedDoubleRelation("Simple Outlier Ensemble", "ensemble-outlier", sumscore, ids); return new OutlierResult(meta, scores); } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/package-info.java index f28f8db3..2e9743b9 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/package-info.java @@ -8,7 +8,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2013 +Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/package-info.java index 0ce6f9b5..aa6da5cf 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/package-info.java @@ -14,7 +14,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2013 +Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractDistanceBasedSpatialOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractDistanceBasedSpatialOutlier.java index e059c16c..a501e00f 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractDistanceBasedSpatialOutlier.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractDistanceBasedSpatialOutlier.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -27,7 +27,6 @@ import de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPre import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction; import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; @@ -39,9 +38,8 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * * @param <N> Object type for neighborhood * @param <O> Non-spatial object type - * @param <D> Distance value type */ -public abstract class AbstractDistanceBasedSpatialOutlier<N, O, D extends NumberDistance<D, ?>> extends AbstractNeighborhoodOutlier<N> { +public abstract class AbstractDistanceBasedSpatialOutlier<N, O> extends AbstractNeighborhoodOutlier<N> { /** * Parameter to specify the non spatial distance function to use */ @@ -50,7 +48,7 @@ public abstract class AbstractDistanceBasedSpatialOutlier<N, O, D extends Number /** * The distance function to use */ - private DistanceFunction<O, D> nonSpatialDistanceFunction; + private DistanceFunction<O> nonSpatialDistanceFunction; /** * Constructor. @@ -59,7 +57,7 @@ public abstract class AbstractDistanceBasedSpatialOutlier<N, O, D extends Number * @param nonSpatialDistanceFunction Distance function to use on the * non-spatial attributes. */ - public AbstractDistanceBasedSpatialOutlier(NeighborSetPredicate.Factory<N> npredf, DistanceFunction<O, D> nonSpatialDistanceFunction) { + public AbstractDistanceBasedSpatialOutlier(NeighborSetPredicate.Factory<N> npredf, DistanceFunction<O> nonSpatialDistanceFunction) { super(npredf); this.nonSpatialDistanceFunction = nonSpatialDistanceFunction; } @@ -69,7 +67,7 @@ public abstract class AbstractDistanceBasedSpatialOutlier<N, O, D extends Number * * @return the distance function to use on the non-spatial attributes */ - protected DistanceFunction<O, D> getNonSpatialDistanceFunction() { + protected DistanceFunction<O> getNonSpatialDistanceFunction() { return nonSpatialDistanceFunction; } @@ -82,18 +80,17 @@ public abstract class AbstractDistanceBasedSpatialOutlier<N, O, D extends Number * * @param <N> Object type for neighborhood * @param <O> Non-spatial object type - * @param <D> Distance value type */ - public abstract static class Parameterizer<N, O, D extends NumberDistance<D, ?>> extends AbstractNeighborhoodOutlier.Parameterizer<N> { + public abstract static class Parameterizer<N, O> extends AbstractNeighborhoodOutlier.Parameterizer<N> { /** * The distance function to use on the non-spatial attributes. */ - protected PrimitiveDistanceFunction<O, D> distanceFunction = null; + protected PrimitiveDistanceFunction<O> distanceFunction = null; @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - ObjectParameter<PrimitiveDistanceFunction<O, D>> distanceFunctionP = makeParameterDistanceFunction(EuclideanDistanceFunction.class, PrimitiveDistanceFunction.class); + ObjectParameter<PrimitiveDistanceFunction<O>> distanceFunctionP = makeParameterDistanceFunction(EuclideanDistanceFunction.class, PrimitiveDistanceFunction.class); if(config.grab(distanceFunctionP)) { distanceFunction = distanceFunctionP.instantiateClass(config); } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractNeighborhoodOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractNeighborhoodOutlier.java index 3b3e71b3..95516a99 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractNeighborhoodOutlier.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractNeighborhoodOutlier.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuGLSBackwardSearchAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuGLSBackwardSearchAlgorithm.java index 5035cf6f..debf0ee2 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuGLSBackwardSearchAlgorithm.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuGLSBackwardSearchAlgorithm.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -37,15 +37,15 @@ import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDVar; +import de.lmu.ifi.dbs.elki.database.ids.KNNList; import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs; -import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList; import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; -import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; +import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; import de.lmu.ifi.dbs.elki.database.relation.ProxyView; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.math.DoubleMinMax; import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix; @@ -81,11 +81,10 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; * @author Ahmed Hettab * * @param <V> Vector type to use for distances - * @param <D> Distance function to use */ @Title("GLS-Backward Search") @Reference(authors = "F. Chen and C.-T. Lu and A. P. Boedihardjo", title = "GLS-SOD: A Generalized Local Statistical Approach for Spatial Outlier Detection", booktitle = "Proc. 16th ACM SIGKDD international conference on Knowledge discovery and data mining", url = "http://dx.doi.org/10.1145/1835804.1835939") -public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<V, D, OutlierResult> implements OutlierAlgorithm { +public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector> extends AbstractDistanceBasedAlgorithm<V, OutlierResult> implements OutlierAlgorithm { /** * The logger for this class. */ @@ -108,7 +107,7 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?>, D extends * @param k number of nearest neighbors to use * @param alpha Significance niveau */ - public CTLuGLSBackwardSearchAlgorithm(DistanceFunction<V, D> distanceFunction, int k, double alpha) { + public CTLuGLSBackwardSearchAlgorithm(DistanceFunction<V> distanceFunction, int k, double alpha) { super(distanceFunction); this.alpha = alpha; this.k = k; @@ -122,7 +121,7 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?>, D extends * @param relationy Attribute relation * @return Algorithm result */ - public OutlierResult run(Database database, Relation<V> relationx, Relation<? extends NumberVector<?>> relationy) { + public OutlierResult run(Database database, Relation<V> relationx, Relation<? extends NumberVector> relationy) { WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relationx.getDBIDs(), DataStoreFactory.HINT_STATIC); DoubleMinMax mm = new DoubleMinMax(0.0, 0.0); @@ -151,7 +150,7 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?>, D extends } } - Relation<Double> scoreResult = new MaterializedRelation<>("GLSSODBackward", "GLSSODbackward-outlier", TypeUtil.DOUBLE, scores, relationx.getDBIDs()); + DoubleRelation scoreResult = new MaterializedDoubleRelation("GLSSODBackward", "GLSSODbackward-outlier", scores, relationx.getDBIDs()); OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax(), 0, Double.POSITIVE_INFINITY, 0); return new OutlierResult(scoreMeta, scoreResult); } @@ -163,11 +162,11 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?>, D extends * @param relationy Attribute relation * @return Top outlier and associated score */ - private Pair<DBIDVar, Double> singleIteration(Relation<V> relationx, Relation<? extends NumberVector<?>> relationy) { + private Pair<DBIDVar, Double> singleIteration(Relation<V> relationx, Relation<? extends NumberVector> relationy) { final int dim = RelationUtil.dimensionality(relationx); final int dimy = RelationUtil.dimensionality(relationy); assert (dim == 2); - KNNQuery<V, D> knnQuery = QueryUtil.getKNNQuery(relationx, getDistanceFunction(), k + 1); + KNNQuery<V> knnQuery = QueryUtil.getKNNQuery(relationx, getDistanceFunction(), k + 1); // We need stable indexed DBIDs ArrayModifiableDBIDs ids = DBIDUtil.newArray(relationx.getDBIDs()); @@ -196,7 +195,7 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?>, D extends } { - final NumberVector<?> vecy = relationy.get(id); + final NumberVector vecy = relationy.get(id); for(int d = 0; d < dimy; d++) { double idy = vecy.doubleValue(d); Y.set(i, d, idy); @@ -205,7 +204,7 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?>, D extends // Fill the neighborhood matrix F: { - KNNList<D> neighbors = knnQuery.getKNNForDBID(id, k + 1); + KNNList neighbors = knnQuery.getKNNForDBID(id, k + 1); ModifiableDBIDs neighborhood = DBIDUtil.newArray(neighbors.size()); for(DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { if(DBIDUtil.equal(id, neighbor)) { @@ -272,9 +271,8 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?>, D extends * @apiviz.exclude * * @param <V> Input vector type - * @param <D> Distance type */ - public static class Parameterizer<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<V, D> { + public static class Parameterizer<V extends NumberVector> extends AbstractDistanceBasedAlgorithm.Parameterizer<V> { /** * Holds the alpha value - significance niveau */ @@ -303,7 +301,7 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?>, D extends } @Override - protected CTLuGLSBackwardSearchAlgorithm<V, D> makeInstance() { + protected CTLuGLSBackwardSearchAlgorithm<V> makeInstance() { return new CTLuGLSBackwardSearchAlgorithm<>(distanceFunction, k, alpha); } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMeanMultipleAttributes.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMeanMultipleAttributes.java index 1712dd4f..151fe129 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMeanMultipleAttributes.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMeanMultipleAttributes.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -33,11 +33,13 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; -import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; +import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.math.DoubleMinMax; +import de.lmu.ifi.dbs.elki.math.MathUtil; import de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid; import de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix; import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix; @@ -61,7 +63,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; * <p> * Implementation note: attribute standardization is not used; this is * equivalent to using the - * {@link de.lmu.ifi.dbs.elki.datasource.filter.normalization.AttributeWiseVarianceNormalization + * {@link de.lmu.ifi.dbs.elki.datasource.filter.normalization.columnwise.AttributeWiseVarianceNormalization * AttributeWiseVarianceNormalization} filter. * </p> * @@ -71,7 +73,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; * @param <O> Attribute Vector */ @Reference(authors = "Chang-Tien Lu and Dechang Chen and Yufeng Kou", title = "Detecting Spatial Outliers with Multiple Attributes", booktitle = "Proc. 15th IEEE International Conference on Tools with Artificial Intelligence, 2003", url = "http://dx.doi.org/10.1109/TAI.2003.1250179") -public class CTLuMeanMultipleAttributes<N, O extends NumberVector<?>> extends AbstractNeighborhoodOutlier<N> { +public class CTLuMeanMultipleAttributes<N, O extends NumberVector> extends AbstractNeighborhoodOutlier<N> { /** * logger */ @@ -118,13 +120,12 @@ public class CTLuMeanMultipleAttributes<N, O extends NumberVector<?>> extends Ab DoubleMinMax minmax = new DoubleMinMax(); WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(attributes.getDBIDs(), DataStoreFactory.HINT_STATIC); for(DBIDIter iditer = attributes.iterDBIDs(); iditer.valid(); iditer.advance()) { - Vector temp = deltas.get(iditer).minus(mean); - final double score = temp.transposeTimesTimes(cmati, temp); + final double score = MathUtil.mahalanobisDistance(cmati, deltas.get(iditer), mean); minmax.put(score); scores.putDouble(iditer, score); } - Relation<Double> scoreResult = new MaterializedRelation<>("mean multiple attributes spatial outlier", "mean-multipleattributes-outlier", TypeUtil.DOUBLE, scores, attributes.getDBIDs()); + DoubleRelation scoreResult = new MaterializedDoubleRelation("mean multiple attributes spatial outlier", "mean-multipleattributes-outlier", scores, attributes.getDBIDs()); OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0); OutlierResult or = new OutlierResult(scoreMeta, scoreResult); or.addChildResult(npred); @@ -146,7 +147,7 @@ public class CTLuMeanMultipleAttributes<N, O extends NumberVector<?>> extends Ab * @param <N> Neighborhood type * @param <O> Attribute object type */ - public static class Parameterizer<N, O extends NumberVector<?>> extends AbstractNeighborhoodOutlier.Parameterizer<N> { + public static class Parameterizer<N, O extends NumberVector> extends AbstractNeighborhoodOutlier.Parameterizer<N> { @Override protected CTLuMeanMultipleAttributes<N, O> makeInstance() { return new CTLuMeanMultipleAttributes<>(npredf); diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianAlgorithm.java index 9848d664..4d5afdd0 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianAlgorithm.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianAlgorithm.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial; This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team @@ -27,14 +27,14 @@ import de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPre import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
-import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
@@ -91,7 +91,7 @@ public class CTLuMedianAlgorithm<N> extends AbstractNeighborhoodOutlier<N> { * @param relation Data relation (1d!)
* @return Outlier detection result
*/
- public OutlierResult run(Relation<N> nrel, Relation<? extends NumberVector<?>> relation) {
+ public OutlierResult run(Relation<N> nrel, Relation<? extends NumberVector> relation) {
final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(nrel);
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
@@ -132,7 +132,7 @@ public class CTLuMedianAlgorithm<N> extends AbstractNeighborhoodOutlier<N> { scores.putDouble(iditer, score);
}
- Relation<Double> scoreResult = new MaterializedRelation<>("MO", "Median-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("MO", "Median-outlier", scores, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0);
OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
or.addChildResult(npred);
@@ -146,7 +146,7 @@ public class CTLuMedianAlgorithm<N> extends AbstractNeighborhoodOutlier<N> { @Override
public TypeInformation[] getInputTypeRestriction() {
- return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), new VectorFieldTypeInformation<NumberVector<?>>(NumberVector.class, 1));
+ return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), TypeUtil.NUMBER_VECTOR_FIELD_1D);
}
/**
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianMultipleAttributes.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianMultipleAttributes.java index 583958fe..4e993a97 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianMultipleAttributes.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianMultipleAttributes.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -33,11 +33,13 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; -import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; +import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.math.DoubleMinMax; +import de.lmu.ifi.dbs.elki.math.MathUtil; import de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix; import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix; import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector; @@ -62,7 +64,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; * <p> * Implementation note: attribute standardization is not used; this is * equivalent to using the - * {@link de.lmu.ifi.dbs.elki.datasource.filter.normalization.AttributeWiseVarianceNormalization + * {@link de.lmu.ifi.dbs.elki.datasource.filter.normalization.columnwise.AttributeWiseVarianceNormalization * AttributeWiseVarianceNormalization} filter. * </p> * @@ -72,7 +74,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; * @param <O> Non Spatial Vector */ @Reference(authors = "Chang-Tien Lu and Dechang Chen and Yufeng Kou", title = "Detecting Spatial Outliers with Multiple Attributes", booktitle = "Proc. 15th IEEE International Conference on Tools with Artificial Intelligence, 2003", url = "http://dx.doi.org/10.1109/TAI.2003.1250179") -public class CTLuMedianMultipleAttributes<N, O extends NumberVector<?>> extends AbstractNeighborhoodOutlier<N> { +public class CTLuMedianMultipleAttributes<N, O extends NumberVector> extends AbstractNeighborhoodOutlier<N> { /** * logger */ @@ -144,13 +146,12 @@ public class CTLuMedianMultipleAttributes<N, O extends NumberVector<?>> extends DoubleMinMax minmax = new DoubleMinMax(); WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(attributes.getDBIDs(), DataStoreFactory.HINT_STATIC); for(DBIDIter iditer = attributes.iterDBIDs(); iditer.valid(); iditer.advance()) { - Vector temp = deltas.get(iditer).minus(mean); - final double score = temp.transposeTimesTimes(cmati, temp); + final double score = MathUtil.mahalanobisDistance(cmati, deltas.get(iditer), mean); minmax.put(score); scores.putDouble(iditer, score); } - Relation<Double> scoreResult = new MaterializedRelation<>("Median multiple attributes outlier", "median-outlier", TypeUtil.DOUBLE, scores, attributes.getDBIDs()); + DoubleRelation scoreResult = new MaterializedDoubleRelation("Median multiple attributes outlier", "median-outlier", scores, attributes.getDBIDs()); OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0); OutlierResult or = new OutlierResult(scoreMeta, scoreResult); or.addChildResult(npred); @@ -172,7 +173,7 @@ public class CTLuMedianMultipleAttributes<N, O extends NumberVector<?>> extends * @param <N> Neighborhood type * @param <O> Attributes vector type */ - public static class Parameterizer<N, O extends NumberVector<?>> extends AbstractNeighborhoodOutlier.Parameterizer<N> { + public static class Parameterizer<N, O extends NumberVector> extends AbstractNeighborhoodOutlier.Parameterizer<N> { @Override protected CTLuMedianMultipleAttributes<N, O> makeInstance() { return new CTLuMedianMultipleAttributes<>(npredf); diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMoranScatterplotOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMoranScatterplotOutlier.java index da527af0..1b59b79b 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMoranScatterplotOutlier.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMoranScatterplotOutlier.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -28,13 +28,13 @@ import de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPre import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.data.type.TypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; -import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; -import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; +import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.math.DoubleMinMax; @@ -94,7 +94,7 @@ public class CTLuMoranScatterplotOutlier<N> extends AbstractNeighborhoodOutlier< * @param relation Data relation (1d!) * @return Outlier detection result */ - public OutlierResult run(Relation<N> nrel, Relation<? extends NumberVector<?>> relation) { + public OutlierResult run(Relation<N> nrel, Relation<? extends NumberVector> relation) { final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(nrel); // Compute the global mean and variance @@ -136,7 +136,7 @@ public class CTLuMoranScatterplotOutlier<N> extends AbstractNeighborhoodOutlier< scores.putDouble(iditer, score); } - Relation<Double> scoreResult = new MaterializedRelation<>("MoranOutlier", "Moran Scatterplot Outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs()); + DoubleRelation scoreResult = new MaterializedDoubleRelation("MoranOutlier", "Moran Scatterplot Outlier", scores, relation.getDBIDs()); OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, 0); OutlierResult or = new OutlierResult(scoreMeta, scoreResult); or.addChildResult(npred); @@ -145,7 +145,7 @@ public class CTLuMoranScatterplotOutlier<N> extends AbstractNeighborhoodOutlier< @Override public TypeInformation[] getInputTypeRestriction() { - return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), new VectorFieldTypeInformation<NumberVector<?>>(NumberVector.class, 1)); + return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), TypeUtil.NUMBER_VECTOR_FIELD_1D); } @Override diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java index 85524b4e..e11785af 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial; This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team @@ -28,7 +28,6 @@ import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm; import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
-import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
@@ -37,13 +36,13 @@ import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.KNNHeap;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.distance.KNNHeap;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.math.MathUtil;
@@ -78,12 +77,11 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * @author Ahmed Hettab
*
* @param <N> Spatial Vector type
- * @param <D> Distance to use
*/
@Title("Random Walk on Exhaustive Combination")
@Description("Spatial Outlier Detection using Random Walk on Exhaustive Combination")
@Reference(authors = "X. Liu and C.-T. Lu and F. Chen", title = "Spatial outlier detection: random walk based approaches", booktitle = "Proc. 18th SIGSPATIAL International Conference on Advances in Geographic Information Systems, 2010", url = "http://dx.doi.org/10.1145/1869790.1869841")
-public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<N, D, OutlierResult> implements OutlierAlgorithm {
+public class CTLuRandomWalkEC<N> extends AbstractDistanceBasedAlgorithm<N, OutlierResult> implements OutlierAlgorithm {
/**
* Logger.
*/
@@ -112,7 +110,7 @@ public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends Abstrac * @param c C parameter
* @param k Number of neighbors
*/
- public CTLuRandomWalkEC(DistanceFunction<N, D> distanceFunction, double alpha, double c, int k) {
+ public CTLuRandomWalkEC(DistanceFunction<N> distanceFunction, double alpha, double c, int k) {
super(distanceFunction);
this.alpha = alpha;
this.c = c;
@@ -126,8 +124,8 @@ public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends Abstrac * @param relation Attribute value relation
* @return Outlier result
*/
- public OutlierResult run(Relation<N> spatial, Relation<? extends NumberVector<?>> relation) {
- DistanceQuery<N, D> distFunc = getDistanceFunction().instantiate(spatial);
+ public OutlierResult run(Relation<N> spatial, Relation<? extends NumberVector> relation) {
+ DistanceQuery<N> distFunc = getDistanceFunction().instantiate(spatial);
WritableDataStore<Vector> similarityVectors = DataStoreUtil.makeStorage(spatial.getDBIDs(), DataStoreFactory.HINT_TEMP, Vector.class);
WritableDataStore<DBIDs> neighbors = DataStoreUtil.makeStorage(spatial.getDBIDs(), DataStoreFactory.HINT_TEMP, DBIDs.class);
@@ -136,7 +134,7 @@ public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends Abstrac // construct the relation Matrix of the ec-graph
Matrix E = new Matrix(ids.size(), ids.size());
- KNNHeap<D> heap = DBIDUtil.newHeap(distFunc.getDistanceFactory(), k);
+ KNNHeap heap = DBIDUtil.newHeap(k);
{
int i = 0;
for(DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
@@ -148,10 +146,9 @@ public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends Abstrac continue;
}
final double e;
- final D distance = distFunc.distance(id, n);
+ final double distance = distFunc.distance(id, n);
heap.insert(distance, n);
- double dist = distance.doubleValue();
- if(dist == 0) {
+ if(distance == 0) {
LOG.warning("Zero distances are not supported - skipping: " + DBIDUtil.toString(id) + " " + DBIDUtil.toString(n));
e = 0;
}
@@ -160,7 +157,7 @@ public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends Abstrac double exp = Math.exp(Math.pow(diff, alpha));
// Implementation note: not inverting exp worked a lot better.
// Therefore we diverge from the article here.
- e = exp / dist;
+ e = exp / distance;
}
E.set(j, i, e);
}
@@ -225,14 +222,14 @@ public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends Abstrac scores.putDouble(id, score);
}
- Relation<Double> scoreResult = new MaterializedRelation<>("randomwalkec", "RandomWalkEC", TypeUtil.DOUBLE, scores, relation.getDBIDs());
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("randomwalkec", "RandomWalkEC", scores, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
return new OutlierResult(scoreMeta, scoreResult);
}
@Override
public TypeInformation[] getInputTypeRestriction() {
- return TypeUtil.array(getDistanceFunction().getInputTypeRestriction(), new VectorFieldTypeInformation<NumberVector<?>>(NumberVector.class, 1));
+ return TypeUtil.array(getDistanceFunction().getInputTypeRestriction(), TypeUtil.NUMBER_VECTOR_FIELD_1D);
}
@Override
@@ -248,9 +245,8 @@ public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends Abstrac * @apiviz.exclude
*
* @param <N> Vector type
- * @param <D> Distance type
*/
- public static class Parameterizer<N, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<N, D> {
+ public static class Parameterizer<N> extends AbstractDistanceBasedAlgorithm.Parameterizer<N> {
/**
* Parameter to specify the number of neighbors.
*/
@@ -327,7 +323,7 @@ public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends Abstrac }
@Override
- protected CTLuRandomWalkEC<N, D> makeInstance() {
+ protected CTLuRandomWalkEC<N> makeInstance() {
return new CTLuRandomWalkEC<>(distanceFunction, alpha, c, k);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuScatterplotOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuScatterplotOutlier.java index bcbbfd2a..6feb08f6 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuScatterplotOutlier.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuScatterplotOutlier.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -27,14 +27,14 @@ import de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPre import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.data.type.TypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; -import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; -import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; +import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.math.DoubleMinMax; @@ -96,7 +96,7 @@ public class CTLuScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<N> { * @param relation Data relation (1d!) * @return Outlier detection result */ - public OutlierResult run(Relation<N> nrel, Relation<? extends NumberVector<?>> relation) { + public OutlierResult run(Relation<N> nrel, Relation<? extends NumberVector> relation) { final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(nrel); WritableDoubleDataStore means = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP); @@ -160,7 +160,7 @@ public class CTLuScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<N> { } } // build representation - Relation<Double> scoreResult = new MaterializedRelation<>("SPO", "Scatterplot-Outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs()); + DoubleRelation scoreResult = new MaterializedDoubleRelation("SPO", "Scatterplot-Outlier", scores, relation.getDBIDs()); OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0); OutlierResult or = new OutlierResult(scoreMeta, scoreResult); or.addChildResult(npred); @@ -174,7 +174,7 @@ public class CTLuScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<N> { @Override public TypeInformation[] getInputTypeRestriction() { - return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), new VectorFieldTypeInformation<NumberVector<?>>(NumberVector.class, 1)); + return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), TypeUtil.NUMBER_VECTOR_FIELD_1D); } /** diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuZTestOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuZTestOutlier.java index d6cb5a50..b973109a 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuZTestOutlier.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuZTestOutlier.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -27,7 +27,6 @@ import de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPre import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.data.type.TypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; -import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation; import de.lmu.ifi.dbs.elki.database.Database; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; @@ -35,7 +34,8 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; -import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; +import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.math.DoubleMinMax; @@ -98,7 +98,7 @@ public class CTLuZTestOutlier<N> extends AbstractNeighborhoodOutlier<N> { * @param relation Data relation (1d!) * @return Outlier detection result */ - public OutlierResult run(Database database, Relation<N> nrel, Relation<? extends NumberVector<?>> relation) { + public OutlierResult run(Database database, Relation<N> nrel, Relation<? extends NumberVector> relation) { final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(nrel); WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC); @@ -133,7 +133,7 @@ public class CTLuZTestOutlier<N> extends AbstractNeighborhoodOutlier<N> { } // Wrap result - Relation<Double> scoreResult = new MaterializedRelation<>("ZTest", "Z Test score", TypeUtil.DOUBLE, scores, relation.getDBIDs()); + DoubleRelation scoreResult = new MaterializedDoubleRelation("ZTest", "Z Test score", scores, relation.getDBIDs()); OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0); OutlierResult or = new OutlierResult(scoreMeta, scoreResult); or.addChildResult(npred); @@ -147,7 +147,7 @@ public class CTLuZTestOutlier<N> extends AbstractNeighborhoodOutlier<N> { @Override public TypeInformation[] getInputTypeRestriction() { - return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), new VectorFieldTypeInformation<NumberVector<?>>(NumberVector.class, 1)); + return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), TypeUtil.NUMBER_VECTOR_FIELD_1D); } /** diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SLOM.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SLOM.java index 08c3e29b..7fbb8486 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SLOM.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SLOM.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -34,10 +34,10 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; -import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; +import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.math.DoubleMinMax; import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta; @@ -65,12 +65,11 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title; * * @param <N> the type the spatial neighborhood is defined over * @param <O> the type of objects handled by the algorithm - * @param <D> the type of Distance used for non spatial attributes */ @Title("SLOM: a new measure for local spatial outliers") @Description("Spatial local outlier measure (SLOM), which captures the local behaviour of datum in their spatial neighbourhood") @Reference(authors = "Sanjay Chawla and Pei Sun", title = "SLOM: a new measure for local spatial outliers", booktitle = "Knowledge and Information Systems 9(4), 412-429, 2006", url = "http://dx.doi.org/10.1007/s10115-005-0200-2") -public class SLOM<N, O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedSpatialOutlier<N, O, D> { +public class SLOM<N, O> extends AbstractDistanceBasedSpatialOutlier<N, O> { /** * The logger for this class. */ @@ -83,7 +82,7 @@ public class SLOM<N, O, D extends NumberDistance<D, ?>> extends AbstractDistance * @param nonSpatialDistanceFunction Distance function to use on the * non-spatial attributes */ - public SLOM(NeighborSetPredicate.Factory<N> npred, PrimitiveDistanceFunction<O, D> nonSpatialDistanceFunction) { + public SLOM(NeighborSetPredicate.Factory<N> npred, PrimitiveDistanceFunction<O> nonSpatialDistanceFunction) { super(npred, nonSpatialDistanceFunction); } @@ -95,7 +94,7 @@ public class SLOM<N, O, D extends NumberDistance<D, ?>> extends AbstractDistance */ public OutlierResult run(Database database, Relation<N> spatial, Relation<O> relation) { final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(spatial); - DistanceQuery<O, D> distFunc = getNonSpatialDistanceFunction().instantiate(relation); + DistanceQuery<O> distFunc = getNonSpatialDistanceFunction().instantiate(relation); WritableDoubleDataStore modifiedDistance = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP); // calculate D-Tilde @@ -109,7 +108,7 @@ public class SLOM<N, O, D extends NumberDistance<D, ?>> extends AbstractDistance if(DBIDUtil.equal(iditer, iter)) { continue; } - double dist = distFunc.distance(iditer, iter).doubleValue(); + double dist = distFunc.distance(iditer, iter); sum += dist; cnt++; maxDist = Math.max(maxDist, dist); @@ -187,7 +186,7 @@ public class SLOM<N, O, D extends NumberDistance<D, ?>> extends AbstractDistance slomminmax.put(slom); } - Relation<Double> scoreResult = new MaterializedRelation<>("SLOM", "slom-outlier", TypeUtil.DOUBLE, sloms, relation.getDBIDs()); + DoubleRelation scoreResult = new MaterializedDoubleRelation("SLOM", "slom-outlier", sloms, relation.getDBIDs()); OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(slomminmax.getMin(), slomminmax.getMax(), 0.0, Double.POSITIVE_INFINITY); OutlierResult or = new OutlierResult(scoreMeta, scoreResult); or.addChildResult(npred); @@ -213,11 +212,10 @@ public class SLOM<N, O, D extends NumberDistance<D, ?>> extends AbstractDistance * * @param <N> Neighborhood type * @param <O> Data Object type - * @param <D> Distance type */ - public static class Parameterizer<N, O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedSpatialOutlier.Parameterizer<N, O, D> { + public static class Parameterizer<N, O> extends AbstractDistanceBasedSpatialOutlier.Parameterizer<N, O> { @Override - protected SLOM<N, O, D> makeInstance() { + protected SLOM<N, O> makeInstance() { return new SLOM<>(npredf, distanceFunction); } } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SOF.java index a2605f39..f9823e56 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SOF.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SOF.java @@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2013 +Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -32,10 +32,10 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
@@ -65,11 +65,10 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title; *
* @param <N> Neighborhood object type
* @param <O> Attribute object type
- * @param <D> Distance type
*/
@Title("Spatial Outlier Factor")
@Reference(authors = "Huang, T., Qin, X.", title = "Detecting outliers in spatial database", booktitle = "Proc. 3rd International Conference on Image and Graphics", url = "http://dx.doi.org/10.1109/ICIG.2004.53")
-public class SOF<N, O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedSpatialOutlier<N, O, D> {
+public class SOF<N, O> extends AbstractDistanceBasedSpatialOutlier<N, O> {
/**
* The logger for this class.
*/
@@ -82,7 +81,7 @@ public class SOF<N, O, D extends NumberDistance<D, ?>> extends AbstractDistanceB * @param nonSpatialDistanceFunction Distance function on non-spatial
* attributes
*/
- public SOF(NeighborSetPredicate.Factory<N> npred, PrimitiveDistanceFunction<O, D> nonSpatialDistanceFunction) {
+ public SOF(NeighborSetPredicate.Factory<N> npred, PrimitiveDistanceFunction<O> nonSpatialDistanceFunction) {
super(npred, nonSpatialDistanceFunction);
}
@@ -101,7 +100,7 @@ public class SOF<N, O, D extends NumberDistance<D, ?>> extends AbstractDistanceB */
public OutlierResult run(Database database, Relation<N> spatial, Relation<O> relation) {
final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(spatial);
- DistanceQuery<O, D> distFunc = getNonSpatialDistanceFunction().instantiate(relation);
+ DistanceQuery<O> distFunc = getNonSpatialDistanceFunction().instantiate(relation);
WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT);
WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
@@ -112,7 +111,7 @@ public class SOF<N, O, D extends NumberDistance<D, ?>> extends AbstractDistanceB DBIDs neighbors = npred.getNeighborDBIDs(iditer);
double avg = 0;
for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
- avg += distFunc.distance(iditer, iter).doubleValue();
+ avg += distFunc.distance(iditer, iter);
}
double lrd = 1 / (avg / neighbors.size());
if (Double.isNaN(lrd)) {
@@ -138,7 +137,7 @@ public class SOF<N, O, D extends NumberDistance<D, ?>> extends AbstractDistanceB }
// Build result representation.
- Relation<Double> scoreResult = new MaterializedRelation<>("Spatial Outlier Factor", "sof-outlier", TypeUtil.DOUBLE, lofs, relation.getDBIDs());
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("Spatial Outlier Factor", "sof-outlier", lofs, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
or.addChildResult(npred);
@@ -159,11 +158,10 @@ public class SOF<N, O, D extends NumberDistance<D, ?>> extends AbstractDistanceB *
* @param <N> Neighborhood type
* @param <O> Attribute object type
- * @param <D> Distance type
*/
- public static class Parameterizer<N, O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedSpatialOutlier.Parameterizer<N, O, D> {
+ public static class Parameterizer<N, O> extends AbstractDistanceBasedSpatialOutlier.Parameterizer<N, O> {
@Override
- protected SOF<N, O, D> makeInstance() {
+ protected SOF<N, O> makeInstance() {
return new SOF<>(npredf, distanceFunction);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java index 1a1f9a82..e46976ab 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial; This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team @@ -29,14 +29,14 @@ import de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPre import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
-import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.logging.Logging;
@@ -109,7 +109,7 @@ public class TrimmedMeanApproach<N> extends AbstractNeighborhoodOutlier<N> { * @param relation Data Relation (1 dimensional!)
* @return Outlier detection result
*/
- public OutlierResult run(Database database, Relation<N> nrel, Relation<? extends NumberVector<?>> relation) {
+ public OutlierResult run(Database database, Relation<N> nrel, Relation<? extends NumberVector> relation) {
assert (RelationUtil.dimensionality(relation) == 1) : "TrimmedMean can only process one-dimensional data sets.";
final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(nrel);
@@ -145,13 +145,9 @@ public class TrimmedMeanApproach<N> extends AbstractNeighborhoodOutlier<N> { // Error: deviation from trimmed mean
errors.putDouble(iditer, relation.get(iditer).doubleValue(0) - tm);
- if(progress != null) {
- progress.incrementProcessed(LOG);
- }
- }
- if(progress != null) {
- progress.ensureCompleted(LOG);
+ LOG.incrementProcessed(progress);
}
+ LOG.ensureCompleted(progress);
if(LOG.isVerbose()) {
LOG.verbose("Computing median error.");
@@ -187,7 +183,7 @@ public class TrimmedMeanApproach<N> extends AbstractNeighborhoodOutlier<N> { minmax.put(score);
}
//
- Relation<Double> scoreResult = new MaterializedRelation<>("TrimmedMean", "Trimmed Mean Score", TypeUtil.DOUBLE, scores, relation.getDBIDs());
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("TrimmedMean", "Trimmed Mean Score", scores, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0);
OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
or.addChildResult(npred);
@@ -202,7 +198,7 @@ public class TrimmedMeanApproach<N> extends AbstractNeighborhoodOutlier<N> { @Override
public TypeInformation[] getInputTypeRestriction() {
// Get one dimensional attribute for analysis.
- return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), new VectorFieldTypeInformation<NumberVector<?>>(NumberVector.class, 1));
+ return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), TypeUtil.NUMBER_VECTOR_FIELD_1D);
}
/**
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/AbstractPrecomputedNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/AbstractPrecomputedNeighborhood.java index ef237928..506c722a 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/AbstractPrecomputedNeighborhood.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/AbstractPrecomputedNeighborhood.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java index c93b10cb..145aecb1 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -156,13 +156,9 @@ public class ExtendedNeighborhood extends AbstractPrecomputedNeighborhood { todo = ntodo; } store.put(iter, res); - if(progress != null) { - progress.incrementProcessed(LOG); - } - } - if(progress != null) { - progress.ensureCompleted(LOG); + LOG.incrementProcessed(progress); } + LOG.ensureCompleted(progress); return store; } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java index 33b5010a..5bdd05bf 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -169,13 +169,11 @@ public class ExternalNeighborhood extends AbstractPrecomputedNeighborhood { } } - try { - if(LOG.isDebugging()) { - LOG.verbose("Loading neighborhood file."); - } - InputStream in = new FileInputStream(file); - in = FileUtil.tryGzipInput(in); - BufferedReader br = new BufferedReader(new InputStreamReader(in)); + if(LOG.isDebugging()) { + LOG.verbose("Loading neighborhood file."); + } + try(InputStream in = FileUtil.tryGzipInput(new FileInputStream(file)); + BufferedReader br = new BufferedReader(new InputStreamReader(in))) { for(String line; (line = br.readLine()) != null;) { ArrayModifiableDBIDs neighbours = DBIDUtil.newArray(); String[] entries = line.split(" "); @@ -200,9 +198,6 @@ public class ExternalNeighborhood extends AbstractPrecomputedNeighborhood { } } } - br.close(); - in.close(); - return store; } catch(IOException e) { diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/NeighborSetPredicate.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/NeighborSetPredicate.java index 25283d5c..5d35aff8 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/NeighborSetPredicate.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/NeighborSetPredicate.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -28,7 +28,6 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.result.Result; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.Parameterizable; /** * Predicate to obtain the neighbors of a reference object as set. @@ -54,7 +53,7 @@ public interface NeighborSetPredicate extends Result { * * @param <O> Input relation object type restriction */ - public static interface Factory<O> extends Parameterizable { + public static interface Factory<O> { /** * Instantiation method. * diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/PrecomputedKNearestNeighborNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/PrecomputedKNearestNeighborNeighborhood.java index c43ebba7..18ab30d7 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/PrecomputedKNearestNeighborNeighborhood.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/PrecomputedKNearestNeighborNeighborhood.java @@ -1,26 +1,27 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood;
-/* -This file is part of ELKI: -Environment for Developing KDD-Applications Supported by Index-Structures - -Copyright (C) 2013 -Ludwig-Maximilians-Universität München -Lehr- und Forschungseinheit für Datenbanksysteme -ELKI Development Team - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU Affero General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU Affero General Public License for more details. - -You should have received a copy of the GNU Affero General Public License -along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ +
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures +
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team +
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version. +
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details. +
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.database.QueryUtil;
@@ -32,11 +33,10 @@ import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
+import de.lmu.ifi.dbs.elki.database.ids.KNNList;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
@@ -48,10 +48,8 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * Neighborhoods based on k nearest neighbors.
*
* @author Ahmed Hettab
- *
- * @param <D> Distance to use
*/
-public class PrecomputedKNearestNeighborNeighborhood<D extends Distance<D>> extends AbstractPrecomputedNeighborhood {
+public class PrecomputedKNearestNeighborNeighborhood extends AbstractPrecomputedNeighborhood {
/**
* Logger
*/
@@ -88,11 +86,10 @@ public class PrecomputedKNearestNeighborNeighborhood<D extends Distance<D>> exte *
* @apiviz.stereotype factory
* @apiviz.has PrecomputedKNearestNeighborNeighborhood
- *
+ *
* @param <O> Object type
- * @param <D> Distance type
*/
- public static class Factory<O, D extends Distance<D>> implements NeighborSetPredicate.Factory<O> {
+ public static class Factory<O> implements NeighborSetPredicate.Factory<O> {
/**
* parameter k
*/
@@ -101,12 +98,12 @@ public class PrecomputedKNearestNeighborNeighborhood<D extends Distance<D>> exte /**
* distance function to use
*/
- private DistanceFunction<? super O, D> distFunc;
+ private DistanceFunction<? super O> distFunc;
/**
* Factory Constructor
*/
- public Factory(int k, DistanceFunction<? super O, D> distFunc) {
+ public Factory(int k, DistanceFunction<? super O> distFunc) {
super();
this.k = k;
this.distFunc = distFunc;
@@ -114,19 +111,19 @@ public class PrecomputedKNearestNeighborNeighborhood<D extends Distance<D>> exte @Override
public NeighborSetPredicate instantiate(Relation<? extends O> relation) {
- KNNQuery<?, D> knnQuery = QueryUtil.getKNNQuery(relation, distFunc);
+ KNNQuery<?> knnQuery = QueryUtil.getKNNQuery(relation, distFunc);
// TODO: use bulk?
WritableDataStore<DBIDs> s = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, DBIDs.class);
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { - KNNList<D> neighbors = knnQuery.getKNNForDBID(iditer, k);
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ KNNList neighbors = knnQuery.getKNNForDBID(iditer, k);
ArrayModifiableDBIDs neighbours = DBIDUtil.newArray(neighbors.size());
- for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ for(DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
neighbours.add(neighbor);
}
s.put(iditer, neighbours);
}
- return new PrecomputedKNearestNeighborNeighborhood<D>(s);
+ return new PrecomputedKNearestNeighborNeighborhood(s);
}
@Override
@@ -142,9 +139,8 @@ public class PrecomputedKNearestNeighborNeighborhood<D extends Distance<D>> exte * @apiviz.exclude
*
* @param <O> Object type
- * @param <D> Distance type
*/
- public static class Parameterizer<O, D extends Distance<D>> extends AbstractParameterizer {
+ public static class Parameterizer<O> extends AbstractParameterizer {
/**
* Parameter k
*/
@@ -163,7 +159,7 @@ public class PrecomputedKNearestNeighborNeighborhood<D extends Distance<D>> exte /**
* Distance function
*/
- DistanceFunction<? super O, D> distFunc;
+ DistanceFunction<? super O> distFunc;
@Override
protected void makeOptions(Parameterization config) {
@@ -172,14 +168,14 @@ public class PrecomputedKNearestNeighborNeighborhood<D extends Distance<D>> exte if(config.grab(kP)) {
k = kP.getValue();
}
- final ObjectParameter<DistanceFunction<? super O, D>> distP = new ObjectParameter<>(DISTANCEFUNCTION_ID, DistanceFunction.class);
+ final ObjectParameter<DistanceFunction<? super O>> distP = new ObjectParameter<>(DISTANCEFUNCTION_ID, DistanceFunction.class);
if(config.grab(distP)) {
distFunc = distP.instantiateClass(config);
}
}
@Override
- protected PrecomputedKNearestNeighborNeighborhood.Factory<O, D> makeInstance() {
+ protected PrecomputedKNearestNeighborNeighborhood.Factory<O> makeInstance() {
return new PrecomputedKNearestNeighborNeighborhood.Factory<>(k, distFunc);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/package-info.java index fd51ca22..6199412c 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/package-info.java @@ -5,7 +5,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2013 +Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java index 4d6ec635..e1abc23c 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.weighted; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/UnweightedNeighborhoodAdapter.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/UnweightedNeighborhoodAdapter.java index 9bdb7d51..6c2fa7c1 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/UnweightedNeighborhoodAdapter.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/UnweightedNeighborhoodAdapter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.weighted; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/WeightedNeighborSetPredicate.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/WeightedNeighborSetPredicate.java index ca0fa620..74ffaaa4 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/WeightedNeighborSetPredicate.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/WeightedNeighborSetPredicate.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.weighted; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -29,7 +29,6 @@ import de.lmu.ifi.dbs.elki.data.type.TypeInformation; import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair; import de.lmu.ifi.dbs.elki.database.relation.Relation; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.Parameterizable; /** * Neighbor predicate with weight support. @@ -55,7 +54,7 @@ public interface WeightedNeighborSetPredicate { * * @param <O> Input relation object type restriction */ - public static interface Factory<O> extends Parameterizable { + public static interface Factory<O> { /** * Instantiation method. * diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/package-info.java index d7c7a797..c09fdf19 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/package-info.java @@ -5,7 +5,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2013 +Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/package-info.java index 5a65d8c1..e1325935 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/package-info.java @@ -5,7 +5,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2013 +Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractAggarwalYuOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/AbstractAggarwalYuOutlier.java index 2b12b306..8ee5e2cd 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractAggarwalYuOutlier.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/AbstractAggarwalYuOutlier.java @@ -1,10 +1,10 @@ -package de.lmu.ifi.dbs.elki.algorithm.outlier; +package de.lmu.ifi.dbs.elki.algorithm.outlier.subspace; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -24,17 +24,17 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier; */ import java.util.ArrayList; -import java.util.Collections; import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm; +import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm; import de.lmu.ifi.dbs.elki.data.NumberVector; +import de.lmu.ifi.dbs.elki.data.VectorUtil.SortDBIDsBySingleDimension; import de.lmu.ifi.dbs.elki.data.type.TypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs; -import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; -import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair; import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; @@ -52,8 +52,8 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.IntIntPair; * Abstract base class for the sparse-grid-cell based outlier detection of * Aggarwal and Yu. * + * Reference: * <p> - * Reference: <br /> * Outlier detection for high dimensional data<br /> * C.C. Aggarwal, P. S. Yu<br /> * International Conference on Management of Data Proceedings of the 2001 ACM @@ -66,15 +66,20 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.IntIntPair; * * @param <V> Vector type */ -@Reference(authors = "C.C. Aggarwal, P. S. Yu", title = "Outlier detection for high dimensional data", booktitle = "Proc. ACM SIGMOD Int. Conf. on Management of Data (SIGMOD 2001), Santa Barbara, CA, 2001", url = "http://dx.doi.org/10.1145/375663.375668") -public abstract class AbstractAggarwalYuOutlier<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm { +@Reference(authors = "C.C. Aggarwal, P. S. Yu", // +title = "Outlier detection for high dimensional data", // +booktitle = "Proc. ACM SIGMOD Int. Conf. on Management of Data (SIGMOD 2001), Santa Barbara, CA, 2001", // +url = "http://dx.doi.org/10.1145/375663.375668") +public abstract class AbstractAggarwalYuOutlier<V extends NumberVector> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm { /** * Symbolic value for subspaces not in use. - * - * Note: in some places, the implementations may rely on this having the value - * 0 currently! */ - public static final int DONT_CARE = 0; + public static final short DONT_CARE = -1; + + /** + * The first bucket. + */ + public static final short GENE_OFFSET = DONT_CARE + 1; /** * The number of partitions for each dimension. @@ -109,38 +114,23 @@ public abstract class AbstractAggarwalYuOutlier<V extends NumberVector<?>> exten protected ArrayList<ArrayList<DBIDs>> buildRanges(Relation<V> relation) { final int dim = RelationUtil.dimensionality(relation); final int size = relation.size(); - final DBIDs allids = relation.getDBIDs(); final ArrayList<ArrayList<DBIDs>> ranges = new ArrayList<>(); - // Temporary projection storage of the database - final ArrayList<ArrayList<DoubleDBIDPair>> dbAxis = new ArrayList<>(dim); - for(int i = 0; i < dim; i++) { - ArrayList<DoubleDBIDPair> axis = new ArrayList<>(size); - dbAxis.add(i, axis); - } - // Project - for(DBIDIter iter = allids.iter(); iter.valid(); iter.advance()) { - final V obj = relation.get(iter); - for(int d = 0; d < dim; d++) { - dbAxis.get(d).add(DBIDUtil.newPair(obj.doubleValue(d), iter)); - } - } + ArrayModifiableDBIDs ids = DBIDUtil.newArray(relation.getDBIDs()); + SortDBIDsBySingleDimension sorter = new SortDBIDsBySingleDimension(relation); // Split into cells final double part = size * 1.0 / phi; for(int d = 0; d < dim; d++) { - ArrayList<DoubleDBIDPair> axis = dbAxis.get(d); - Collections.sort(axis); + sorter.setDimension(d); + ids.sort(sorter); ArrayList<DBIDs> dimranges = new ArrayList<>(phi + 1); - dimranges.add(allids); int start = 0; - for(int r = 0; r < phi; r++) { - int end = (int) (part * r); - if(r == phi - 1) { - end = size; - } - ArrayModifiableDBIDs currange = DBIDUtil.newArray(phi + 1); - for(int i = start; i < end; i++) { - currange.add(axis.get(i)); + DBIDArrayIter iter = ids.iter(); + for(int r = 1; r <= phi; r++) { + int end = (r < phi) ? (int) (part * r) : size; + ArrayModifiableDBIDs currange = DBIDUtil.newArray(end - start); + for(iter.seek(start); iter.getOffset() < end; iter.advance()) { + currange.add(iter); } start = end; dimranges.add(currange); @@ -178,7 +168,7 @@ public abstract class AbstractAggarwalYuOutlier<V extends NumberVector<?>> exten HashSetModifiableDBIDs ids = DBIDUtil.newHashSet(ranges.get(subspace.get(0).first).get(subspace.get(0).second)); // intersect all selected dimensions for(int i = 1; i < subspace.size(); i++) { - DBIDs current = ranges.get(subspace.get(i).first).get(subspace.get(i).second); + DBIDs current = ranges.get(subspace.get(i).first).get(subspace.get(i).second - GENE_OFFSET); ids.retainAll(current); if(ids.size() == 0) { break; @@ -194,15 +184,21 @@ public abstract class AbstractAggarwalYuOutlier<V extends NumberVector<?>> exten * @param ranges Database ranges * @return resulting DBIDs */ - protected DBIDs computeSubspaceForGene(int[] gene, ArrayList<ArrayList<DBIDs>> ranges) { - HashSetModifiableDBIDs m = DBIDUtil.newHashSet(ranges.get(0).get(gene[0])); - // intersect - for(int i = 1; i < gene.length; i++) { + protected DBIDs computeSubspaceForGene(short[] gene, ArrayList<ArrayList<DBIDs>> ranges) { + HashSetModifiableDBIDs m = null; + // intersect all present restrictions + for(int i = 0; i < gene.length; i++) { if(gene[i] != DONT_CARE) { - DBIDs current = ranges.get(i).get(gene[i]); - m.retainAll(current); + DBIDs current = ranges.get(i).get(gene[i] - GENE_OFFSET); + if(m == null) { + m = DBIDUtil.newHashSet(current); + } + else { + m.retainAll(current); + } } } + assert (m != null) : "All genes set to '*', should not happen!"; return m; } @@ -242,13 +238,13 @@ public abstract class AbstractAggarwalYuOutlier<V extends NumberVector<?>> exten @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - final IntParameter kP = new IntParameter(K_ID); - kP.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT); + final IntParameter kP = new IntParameter(K_ID)// + .addConstraint(CommonConstraints.GREATER_THAN_ONE_INT); if(config.grab(kP)) { k = kP.getValue(); } - final IntParameter phiP = new IntParameter(PHI_ID); - phiP.addConstraint(CommonConstraints.GREATER_THAN_ONE_INT); + final IntParameter phiP = new IntParameter(PHI_ID)// + .addConstraint(CommonConstraints.GREATER_THAN_ONE_INT); if(config.grab(phiP)) { phi = phiP.getValue(); } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuEvolutionary.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/AggarwalYuEvolutionary.java index c4e5cc5d..b32e5124 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuEvolutionary.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/AggarwalYuEvolutionary.java @@ -1,10 +1,10 @@ -package de.lmu.ifi.dbs.elki.algorithm.outlier;
+package de.lmu.ifi.dbs.elki.algorithm.outlier.subspace;
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team @@ -23,49 +23,48 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier; along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import gnu.trove.iterator.TIntIterator;
+import gnu.trove.list.array.TIntArrayList;
+
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
-import java.util.Iterator;
import java.util.Random;
-import java.util.TreeSet;
import de.lmu.ifi.dbs.elki.data.NumberVector;
-import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.math.random.RandomFactory;
import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
-import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
-import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TopBoundedHeap;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
-import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
-import de.lmu.ifi.dbs.elki.utilities.pairs.FCPair;
import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
/**
- * EAFOD provides the evolutionary outlier detection algorithm, an algorithm to
- * detect outliers for high dimensional data.
+ * Evolutionary variant (EAFOD) of the high-dimensional outlier detection
+ * algorithm by Aggarwal and Yu.
* <p>
* Reference: <br />
* Outlier detection for high dimensional data<br />
@@ -86,7 +85,7 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; @Title("EAFOD: the evolutionary outlier detection algorithm")
@Description("Outlier detection for high dimensional data")
@Reference(authors = "C.C. Aggarwal, P. S. Yu", title = "Outlier detection for high dimensional data", booktitle = "Proc. ACM SIGMOD Int. Conf. on Management of Data (SIGMOD 2001), Santa Barbara, CA, 2001", url = "http://dx.doi.org/10.1145/375663.375668")
-public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractAggarwalYuOutlier<V> {
+public class AggarwalYuEvolutionary<V extends NumberVector> extends AbstractAggarwalYuOutlier<V> {
/**
* The logger for this class.
*/
@@ -98,6 +97,11 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA protected final static int MAX_ITERATIONS = 1000;
/**
+ * At which gene homogenity do we have convergence?
+ */
+ protected final static double CONVERGENCE = .85;
+
+ /**
* Holds the value of {@link Parameterizer#M_ID}.
*/
private int m;
@@ -155,7 +159,7 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA }
minmax.put(val);
}
- Relation<Double> scoreResult = new MaterializedRelation<>("AggarwalYuEvolutionary", "aggarwal-yu-outlier", TypeUtil.DOUBLE, outlierScore, relation.getDBIDs());
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("AggarwalYuEvolutionary", "aggarwal-yu-outlier", outlierScore, relation.getDBIDs());
OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), Double.NEGATIVE_INFINITY, 0.0);
return new OutlierResult(meta, scoreResult);
}
@@ -223,14 +227,16 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA bestSol.add(ind);
}
+ IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("Evolutionary search iterations", LOG) : null;
int iterations = 0;
while(!checkConvergence(pop)) {
Collections.sort(pop);
+ // Fitter members are more likely to survive
pop = rouletteRankSelection(pop);
- // Crossover
+ // Crossover survivors
pop = crossoverOptimized(pop);
// Mutation with probability 0.25 , 0.25
- pop = mutation(pop, 0.5, 0.5);
+ pop = mutation(pop, 0.25, 0.25);
// Avoid duplicates
ind: for(Individuum ind : pop) {
for(Heap<Individuum>.UnorderedIter it = bestSol.unorderedIter(); it.valid(); it.advance()) {
@@ -253,11 +259,15 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA LOG.debugFinest(buf.toString());
}
iterations++;
+ LOG.incrementProcessed(prog);
if(iterations > MAX_ITERATIONS) {
LOG.warning("Maximum iterations reached.");
break;
}
}
+ if(prog != null) {
+ prog.setCompleted(LOG);
+ }
return bestSol.unorderedIter();
}
@@ -276,25 +286,28 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA int[][] occur = new int[dim][phi + 1];
// Count gene occurrences
for(Individuum ind : pop) {
- int[] gene = ind.getGene();
+ short[] gene = ind.getGene();
for(int d = 0; d < dim; d++) {
- int val = gene[d] + DONT_CARE;
- if(val < 0 || val >= phi + 1) {
+ if(gene[d] == DONT_CARE) {
+ occur[d][0] += 1;
+ continue;
+ }
+ int val = gene[d] - GENE_OFFSET;
+ if(val < 0 || val >= phi) {
LOG.warning("Invalid gene value encountered: " + val + " in " + ind.toString());
continue;
}
- occur[d][val] += 1;
+ occur[d][val + 1] += 1;
}
}
- int conv = (int) (pop.size() * 0.95);
+ int conv = (int) Math.floor(pop.size() * CONVERGENCE);
if(LOG.isDebuggingFine()) {
LOG.debugFine("Convergence at " + conv + " of " + pop.size() + " individuums.");
}
for(int d = 0; d < dim; d++) {
boolean converged = false;
-
- for(int val = 0; val < phi + 1; val++) {
+ for(int val = 0; val <= phi; val++) {
if(occur[d][val] >= conv) {
converged = true;
break;
@@ -320,24 +333,23 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA // fill population
for(int i = 0; i < popsize; i++) {
// Random Individual
- int[] gene = new int[dim];
+ short[] gene = new short[dim];
// fill don't care ( any dimension == don't care)
- for(int j = 0; j < dim; j++) {
- gene[j] = DONT_CARE;
- }
+ Arrays.fill(gene, DONT_CARE);
// count of don't care positions
int countDim = k;
// fill non don't care positions of the Individual
while(countDim > 0) {
int z = random.nextInt(dim);
- if(gene[z] == DONT_CARE) {
- gene[z] = random.nextInt(phi) + 1;
- countDim--;
+ if(gene[z] != DONT_CARE) {
+ continue;
}
+ gene[z] = (short) (random.nextInt(phi) + GENE_OFFSET);
+ countDim--;
}
population.add(makeIndividuum(gene));
}
- Collections.sort(population);
+ // Collections.sort(population);
return population;
}
@@ -363,94 +375,56 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA // position of selection
for(int i = 0; i < popsize; i++) {
int z = random.nextInt(totalweight);
- for(int j = 0; j < popsize; j++) {
- if(z < popsize - j) {
- // TODO: need clone?
+ for(int j = 0, rank = popsize; j < popsize; ++j, --rank) {
+ if(z < rank) {
survivors.add(population.get(j));
break;
}
- else {
- // decrement
- z -= (popsize - j);
- }
+ z -= rank;
}
}
- if(survivors.size() != popsize) {
- throw new AbortException("Selection step failed - implementation error?");
- }
- // Don't sort, to avoid biasing the crossover!
- // Collections.sort(survivors);
+ assert (survivors.size() == popsize) : "Selection step failed - implementation error?";
return survivors;
}
/**
- * Apply the mutation alogrithm.
+ * Apply the mutation algorithm.
*/
private ArrayList<Individuum> mutation(ArrayList<Individuum> population, double perc1, double perc2) {
// the Mutations
ArrayList<Individuum> mutations = new ArrayList<>();
- // Set of Positions which are don't care in the String
- TreeSet<Integer> Q = new TreeSet<>();
- // Set of Positions which are not don't care in the String
- TreeSet<Integer> R = new TreeSet<>();
+ int[] QR = new int[dim];
// for each individuum
for(int j = 0; j < population.size(); j++) {
- // clear the Sets
- Q.clear();
- R.clear();
- // Fill the Sets with the Positions
+ short[] gene = population.get(j).getGene().clone();
+ // Fill position array for mutation process
+ int q = 0, r = dim;
for(int i = 0; i < dim; i++) {
- if(population.get(j).getGene()[i] == DONT_CARE) {
- Q.add(i);
- }
- else {
- R.add(i);
- }
+ QR[(gene[i] == DONT_CARE) ? (q++) : (--r)] = i;
}
- //
- double r1 = random.nextDouble();
- if(Q.size() != 0) {
- // Mutation Variant 1
- if(r1 <= perc1) {
- // calc Mutation Spot
- Integer[] pos = new Integer[Q.size()];
- pos = Q.toArray(pos);
- int position = random.nextInt(pos.length);
- int depth = pos[position];
- // Mutate don't care into 1....phi
- population.get(j).getGene()[depth] = random.nextInt(phi) + 1;
- // update Sets
- Q.remove(depth);
- R.add(depth);
- // calc new Mutation Spot
- pos = new Integer[R.size()];
- pos = R.toArray(pos);
- position = random.nextInt(pos.length);
- depth = pos[position];
- // Mutate non don't care into don't care
- population.get(j).getGene()[depth] = DONT_CARE;
- // update Sets
- Q.add(depth);
- R.remove(depth);
- }
+ // Mutation variant 1
+ if(q > 0 && r < dim && random.nextDouble() <= perc1) {
+ // Random mutation spots:
+ int rq = random.nextInt(q), rr = random.nextInt(dim - r) + r;
+ int pq = QR[rq], pr = QR[rr];
+ // Mutate don't care (position pq) into 1....phi
+ gene[pq] = (short) (random.nextInt(phi) + GENE_OFFSET);
+ // Mutate non don't care (position pr) into don't care
+ gene[pr] = DONT_CARE;
+ // update sets, by swapping the position vlaues
+ QR[rq] = pr;
+ QR[rr] = pq;
}
- r1 = random.nextDouble();
// Mutation Variant 2
- if(r1 <= perc2) {
+ if(random.nextDouble() <= perc2) {
// calc Mutation Spot
- Integer[] pos = new Integer[R.size()];
- pos = R.toArray(pos);
- int position = random.nextInt(pos.length);
- int depth = pos[position];
+ int pr = random.nextInt(dim - r) + r;
// Mutate 1...phi into another 1...phi
- population.get(j).getGene()[depth] = random.nextInt(phi) + 1;
+ gene[QR[pr]] = (short) (random.nextInt(phi) + GENE_OFFSET);
}
- int[] gene = population.get(j).getGene();
mutations.add(makeIndividuum(gene));
-
}
- Collections.sort(mutations);
return mutations;
}
@@ -460,7 +434,7 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA * @param gene Gene to evaluate
* @return new individuum
*/
- private Individuum makeIndividuum(int[] gene) {
+ private Individuum makeIndividuum(short[] gene) {
final DBIDs ids = computeSubspaceForGene(gene, ranges);
final double fitness = (ids.size() > 0) ? sparsity(ids.size(), dbsize, k, phi) : Double.MAX_VALUE;
return new Individuum(fitness, gene);
@@ -483,7 +457,6 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA if(population.size() % 2 == 1) {
crossover.add(population.get(population.size() - 1));
}
- // Collections.sort(crossover);
return crossover;
}
@@ -497,9 +470,9 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA private Pair<Individuum, Individuum> recombineOptimized(Individuum parent1, Individuum parent2) {
Pair<Individuum, Individuum> recombinePair;
// Set of Positions in which either s1 or s2 are don't care
- ArrayList<Integer> Q = new ArrayList<>(dim);
+ TIntArrayList Q = new TIntArrayList(dim);
// Set of Positions in which neither s1 or s2 is don't care
- ArrayList<Integer> R = new ArrayList<>(dim);
+ TIntArrayList R = new TIntArrayList(dim);
for(int i = 0; i < dim; i++) {
if((parent1.getGene()[i] == DONT_CARE) && (parent2.getGene()[i] != DONT_CARE)) {
@@ -516,21 +489,21 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA Individuum best = combineRecursive(R, 0, Individuum.nullIndividuum(dim).getGene(), parent1, parent2);
// Extends gene greedily
- int[] b = best.getGene();
+ short[] b = best.getGene();
int count = k - R.size();
- Iterator<Integer> q = Q.iterator();
+ TIntIterator q = Q.iterator();
while(count > 0) {
- int[] l1 = b.clone();
- int[] l2 = b.clone();
+ short[] l1 = b.clone();
+ short[] l2 = b.clone();
while(q.hasNext()) {
int next = q.next();
// pos = next;
{
- boolean s1Null = (parent1.getGene()[next] == 0);
- boolean s2Null = (parent1.getGene()[next] == 0);
+ boolean s1Null = (parent1.getGene()[next] == DONT_CARE);
+ boolean s2Null = (parent1.getGene()[next] == DONT_CARE);
l1[next] = parent1.getGene()[next];
l2[next] = parent2.getGene()[next];
@@ -556,7 +529,7 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA }
// create the complementary String
- int[] comp = new int[dim];
+ short[] comp = new short[dim];
for(int i = 0; i < dim; i++) {
if(b[i] == parent1.getGene()[i]) {
@@ -584,26 +557,21 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA * @param parent2 Second parent
* @return best gene combination
*/
- private Individuum combineRecursive(ArrayList<Integer> r, int i, int[] current, Individuum parent1, Individuum parent2) {
+ private Individuum combineRecursive(TIntArrayList r, int i, short[] current, Individuum parent1, Individuum parent2) {
if(i == r.size()) {
return makeIndividuum(current);
}
// Position to modify
int pos = r.get(i);
// Build genes
- int[] gene1 = current.clone();
- int[] gene2 = current; // .clone();
+ short[] gene1 = current.clone();
+ short[] gene2 = current; // .clone();
gene1[pos] = parent1.getGene()[pos];
gene2[pos] = parent2.getGene()[pos];
Individuum i1 = combineRecursive(r, i + 1, gene1, parent1, parent2);
Individuum i2 = combineRecursive(r, i + 1, gene2, parent1, parent2);
// Return the better result.
- if(i1.getFitness() < i2.getFitness()) {
- return i1;
- }
- else {
- return i2;
- }
+ return (i1.getFitness() < i2.getFitness()) ? i1 : i2;
}
}
@@ -611,18 +579,21 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA * Individuum for the evolutionary search.
*
* @author Erich Schubert
- *
- * @apiviz.exclude de.lmu.ifi.dbs.elki.utilities.pairs.FCPair
*/
- private static class Individuum extends FCPair<Double, int[]> {
+ private static class Individuum implements Comparable<Individuum> {
+ double fitness;
+
+ short[] gene;
+
/**
* Constructor
*
* @param fitness Fitness
* @param gene Gene information
*/
- public Individuum(double fitness, int[] gene) {
- super(fitness, gene);
+ public Individuum(double fitness, short[] gene) {
+ this.fitness = fitness;
+ this.gene = gene;
}
/**
@@ -630,8 +601,8 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA *
* @return the gene information
*/
- public int[] getGene() {
- return second;
+ public short[] getGene() {
+ return gene;
}
/**
@@ -640,7 +611,7 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA * @return fitness
*/
public double getFitness() {
- return first;
+ return fitness;
}
/**
@@ -650,14 +621,29 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA * @return new individuum
*/
public static Individuum nullIndividuum(int dim) {
- int[] gene = new int[dim];
+ short[] gene = new short[dim];
Arrays.fill(gene, DONT_CARE);
return new Individuum(0.0, gene);
}
@Override
public String toString() {
- return "I(f=" + first + ",g=" + FormatUtil.format(second) + ")";
+ StringBuilder buf = new StringBuilder();
+ buf.append("I(f=").append(fitness);
+ buf.append(",g=");
+ for(int i = 0; i < gene.length; i++) {
+ if(i > 0) {
+ buf.append(",");
+ }
+ if(gene[i] == DONT_CARE) {
+ buf.append("*");
+ }
+ else {
+ buf.append(gene[i]);
+ }
+ }
+ buf.append(")");
+ return buf.toString();
}
@Override
@@ -666,16 +652,21 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA return false;
}
Individuum other = (Individuum) obj;
- if(other.second.length != this.second.length) {
+ if(other.gene.length != this.gene.length) {
return false;
}
- for(int i = 0; i < this.second.length; i++) {
- if(other.second[i] != this.second[i]) {
+ for(int i = 0; i < this.gene.length; i++) {
+ if(other.gene[i] != this.gene[i]) {
return false;
}
}
return true;
}
+
+ @Override
+ public int compareTo(Individuum o) {
+ return Double.compare(this.fitness, o.fitness);
+ }
}
/**
@@ -685,7 +676,7 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA *
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<?>> extends AbstractAggarwalYuOutlier.Parameterizer {
+ public static class Parameterizer<V extends NumberVector> extends AbstractAggarwalYuOutlier.Parameterizer {
/**
* Parameter to specify the number of solutions must be an integer greater
* than 1.
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuNaive.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/AggarwalYuNaive.java index 1816c3a3..4ee1969b 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuNaive.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/AggarwalYuNaive.java @@ -1,10 +1,10 @@ -package de.lmu.ifi.dbs.elki.algorithm.outlier;
+package de.lmu.ifi.dbs.elki.algorithm.outlier.subspace;
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team @@ -26,13 +26,13 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier; import java.util.ArrayList;
import de.lmu.ifi.dbs.elki.data.NumberVector;
-import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.logging.Logging;
@@ -46,12 +46,12 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title; import de.lmu.ifi.dbs.elki.utilities.pairs.IntIntPair;
/**
- * BruteForce provides a naive brute force algorithm in which all k-subsets of
- * dimensions are examined and calculates the sparsity coefficient to find
- * outliers.
+ * BruteForce variant of the high-dimensional outlier detection algorithm by
+ * Aggarwal and Yu.
*
* The evolutionary approach is implemented as
- * {@link de.lmu.ifi.dbs.elki.algorithm.outlier.AggarwalYuEvolutionary}.
+ * {@link de.lmu.ifi.dbs.elki.algorithm.outlier.subspace.AggarwalYuEvolutionary}
+ * .
*
* <p>
* Reference: <br />
@@ -71,7 +71,7 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.IntIntPair; @Title("BruteForce: Outlier detection for high dimensional data")
@Description("Examines all possible sets of k dimensional projections")
@Reference(authors = "C.C. Aggarwal, P. S. Yu", title = "Outlier detection for high dimensional data", booktitle = "Proc. ACM SIGMOD Int. Conf. on Management of Data (SIGMOD 2001), Santa Barbara, CA, 2001", url = "http://dx.doi.org/10.1145/375663.375668")
-public class AggarwalYuNaive<V extends NumberVector<?>> extends AbstractAggarwalYuOutlier<V> {
+public class AggarwalYuNaive<V extends NumberVector> extends AbstractAggarwalYuOutlier<V> {
/**
* The logger for this class.
*/
@@ -106,7 +106,7 @@ public class AggarwalYuNaive<V extends NumberVector<?>> extends AbstractAggarwal // Set of all dim*phi ranges
ArrayList<IntIntPair> q = new ArrayList<>();
for(int i = 0; i < dimensionality; i++) {
- for(int j = 1; j <= phi; j++) {
+ for(int j = 0; j < phi; j++) {
IntIntPair s = new IntIntPair(i, j);
q.add(s);
// Add to first Rk
@@ -148,7 +148,7 @@ public class AggarwalYuNaive<V extends NumberVector<?>> extends AbstractAggarwal final double sparsityC = sparsity(ids.size(), size, k, phi);
if(sparsityC < 0) {
- for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
double prev = sparsity.doubleValue(iter);
if(Double.isNaN(prev) || sparsityC < prev) {
sparsity.putDouble(iter, sparsityC);
@@ -157,7 +157,7 @@ public class AggarwalYuNaive<V extends NumberVector<?>> extends AbstractAggarwal }
}
DoubleMinMax minmax = new DoubleMinMax();
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { + for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double val = sparsity.doubleValue(iditer);
if(Double.isNaN(val)) {
sparsity.putDouble(iditer, 0.0);
@@ -165,7 +165,7 @@ public class AggarwalYuNaive<V extends NumberVector<?>> extends AbstractAggarwal }
minmax.put(val);
}
- Relation<Double> scoreResult = new MaterializedRelation<>("AggarwalYuNaive", "aggarwal-yu-outlier", TypeUtil.DOUBLE, sparsity, relation.getDBIDs());
+ DoubleRelation scoreResult = new MaterializedDoubleRelation("AggarwalYuNaive", "aggarwal-yu-outlier", sparsity, relation.getDBIDs());
OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), Double.NEGATIVE_INFINITY, 0.0);
return new OutlierResult(meta, scoreResult);
}
@@ -182,7 +182,7 @@ public class AggarwalYuNaive<V extends NumberVector<?>> extends AbstractAggarwal *
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<?>> extends AbstractAggarwalYuOutlier.Parameterizer {
+ public static class Parameterizer<V extends NumberVector> extends AbstractAggarwalYuOutlier.Parameterizer {
@Override
protected AggarwalYuNaive<V> makeInstance() {
return new AggarwalYuNaive<>(k, phi);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java index c21542da..b3a03ba6 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.subspace; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -24,7 +24,6 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.subspace; */ import java.util.Arrays; -import java.util.BitSet; import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm; import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm; @@ -37,21 +36,17 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; -import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList; -import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDPair; -import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter; -import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDList; -import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDPair; -import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDPairList; -import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter; -import de.lmu.ifi.dbs.elki.database.ids.distance.ModifiableDoubleDistanceDBIDList; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; +import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList; +import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter; +import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair; +import de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList; import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery; -import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; -import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction; import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; import de.lmu.ifi.dbs.elki.math.DoubleMinMax; @@ -63,6 +58,7 @@ import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.KernelDensityFunction import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; +import de.lmu.ifi.dbs.elki.utilities.BitsUtil; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; @@ -93,7 +89,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; * @param <V> vector type */ @Reference(authors = "E. Müller, M. Schiffer, T. Seidl", title = "Adaptive outlierness for subspace outlier ranking", booktitle = "Proc. 19th ACM International Conference on Information and knowledge management") -public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm { +public class OUTRES<V extends NumberVector> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm { /** * The logger for this class. */ @@ -130,25 +126,21 @@ public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<Outlier DoubleMinMax minmax = new DoubleMinMax(); KernelDensityEstimator kernel = new KernelDensityEstimator(relation); - BitSet subspace = new BitSet(kernel.dim); + long[] subspace = BitsUtil.zero(kernel.dim); FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("OUTRES scores", relation.size(), LOG) : null; for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { - subspace.clear(); + BitsUtil.zeroI(subspace); double score = outresScore(0, subspace, iditer, kernel); ranks.putDouble(iditer, score); minmax.put(score); - if(progress != null) { - progress.incrementProcessed(LOG); - } - } - if(progress != null) { - progress.ensureCompleted(LOG); + LOG.incrementProcessed(progress); } + LOG.ensureCompleted(progress); OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0., 1., 1.); - OutlierResult outresResult = new OutlierResult(meta, new MaterializedRelation<>("OUTRES", "outres-score", TypeUtil.DOUBLE, ranks, relation.getDBIDs())); + OutlierResult outresResult = new OutlierResult(meta, new MaterializedDoubleRelation("OUTRES", "outres-score", ranks, relation.getDBIDs())); return outresResult; } @@ -161,33 +153,34 @@ public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<Outlier * @param kernel Kernel * @return Score */ - public double outresScore(final int s, BitSet subspace, DBIDRef id, KernelDensityEstimator kernel) { + public double outresScore(final int s, long[] subspace, DBIDRef id, KernelDensityEstimator kernel) { double score = 1.0; // Initial score is 1.0 final SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(subspace); MeanVariance meanv = new MeanVariance(); for(int i = s; i < kernel.dim; i++) { - if(subspace.get(i)) { // TODO: needed? Or should we always start with i=0? + if(BitsUtil.get(subspace, i)) { // TODO: needed? Or should we always start + // with i=0? continue; } - subspace.set(i); + BitsUtil.setI(subspace, i); df.setSelectedDimensions(subspace); final double adjustedEps = kernel.adjustedEps(kernel.dim); // Query with a larger window, to also get neighbors of neighbors // Subspace euclidean is metric! - final DoubleDistance range = new DoubleDistance(adjustedEps * 2.); - RangeQuery<V, DoubleDistance> rq = QueryUtil.getRangeQuery(kernel.relation, df, range); + final double range = adjustedEps * 2.; + RangeQuery<V> rq = QueryUtil.getRangeQuery(kernel.relation, df, range); - DistanceDBIDList<DoubleDistance> neighc = rq.getRangeForDBID(id, range); - DoubleDistanceDBIDList neigh = refineRange(neighc, adjustedEps); + DoubleDBIDList neighc = rq.getRangeForDBID(id, range); + DoubleDBIDList neigh = refineRange(neighc, adjustedEps); if(neigh.size() > 2) { // Relevance test if(relevantSubspace(subspace, neigh, kernel)) { final double density = kernel.subspaceDensity(subspace, neigh); // Compute mean and standard deviation for densities of neighbors. meanv.reset(); - for (DoubleDistanceDBIDListIter neighbor = neigh.iter(); neighbor.valid(); neighbor.advance()) { - DoubleDistanceDBIDList n2 = subsetNeighborhoodQuery(neighc, neighbor, df, adjustedEps, kernel); + for(DoubleDBIDListIter neighbor = neigh.iter(); neighbor.valid(); neighbor.advance()) { + DoubleDBIDList n2 = subsetNeighborhoodQuery(neighc, neighbor, df, adjustedEps, kernel); meanv.put(kernel.subspaceDensity(subspace, n2)); } final double deviation = (meanv.getMean() - density) / (2. * meanv.getSampleStddev()); @@ -199,7 +192,7 @@ public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<Outlier score *= outresScore(i + 1, subspace, id, kernel); } } - subspace.clear(i); + BitsUtil.clearI(subspace, i); } return score; } @@ -211,21 +204,14 @@ public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<Outlier * @param adjustedEps New epsilon * @return refined list */ - private DoubleDistanceDBIDList refineRange(DistanceDBIDList<DoubleDistance> neighc, double adjustedEps) { - ModifiableDoubleDistanceDBIDList n = new DoubleDistanceDBIDPairList(neighc.size()); + private DoubleDBIDList refineRange(DoubleDBIDList neighc, double adjustedEps) { + ModifiableDoubleDBIDList n = DBIDUtil.newDistanceDBIDList(neighc.size()); // We don't have a guarantee for this list to be sorted - for (DistanceDBIDListIter<DoubleDistance> neighbor = neighc.iter(); neighbor.valid(); neighbor.advance()) { - DistanceDBIDPair<DoubleDistance> p = neighbor.getDistancePair(); - if(p instanceof DoubleDistanceDBIDPair) { - if(((DoubleDistanceDBIDPair) p).doubleDistance() <= adjustedEps) { - n.add((DoubleDistanceDBIDPair) p); - } - } - else { - double dist = p.getDistance().doubleValue(); - if(dist <= adjustedEps) { - n.add(dist, p); - } + for(DoubleDBIDListIter neighbor = neighc.iter(); neighbor.valid(); neighbor.advance()) { + DoubleDBIDPair p = neighbor.getPair(); + double dist = p.doubleValue(); + if(dist <= adjustedEps) { + n.add(dist, p); } } return n; @@ -241,12 +227,12 @@ public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<Outlier * @param kernel Kernel * @return Neighbors of neighbor object */ - private DoubleDistanceDBIDList subsetNeighborhoodQuery(DistanceDBIDList<DoubleDistance> neighc, DBIDRef dbid, PrimitiveDoubleDistanceFunction<? super V> df, double adjustedEps, KernelDensityEstimator kernel) { - ModifiableDoubleDistanceDBIDList n = new DoubleDistanceDBIDPairList(neighc.size()); + private DoubleDBIDList subsetNeighborhoodQuery(DoubleDBIDList neighc, DBIDRef dbid, PrimitiveDistanceFunction<? super V> df, double adjustedEps, KernelDensityEstimator kernel) { + ModifiableDoubleDBIDList n = DBIDUtil.newDistanceDBIDList(neighc.size()); V query = kernel.relation.get(dbid); - for (DistanceDBIDListIter<DoubleDistance> neighbor = neighc.iter(); neighbor.valid(); neighbor.advance()) { - DistanceDBIDPair<DoubleDistance> p = neighbor.getDistancePair(); - double dist = df.doubleDistance(query, kernel.relation.get(p)); + for(DoubleDBIDListIter neighbor = neighc.iter(); neighbor.valid(); neighbor.advance()) { + DoubleDBIDPair p = neighbor.getPair(); + double dist = df.distance(query, kernel.relation.get(p)); if(dist <= adjustedEps) { n.add(dist, p); } @@ -262,16 +248,16 @@ public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<Outlier * @param kernel Kernel density estimator * @return relevance test result */ - protected boolean relevantSubspace(BitSet subspace, DoubleDistanceDBIDList neigh, KernelDensityEstimator kernel) { + protected boolean relevantSubspace(long[] subspace, DoubleDBIDList neigh, KernelDensityEstimator kernel) { Relation<V> relation = kernel.relation; final double crit = K_S_CRITICAL001 / Math.sqrt(neigh.size()); - for(int dim = subspace.nextSetBit(0); dim > 0; dim = subspace.nextSetBit(dim + 1)) { + for(int dim = BitsUtil.nextSetBit(subspace, 0); dim > 0; dim = BitsUtil.nextSetBit(subspace, dim + 1)) { // TODO: can we save this copy somehow? double[] data = new double[neigh.size()]; { int count = 0; - for (DBIDIter neighbor = neigh.iter(); neighbor.valid(); neighbor.advance()) { + for(DBIDIter neighbor = neigh.iter(); neighbor.valid(); neighbor.advance()) { V vector = relation.get(neighbor); data[count] = vector.doubleValue(dim); count++; @@ -347,12 +333,12 @@ public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<Outlier * @param neighbors Neighbor distance list * @return Density */ - protected double subspaceDensity(BitSet subspace, DoubleDistanceDBIDList neighbors) { - final double bandwidth = optimalBandwidth(subspace.cardinality()); + protected double subspaceDensity(long[] subspace, DoubleDBIDList neighbors) { + final double bandwidth = optimalBandwidth(BitsUtil.cardinality(subspace)); double density = 0; - for (DoubleDistanceDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { - double v = neighbor.doubleDistance() / bandwidth; + for(DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { + double v = neighbor.doubleValue() / bandwidth; if(v < 1) { density += 1 - (v * v); } @@ -407,7 +393,7 @@ public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<Outlier * * @apiviz.exclude */ - public static class Parameterizer<O extends NumberVector<?>> extends AbstractParameterizer { + public static class Parameterizer<O extends NumberVector> extends AbstractParameterizer { /** * Option ID for Epsilon parameter */ diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OutRankS1.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OutRankS1.java index 3e248bfa..a87515e5 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OutRankS1.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OutRankS1.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.subspace; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -37,13 +37,14 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; -import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; -import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.math.DoubleMinMax; import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; +import de.lmu.ifi.dbs.elki.utilities.BitsUtil; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; @@ -83,7 +84,7 @@ public class OutRankS1 extends AbstractAlgorithm<OutlierResult> implements Outli /** * Clustering algorithm to run. */ - protected SubspaceClusteringAlgorithm<? extends SubspaceModel<?>> clusteralg; + protected SubspaceClusteringAlgorithm<? extends SubspaceModel> clusteralg; /** * Weighting parameter of size vs. dimensionality score. @@ -97,7 +98,7 @@ public class OutRankS1 extends AbstractAlgorithm<OutlierResult> implements Outli * {@link SubspaceClusteringAlgorithm}!) * @param alpha Alpha parameter to balance size and dimensionality. */ - public OutRankS1(SubspaceClusteringAlgorithm<? extends SubspaceModel<?>> clusteralg, double alpha) { + public OutRankS1(SubspaceClusteringAlgorithm<? extends SubspaceModel> clusteralg, double alpha) { super(); this.clusteralg = clusteralg; this.alpha = alpha; @@ -105,35 +106,35 @@ public class OutRankS1 extends AbstractAlgorithm<OutlierResult> implements Outli @Override public OutlierResult run(Database database) { - DBIDs ids = database.getRelation(TypeUtil.DBID).getDBIDs(); + DBIDs ids = database.getRelation(TypeUtil.ANY).getDBIDs(); // Run the primary algorithm - Clustering<? extends SubspaceModel<?>> clustering = clusteralg.run(database); + Clustering<? extends SubspaceModel> clustering = clusteralg.run(database); WritableDoubleDataStore score = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT); - for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { + for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { score.putDouble(iter, 0); } int maxdim = 0, maxsize = 0; // Find maximum dimensionality and cluster size - for (Cluster<? extends SubspaceModel<?>> cluster : clustering.getAllClusters()) { + for(Cluster<? extends SubspaceModel> cluster : clustering.getAllClusters()) { maxsize = Math.max(maxsize, cluster.size()); - maxdim = Math.max(maxdim, cluster.getModel().getDimensions().cardinality()); + maxdim = Math.max(maxdim, BitsUtil.cardinality(cluster.getModel().getDimensions())); } // Iterate over all clusters: DoubleMinMax minmax = new DoubleMinMax(); - for (Cluster<? extends SubspaceModel<?>> cluster : clustering.getAllClusters()) { + for(Cluster<? extends SubspaceModel> cluster : clustering.getAllClusters()) { double relsize = cluster.size() / (double) maxsize; - double reldim = cluster.getModel().getDimensions().cardinality() / (double) maxdim; + double reldim = BitsUtil.cardinality(cluster.getModel().getDimensions()) / (double) maxdim; // Process objects in the cluster - for (DBIDIter iter = cluster.getIDs().iter(); iter.valid(); iter.advance()) { + for(DBIDIter iter = cluster.getIDs().iter(); iter.valid(); iter.advance()) { double newscore = score.doubleValue(iter) + alpha * relsize + (1 - alpha) * reldim; score.putDouble(iter, newscore); minmax.put(newscore); } } - Relation<Double> scoreResult = new MaterializedRelation<>("OutRank-S1", "OUTRANK_S1", TypeUtil.DOUBLE, score, ids); + DoubleRelation scoreResult = new MaterializedDoubleRelation("OutRank-S1", "OUTRANK_S1", score, ids); OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0, Double.POSITIVE_INFINITY); OutlierResult res = new OutlierResult(meta, scoreResult); res.addChildResult(clustering); @@ -171,7 +172,7 @@ public class OutRankS1 extends AbstractAlgorithm<OutlierResult> implements Outli /** * Clustering algorithm to run. */ - protected SubspaceClusteringAlgorithm<? extends SubspaceModel<?>> algorithm = null; + protected SubspaceClusteringAlgorithm<? extends SubspaceModel> algorithm = null; /** * Alpha parameter to balance parameters @@ -181,13 +182,13 @@ public class OutRankS1 extends AbstractAlgorithm<OutlierResult> implements Outli @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - ObjectParameter<SubspaceClusteringAlgorithm<? extends SubspaceModel<?>>> algP = new ObjectParameter<>(ALGORITHM_ID, SubspaceClusteringAlgorithm.class); - if (config.grab(algP)) { + ObjectParameter<SubspaceClusteringAlgorithm<? extends SubspaceModel>> algP = new ObjectParameter<>(ALGORITHM_ID, SubspaceClusteringAlgorithm.class); + if(config.grab(algP)) { algorithm = algP.instantiateClass(config); } DoubleParameter alphaP = new DoubleParameter(ALPHA_ID, 0.25); alphaP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE); - if (config.grab(alphaP)) { + if(config.grab(alphaP)) { alpha = alphaP.doubleValue(); } } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/SOD.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/SOD.java index 489f811b..b8372884 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/SOD.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/SOD.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.subspace; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,8 +23,6 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.subspace; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import java.util.BitSet; - import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm; import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm; import de.lmu.ifi.dbs.elki.data.NumberVector; @@ -42,10 +40,10 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair; import de.lmu.ifi.dbs.elki.database.query.similarity.SimilarityQuery; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.distance.similarityfunction.SharedNearestNeighborSimilarityFunction; import de.lmu.ifi.dbs.elki.distance.similarityfunction.SimilarityFunction; import de.lmu.ifi.dbs.elki.logging.Logging; @@ -59,6 +57,7 @@ import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.textwriter.TextWriteable; import de.lmu.ifi.dbs.elki.result.textwriter.TextWriterStream; +import de.lmu.ifi.dbs.elki.utilities.BitsUtil; import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap; import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TiedTopBoundedHeap; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; @@ -91,12 +90,11 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * @apiviz.has SharedNearestNeighborSimilarityFunction * * @param <V> the type of NumberVector handled by this Algorithm - * @param <D> distance type */ @Title("SOD: Subspace outlier degree") @Description("Outlier Detection in Axis-Parallel Subspaces of High Dimensional Data") @Reference(authors = "H.-P. Kriegel, P. Kröger, E. Schubert, A. Zimek", title = "Outlier Detection in Axis-Parallel Subspaces of High Dimensional Data", booktitle = "Proceedings of the 13th Pacific-Asia Conference on Knowledge Discovery and Data Mining (PAKDD), Bangkok, Thailand, 2009", url = "http://dx.doi.org/10.1007/978-3-642-01307-2") -public class SOD<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm { +public class SOD<V extends NumberVector> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm { /** * The logger for this class. */ @@ -115,7 +113,7 @@ public class SOD<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte /** * Similarity function to use. */ - private SimilarityFunction<V, D> similarityFunction; + private SimilarityFunction<V> similarityFunction; /** * Report models. @@ -130,7 +128,7 @@ public class SOD<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte * @param similarityFunction Shared nearest neighbor similarity function * @param models Report generated models */ - public SOD(int knn, double alpha, SimilarityFunction<V, D> similarityFunction, boolean models) { + public SOD(int knn, double alpha, SimilarityFunction<V> similarityFunction, boolean models) { super(); this.knn = knn; this.alpha = alpha; @@ -145,54 +143,51 @@ public class SOD<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte * @return Outlier result */ public OutlierResult run(Relation<V> relation) { - SimilarityQuery<V, D> snnInstance = similarityFunction.instantiate(relation); + SimilarityQuery<V> snnInstance = similarityFunction.instantiate(relation); FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Assigning Subspace Outlier Degree", relation.size(), LOG) : null; final WritableDoubleDataStore sod_scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC); WritableDataStore<SODModel> sod_models = null; - if (models) { // Models requested + if(models) { // Models requested sod_models = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, SODModel.class); } DoubleMinMax minmax = new DoubleMinMax(); - for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) { - if (progress != null) { - progress.incrementProcessed(LOG); - } + for(DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) { + LOG.incrementProcessed(progress); DBIDs neighborhood = getNearestNeighbors(relation, snnInstance, iter); Vector center; - BitSet weightVector; + long[] weightVector; double sod; - if (neighborhood.size() > 0) { + if(neighborhood.size() > 0) { center = Centroid.make(relation, neighborhood); // Note: per-dimension variances; no covariances. double[] variances = computePerDimensionVariances(relation, center, neighborhood); double expectationOfVariance = Mean.of(variances); - weightVector = new BitSet(variances.length); - for (int d = 0; d < variances.length; d++) { - if (variances[d] < alpha * expectationOfVariance) { - weightVector.set(d, true); + weightVector = BitsUtil.zero(variances.length); + for(int d = 0; d < variances.length; d++) { + if(variances[d] < alpha * expectationOfVariance) { + BitsUtil.setI(weightVector, d); } } sod = subspaceOutlierDegree(relation.get(iter), center, weightVector); - } else { + } + else { center = relation.get(iter).getColumnVector(); weightVector = null; sod = 0.; } - if (sod_models != null) { + if(sod_models != null) { sod_models.put(iter, new SODModel(center, weightVector)); } sod_scores.putDouble(iter, sod); minmax.put(sod); } - if (progress != null) { - progress.ensureCompleted(LOG); - } + LOG.ensureCompleted(progress); // combine results. OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax()); - OutlierResult sodResult = new OutlierResult(meta, new MaterializedRelation<>("Subspace Outlier Degree", "sod-outlier", TypeUtil.DOUBLE, sod_scores, relation.getDBIDs())); - if (sod_models != null) { + OutlierResult sodResult = new OutlierResult(meta, new MaterializedDoubleRelation("Subspace Outlier Degree", "sod-outlier", sod_scores, relation.getDBIDs())); + if(sod_models != null) { Relation<SODModel> models = new MaterializedRelation<>("Subspace Outlier Model", "sod-outlier", new SimpleTypeInformation<>(SODModel.class), sod_models, relation.getDBIDs()); sodResult.addChildResult(models); } @@ -200,9 +195,9 @@ public class SOD<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte } /** - * Provides the k nearest neighbors in terms of the shared nearest neighbor + * Get the k nearest neighbors in terms of the shared nearest neighbor * distance. - * <p/> + * * The query object is excluded from the knn list. * * FIXME: move this to the database layer. @@ -213,20 +208,20 @@ public class SOD<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte * @return the k nearest neighbors in terms of the shared nearest neighbor * distance without the query object */ - private DBIDs getNearestNeighbors(Relation<V> relation, SimilarityQuery<V, D> simQ, DBIDRef queryObject) { + private DBIDs getNearestNeighbors(Relation<V> relation, SimilarityQuery<V> simQ, DBIDRef queryObject) { Heap<DoubleDBIDPair> nearestNeighbors = new TiedTopBoundedHeap<>(knn); - for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) { - if (DBIDUtil.equal(iter, queryObject)) { + for(DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) { + if(DBIDUtil.equal(iter, queryObject)) { continue; } - double sim = simQ.similarity(queryObject, iter).doubleValue(); - if (sim > 0.) { + double sim = simQ.similarity(queryObject, iter); + if(sim > 0.) { nearestNeighbors.add(DBIDUtil.newPair(sim, iter)); } } // Collect DBIDs ArrayModifiableDBIDs dbids = DBIDUtil.newArray(nearestNeighbors.size()); - while (nearestNeighbors.size() > 0) { + while(nearestNeighbors.size() > 0) { dbids.add(nearestNeighbors.poll()); } return dbids; @@ -240,17 +235,17 @@ public class SOD<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte * @param neighborhood Neighbors * @return Per-dimension variances. */ - private static double[] computePerDimensionVariances(Relation<? extends NumberVector<?>> relation, Vector center, DBIDs neighborhood) { + private static double[] computePerDimensionVariances(Relation<? extends NumberVector> relation, Vector center, DBIDs neighborhood) { double[] c = center.getArrayRef(); double[] variances = new double[c.length]; - for (DBIDIter iter = neighborhood.iter(); iter.valid(); iter.advance()) { - NumberVector<?> databaseObject = relation.get(iter); - for (int d = 0; d < c.length; d++) { + for(DBIDIter iter = neighborhood.iter(); iter.valid(); iter.advance()) { + NumberVector databaseObject = relation.get(iter); + for(int d = 0; d < c.length; d++) { final double deviation = databaseObject.doubleValue(d) - c[d]; variances[d] += deviation * deviation; } } - for (int d = 0; d < variances.length; d++) { + for(int d = 0; d < variances.length; d++) { variances[d] /= neighborhood.size(); } return variances; @@ -264,15 +259,15 @@ public class SOD<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte * @param weightVector Weight vector * @return sod score */ - private double subspaceOutlierDegree(V queryObject, Vector center, BitSet weightVector) { - final int card = weightVector.cardinality(); - if (card == 0) { + private double subspaceOutlierDegree(V queryObject, Vector center, long[] weightVector) { + final int card = BitsUtil.cardinality(weightVector); + if(card == 0) { return 0; } final SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(weightVector); - double distance = df.distance(queryObject, center).doubleValue(); - distance /= card; // FIXME: defined as card, should be sqrt(card), - // unfortunately + double distance = df.distance(queryObject, center); + distance /= card; // FIXME: defined and published as card, should be + // sqrt(card), unfortunately return distance; } @@ -300,7 +295,7 @@ public class SOD<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte /** * Relevant dimensions. */ - private BitSet weightVector; + private long[] weightVector; /** * Initialize SOD Model @@ -308,7 +303,7 @@ public class SOD<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte * @param center Center vector * @param weightVector Selected dimensions */ - public SODModel(Vector center, BitSet weightVector) { + public SODModel(Vector center, long[] weightVector) { this.center = center; this.weightVector = weightVector; } @@ -316,7 +311,7 @@ public class SOD<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte @Override public void writeToText(TextWriterStream out, String label) { out.commentPrintLn(this.getClass().getSimpleName() + ":"); - out.commentPrintLn("relevant attributes (counting starts with 0): " + this.weightVector.toString()); + out.commentPrintLn("relevant attributes (starting with 0): " + BitsUtil.toString(weightVector, ", ", 0)); out.commentPrintLn("center of neighborhood: " + out.normalizationRestore(center).toString()); out.commentPrintSeparator(); } @@ -329,7 +324,7 @@ public class SOD<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte * * @apiviz.exclude */ - public static class Parameterizer<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractParameterizer { + public static class Parameterizer<V extends NumberVector> extends AbstractParameterizer { /** * Parameter to specify the number of shared nearest neighbors to be * considered for learning the subspace properties., must be an integer @@ -366,7 +361,7 @@ public class SOD<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte /** * The similarity function. */ - private SimilarityFunction<V, D> similarityFunction; + private SimilarityFunction<V> similarityFunction; /** * Track models. @@ -376,31 +371,31 @@ public class SOD<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - final ObjectParameter<SimilarityFunction<V, D>> simP = new ObjectParameter<>(SIM_ID, SimilarityFunction.class, SharedNearestNeighborSimilarityFunction.class); - if (config.grab(simP)) { + final ObjectParameter<SimilarityFunction<V>> simP = new ObjectParameter<>(SIM_ID, SimilarityFunction.class, SharedNearestNeighborSimilarityFunction.class); + if(config.grab(simP)) { similarityFunction = simP.instantiateClass(config); } final IntParameter knnP = new IntParameter(KNN_ID); knnP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT); - if (config.grab(knnP)) { + if(config.grab(knnP)) { knn = knnP.getValue(); } final DoubleParameter alphaP = new DoubleParameter(ALPHA_ID, 1.1); alphaP.addConstraint(CommonConstraints.GREATER_THAN_ZERO_DOUBLE); - if (config.grab(alphaP)) { + if(config.grab(alphaP)) { alpha = alphaP.doubleValue(); } final Flag modelsF = new Flag(MODELS_ID); - if (config.grab(modelsF)) { + if(config.grab(modelsF)) { models = modelsF.isTrue(); } } @Override - protected SOD<V, D> makeInstance() { + protected SOD<V> makeInstance() { return new SOD<>(knn, alpha, similarityFunction, models); } } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/package-info.java index c3951821..471d9b8d 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/package-info.java @@ -7,7 +7,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/svm/LibSVMOneClassOutlierDetection.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/svm/LibSVMOneClassOutlierDetection.java new file mode 100644 index 00000000..25b9cb30 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/svm/LibSVMOneClassOutlierDetection.java @@ -0,0 +1,279 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier.svm; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2014 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import libsvm.svm; +import libsvm.svm_model; +import libsvm.svm_node; +import libsvm.svm_parameter; +import libsvm.svm_print_interface; +import libsvm.svm_problem; +import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm; +import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm; +import de.lmu.ifi.dbs.elki.data.NumberVector; +import de.lmu.ifi.dbs.elki.data.type.TypeInformation; +import de.lmu.ifi.dbs.elki.data.type.TypeUtil; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; +import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; +import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs; +import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; +import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; +import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.math.DoubleMinMax; +import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.EnumParameter; + +/** + * Outlier-detection using one-class support vector machines. + * + * Important note: from literature, the one-class SVM is trained as if 0 was the + * only counterexample. Outliers will only be detected when they are close to + * the origin! + * + * <p> + * Reference:<br /> + * B. Schölkopf, J. C. Platt, J. Shawe-Taylor, A. J. Smola, R. C. Williamson<br /> + * Estimating the support of a high-dimensional distribution<br /> + * Neural computation 13.7 + * </p> + * + * @author Erich Schubert + * + * @param V vector type + */ +@Reference(authors = "B. Schölkopf, J. C. Platt, J. Shawe-Taylor, A. J. Smola, R. C. Williamson", // +title = "Estimating the support of a high-dimensional distribution", // +booktitle = "Neural computation 13.7") +public class LibSVMOneClassOutlierDetection<V extends NumberVector> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm { + /** + * Class logger. + */ + private static final Logging LOG = Logging.getLogger(LibSVMOneClassOutlierDetection.class); + + /** + * Kernel functions. Expose as enum for convenience. + * + * @apiviz.exclude + */ + public static enum SVMKernel { // + LINEAR, // Linear + QUADRATIC, // Quadratic + CUBIC, // Cubic + RBF, // Radial basis functions + SIGMOID, // Sigmoid + } + + /** + * Kernel function in use. + */ + protected SVMKernel kernel = SVMKernel.RBF; + + /** + * Constructor. + * + * @param kernel Kernel to use with SVM. + */ + public LibSVMOneClassOutlierDetection(SVMKernel kernel) { + super(); + this.kernel = kernel; + } + + /** + * Run one-class SVM. + * + * @param relation Data relation + * @return Outlier result. + */ + public OutlierResult run(Relation<V> relation) { + final int dim = RelationUtil.dimensionality(relation); + final ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs()); + + svm.svm_set_print_string_function(LOG_HELPER); + + svm_parameter param = new svm_parameter(); + param.svm_type = svm_parameter.ONE_CLASS; + param.kernel_type = svm_parameter.LINEAR; + param.degree = 3; + switch(kernel){ + case LINEAR: + param.kernel_type = svm_parameter.LINEAR; + break; + case QUADRATIC: + param.kernel_type = svm_parameter.POLY; + param.degree = 2; + break; + case CUBIC: + param.kernel_type = svm_parameter.POLY; + param.degree = 3; + break; + case RBF: + param.kernel_type = svm_parameter.RBF; + break; + case SIGMOID: + param.kernel_type = svm_parameter.SIGMOID; + break; + default: + throw new AbortException("Invalid kernel parameter: " + kernel); + } + // TODO: expose additional parameters to the end user! + param.nu = 0.05; + param.coef0 = 0.; + param.cache_size = 100; + param.C = 1e2; + param.eps = 1e-4; // not used by one-class? + param.p = 0.1; // not used by one-class? + param.shrinking = 0; + param.probability = 0; + param.nr_weight = 0; + param.weight_label = new int[0]; + param.weight = new double[0]; + param.gamma = 1e-4 / dim; + + // Transform data: + svm_problem prob = new svm_problem(); + prob.l = relation.size(); + prob.x = new svm_node[prob.l][]; + prob.y = new double[prob.l]; + { + DBIDIter iter = ids.iter(); + for(int i = 0; i < prob.l && iter.valid(); iter.advance(), i++) { + V vec = relation.get(iter); + // TODO: support compact sparse vectors, too! + svm_node[] x = new svm_node[dim]; + for(int d = 0; d < dim; d++) { + x[d] = new svm_node(); + x[d].index = d + 1; + x[d].value = vec.doubleValue(d); + } + prob.x[i] = x; + prob.y[i] = +1; + } + } + + if(LOG.isVerbose()) { + LOG.verbose("Training one-class SVM..."); + } + String err = svm.svm_check_parameter(prob, param); + if(err != null) { + LOG.warning("svm_check_parameter: " + err); + } + svm_model model = svm.svm_train(prob, param); + + if(LOG.isVerbose()) { + LOG.verbose("Predicting..."); + } + WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_DB); + DoubleMinMax mm = new DoubleMinMax(); + { + DBIDIter iter = ids.iter(); + double[] buf = new double[svm.svm_get_nr_class(model)]; + for(int i = 0; i < prob.l && iter.valid(); iter.advance(), i++) { + V vec = relation.get(iter); + svm_node[] x = new svm_node[dim]; + for(int d = 0; d < dim; d++) { + x[d] = new svm_node(); + x[d].index = d + 1; + x[d].value = vec.doubleValue(d); + } + svm.svm_predict_values(model, x, buf); + double score = -buf[0] / param.gamma; // Heuristic rescaling, sorry. + // Unfortunately, libsvm one-class currently yields a binary decision. + scores.putDouble(iter, score); + mm.put(score); + } + } + DoubleRelation scoreResult = new MaterializedDoubleRelation("One-Class SVM Decision", "svm-outlier", scores, ids); + OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax(), Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, 0.); + return new OutlierResult(scoreMeta, scoreResult); + } + + @Override + public TypeInformation[] getInputTypeRestriction() { + return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD); + } + + @Override + protected Logging getLogger() { + return LOG; + } + + /** + * Setup logging helper for SVM. + */ + static final svm_print_interface LOG_HELPER = new svm_print_interface() { + @Override + public void print(String arg0) { + if(LOG.isVerbose()) { + LOG.verbose(arg0); + } + } + }; + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + * + * @param <V> Vector type + */ + public static class Parameterizer<V extends NumberVector> extends AbstractParameterizer { + /** + * Parameter for kernel function. + */ + private static final OptionID KERNEL_ID = new OptionID("svm.kernel", "Kernel to use with SVM."); + + /** + * Kernel in use. + */ + protected SVMKernel kernel = SVMKernel.RBF; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + + EnumParameter<SVMKernel> kernelP = new EnumParameter<>(KERNEL_ID, SVMKernel.class, SVMKernel.RBF); + if(config.grab(kernelP)) { + kernel = kernelP.getValue(); + } + } + + @Override + protected LibSVMOneClassOutlierDetection<V> makeInstance() { + return new LibSVMOneClassOutlierDetection<>(kernel); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/svm/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/svm/package-info.java new file mode 100644 index 00000000..2afbbaf1 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/svm/package-info.java @@ -0,0 +1,29 @@ +/** + * Support-Vector-Machines for outlier detection. + * + * @author Erich Schubert + */ + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2014 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +package de.lmu.ifi.dbs.elki.algorithm.outlier.svm;
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java index d10eaef8..f8b4eb3e 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.trivial; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -36,7 +36,8 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; -import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; +import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; @@ -117,7 +118,7 @@ public class ByLabelOutlier extends AbstractAlgorithm<OutlierResult> implements final double score = (pattern.matcher(label).matches()) ? 1 : 0; scores.putDouble(iditer, score); } - Relation<Double> scoreres = new MaterializedRelation<>("By label outlier scores", "label-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs()); + DoubleRelation scoreres = new MaterializedDoubleRelation("By label outlier scores", "label-outlier", scores, relation.getDBIDs()); OutlierScoreMeta meta = new ProbabilisticOutlierScore(); return new OutlierResult(meta, scoreres); } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java index 44a7975f..c8920617 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.trivial; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -31,7 +31,8 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; -import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; +import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; @@ -73,7 +74,7 @@ public class TrivialAllOutlier extends AbstractAlgorithm<OutlierResult> implemen for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { scores.putDouble(iditer, 1.0); } - Relation<Double> scoreres = new MaterializedRelation<>("Trivial all-outlier score", "all-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs()); + DoubleRelation scoreres = new MaterializedDoubleRelation("Trivial all-outlier score", "all-outlier", scores, relation.getDBIDs()); OutlierScoreMeta meta = new ProbabilisticOutlierScore(); return new OutlierResult(meta, scoreres); } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAverageCoordinateOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAverageCoordinateOutlier.java index 6f2f2f38..dbf338f1 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAverageCoordinateOutlier.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAverageCoordinateOutlier.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.trivial; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -32,7 +32,8 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; -import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; +import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.math.DoubleMinMax; @@ -72,13 +73,13 @@ public class TrivialAverageCoordinateOutlier extends AbstractAlgorithm<OutlierRe * @param relation Relation * @return Result */ - public OutlierResult run(Relation<? extends NumberVector<?>> relation) { + public OutlierResult run(Relation<? extends NumberVector> relation) { WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT); DoubleMinMax minmax = new DoubleMinMax(); Mean m = new Mean(); for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { m.reset(); - NumberVector<?> nv = relation.get(iditer); + NumberVector nv = relation.get(iditer); for (int i = 0; i < nv.getDimensionality(); i++) { m.put(nv.doubleValue(i)); } @@ -86,7 +87,7 @@ public class TrivialAverageCoordinateOutlier extends AbstractAlgorithm<OutlierRe scores.putDouble(iditer, score); minmax.put(score); } - Relation<Double> scoreres = new MaterializedRelation<>("Trivial mean score", "mean-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs()); + DoubleRelation scoreres = new MaterializedDoubleRelation("Trivial mean score", "mean-outlier", scores, relation.getDBIDs()); OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax()); return new OutlierResult(meta, scoreres); } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java index 2e952b5f..adaf9431 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.trivial; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -39,7 +39,8 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; -import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; +import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector; @@ -99,7 +100,7 @@ public class TrivialGeneratedOutlier extends AbstractAlgorithm<OutlierResult> im @Override public OutlierResult run(Database database) { - Relation<NumberVector<?>> vecs = database.getRelation(TypeUtil.NUMBER_VECTOR_FIELD); + Relation<NumberVector> vecs = database.getRelation(TypeUtil.NUMBER_VECTOR_FIELD); Relation<Model> models = database.getRelation(new SimpleTypeInformation<>(Model.class)); // Prefer a true class label try { @@ -120,7 +121,7 @@ public class TrivialGeneratedOutlier extends AbstractAlgorithm<OutlierResult> im * @param labels Label relation * @return Outlier result */ - public OutlierResult run(Relation<Model> models, Relation<NumberVector<?>> vecs, Relation<?> labels) { + public OutlierResult run(Relation<Model> models, Relation<NumberVector> vecs, Relation<?> labels) { WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(models.getDBIDs(), DataStoreFactory.HINT_HOT); HashSet<GeneratorSingleCluster> generators = new HashSet<>(); @@ -180,7 +181,7 @@ public class TrivialGeneratedOutlier extends AbstractAlgorithm<OutlierResult> im } scores.putDouble(iditer, score); } - Relation<Double> scoreres = new MaterializedRelation<>("Model outlier scores", "model-outlier", TypeUtil.DOUBLE, scores, models.getDBIDs()); + DoubleRelation scoreres = new MaterializedDoubleRelation("Model outlier scores", "model-outlier", scores, models.getDBIDs()); OutlierScoreMeta meta = new ProbabilisticOutlierScore(0., 1.); return new OutlierResult(meta, scoreres); } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java index ff3d0296..0a3e27b4 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.trivial; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -31,7 +31,8 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; -import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; +import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; @@ -73,7 +74,7 @@ public class TrivialNoOutlier extends AbstractAlgorithm<OutlierResult> implement for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { scores.putDouble(iditer, 0.0); } - Relation<Double> scoreres = new MaterializedRelation<>("Trivial no-outlier score", "no-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs()); + DoubleRelation scoreres = new MaterializedDoubleRelation("Trivial no-outlier score", "no-outlier", scores, relation.getDBIDs()); OutlierScoreMeta meta = new ProbabilisticOutlierScore(); return new OutlierResult(meta, scoreres); } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/package-info.java index c927cae4..a6ea3186 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/package-info.java @@ -7,7 +7,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2013 +Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team |