summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/algorithm/outlier
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/algorithm/outlier')
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/ABOD.java296
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/ALOCI.java112
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractAggarwalYuOutlier.java93
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractDBOutlier.java10
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuEvolutionary.java166
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuNaive.java33
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/COP.java385
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierDetection.java28
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierScore.java8
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/EMOutlier.java8
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java18
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java26
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/HilOut.java117
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/INFLO.java113
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNOutlier.java86
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNWeightOutlier.java37
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/LDF.java342
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/LDOF.java33
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOCI.java49
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOF.java165
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/LoOP.java193
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java22
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/OnlineLOF.java57
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/OutlierAlgorithm.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/ReferenceBasedOutlierDetection.java40
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleCOP.java236
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleKernelDensityLOF.java284
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleLOF.java249
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java16
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java172
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/HiCS.java307
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/RescaleMetaOutlierAlgorithm.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/SimpleOutlierEnsemble.java222
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/package-info.java5
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/package-info.java7
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractDistanceBasedSpatialOutlier.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractNeighborhoodOutlier.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuGLSBackwardSearchAlgorithm.java115
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMeanMultipleAttributes.java31
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianAlgorithm.java96
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianMultipleAttributes.java33
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMoranScatterplotOutlier.java31
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java182
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuScatterplotOutlier.java40
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuZTestOutlier.java35
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SLOM.java39
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SOF.java23
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java66
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/AbstractPrecomputedNeighborhood.java9
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java37
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java37
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/NeighborSetPredicate.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/PrecomputedKNearestNeighborNeighborhood.java21
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java29
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/UnweightedNeighborhoodAdapter.java14
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/WeightedNeighborSetPredicate.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java94
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OutRankS1.java27
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/SOD.java113
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java12
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java4
63 files changed, 3420 insertions, 1641 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ABOD.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ABOD.java
index 88a62e38..d52a81fd 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ABOD.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ABOD.java
@@ -25,13 +25,11 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
import java.util.Collections;
import java.util.HashMap;
-import java.util.Iterator;
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
-import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.QueryUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
@@ -42,13 +40,13 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.distance.similarityfunction.PrimitiveSimilarityFunction;
import de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.KernelMatrix;
@@ -66,11 +64,11 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
-import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair;
/**
* Angle-Based Outlier Detection
@@ -92,39 +90,39 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair;
@Title("ABOD: Angle-Based Outlier Detection")
@Description("Outlier detection using variance analysis on angles, especially for high dimensional data sets.")
@Reference(authors = "H.-P. Kriegel, M. Schubert, and A. Zimek", title = "Angle-Based Outlier Detection in High-dimensional Data", booktitle = "Proc. 14th ACM SIGKDD Int. Conf. on Knowledge Discovery and Data Mining (KDD '08), Las Vegas, NV, 2008", url = "http://dx.doi.org/10.1145/1401890.1401946")
-public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlgorithm<V, DoubleDistance, OutlierResult> implements OutlierAlgorithm {
+public class ABOD<V extends NumberVector<?>> extends AbstractDistanceBasedAlgorithm<V, DoubleDistance, OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(ABOD.class);
+ private static final Logging LOG = Logging.getLogger(ABOD.class);
/**
* Parameter for k, the number of neighbors used in kNN queries.
*/
- public static final OptionID K_ID = OptionID.getOrCreateOptionID("abod.k", "Parameter k for kNN queries.");
+ public static final OptionID K_ID = new OptionID("abod.k", "Parameter k for kNN queries.");
/**
* Parameter for sample size to be used in fast mode.
*/
- public static final OptionID FAST_SAMPLE_ID = OptionID.getOrCreateOptionID("abod.samplesize", "Sample size to enable fast mode.");
+ public static final OptionID FAST_SAMPLE_ID = new OptionID("abod.samplesize", "Sample size to enable fast mode.");
/**
* Parameter for the kernel function.
*/
- public static final OptionID KERNEL_FUNCTION_ID = OptionID.getOrCreateOptionID("abod.kernelfunction", "Kernel function to use.");
+ public static final OptionID KERNEL_FUNCTION_ID = new OptionID("abod.kernelfunction", "Kernel function to use.");
/**
* The preprocessor used to materialize the kNN neighborhoods.
*/
- public static final OptionID PREPROCESSOR_ID = OptionID.getOrCreateOptionID("abod.knnquery", "Processor to compute the kNN neighborhoods.");
+ public static final OptionID PREPROCESSOR_ID = new OptionID("abod.knnquery", "Processor to compute the kNN neighborhoods.");
/**
- * use alternate code below
+ * use alternate code below.
*/
- private static final boolean useRNDSample = false;
+ private static final boolean USE_RND_SAMPLE = false;
/**
- * k parameter
+ * k parameter.
*/
private int k;
@@ -134,10 +132,13 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
int sampleSize = 0;
/**
- * Store the configured Kernel version
+ * Store the configured Kernel version.
*/
private PrimitiveSimilarityFunction<? super V, DoubleDistance> primitiveKernelFunction;
+ /**
+ * Static DBID map.
+ */
private ArrayModifiableDBIDs staticids = null;
/**
@@ -173,41 +174,32 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
* Main part of the algorithm. Exact version.
*
* @param relation Relation to query
- * @param k k for kNN queries
* @return result
*/
- public OutlierResult getRanking(Relation<V> relation, int k) {
+ public OutlierResult getRanking(Relation<V> relation) {
// Fix a static set of IDs
staticids = DBIDUtil.newArray(relation.getDBIDs());
staticids.sort();
KernelMatrix kernelMatrix = new KernelMatrix(primitiveKernelFunction, relation, staticids);
- Heap<DoubleObjPair<DBID>> pq = new Heap<DoubleObjPair<DBID>>(relation.size(), Collections.reverseOrder());
+ Heap<DoubleDBIDPair> pq = new Heap<DoubleDBIDPair>(relation.size(), Collections.reverseOrder());
// preprocess kNN neighborhoods
- assert (k == this.k);
KNNQuery<V, DoubleDistance> knnQuery = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k);
MeanVariance s = new MeanVariance();
- for(DBIDIter objKey = relation.iterDBIDs(); objKey.valid(); objKey.advance()) {
+ for (DBIDIter objKey = relation.iterDBIDs(); objKey.valid(); objKey.advance()) {
s.reset();
- // System.out.println("Processing: " +objKey);
KNNResult<DoubleDistance> neighbors = knnQuery.getKNNForDBID(objKey, k);
- Iterator<DistanceResultPair<DoubleDistance>> iter = neighbors.iterator();
- while(iter.hasNext()) {
- DistanceResultPair<DoubleDistance> key1 = iter.next();
- // Iterator iter2 = data.keyIterator();
- Iterator<DistanceResultPair<DoubleDistance>> iter2 = neighbors.iterator();
- // PriorityQueue best = new PriorityQueue(false, k);
- while(iter2.hasNext()) {
- DistanceResultPair<DoubleDistance> key2 = iter2.next();
- if(key2.sameDBID(key1) || key1.sameDBID(objKey) || key2.sameDBID(objKey)) {
+ for (DBIDIter key1 = neighbors.iter(); key1.valid(); key1.advance()) {
+ for (DBIDIter key2 = neighbors.iter(); key2.valid(); key2.advance()) {
+ if (DBIDUtil.equal(key2, key1) || DBIDUtil.equal(key1, objKey) || DBIDUtil.equal(key2, objKey)) {
continue;
}
double nenner = calcDenominator(kernelMatrix, objKey, key1, key2);
- if(nenner != 0) {
+ if (nenner != 0) {
double sqrtnenner = Math.sqrt(nenner);
double tmp = calcNumerator(kernelMatrix, objKey, key1, key2) / nenner;
s.put(tmp, 1 / sqrtnenner);
@@ -217,14 +209,14 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
}
// Sample variance probably would be correct, however the numerical
// instabilities can actually break ABOD here.
- pq.add(new DoubleObjPair<DBID>(s.getNaiveVariance(), objKey.getDBID()));
+ pq.add(DBIDUtil.newPair(s.getNaiveVariance(), objKey));
}
DoubleMinMax minmaxabod = new DoubleMinMax();
WritableDoubleDataStore abodvalues = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
- for(DoubleObjPair<DBID> pair : pq) {
- abodvalues.putDouble(pair.getSecond(), pair.first);
- minmaxabod.put(pair.first);
+ for (DoubleDBIDPair pair : pq) {
+ abodvalues.putDouble(pair, pair.doubleValue());
+ minmaxabod.put(pair.doubleValue());
}
// Build result representation.
Relation<Double> scoreResult = new MaterializedRelation<Double>("Angle-based Outlier Degree", "abod-outlier", TypeUtil.DOUBLE, abodvalues, relation.getDBIDs());
@@ -236,11 +228,9 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
* Main part of the algorithm. Fast version.
*
* @param relation Relation to use
- * @param k k for kNN queries
- * @param sampleSize Sample size
* @return result
*/
- public OutlierResult getFastRanking(Relation<V> relation, int k, int sampleSize) {
+ public OutlierResult getFastRanking(Relation<V> relation) {
final DBIDs ids = relation.getDBIDs();
// Fix a static set of IDs
// TODO: add a DBIDUtil.ensureSorted?
@@ -249,92 +239,72 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
KernelMatrix kernelMatrix = new KernelMatrix(primitiveKernelFunction, relation, staticids);
- Heap<DoubleObjPair<DBID>> pq = new Heap<DoubleObjPair<DBID>>(relation.size(), Collections.reverseOrder());
+ Heap<DoubleDBIDPair> pq = new Heap<DoubleDBIDPair>(relation.size(), Collections.reverseOrder());
// get Candidate Ranking
- for(DBIDIter aKey = relation.iterDBIDs(); aKey.valid(); aKey.advance()) {
+ for (DBIDIter aKey = relation.iterDBIDs(); aKey.valid(); aKey.advance()) {
WritableDoubleDataStore dists = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT);
// determine kNearestNeighbors and pairwise distances
- Heap<DoubleObjPair<DBID>> nn;
- if(!useRNDSample) {
+ Heap<DoubleDBIDPair> nn;
+ if (!USE_RND_SAMPLE) {
nn = calcDistsandNN(relation, kernelMatrix, sampleSize, aKey, dists);
- }
- else {
+ } else {
// alternative:
nn = calcDistsandRNDSample(relation, kernelMatrix, sampleSize, aKey, dists);
}
// get normalization
double[] counter = calcFastNormalization(aKey, dists, staticids);
- // System.out.println(counter[0] + " " + counter2[0] + " " + counter[1] +
- // " " + counter2[1]);
// umsetzen von Pq zu list
ModifiableDBIDs neighbors = DBIDUtil.newArray(nn.size());
- while(!nn.isEmpty()) {
- neighbors.add(nn.remove().getSecond());
+ while (!nn.isEmpty()) {
+ neighbors.add(nn.poll());
}
// getFilter
double var = getAbofFilter(kernelMatrix, aKey, dists, counter[1], counter[0], neighbors);
- pq.add(new DoubleObjPair<DBID>(var, aKey.getDBID()));
- // System.out.println("prog "+(prog++));
+ pq.add(DBIDUtil.newPair(var, aKey));
}
// refine Candidates
- Heap<DoubleObjPair<DBID>> resqueue = new Heap<DoubleObjPair<DBID>>(k);
- // System.out.println(pq.size() + " objects ordered into candidate list.");
- // int v = 0;
+ Heap<DoubleDBIDPair> resqueue = new Heap<DoubleDBIDPair>(k);
MeanVariance s = new MeanVariance();
- while(!pq.isEmpty()) {
- if(resqueue.size() == k && pq.peek().first > resqueue.peek().first) {
+ while (!pq.isEmpty()) {
+ if (resqueue.size() == k && pq.peek().doubleValue() > resqueue.peek().doubleValue()) {
break;
}
// double approx = pq.peek().getFirst();
- DBID aKey = pq.remove().getSecond();
- // if(!result.isEmpty()) {
- // System.out.println("Best Candidate " + aKey+" : " + pq.firstPriority()
- // + " worst result: " + result.firstPriority());
- // } else {
- // System.out.println("Best Candidate " + aKey+" : " + pq.firstPriority()
- // + " worst result: " + Double.MAX_VALUE);
- // }
- // v++;
+ DBIDRef aKey = pq.poll();
s.reset();
- for(DBIDIter bKey = relation.iterDBIDs(); bKey.valid(); bKey.advance()) {
- if(bKey.sameDBID(aKey)) {
+ for (DBIDIter bKey = relation.iterDBIDs(); bKey.valid(); bKey.advance()) {
+ if (DBIDUtil.equal(bKey, aKey)) {
continue;
}
- for(DBIDIter cKey = relation.iterDBIDs(); cKey.valid(); cKey.advance()) {
- if(cKey.sameDBID(aKey)) {
+ for (DBIDIter cKey = relation.iterDBIDs(); cKey.valid(); cKey.advance()) {
+ if (DBIDUtil.equal(cKey, aKey)) {
continue;
}
// double nenner = dists[y]*dists[z];
double nenner = calcDenominator(kernelMatrix, aKey, bKey, cKey);
- if(nenner != 0) {
+ if (nenner != 0) {
double tmp = calcNumerator(kernelMatrix, aKey, bKey, cKey) / nenner;
double sqrtNenner = Math.sqrt(nenner);
s.put(tmp, 1 / sqrtNenner);
}
}
}
- // System.out.println( aKey + "Sum " + sum + " SQRSum " +sqrSum +
- // " Counter " + counter);
double var = s.getSampleVariance();
- // System.out.println(aKey+ " : " + approx +" " + var);
- if(resqueue.size() < k) {
- resqueue.add(new DoubleObjPair<DBID>(var, aKey));
- }
- else {
- if(resqueue.peek().first > var) {
- resqueue.remove();
- resqueue.add(new DoubleObjPair<DBID>(var, aKey));
+ if (resqueue.size() < k) {
+ resqueue.add(DBIDUtil.newPair(var, aKey));
+ } else {
+ if (resqueue.peek().doubleValue() > var) {
+ resqueue.replaceTopElement(DBIDUtil.newPair(var, aKey));
}
}
}
- // System.out.println(v + " Punkte von " + data.size() + " verfeinert !!");
DoubleMinMax minmaxabod = new DoubleMinMax();
WritableDoubleDataStore abodvalues = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
- for(DoubleObjPair<DBID> pair : pq) {
- abodvalues.putDouble(pair.getSecond(), pair.first);
- minmaxabod.put(pair.first);
+ for (DoubleDBIDPair pair : pq) {
+ abodvalues.putDouble(pair, pair.doubleValue());
+ minmaxabod.put(pair.doubleValue());
}
// Build result representation.
Relation<Double> scoreResult = new MaterializedRelation<Double>("Angle-based Outlier Detection", "abod-outlier", TypeUtil.DOUBLE, abodvalues, ids);
@@ -348,7 +318,7 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
double sum = 0;
double sumF = 0;
for (DBIDIter yKey = ids.iter(); yKey.valid(); yKey.advance()) {
- if(dists.doubleValue(yKey) != 0) {
+ if (dists.doubleValue(yKey) != 0) {
double tmp = 1 / Math.sqrt(dists.doubleValue(yKey));
sum += tmp;
sumF += (1 / dists.doubleValue(yKey)) * tmp;
@@ -357,7 +327,7 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
double sofar = 0;
double sofarF = 0;
for (DBIDIter zKey = ids.iter(); zKey.valid(); zKey.advance()) {
- if(dists.doubleValue(zKey) != 0) {
+ if (dists.doubleValue(zKey) != 0) {
double tmp = 1 / Math.sqrt(dists.doubleValue(zKey));
sofar += tmp;
double rest = sum - sofar;
@@ -375,17 +345,17 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
double sum = 0.0;
double sqrSum = 0.0;
double partCounter = 0;
- for(DBIDIter bKey = neighbors.iter(); bKey.valid(); bKey.advance()) {
- if(bKey.sameDBID(aKey)) {
+ for (DBIDIter bKey = neighbors.iter(); bKey.valid(); bKey.advance()) {
+ if (DBIDUtil.equal(bKey, aKey)) {
continue;
}
- for(DBIDIter cKey = neighbors.iter(); cKey.valid(); cKey.advance()) {
- if(cKey.sameDBID(aKey)) {
+ for (DBIDIter cKey = neighbors.iter(); cKey.valid(); cKey.advance()) {
+ if (DBIDUtil.equal(cKey, aKey)) {
continue;
}
- if(bKey.compareDBID(cKey) > 0) {
+ if (DBIDUtil.compare(bKey, cKey) > 0) {
double nenner = dists.doubleValue(bKey) * dists.doubleValue(cKey);
- if(nenner != 0) {
+ if (nenner != 0) {
double tmp = calcNumerator(kernelMatrix, aKey, bKey, cKey) / nenner;
double sqrtNenner = Math.sqrt(nenner);
sum += tmp * (1 / sqrtNenner);
@@ -417,7 +387,7 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
private int mapDBID(DBIDRef aKey) {
// TODO: this is not the most efficient...
int off = staticids.binarySearch(aKey);
- if(off < 0) {
+ if (off < 0) {
throw new AbortException("Did not find id " + aKey.toString() + " in staticids. " + staticids.contains(aKey));
}
return off + 1;
@@ -434,33 +404,31 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
return (kernelMatrix.getDistance(ai, ai) + kernelMatrix.getDistance(bi, ci) - kernelMatrix.getDistance(ai, ci) - kernelMatrix.getDistance(ai, bi));
}
- private Heap<DoubleObjPair<DBID>> calcDistsandNN(Relation<V> data, KernelMatrix kernelMatrix, int sampleSize, DBIDRef aKey, WritableDoubleDataStore dists) {
- Heap<DoubleObjPair<DBID>> nn = new Heap<DoubleObjPair<DBID>>(sampleSize);
- for(DBIDIter bKey = data.iterDBIDs(); bKey.valid(); bKey.advance()) {
+ private Heap<DoubleDBIDPair> calcDistsandNN(Relation<V> data, KernelMatrix kernelMatrix, int sampleSize, DBIDRef aKey, WritableDoubleDataStore dists) {
+ Heap<DoubleDBIDPair> nn = new Heap<DoubleDBIDPair>(sampleSize);
+ for (DBIDIter bKey = data.iterDBIDs(); bKey.valid(); bKey.advance()) {
double val = calcCos(kernelMatrix, aKey, bKey);
dists.putDouble(bKey, val);
- if(nn.size() < sampleSize) {
- nn.add(new DoubleObjPair<DBID>(val, bKey.getDBID()));
- }
- else {
- if(val < nn.peek().first) {
- nn.remove();
- nn.add(new DoubleObjPair<DBID>(val, bKey.getDBID()));
+ if (nn.size() < sampleSize) {
+ nn.add(DBIDUtil.newPair(val, bKey));
+ } else {
+ if (val < nn.peek().doubleValue()) {
+ nn.replaceTopElement(DBIDUtil.newPair(val, bKey));
}
}
}
return nn;
}
- private Heap<DoubleObjPair<DBID>> calcDistsandRNDSample(Relation<V> data, KernelMatrix kernelMatrix, int sampleSize, DBIDRef aKey, WritableDoubleDataStore dists) {
- Heap<DoubleObjPair<DBID>> nn = new Heap<DoubleObjPair<DBID>>(sampleSize);
+ private Heap<DoubleDBIDPair> calcDistsandRNDSample(Relation<V> data, KernelMatrix kernelMatrix, int sampleSize, DBIDRef aKey, WritableDoubleDataStore dists) {
+ Heap<DoubleDBIDPair> nn = new Heap<DoubleDBIDPair>(sampleSize);
int step = (int) ((double) data.size() / (double) sampleSize);
int counter = 0;
- for(DBIDIter bKey = data.iterDBIDs(); bKey.valid(); bKey.advance()) {
+ for (DBIDIter bKey = data.iterDBIDs(); bKey.valid(); bKey.advance()) {
double val = calcCos(kernelMatrix, aKey, bKey);
dists.putDouble(bKey, val);
- if(counter % step == 0) {
- nn.add(new DoubleObjPair<DBID>(val, bKey.getDBID()));
+ if (counter % step == 0) {
+ nn.add(DBIDUtil.newPair(val, bKey));
}
counter++;
}
@@ -471,112 +439,108 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
* Get explanations for points in the database.
*
* @param data to get explanations for
+ * @return String explanation
*/
// TODO: this should be done by the result classes.
- public void getExplanations(Relation<V> data) {
+ public String getExplanations(Relation<V> data) {
KernelMatrix kernelMatrix = new KernelMatrix(primitiveKernelFunction, data, staticids);
// PQ for Outlier Ranking
- Heap<DoubleObjPair<DBID>> pq = new Heap<DoubleObjPair<DBID>>(data.size(), Collections.reverseOrder());
+ Heap<DoubleDBIDPair> pq = new Heap<DoubleDBIDPair>(data.size(), Collections.reverseOrder());
HashMap<DBID, DBIDs> explaintab = new HashMap<DBID, DBIDs>();
// test all objects
MeanVariance s = new MeanVariance(), s2 = new MeanVariance();
- for(DBIDIter objKey = data.iterDBIDs(); objKey.valid(); objKey.advance()) {
+ for (DBIDIter objKey = data.iterDBIDs(); objKey.valid(); objKey.advance()) {
s.reset();
// Queue for the best explanation
- Heap<DoubleObjPair<DBID>> explain = new Heap<DoubleObjPair<DBID>>();
+ Heap<DoubleDBIDPair> explain = new Heap<DoubleDBIDPair>();
// determine Object
// for each pair of other objects
for (DBIDIter key1 = data.iterDBIDs(); key1.valid(); key1.advance()) {
- // Collect Explanation Vectors
+ // Collect Explanation Vectors
s2.reset();
- if(objKey.sameDBID(key1)) {
+ if (DBIDUtil.equal(objKey, key1)) {
continue;
}
for (DBIDIter key2 = data.iterDBIDs(); key2.valid(); key2.advance()) {
- if(key2.sameDBID(key1) || objKey.sameDBID(key2)) {
+ if (DBIDUtil.equal(key2, key1) || DBIDUtil.equal(objKey, key2)) {
continue;
}
double nenner = calcDenominator(kernelMatrix, objKey, key1, key2);
- if(nenner != 0) {
+ if (nenner != 0) {
double tmp = calcNumerator(kernelMatrix, objKey, key1, key2) / nenner;
double sqr = Math.sqrt(nenner);
s2.put(tmp, 1 / sqr);
}
}
- explain.add(new DoubleObjPair<DBID>(s2.getSampleVariance(), key1.getDBID()));
+ explain.add(DBIDUtil.newPair(s2.getSampleVariance(), key1));
s.put(s2);
}
// build variance of the observed vectors
- pq.add(new DoubleObjPair<DBID>(s.getSampleVariance(), objKey.getDBID()));
+ pq.add(DBIDUtil.newPair(s.getSampleVariance(), objKey));
//
ModifiableDBIDs expList = DBIDUtil.newArray();
- expList.add(explain.remove().getSecond());
- while(!explain.isEmpty()) {
- DBID nextKey = explain.remove().getSecond();
- if(nextKey.sameDBID(objKey)) {
+ expList.add(explain.poll());
+ while (!explain.isEmpty()) {
+ DBIDRef nextKey = explain.poll();
+ if (DBIDUtil.equal(nextKey, objKey)) {
continue;
}
double max = Double.MIN_VALUE;
- for(DBIDIter exp = expList.iter(); exp.valid(); exp.advance()) {
- if(exp.sameDBID(objKey) || nextKey.sameDBID(exp)) {
+ for (DBIDIter exp = expList.iter(); exp.valid(); exp.advance()) {
+ if (DBIDUtil.equal(exp, objKey) || DBIDUtil.equal(nextKey, exp)) {
continue;
}
double nenner = Math.sqrt(calcCos(kernelMatrix, objKey, nextKey)) * Math.sqrt(calcCos(kernelMatrix, objKey, exp));
double angle = calcNumerator(kernelMatrix, objKey, nextKey, exp) / nenner;
max = Math.max(angle, max);
}
- if(max < 0.5) {
+ if (max < 0.5) {
expList.add(nextKey);
}
}
- explaintab.put(objKey.getDBID(), expList);
+ explaintab.put(DBIDUtil.deref(objKey), expList);
}
- System.out.println("--------------------------------------------");
- System.out.println("Result: ABOD");
+ StringBuilder buf = new StringBuilder();
+ buf.append("Result: ABOD\n");
int count = 0;
- while(!pq.isEmpty()) {
- if(count > 10) {
+ while (!pq.isEmpty()) {
+ if (count > 10) {
break;
}
- double factor = pq.peek().first;
- DBID key = pq.remove().getSecond();
- System.out.print(data.get(key) + " ");
- System.out.println(count + " Factor=" + factor + " " + key);
+ double factor = pq.peek().doubleValue();
+ DBIDRef key = pq.poll();
+ buf.append(data.get(key)).append(' ');
+ buf.append(count).append(" Factor=").append(factor).append(' ').append(key).append('\n');
DBIDs expList = explaintab.get(key);
- generateExplanation(data, key, expList);
+ generateExplanation(buf, data, key, expList);
count++;
}
- System.out.println("--------------------------------------------");
+ return buf.toString();
}
- private void generateExplanation(Relation<V> data, DBID key, DBIDs expList) {
+ private void generateExplanation(StringBuilder buf, Relation<V> data, DBIDRef key, DBIDs expList) {
Vector vect1 = data.get(key).getColumnVector();
- for(DBIDIter iter = expList.iter(); iter.valid(); iter.advance()) {
- System.out.println("Outlier: " + vect1);
+ for (DBIDIter iter = expList.iter(); iter.valid(); iter.advance()) {
+ buf.append("Outlier: ").append(vect1).append('\n');
Vector exp = data.get(iter).getColumnVector();
- System.out.println("Most common neighbor: " + exp);
+ buf.append("Most common neighbor: ").append(exp).append('\n');
// determine difference Vector
Vector vals = exp.minus(vect1);
- System.out.println(vals);
- // System.out.println(new FeatureVector(
- // "Diff-"+vect1.getPrimaryKey(),vals ));
+ buf.append(vals).append('\n');
}
- System.out.println();
}
/**
- * Run ABOD on the data set
+ * Run ABOD on the data set.
*
- * @param database
- * @param relation
+ * @param relation Relation to process
* @return Outlier detection result
*/
- public OutlierResult run(Database database, Relation<V> relation) {
- if(sampleSize > 0) {
- return getFastRanking(relation, k, sampleSize);
- }
- else {
- return getRanking(relation, k);
+ public OutlierResult run(Relation<V> relation) {
+ if (sampleSize > 0) {
+ return getFastRanking(relation);
+ } else {
+ return getRanking(relation);
}
}
@@ -587,7 +551,7 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -597,26 +561,38 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<V, DoubleDistance> {
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<V, DoubleDistance> {
+ /**
+ * k Parameter.
+ */
protected int k = 0;
+ /**
+ * Sample size.
+ */
protected int sampleSize = 0;
+ /**
+ * Distance function.
+ */
protected PrimitiveSimilarityFunction<V, DoubleDistance> primitiveKernelFunction = null;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final IntParameter kP = new IntParameter(K_ID, new GreaterEqualConstraint(1), 30);
- if(config.grab(kP)) {
+ final IntParameter kP = new IntParameter(K_ID, 30);
+ kP.addConstraint(new GreaterEqualConstraint(1));
+ if (config.grab(kP)) {
k = kP.getValue();
}
- final IntParameter sampleSizeP = new IntParameter(FAST_SAMPLE_ID, new GreaterEqualConstraint(1), true);
- if(config.grab(sampleSizeP)) {
+ final IntParameter sampleSizeP = new IntParameter(FAST_SAMPLE_ID);
+ sampleSizeP.addConstraint(new GreaterEqualConstraint(1));
+ sampleSizeP.setOptional(true);
+ if (config.grab(sampleSizeP)) {
sampleSize = sampleSizeP.getValue();
}
final ObjectParameter<PrimitiveSimilarityFunction<V, DoubleDistance>> param = new ObjectParameter<PrimitiveSimilarityFunction<V, DoubleDistance>>(KERNEL_FUNCTION_ID, PrimitiveSimilarityFunction.class, PolynomialKernelFunction.class);
- if(config.grab(param)) {
+ if (config.grab(param)) {
primitiveKernelFunction = param.instantiateClass(config);
}
}
@@ -626,4 +602,4 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
return new ABOD<V>(k, sampleSize, primitiveKernelFunction, distanceFunction);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ALOCI.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ALOCI.java
index 39c3db60..41da687f 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ALOCI.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ALOCI.java
@@ -36,10 +36,12 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.NumberVectorDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
@@ -51,6 +53,7 @@ import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -58,8 +61,8 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.LongParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
/**
@@ -78,17 +81,19 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
* @author Jonathan von Brünken
* @author Erich Schubert
*
+ * @apiviz.composedOf ALOCIQuadTree
+ *
* @param <O> Object type
* @param <D> Distance type
*/
@Title("LOCI: Fast Outlier Detection Using the Local Correlation Integral")
@Description("Algorithm to compute outliers based on the Local Correlation Integral")
@Reference(authors = "S. Papadimitriou, H. Kitagawa, P. B. Gibbons, C. Faloutsos", title = "LOCI: Fast Outlier Detection Using the Local Correlation Integral", booktitle = "Proc. 19th IEEE Int. Conf. on Data Engineering (ICDE '03), Bangalore, India, 2003", url = "http://dx.doi.org/10.1109/ICDE.2003.1260802")
-public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+public class ALOCI<O extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(ALOCI.class);
+ private static final Logging LOG = Logging.getLogger(ALOCI.class);
/**
* Minimum size for a leaf.
@@ -108,7 +113,7 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>>
/**
* Random generator
*/
- private Random random;
+ private RandomFactory rnd;
/**
* Distance function
@@ -122,20 +127,21 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>>
* @param nmin Minimum neighborhood size
* @param alpha Alpha value
* @param g Number of grids to use
- * @param seed Random generator seed.
+ * @param rnd Random generator.
*/
- public ALOCI(NumberVectorDistanceFunction<D> distanceFunction, int nmin, int alpha, int g, Long seed) {
+ public ALOCI(NumberVectorDistanceFunction<D> distanceFunction, int nmin, int alpha, int g, RandomFactory rnd) {
super();
this.distFunc = distanceFunction;
this.nmin = nmin;
this.alpha = alpha;
this.g = g;
- this.random = (seed != null) ? new Random(seed) : new Random(0);
+ this.rnd = rnd;
}
public OutlierResult run(Database database, Relation<O> relation) {
- final int dim = DatabaseUtil.dimensionality(relation);
- FiniteProgress progressPreproc = logger.isVerbose() ? new FiniteProgress("Build aLOCI quadtress", g, logger) : null;
+ final int dim = RelationUtil.dimensionality(relation);
+ final Random random = rnd.getRandom();
+ FiniteProgress progressPreproc = LOG.isVerbose() ? new FiniteProgress("Build aLOCI quadtress", g, LOG) : null;
// Compute extend of dataset.
double[] min, max;
@@ -145,13 +151,13 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>>
min = new double[dim];
max = new double[dim];
for(int i = 0; i < dim; i++) {
- min[i] = hbbs.first.doubleValue(i + 1);
- max[i] = hbbs.second.doubleValue(i + 1);
+ min[i] = hbbs.first.doubleValue(i);
+ max[i] = hbbs.second.doubleValue(i);
maxd = Math.max(maxd, max[i] - min[i]);
}
// Enlarge bounding box to have equal lengths.
for(int i = 0; i < dim; i++) {
- double diff = (maxd - (max[i] - min[i])) / 2;
+ double diff = (maxd - (max[i] - min[i])) * .5;
min[i] -= diff;
max[i] += diff;
}
@@ -163,7 +169,7 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>>
ALOCIQuadTree qt = new ALOCIQuadTree(min, max, nshift, nmin, relation);
qts.add(qt);
if(progressPreproc != null) {
- progressPreproc.incrementProcessed(logger);
+ progressPreproc.incrementProcessed(LOG);
}
/*
* create the remaining g-1 shifted QuadTrees. This not clearly described in
@@ -178,19 +184,19 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>>
qt = new ALOCIQuadTree(min, max, svec, nmin, relation);
qts.add(qt);
if(progressPreproc != null) {
- progressPreproc.incrementProcessed(logger);
+ progressPreproc.incrementProcessed(LOG);
}
}
if(progressPreproc != null) {
- progressPreproc.ensureCompleted(logger);
+ progressPreproc.ensureCompleted(LOG);
}
// aLOCI main loop: evaluate
- FiniteProgress progressLOCI = logger.isVerbose() ? new FiniteProgress("Compute aLOCI scores", relation.size(), logger) : null;
+ FiniteProgress progressLOCI = LOG.isVerbose() ? new FiniteProgress("Compute aLOCI scores", relation.size(), LOG) : null;
WritableDoubleDataStore mdef_norm = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
DoubleMinMax minmax = new DoubleMinMax();
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
final O obj = relation.get(iditer);
double maxmdefnorm = 0;
@@ -239,11 +245,11 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>>
mdef_norm.putDouble(iditer, maxmdefnorm);
minmax.put(maxmdefnorm);
if(progressLOCI != null) {
- progressLOCI.incrementProcessed(logger);
+ progressLOCI.incrementProcessed(LOG);
}
}
if(progressLOCI != null) {
- progressLOCI.ensureCompleted(logger);
+ progressLOCI.ensureCompleted(LOG);
}
Relation<Double> scoreResult = new MaterializedRelation<Double>("aLOCI normalized MDEF", "aloci-mdef-outlier", TypeUtil.DOUBLE, mdef_norm, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY);
@@ -291,7 +297,7 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>>
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
@Override
@@ -329,7 +335,7 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>>
/**
* Relation indexed.
*/
- private Relation<? extends NumberVector<?, ?>> relation;
+ private Relation<? extends NumberVector<?>> relation;
/**
* Constructor.
@@ -340,7 +346,7 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>>
* @param nmin Maximum size for a page to split
* @param relation Relation to index
*/
- public ALOCIQuadTree(double[] min, double[] max, double[] shift, int nmin, Relation<? extends NumberVector<?, ?>> relation) {
+ public ALOCIQuadTree(double[] min, double[] max, double[] shift, int nmin, Relation<? extends NumberVector<?>> relation) {
super();
assert (min.length <= 32) : "Quadtrees are only supported for up to 32 dimensions";
this.shift = shift;
@@ -386,11 +392,14 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>>
// logger.warning(FormatUtil.format(lmin)+" "+FormatUtil.format(lmax)+" "+start+"->"+end+" "+(end-start));
// Hack: Check degenerate cases that won't split
if(dim == 0) {
- NumberVector<?, ?> first = relation.get(ids.get(start));
+ DBIDArrayIter iter = ids.iter();
+ iter.seek(start);
+ NumberVector<?> first = relation.get(iter);
+ iter.advance();
boolean degenerate = true;
- loop: for(int pos = start + 1; pos < end; pos++) {
- NumberVector<?, ?> other = relation.get(ids.get(pos));
- for(int d = 1; d <= lmin.length; d++) {
+ loop: for(; iter.getOffset() < end; iter.advance()) {
+ NumberVector<?> other = relation.get(iter);
+ for(int d = 0; d < lmin.length; d++) {
if(Math.abs(first.doubleValue(d) - other.doubleValue(d)) > 1E-15) {
degenerate = false;
break loop;
@@ -431,20 +440,23 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>>
}
else {
// Partially sort data, by dimension dim < mid
- int spos = start, epos = end;
- while(spos < epos) {
- if(getShiftedDim(relation.get(ids.get(spos)), dim, level) <= .5) {
- spos++;
+ DBIDArrayIter siter = ids.iter(), eiter = ids.iter();
+ siter.seek(start);
+ eiter.seek(end - 1);
+ while(siter.getOffset() < eiter.getOffset()) {
+ if(getShiftedDim(relation.get(siter), dim, level) <= .5) {
+ siter.advance();
continue;
}
- if(getShiftedDim(relation.get(ids.get(epos - 1)), dim, level) > 0.5) {
- epos--;
+ if(getShiftedDim(relation.get(eiter), dim, level) > 0.5) {
+ eiter.retract();
continue;
}
- ids.swap(spos, epos - 1);
- spos++;
- epos--;
+ ids.swap(siter.getOffset(), eiter.getOffset() - 1);
+ siter.advance();
+ eiter.retract();
}
+ final int spos = siter.getOffset();
if(start < spos) {
final double tmp = lmax[dim];
lmax[dim] = lmax[dim] * .5 + lmin[dim] * .5;
@@ -468,8 +480,8 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>>
* @param level Level (controls scaling/wraping!)
* @return Shifted position
*/
- private double getShiftedDim(NumberVector<?, ?> obj, int dim, int level) {
- double pos = obj.doubleValue(dim + 1) + shift[dim];
+ private double getShiftedDim(NumberVector<?> obj, int dim, int level) {
+ double pos = obj.doubleValue(dim) + shift[dim];
pos = (pos - min[dim]) / width[dim] * (1 + level);
return pos - Math.floor(pos);
}
@@ -482,7 +494,7 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>>
* @param tlevel Target level
* @return Node
*/
- public Node findClosestNode(NumberVector<?, ?> vec, int tlevel) {
+ public Node findClosestNode(NumberVector<?> vec, int tlevel) {
Node cur = root;
for(int level = 0; level <= tlevel; level++) {
if(cur.children == null) {
@@ -637,26 +649,26 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>>
*
* @apiviz.exclude
*/
- public static class Parameterizer<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<O extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractParameterizer {
/**
* Parameter to specify the minimum neighborhood size
*/
- public static final OptionID NMIN_ID = OptionID.getOrCreateOptionID("loci.nmin", "Minimum neighborhood size to be considered.");
+ public static final OptionID NMIN_ID = new OptionID("loci.nmin", "Minimum neighborhood size to be considered.");
/**
* Parameter to specify the averaging neighborhood scaling.
*/
- public static final OptionID ALPHA_ID = OptionID.getOrCreateOptionID("loci.alpha", "Scaling factor for averaging neighborhood");
+ public static final OptionID ALPHA_ID = new OptionID("loci.alpha", "Scaling factor for averaging neighborhood");
/**
* Parameter to specify the number of Grids to use.
*/
- public static final OptionID GRIDS_ID = OptionID.getOrCreateOptionID("loci.g", "The number of Grids to use.");
+ public static final OptionID GRIDS_ID = new OptionID("loci.g", "The number of Grids to use.");
/**
* Parameter to specify the seed to initialize Random.
*/
- public static final OptionID SEED_ID = OptionID.getOrCreateOptionID("loci.seed", "The seed to use for initializing Random.");
+ public static final OptionID SEED_ID = new OptionID("loci.seed", "The seed to use for initializing Random.");
/**
* Neighborhood minimum size
@@ -674,9 +686,9 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>>
protected int g = 1;
/**
- * Random generator seed
+ * Random generator
*/
- protected Long seed = null;
+ protected RandomFactory rnd;
/**
* The distance function
@@ -702,9 +714,9 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>>
this.g = g.getValue();
}
- final LongParameter seedP = new LongParameter(SEED_ID, true);
- if(config.grab(seedP)) {
- this.seed = seedP.getValue();
+ final RandomParameter rndP = new RandomParameter(SEED_ID);
+ if(config.grab(rndP)) {
+ this.rnd = rndP.getValue();
}
final IntParameter alphaP = new IntParameter(ALPHA_ID, 4);
@@ -718,7 +730,7 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>>
@Override
protected ALOCI<O, D> makeInstance() {
- return new ALOCI<O, D>(distanceFunction, nmin, alpha, g, seed);
+ return new ALOCI<O, D>(distanceFunction, nmin, alpha, g, rnd);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractAggarwalYuOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractAggarwalYuOutlier.java
index 9c1a216a..2a4885dc 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractAggarwalYuOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractAggarwalYuOutlier.java
@@ -25,28 +25,26 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
import java.util.ArrayList;
import java.util.Collections;
-import java.util.Vector;
import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair;
import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
-import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair;
import de.lmu.ifi.dbs.elki.utilities.pairs.IntIntPair;
/**
@@ -64,19 +62,11 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.IntIntPair;
*
* @author Ahmed Hettab
* @author Erich Schubert
+ *
+ * @param <V> Vector type
*/
@Reference(authors = "C.C. Aggarwal, P. S. Yu", title = "Outlier detection for high dimensional data", booktitle = "Proc. ACM SIGMOD Int. Conf. on Management of Data (SIGMOD 2001), Santa Barbara, CA, 2001", url = "http://dx.doi.org/10.1145/375663.375668")
-public abstract class AbstractAggarwalYuOutlier<V extends NumberVector<?, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
- /**
- * OptionID for the grid size
- */
- public static final OptionID PHI_ID = OptionID.getOrCreateOptionID("ay.phi", "The number of equi-depth grid ranges to use in each dimension.");
-
- /**
- * OptionID for the target dimensionality
- */
- public static final OptionID K_ID = OptionID.getOrCreateOptionID("ay.k", "Subspace dimensionality to search for.");
-
+public abstract class AbstractAggarwalYuOutlier<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
/**
* Symbolic value for subspaces not in use.
*
@@ -86,7 +76,7 @@ public abstract class AbstractAggarwalYuOutlier<V extends NumberVector<?, ?>> ex
public static final int DONT_CARE = 0;
/**
- * The number of partitions for each dimension
+ * The number of partitions for each dimension.
*/
protected int phi;
@@ -112,33 +102,32 @@ public abstract class AbstractAggarwalYuOutlier<V extends NumberVector<?, ?>> ex
* Each attribute of data is divided into phi equi-depth ranges.<br />
* Each range contains a fraction f=1/phi of the records.
*
- * @param database
+ * @param relation Relation to process
* @return range map
*/
- protected ArrayList<ArrayList<DBIDs>> buildRanges(Relation<V> database) {
- final int dim = DatabaseUtil.dimensionality(database);
- final int size = database.size();
- final DBIDs allids = database.getDBIDs();
+ protected ArrayList<ArrayList<DBIDs>> buildRanges(Relation<V> relation) {
+ final int dim = RelationUtil.dimensionality(relation);
+ final int size = relation.size();
+ final DBIDs allids = relation.getDBIDs();
final ArrayList<ArrayList<DBIDs>> ranges = new ArrayList<ArrayList<DBIDs>>();
// Temporary projection storage of the database
- final ArrayList<ArrayList<DoubleObjPair<DBID>>> dbAxis = new ArrayList<ArrayList<DoubleObjPair<DBID>>>(dim);
+ final ArrayList<ArrayList<DoubleDBIDPair>> dbAxis = new ArrayList<ArrayList<DoubleDBIDPair>>(dim);
for(int i = 0; i < dim; i++) {
- ArrayList<DoubleObjPair<DBID>> axis = new ArrayList<DoubleObjPair<DBID>>(size);
+ ArrayList<DoubleDBIDPair> axis = new ArrayList<DoubleDBIDPair>(size);
dbAxis.add(i, axis);
}
// Project
for(DBIDIter iter = allids.iter(); iter.valid(); iter.advance()) {
- DBID id = iter.getDBID();
- final V obj = database.get(id);
- for(int d = 1; d <= dim; d++) {
- dbAxis.get(d - 1).add(new DoubleObjPair<DBID>(obj.doubleValue(d), id));
+ final V obj = relation.get(iter);
+ for(int d = 0; d < dim; d++) {
+ dbAxis.get(d).add(DBIDUtil.newPair(obj.doubleValue(d), iter));
}
}
// Split into cells
final double part = size * 1.0 / phi;
- for(int d = 1; d <= dim; d++) {
- ArrayList<DoubleObjPair<DBID>> axis = dbAxis.get(d - 1);
+ for(int d = 0; d < dim; d++) {
+ ArrayList<DoubleDBIDPair> axis = dbAxis.get(d);
Collections.sort(axis);
ArrayList<DBIDs> dimranges = new ArrayList<DBIDs>(phi + 1);
dimranges.add(allids);
@@ -150,7 +139,7 @@ public abstract class AbstractAggarwalYuOutlier<V extends NumberVector<?, ?>> ex
}
ArrayModifiableDBIDs currange = DBIDUtil.newArray(phi + 1);
for(int i = start; i < end; i++) {
- currange.add(axis.get(i).second);
+ currange.add(axis.get(i));
}
start = end;
dimranges.add(currange);
@@ -161,14 +150,15 @@ public abstract class AbstractAggarwalYuOutlier<V extends NumberVector<?, ?>> ex
}
/**
- * Method to calculate the sparsity coefficient of
+ * Method to calculate the sparsity coefficient of.
*
* @param setsize Size of subset
* @param dbsize Size of database
* @param k Dimensionality
+ * @param phi Phi parameter
* @return sparsity coefficient
*/
- protected double sparsity(final int setsize, final int dbsize, final int k) {
+ protected static double sparsity(final int setsize, final int dbsize, final int k, final double phi) {
// calculate sparsity c
final double f = 1. / phi;
final double fK = Math.pow(f, k);
@@ -177,16 +167,17 @@ public abstract class AbstractAggarwalYuOutlier<V extends NumberVector<?, ?>> ex
}
/**
- * Method to get the ids in the given subspace
+ * Method to get the ids in the given subspace.
*
- * @param subspace
+ * @param subspace Subspace to process
+ * @param ranges List of DBID ranges
* @return ids
*/
- protected DBIDs computeSubspace(Vector<IntIntPair> subspace, ArrayList<ArrayList<DBIDs>> ranges) {
- HashSetModifiableDBIDs ids = DBIDUtil.newHashSet(ranges.get(subspace.get(0).first - 1).get(subspace.get(0).second));
+ protected DBIDs computeSubspace(ArrayList<IntIntPair> subspace, ArrayList<ArrayList<DBIDs>> ranges) {
+ HashSetModifiableDBIDs ids = DBIDUtil.newHashSet(ranges.get(subspace.get(0).first).get(subspace.get(0).second));
// intersect all selected dimensions
for(int i = 1; i < subspace.size(); i++) {
- DBIDs current = ranges.get(subspace.get(i).first - 1).get(subspace.get(i).second);
+ DBIDs current = ranges.get(subspace.get(i).first).get(subspace.get(i).second);
ids.retainAll(current);
if(ids.size() == 0) {
break;
@@ -226,19 +217,37 @@ public abstract class AbstractAggarwalYuOutlier<V extends NumberVector<?, ?>> ex
*
* @apiviz.exclude
*/
- public static abstract class Parameterizer extends AbstractParameterizer {
- protected Integer phi;
+ public abstract static class Parameterizer extends AbstractParameterizer {
+ /**
+ * OptionID for the grid size.
+ */
+ public static final OptionID PHI_ID = new OptionID("ay.phi", "The number of equi-depth grid ranges to use in each dimension.");
+
+ /**
+ * OptionID for the target dimensionality.
+ */
+ public static final OptionID K_ID = new OptionID("ay.k", "Subspace dimensionality to search for.");
+
+ /**
+ * Phi parameter.
+ */
+ protected int phi;
- protected Integer k;
+ /**
+ * k Parameter.
+ */
+ protected int k;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final IntParameter kP = new IntParameter(K_ID, new GreaterEqualConstraint(2));
+ final IntParameter kP = new IntParameter(K_ID);
+ kP.addConstraint(new GreaterEqualConstraint(2));
if(config.grab(kP)) {
k = kP.getValue();
}
- final IntParameter phiP = new IntParameter(PHI_ID, new GreaterEqualConstraint(2));
+ final IntParameter phiP = new IntParameter(PHI_ID);
+ phiP.addConstraint(new GreaterEqualConstraint(2));
if(config.grab(phiP)) {
phi = phiP.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractDBOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractDBOutlier.java
index a5ccce3a..0e6f502a 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractDBOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractDBOutlier.java
@@ -27,7 +27,7 @@ import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
-import de.lmu.ifi.dbs.elki.database.datastore.DataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.DoubleDataStore;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
@@ -56,7 +56,7 @@ public abstract class AbstractDBOutlier<O, D extends Distance<D>> extends Abstra
/**
* Parameter to specify the size of the D-neighborhood
*/
- public static final OptionID D_ID = OptionID.getOrCreateOptionID("dbod.d", "size of the D-neighborhood");
+ public static final OptionID D_ID = new OptionID("dbod.d", "size of the D-neighborhood");
/**
* Holds the value of {@link #D_ID}.
@@ -83,7 +83,7 @@ public abstract class AbstractDBOutlier<O, D extends Distance<D>> extends Abstra
*/
public OutlierResult run(Database database, Relation<O> relation) {
// Run the actual score process
- DataStore<Double> dbodscore = computeOutlierScores(database, relation, d);
+ DoubleDataStore dbodscore = computeOutlierScores(database, relation, d);
// Build result representation.
Relation<Double> scoreResult = new MaterializedRelation<Double>("Density-Based Outlier Detection", "db-outlier", TypeUtil.DOUBLE, dbodscore, relation.getDBIDs());
@@ -99,7 +99,7 @@ public abstract class AbstractDBOutlier<O, D extends Distance<D>> extends Abstra
* @param d distance
* @return computed scores
*/
- protected abstract DataStore<Double> computeOutlierScores(Database database, Relation<O> relation, D d);
+ protected abstract DoubleDataStore computeOutlierScores(Database database, Relation<O> relation, D d);
@Override
public TypeInformation[] getInputTypeRestriction() {
@@ -113,7 +113,7 @@ public abstract class AbstractDBOutlier<O, D extends Distance<D>> extends Abstra
*
* @apiviz.exclude
*/
- public static abstract class Parameterizer<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ public abstract static class Parameterizer<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
/**
* Query radius
*/
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuEvolutionary.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuEvolutionary.java
index 1d02e865..c263cdfa 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuEvolutionary.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuEvolutionary.java
@@ -37,18 +37,18 @@ import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TopBoundedHeap;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
@@ -58,7 +58,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.LongParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
import de.lmu.ifi.dbs.elki.utilities.pairs.FCPair;
import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
@@ -85,40 +85,26 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
@Title("EAFOD: the evolutionary outlier detection algorithm")
@Description("Outlier detection for high dimensional data")
@Reference(authors = "C.C. Aggarwal, P. S. Yu", title = "Outlier detection for high dimensional data", booktitle = "Proc. ACM SIGMOD Int. Conf. on Management of Data (SIGMOD 2001), Santa Barbara, CA, 2001", url = "http://dx.doi.org/10.1145/375663.375668")
-public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends AbstractAggarwalYuOutlier<V> {
+public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractAggarwalYuOutlier<V> {
/**
* The logger for this class.
*/
- protected static final Logging logger = Logging.getLogger(AggarwalYuEvolutionary.class);
-
- /**
- * Parameter to specify the number of solutions must be an integer greater
- * than 1.
- * <p>
- * Key: {@code -eafod.m}
- * </p>
- */
- public static final OptionID M_ID = OptionID.getOrCreateOptionID("ay.m", "Population size for evolutionary algorithm.");
-
- /**
- * Parameter to specify the random generator seed.
- */
- public static final OptionID SEED_ID = OptionID.getOrCreateOptionID("ay.seed", "The random number generator seed.");
+ private static final Logging LOG = Logging.getLogger(AggarwalYuEvolutionary.class);
/**
* Maximum iteration count for evolutionary search.
*/
- protected final int MAX_ITERATIONS = 1000;
+ protected final static int MAX_ITERATIONS = 1000;
/**
- * Holds the value of {@link #M_ID}.
+ * Holds the value of {@link Parameterizer#M_ID}.
*/
private int m;
/**
- * Holds the value of {@link #SEED_ID}.
+ * Random generator.
*/
- private Long seed;
+ private RandomFactory rnd;
/**
* Constructor.
@@ -126,12 +112,12 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra
* @param k K
* @param phi Phi
* @param m M
- * @param seed Seed
+ * @param rnd Random generator
*/
- public AggarwalYuEvolutionary(int k, int phi, int m, Long seed) {
+ public AggarwalYuEvolutionary(int k, int phi, int m, RandomFactory rnd) {
super(k, phi);
this.m = m;
- this.seed = seed;
+ this.rnd = rnd;
}
/**
@@ -145,27 +131,25 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra
final int dbsize = relation.size();
ArrayList<ArrayList<DBIDs>> ranges = buildRanges(relation);
- Collection<Individuum> individuums = (new EvolutionarySearch(relation, ranges, m, seed)).run();
+ Iterable<Individuum> individuums = (new EvolutionarySearch(relation, ranges, m, rnd.getRandom())).run();
WritableDoubleDataStore outlierScore = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
for(Individuum ind : individuums) {
DBIDs ids = computeSubspaceForGene(ind.getGene(), ranges);
- double sparsityC = sparsity(ids.size(), dbsize, k);
+ double sparsityC = sparsity(ids.size(), dbsize, k, phi);
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
- DBID id = iter.getDBID();
- double prev = outlierScore.doubleValue(id);
+ double prev = outlierScore.doubleValue(iter);
if(Double.isNaN(prev) || sparsityC < prev) {
- outlierScore.putDouble(id, sparsityC);
+ outlierScore.putDouble(iter, sparsityC);
}
}
}
DoubleMinMax minmax = new DoubleMinMax();
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
- double val = outlierScore.doubleValue(id);
+ double val = outlierScore.doubleValue(iditer);
if(Double.isNaN(val)) {
- outlierScore.putDouble(id, 0.0);
+ outlierScore.putDouble(iditer, 0.0);
val = 0.0;
}
minmax.put(val);
@@ -177,7 +161,7 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -189,17 +173,17 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra
*/
private class EvolutionarySearch {
/**
- * Database size
+ * Database size.
*/
final int dbsize;
/**
- * Database dimensionality
+ * Database dimensionality.
*/
final int dim;
/**
- * Database ranges
+ * Database ranges.
*/
final ArrayList<ArrayList<DBIDs>> ranges;
@@ -209,36 +193,34 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra
final int m;
/**
- * random generator
+ * random generator.
*/
final private Random random;
/**
* Constructor.
*
- * @param database Database to use
+ * @param relation Database to use
+ * @param ranges DBID ranges to process
* @param m Population size
- * @param seed Random generator seed
+ * @param random Random generator
*/
- public EvolutionarySearch(Relation<V> database, ArrayList<ArrayList<DBIDs>> ranges, int m, Long seed) {
+ public EvolutionarySearch(Relation<V> relation, ArrayList<ArrayList<DBIDs>> ranges, int m, Random random) {
super();
this.ranges = ranges;
this.m = m;
- this.dbsize = database.size();
- this.dim = DatabaseUtil.dimensionality(database);
- if(seed != null) {
- this.random = new Random(seed);
- }
- else {
- this.random = new Random();
- }
+ this.dbsize = relation.size();
+ this.dim = RelationUtil.dimensionality(relation);
+ this.random = random;
}
- public Collection<Individuum> run() {
+ public Iterable<Individuum> run() {
ArrayList<Individuum> pop = initialPopulation(m);
// best Population
TopBoundedHeap<Individuum> bestSol = new TopBoundedHeap<Individuum>(m, Collections.reverseOrder());
- bestSol.addAll(pop);
+ for (Individuum ind : pop) {
+ bestSol.add(ind);
+ }
int iterations = 0;
while(!checkConvergence(pop)) {
@@ -249,26 +231,29 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra
// Mutation with probability 0.25 , 0.25
pop = mutation(pop, 0.5, 0.5);
// Avoid duplicates
- for(Individuum ind : pop) {
- if(!bestSol.contains(ind)) {
- bestSol.add(ind);
+ ind: for(Individuum ind : pop) {
+ for (Individuum b : bestSol) {
+ if (b.equals(ind)) {
+ continue ind;
+ }
}
+ bestSol.add(ind);
}
- if(logger.isDebuggingFinest()) {
- StringBuffer buf = new StringBuffer();
+ if(LOG.isDebuggingFinest()) {
+ StringBuilder buf = new StringBuilder();
buf.append("Top solutions:\n");
for(Individuum ind : bestSol) {
- buf.append(ind.toString()).append("\n");
+ buf.append(ind.toString()).append('\n');
}
buf.append("Population:\n");
for(Individuum ind : pop) {
- buf.append(ind.toString()).append("\n");
+ buf.append(ind.toString()).append('\n');
}
- logger.debugFinest(buf.toString());
+ LOG.debugFinest(buf.toString());
}
iterations++;
if(iterations > MAX_ITERATIONS) {
- logger.warning("Maximum iterations reached.");
+ LOG.warning("Maximum iterations reached.");
break;
}
}
@@ -276,7 +261,10 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra
}
/**
- * check the termination criterion
+ * check the termination criterion.
+ *
+ * @param pop Population
+ * @return Convergence
*/
private boolean checkConvergence(Collection<Individuum> pop) {
if(pop.size() == 0) {
@@ -291,7 +279,7 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra
for(int d = 0; d < dim; d++) {
int val = gene[d] + DONT_CARE;
if(val < 0 || val >= phi + 1) {
- logger.warning("Invalid gene value encountered: " + val + " in " + ind.toString());
+ LOG.warning("Invalid gene value encountered: " + val + " in " + ind.toString());
continue;
}
occur[d][val] += 1;
@@ -299,8 +287,8 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra
}
int conv = (int) (pop.size() * 0.95);
- if(logger.isDebuggingFine()) {
- logger.debugFine("Convergence at " + conv + " of " + pop.size() + " individuums.");
+ if(LOG.isDebuggingFine()) {
+ LOG.debugFine("Convergence at " + conv + " of " + pop.size() + " individuums.");
}
for(int d = 0; d < dim; d++) {
boolean converged = false;
@@ -353,18 +341,21 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra
}
/**
+ * Select surviving individuums weighted by rank.
+ *
* the selection criterion for the genetic algorithm: <br>
* roulette wheel mechanism: <br>
* where the probability of sampling an individual of the population was
* proportional to p - r(i), where p is the size of population and r(i) the
* rank of i-th individual
*
- * @param population
+ * @param population Population
+ * @return Survivors
*/
private ArrayList<Individuum> rouletteRankSelection(ArrayList<Individuum> population) {
final int popsize = population.size();
// Relative weight := popsize - position => sum(1..popsize)
- int totalweight = popsize * (popsize + 1) / 2;
+ int totalweight = (popsize * (popsize + 1)) >> 1;
// Survivors
ArrayList<Individuum> survivors = new ArrayList<Individuum>(popsize);
@@ -392,7 +383,7 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra
}
/**
- * method implements the mutation algorithm
+ * Apply the mutation alogrithm.
*/
private ArrayList<Individuum> mutation(ArrayList<Individuum> population, double perc1, double perc2) {
// the Mutations
@@ -470,7 +461,7 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra
*/
private Individuum makeIndividuum(int[] gene) {
final DBIDs ids = computeSubspaceForGene(gene, ranges);
- final double fitness = (ids.size() > 0) ? sparsity(ids.size(), dbsize, k) : Double.MAX_VALUE;
+ final double fitness = (ids.size() > 0) ? sparsity(ids.size(), dbsize, k, phi) : Double.MAX_VALUE;
return new Individuum(fitness, gene);
}
@@ -543,8 +534,8 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra
l1[next] = parent1.getGene()[next];
l2[next] = parent2.getGene()[next];
- final double sparsityL1 = sparsity(computeSubspaceForGene(l1, ranges).size(), dbsize, k);
- final double sparsityL2 = sparsity(computeSubspaceForGene(l2, ranges).size(), dbsize, k);
+ final double sparsityL1 = sparsity(computeSubspaceForGene(l1, ranges).size(), dbsize, k, phi);
+ final double sparsityL2 = sparsity(computeSubspaceForGene(l2, ranges).size(), dbsize, k, phi);
if(sparsityL1 <= sparsityL2) {
b = l1.clone();
@@ -619,6 +610,8 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra
* Individuum for the evolutionary search.
*
* @author Erich Schubert
+ *
+ * @apiviz.exclude de.lmu.ifi.dbs.elki.utilities.pairs.FCPair
*/
private static class Individuum extends FCPair<Double, int[]> {
/**
@@ -691,27 +684,42 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<?, ?>> extends AbstractAggarwalYuOutlier.Parameterizer {
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractAggarwalYuOutlier.Parameterizer {
+ /**
+ * Parameter to specify the number of solutions must be an integer greater
+ * than 1.
+ * <p>
+ * Key: {@code -eafod.m}
+ * </p>
+ */
+ public static final OptionID M_ID = new OptionID("ay.m", "Population size for evolutionary algorithm.");
+
+ /**
+ * Parameter to specify the random generator seed.
+ */
+ public static final OptionID SEED_ID = new OptionID("ay.seed", "The random number generator seed.");
+
protected int m = 0;
- protected Long seed = null;
+ protected RandomFactory rnd;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final IntParameter mP = new IntParameter(M_ID, new GreaterEqualConstraint(2));
+ final IntParameter mP = new IntParameter(M_ID);
+ mP.addConstraint(new GreaterEqualConstraint(2));
if(config.grab(mP)) {
m = mP.getValue();
}
- final LongParameter seedP = new LongParameter(SEED_ID, true);
- if(config.grab(seedP)) {
- seed = seedP.getValue();
+ final RandomParameter rndP = new RandomParameter(SEED_ID);
+ if(config.grab(rndP)) {
+ rnd = rndP.getValue();
}
}
@Override
protected AggarwalYuEvolutionary<V> makeInstance() {
- return new AggarwalYuEvolutionary<V>(k, phi, m, seed);
+ return new AggarwalYuEvolutionary<V>(k, phi, m, rnd);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuNaive.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuNaive.java
index 0bb73aba..9cd7d79f 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuNaive.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuNaive.java
@@ -24,7 +24,6 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
*/
import java.util.ArrayList;
-import java.util.Vector;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
@@ -35,12 +34,12 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -65,16 +64,18 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.IntIntPair;
*
* @author Ahmed Hettab
* @author Erich Schubert
+ *
+ * @param <V> Vector type
*/
// TODO: progress logging!
@Title("BruteForce: Outlier detection for high dimensional data")
@Description("Examines all possible sets of k dimensional projections")
@Reference(authors = "C.C. Aggarwal, P. S. Yu", title = "Outlier detection for high dimensional data", booktitle = "Proc. ACM SIGMOD Int. Conf. on Management of Data (SIGMOD 2001), Santa Barbara, CA, 2001", url = "http://dx.doi.org/10.1145/375663.375668")
-public class AggarwalYuNaive<V extends NumberVector<?, ?>> extends AbstractAggarwalYuOutlier<V> {
+public class AggarwalYuNaive<V extends NumberVector<?>> extends AbstractAggarwalYuOutlier<V> {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(AggarwalYuNaive.class);
+ private static final Logging LOG = Logging.getLogger(AggarwalYuNaive.class);
/**
* Constructor.
@@ -93,23 +94,23 @@ public class AggarwalYuNaive<V extends NumberVector<?, ?>> extends AbstractAggar
* @return Outlier detection result
*/
public OutlierResult run(Relation<V> relation) {
- final int dimensionality = DatabaseUtil.dimensionality(relation);
+ final int dimensionality = RelationUtil.dimensionality(relation);
final int size = relation.size();
ArrayList<ArrayList<DBIDs>> ranges = buildRanges(relation);
- ArrayList<Vector<IntIntPair>> Rk;
+ ArrayList<ArrayList<IntIntPair>> Rk;
// Build a list of all subspaces
{
// R1 initial one-dimensional subspaces.
- Rk = new ArrayList<Vector<IntIntPair>>();
+ Rk = new ArrayList<ArrayList<IntIntPair>>();
// Set of all dim*phi ranges
ArrayList<IntIntPair> q = new ArrayList<IntIntPair>();
- for(int i = 1; i <= dimensionality; i++) {
+ for(int i = 0; i < dimensionality; i++) {
for(int j = 1; j <= phi; j++) {
IntIntPair s = new IntIntPair(i, j);
q.add(s);
// Add to first Rk
- Vector<IntIntPair> v = new Vector<IntIntPair>();
+ ArrayList<IntIntPair> v = new ArrayList<IntIntPair>();
v.add(s);
Rk.add(v);
}
@@ -117,10 +118,10 @@ public class AggarwalYuNaive<V extends NumberVector<?, ?>> extends AbstractAggar
// build Ri
for(int i = 2; i <= k; i++) {
- ArrayList<Vector<IntIntPair>> Rnew = new ArrayList<Vector<IntIntPair>>();
+ ArrayList<ArrayList<IntIntPair>> Rnew = new ArrayList<ArrayList<IntIntPair>>();
for(int j = 0; j < Rk.size(); j++) {
- Vector<IntIntPair> c = Rk.get(j);
+ ArrayList<IntIntPair> c = Rk.get(j);
for(IntIntPair pair : q) {
boolean invalid = false;
for(int t = 0; t < c.size(); t++) {
@@ -130,7 +131,7 @@ public class AggarwalYuNaive<V extends NumberVector<?, ?>> extends AbstractAggar
}
}
if(!invalid) {
- Vector<IntIntPair> neu = new Vector<IntIntPair>(c);
+ ArrayList<IntIntPair> neu = new ArrayList<IntIntPair>(c);
neu.add(pair);
Rnew.add(neu);
}
@@ -142,9 +143,9 @@ public class AggarwalYuNaive<V extends NumberVector<?, ?>> extends AbstractAggar
WritableDoubleDataStore sparsity = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
// calculate the sparsity coefficient
- for(Vector<IntIntPair> sub : Rk) {
+ for(ArrayList<IntIntPair> sub : Rk) {
DBIDs ids = computeSubspace(sub, ranges);
- final double sparsityC = sparsity(ids.size(), size, k);
+ final double sparsityC = sparsity(ids.size(), size, k, phi);
if(sparsityC < 0) {
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
@@ -171,7 +172,7 @@ public class AggarwalYuNaive<V extends NumberVector<?, ?>> extends AbstractAggar
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -181,7 +182,7 @@ public class AggarwalYuNaive<V extends NumberVector<?, ?>> extends AbstractAggar
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<?, ?>> extends AbstractAggarwalYuOutlier.Parameterizer {
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractAggarwalYuOutlier.Parameterizer {
@Override
protected AggarwalYuNaive<V> makeInstance() {
return new AggarwalYuNaive<V>(k, phi);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/COP.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/COP.java
new file mode 100644
index 00000000..ac544b7f
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/COP.java
@@ -0,0 +1,385 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.Arrays;
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.QueryUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAResult;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCARunner;
+import de.lmu.ifi.dbs.elki.math.statistics.distribution.ChiSquaredDistribution;
+import de.lmu.ifi.dbs.elki.math.statistics.distribution.GammaDistribution;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.EnumParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * Correlation outlier probability: Outlier Detection in Arbitrarily Oriented
+ * Subspaces
+ *
+ * <p>
+ * Hans-Peter Kriegel, Peer Kröger, Erich Schubert, Arthur Zimek<br />
+ * Outlier Detection in Arbitrarily Oriented Subspaces<br />
+ * in: Proc. IEEE International Conference on Data Mining (ICDM 2012)
+ * </p>
+ *
+ * @author Erich Schubert
+ *
+ * @param <V> the type of NumberVector handled by this Algorithm
+ * @param <D> Distance type
+ */
+@Title("COP: Correlation Outlier Probability")
+@Reference(authors = "Hans-Peter Kriegel, Peer Kröger, Erich Schubert, Arthur Zimek", title = "Outlier Detection in Arbitrarily Oriented Subspaces", booktitle = "Proc. IEEE International Conference on Data Mining (ICDM 2012)")
+public class COP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<V, D, OutlierResult> implements OutlierAlgorithm {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(COP.class);
+
+ /**
+ * Result name for the COP outlier scores.
+ */
+ public static final String COP_SCORES = "cop-outlier";
+
+ /**
+ * Result name for the dimensionality.
+ */
+ public static final String COP_DIM = "cop-dim";
+
+ /**
+ * Result name for the error vectors.
+ */
+ public static final String COP_ERRORVEC = "cop-errorvec";
+
+ /**
+ * Number of neighbors to be considered.
+ */
+ int k;
+
+ /**
+ * Holds the PCA runner.
+ */
+ private PCARunner<V> pca;
+
+ /**
+ * Expected amount of outliers.
+ */
+ double expect = 0.0001;
+
+ /**
+ * Score type.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public enum DistanceDist {
+ /**
+ * Use chi^2 for score normalization.
+ */
+ CHISQUARED,
+ /**
+ * Use gamma distributions for score normalization.
+ */
+ GAMMA
+ }
+
+ /**
+ * Type of distribution to assume for distances.
+ */
+ DistanceDist dist = DistanceDist.CHISQUARED;
+
+ /**
+ * Constructor.
+ *
+ * @param distanceFunction distance function
+ * @param k number of neighbors
+ * @param pca PCA computation method
+ * @param expect Expected fraction of outliers (for score normalization)
+ * @param dist Distance distribution model (ChiSquared, Gamma)
+ */
+ public COP(DistanceFunction<? super V, D> distanceFunction, int k, PCARunner<V> pca, double expect, DistanceDist dist) {
+ super(distanceFunction);
+ this.k = k;
+ this.pca = pca;
+ this.expect = expect;
+ this.dist = dist;
+ }
+
+ /**
+ * Process a single relation.
+ *
+ * @param relation Relation to process
+ * @return Outlier detection result
+ */
+ public OutlierResult run(Relation<V> relation) {
+ final DBIDs ids = relation.getDBIDs();
+ KNNQuery<V, D> knnQuery = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k + 1);
+
+ final int dim = RelationUtil.dimensionality(relation);
+ if (k <= dim + 1) {
+ LOG.warning("PCA is underspecified with a too low k! k should be at much larger than " + dim);
+ }
+
+ WritableDoubleDataStore cop_score = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
+ WritableDataStore<Vector> cop_err_v = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, Vector.class);
+ WritableIntegerDataStore cop_dim = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, -1);
+ // compute neighbors of each db object
+ FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Correlation Outlier Probabilities", relation.size(), LOG) : null;
+
+ for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
+ KNNResult<D> neighbors = knnQuery.getKNNForDBID(id, k + 1);
+ ModifiableDBIDs nids = DBIDUtil.newHashSet(neighbors);
+ nids.remove(id); // Do not use query object
+
+ Vector centroid = Centroid.make(relation, nids).toVector(relation).getColumnVector();
+ Vector relative = relation.get(id).getColumnVector().minusEquals(centroid);
+
+ PCAResult pcares = pca.processIds(nids, relation);
+ Matrix evecs = pcares.getEigenvectors();
+ Vector projected = evecs.transposeTimes(relative);
+ double[] evs = pcares.getEigenvalues();
+
+ double min = Double.POSITIVE_INFINITY;
+ int vdim = dim;
+ switch(dist) {
+ case CHISQUARED: {
+ double sqdevs = 0;
+ for (int d = 0; d < dim; d++) {
+ // Scale with Stddev
+ double dev = projected.get(d);
+ // Accumulate
+ sqdevs += dev * dev / evs[d];
+ // Evaluate
+ double score = 1 - ChiSquaredDistribution.cdf(sqdevs, d + 1);
+ if (score < min) {
+ min = score;
+ vdim = d + 1;
+ }
+ }
+ break;
+ }
+ case GAMMA: {
+ double[][] dists = new double[dim][nids.size()];
+ int j = 0;
+ Vector srel = new Vector(dim);
+ for (DBIDIter s = nids.iter(); s.valid() && j < nids.size(); s.advance()) {
+ V vec = relation.get(s);
+ for (int d = 0; d < dim; d++) {
+ srel.set(d, vec.doubleValue(d) - centroid.get(d));
+ }
+ Vector serr = evecs.transposeTimes(srel);
+ double sqdist = 0.0;
+ for (int d = 0; d < dim; d++) {
+ sqdist += serr.get(d) * serr.get(d) / evs[d];
+ dists[d][j] = sqdist;
+ }
+ j++;
+ }
+ double sqdevs = 0;
+ for (int d = 0; d < dim; d++) {
+ // Scale with Stddev
+ final double dev = projected.get(d);
+ // Accumulate
+ sqdevs += dev * dev / evs[d];
+ // Sort, so we can trim the top 15% below.
+ Arrays.sort(dists[d]);
+ // Evaluate
+ double score = 1 - GammaDistribution.estimate(dists[d], (int) (.85 * dists[d].length)).cdf(sqdevs);
+ if (score < min) {
+ min = score;
+ vdim = d + 1;
+ }
+ }
+ break;
+ }
+ }
+ // Normalize the value
+ final double prob = expect * (1 - min) / (expect + min);
+ // Construct the error vector:
+ for (int d = vdim; d < dim; d++) {
+ projected.set(d, 0.0);
+ }
+ Vector ev = evecs.times(projected).timesEquals(-1 * prob);
+
+ cop_score.putDouble(id, prob);
+ cop_err_v.put(id, ev);
+ cop_dim.putInt(id, dim + 1 - vdim);
+
+ if (prog != null) {
+ prog.incrementProcessed(LOG);
+ }
+ }
+ if (prog != null) {
+ prog.ensureCompleted(LOG);
+ }
+
+ // combine results.
+ Relation<Double> scoreResult = new MaterializedRelation<Double>("Correlation Outlier Probabilities", COP_SCORES, TypeUtil.DOUBLE, cop_score, ids);
+ OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore();
+ OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
+ result.addChildResult(new MaterializedRelation<Integer>("Local Dimensionality", COP_DIM, TypeUtil.INTEGER, cop_dim, ids));
+ result.addChildResult(new MaterializedRelation<Vector>("Error vectors", COP_ERRORVEC, TypeUtil.VECTOR, cop_err_v, ids));
+ return result;
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD);
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<V, D> {
+ /**
+ * Parameter to specify the number of nearest neighbors of an object to be
+ * considered for computing its COP_SCORE, must be an integer greater than
+ * 0.
+ * <p>
+ * Key: {@code -cop.k}
+ * </p>
+ */
+ public static final OptionID K_ID = new OptionID("cop.k", "The number of nearest neighbors of an object to be considered for computing its COP_SCORE.");
+
+ /**
+ * Distribution assumption for distances.
+ * <p>
+ * Key: {@code -cop.dist}
+ * </p>
+ */
+ public static final OptionID DIST_ID = new OptionID("cop.dist", "The assumed distribution of squared distances. ChiSquared is faster, Gamma expected to be more accurate but could also overfit.");
+
+ /**
+ * Class to compute the PCA with.
+ * <p>
+ * Key: {@code -cop.pcarunner}
+ * </p>
+ */
+ public static final OptionID PCARUNNER_ID = new OptionID("cop.pcarunner", "The class to compute (filtered) PCA.");
+
+ /**
+ * Expected share of outliers.
+ * <p>
+ * Key: {@code -cop.expect}
+ *
+ * Default: 0.001
+ * </p>
+ */
+ public static final OptionID EXPECT_ID = new OptionID("cop.expect", "Expected share of outliers. Only affect score normalization.");
+
+ /**
+ * Number of neighbors to be considered.
+ */
+ int k;
+
+ /**
+ * Holds the object performing the dependency derivation.
+ */
+ PCARunner<V> pca;
+
+ /**
+ * Distance distributution assumption.
+ */
+ DistanceDist dist;
+
+ /**
+ * Expected amount of outliers.
+ */
+ double expect;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ IntParameter kP = new IntParameter(K_ID);
+ kP.addConstraint(new GreaterConstraint(5));
+ if (config.grab(kP)) {
+ k = kP.intValue();
+ }
+ EnumParameter<DistanceDist> distP = new EnumParameter<DistanceDist>(DIST_ID, DistanceDist.class, DistanceDist.GAMMA);
+ if (config.grab(distP)) {
+ dist = distP.getValue();
+ }
+ DoubleParameter expectP = new DoubleParameter(EXPECT_ID, 0.001);
+ expectP.addConstraint(new GreaterConstraint(0));
+ expectP.addConstraint(new LessConstraint(1.0));
+ if (config.grab(expectP)) {
+ expect = expectP.doubleValue();
+ }
+ ObjectParameter<PCARunner<V>> pcaP = new ObjectParameter<PCARunner<V>>(PCARUNNER_ID, PCARunner.class, PCARunner.class);
+ if (config.grab(pcaP)) {
+ pca = pcaP.instantiateClass(config);
+ }
+ }
+
+ @Override
+ protected COP<V, D> makeInstance() {
+ return new COP<V, D>(distanceFunction, k, pca, expect, dist);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierDetection.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierDetection.java
index dbaf8a5a..ba1fd841 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierDetection.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierDetection.java
@@ -24,17 +24,17 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
*/
import de.lmu.ifi.dbs.elki.database.Database;
-import de.lmu.ifi.dbs.elki.database.datastore.DataStore;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.DoubleDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
@@ -72,13 +72,13 @@ public class DBOutlierDetection<O, D extends Distance<D>> extends AbstractDBOutl
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(DBOutlierDetection.class);
+ private static final Logging LOG = Logging.getLogger(DBOutlierDetection.class);
/**
* Parameter to specify the minimum fraction of objects that must be outside
* the D- neighborhood of an outlier
*/
- public static final OptionID P_ID = OptionID.getOrCreateOptionID("dbod.p", "minimum fraction of objects that must be outside the D-neighborhood of an outlier");
+ public static final OptionID P_ID = new OptionID("dbod.p", "minimum fraction of objects that must be outside the D-neighborhood of an outlier");
/**
* Holds the value of {@link #P_ID}.
@@ -98,7 +98,7 @@ public class DBOutlierDetection<O, D extends Distance<D>> extends AbstractDBOutl
}
@Override
- protected DataStore<Double> computeOutlierScores(Database database, Relation<O> relation, D neighborhoodSize) {
+ protected DoubleDataStore computeOutlierScores(Database database, Relation<O> relation, D neighborhoodSize) {
DistanceQuery<O, D> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
KNNQuery<O, D> knnQuery = database.getKNNQuery(distFunc, DatabaseQuery.HINT_OPTIMIZED_ONLY);
@@ -106,11 +106,11 @@ public class DBOutlierDetection<O, D extends Distance<D>> extends AbstractDBOutl
int m = (int) ((distFunc.getRelation().size()) * (1 - p));
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(distFunc.getRelation().getDBIDs(), DataStoreFactory.HINT_STATIC);
- if(logger.isVerbose()) {
- logger.verbose("computing outlier flag");
+ if(LOG.isVerbose()) {
+ LOG.verbose("computing outlier flag");
}
- FiniteProgress progressOFlags = logger.isVerbose() ? new FiniteProgress("DBOutlier for objects", distFunc.getRelation().size(), logger) : null;
+ FiniteProgress progressOFlags = LOG.isVerbose() ? new FiniteProgress("DBOutlier for objects", distFunc.getRelation().size(), LOG) : null;
int counter = 0;
// if index exists, kNN query. if the distance to the mth nearest neighbor
// is more than d -> object is outlier
@@ -118,8 +118,8 @@ public class DBOutlierDetection<O, D extends Distance<D>> extends AbstractDBOutl
for(DBIDIter iditer = distFunc.getRelation().iterDBIDs(); iditer.valid(); iditer.advance()) {
counter++;
final KNNResult<D> knns = knnQuery.getKNNForDBID(iditer, m);
- if(logger.isDebugging()) {
- logger.debugFine("distance to mth nearest neighbour" + knns.toString());
+ if(LOG.isDebugging()) {
+ LOG.debugFine("distance to mth nearest neighbour" + knns.toString());
}
if(knns.get(Math.min(m, knns.size()) - 1).getDistance().compareTo(neighborhoodSize) <= 0) {
// flag as outlier
@@ -131,7 +131,7 @@ public class DBOutlierDetection<O, D extends Distance<D>> extends AbstractDBOutl
}
}
if(progressOFlags != null) {
- progressOFlags.setProcessed(counter, logger);
+ progressOFlags.setProcessed(counter, LOG);
}
}
else {
@@ -149,18 +149,18 @@ public class DBOutlierDetection<O, D extends Distance<D>> extends AbstractDBOutl
}
if(progressOFlags != null) {
- progressOFlags.setProcessed(counter, logger);
+ progressOFlags.setProcessed(counter, LOG);
}
}
if(progressOFlags != null) {
- progressOFlags.ensureCompleted(logger);
+ progressOFlags.ensureCompleted(LOG);
}
return scores;
}
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierScore.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierScore.java
index 419b9a0e..a2d39130 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierScore.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierScore.java
@@ -24,9 +24,9 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
*/
import de.lmu.ifi.dbs.elki.database.Database;
-import de.lmu.ifi.dbs.elki.database.datastore.DataStore;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.DoubleDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
@@ -60,7 +60,7 @@ public class DBOutlierScore<O, D extends Distance<D>> extends AbstractDBOutlier<
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(DBOutlierScore.class);
+ private static final Logging LOG = Logging.getLogger(DBOutlierScore.class);
/**
* Constructor with parameters.
@@ -73,7 +73,7 @@ public class DBOutlierScore<O, D extends Distance<D>> extends AbstractDBOutlier<
}
@Override
- protected DataStore<Double> computeOutlierScores(Database database, Relation<O> relation, D d) {
+ protected DoubleDataStore computeOutlierScores(Database database, Relation<O> relation, D d) {
DistanceQuery<O, D> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
RangeQuery<O, D> rangeQuery = database.getRangeQuery(distFunc);
final double size = distFunc.getRelation().size();
@@ -90,7 +90,7 @@ public class DBOutlierScore<O, D extends Distance<D>> extends AbstractDBOutlier<
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/EMOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/EMOutlier.java
index db4b7782..2d2a4466 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/EMOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/EMOutlier.java
@@ -62,11 +62,11 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz
// TODO: re-use an existing EM when present?
@Title("EM Outlier: Outlier Detection based on the generic EM clustering")
@Description("The outlier score assigned is based on the highest cluster probability obtained from EM clustering.")
-public class EMOutlier<V extends NumberVector<V, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+public class EMOutlier<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(EMOutlier.class);
+ private static final Logging LOG = Logging.getLogger(EMOutlier.class);
/**
* Inner algorithm.
@@ -120,7 +120,7 @@ public class EMOutlier<V extends NumberVector<V, ?>> extends AbstractAlgorithm<O
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -130,7 +130,7 @@ public class EMOutlier<V extends NumberVector<V, ?>> extends AbstractAlgorithm<O
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
protected EM<V> em = null;
@Override
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java
index 51833c8b..6aed60fe 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java
@@ -29,10 +29,10 @@ import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.math.MathUtil;
@@ -43,7 +43,6 @@ import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
@@ -61,16 +60,16 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
*/
@Title("Gaussian Model Outlier Detection")
@Description("Fit a multivariate gaussian model onto the data, and use the PDF to compute an outlier score.")
-public class GaussianModel<V extends NumberVector<V, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+public class GaussianModel<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(GaussianModel.class);
+ private static final Logging LOG = Logging.getLogger(GaussianModel.class);
/**
* OptionID for inversion flag.
*/
- public static final OptionID INVERT_ID = OptionID.getOrCreateOptionID("gaussod.invert", "Invert the value range to [0:1], with 1 being outliers instead of 0.");
+ public static final OptionID INVERT_ID = new OptionID("gaussod.invert", "Invert the value range to [0:1], with 1 being outliers instead of 0.");
/**
* Small value to increment diagonally of a matrix in order to avoid
@@ -113,7 +112,7 @@ public class GaussianModel<V extends NumberVector<V, ?>> extends AbstractAlgorit
Matrix covarianceTransposed = covarianceMatrix.cheatToAvoidSingularity(SINGULARITY_CHEAT).inverse();
// Normalization factors for Gaussian PDF
- final double fakt = (1.0 / (Math.sqrt(Math.pow(MathUtil.TWOPI, DatabaseUtil.dimensionality(relation)) * covarianceMatrix.det())));
+ final double fakt = (1.0 / (Math.sqrt(Math.pow(MathUtil.TWOPI, RelationUtil.dimensionality(relation)) * covarianceMatrix.det())));
// for each object compute Mahalanobis distance
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
@@ -130,8 +129,7 @@ public class GaussianModel<V extends NumberVector<V, ?>> extends AbstractAlgorit
if(invert) {
double max = mm.getMax() != 0 ? mm.getMax() : 1.;
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
- oscores.putDouble(id, (max - oscores.doubleValue(id)) / max);
+ oscores.putDouble(iditer, (max - oscores.doubleValue(iditer)) / max);
}
meta = new BasicOutlierScoreMeta(0.0, 1.0);
}
@@ -149,7 +147,7 @@ public class GaussianModel<V extends NumberVector<V, ?>> extends AbstractAlgorit
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -159,7 +157,7 @@ public class GaussianModel<V extends NumberVector<V, ?>> extends AbstractAlgorit
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
protected boolean invert = false;
@Override
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java
index 1cd31442..db53a3ef 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java
@@ -32,13 +32,13 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.generic.MaskedDBIDs;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.math.MathUtil;
@@ -48,7 +48,6 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -79,21 +78,21 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
@Title("Gaussian-Uniform Mixture Model Outlier Detection")
@Description("Fits a mixture model consisting of a Gaussian and a uniform distribution to the data.")
@Reference(prefix = "Generalization using the likelihood gain as outlier score of", authors = "Eskin, Eleazar", title = "Anomaly detection over noisy data using learned probability distributions", booktitle = "Proc. of the Seventeenth International Conference on Machine Learning (ICML-2000)")
-public class GaussianUniformMixture<V extends NumberVector<V, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+public class GaussianUniformMixture<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(GaussianUniformMixture.class);
+ private static final Logging LOG = Logging.getLogger(GaussianUniformMixture.class);
/**
* Parameter to specify the fraction of expected outliers.
*/
- public static final OptionID L_ID = OptionID.getOrCreateOptionID("mmo.l", "expected fraction of outliers");
+ public static final OptionID L_ID = new OptionID("mmo.l", "expected fraction of outliers");
/**
* Parameter to specify the cutoff.
*/
- public static final OptionID C_ID = OptionID.getOrCreateOptionID("mmo.c", "cutoff");
+ public static final OptionID C_ID = new OptionID("mmo.c", "cutoff");
/**
* Small value to increment diagonally of a matrix in order to avoid
@@ -154,20 +153,19 @@ public class GaussianUniformMixture<V extends NumberVector<V, ?>> extends Abstra
// logger.debugFine(logLike + " loglike beginning" +
// loglikelihoodNormal(normalObjs, database));
DoubleMinMax minmax = new DoubleMinMax();
- for(int i = 0; i < objids.size(); i++) {
+
+ DBIDIter iter = objids.iter();
+ for(int i = 0; i < objids.size(); i++, iter.advance()) {
// logger.debugFine("i " + i);
// Change mask to make the current object anomalous
bits.set(i);
// Compute new likelihoods
double currentLogLike = normalObjs.size() * logml + loglikelihoodNormal(normalObjs, relation) + anomalousObjs.size() * logl + loglikelihoodAnomalous(anomalousObjs);
- // Get the actual object id
- DBID curid = objids.get(i);
-
// if the loglike increases more than a threshold, object stays in
// anomalous set and is flagged as outlier
final double loglikeGain = currentLogLike - logLike;
- oscores.putDouble(curid, loglikeGain);
+ oscores.putDouble(iter, loglikeGain);
minmax.put(loglikeGain);
if(loglikeGain > c) {
@@ -221,7 +219,7 @@ public class GaussianUniformMixture<V extends NumberVector<V, ?>> extends Abstra
Matrix covInv = covarianceMatrix.cheatToAvoidSingularity(SINGULARITY_CHEAT).inverse();
double covarianceDet = covarianceMatrix.det();
- double fakt = 1.0 / Math.sqrt(Math.pow(MathUtil.TWOPI, DatabaseUtil.dimensionality(database)) * covarianceDet);
+ double fakt = 1.0 / Math.sqrt(Math.pow(MathUtil.TWOPI, RelationUtil.dimensionality(database)) * covarianceDet);
// for each object compute probability and sum
double prob = 0;
for (DBIDIter iter = objids.iter(); iter.valid(); iter.advance()) {
@@ -239,7 +237,7 @@ public class GaussianUniformMixture<V extends NumberVector<V, ?>> extends Abstra
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -249,7 +247,7 @@ public class GaussianUniformMixture<V extends NumberVector<V, ?>> extends Abstra
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
protected double l = 1E-7;
protected double c = 0;
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/HilOut.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/HilOut.java
index 4ed56e1a..15f6cbf3 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/HilOut.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/HilOut.java
@@ -36,13 +36,15 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDFactory;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDistanceDBIDPair;
import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.query.DoubleDistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.LPNormDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
@@ -91,11 +93,11 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
@Title("Fast Outlier Detection in High Dimensional Spaces")
@Description("Algorithm to compute outliers using Hilbert space filling curves")
@Reference(authors = "F. Angiulli, C. Pizzuti", title = "Fast Outlier Detection in High Dimensional Spaces", booktitle = "Proc. European Conference on Principles of Knowledge Discovery and Data Mining (PKDD'02)", url = "http://dx.doi.org/10.1145/375663.375668")
-public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedAlgorithm<O, DoubleDistance, OutlierResult> implements OutlierAlgorithm {
+public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgorithm<O, DoubleDistance, OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(HilOut.class);
+ private static final Logging LOG = Logging.getLogger(HilOut.class);
/**
* Number of nearest neighbors
@@ -170,7 +172,7 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA
public OutlierResult run(Database database, Relation<O> relation) {
distq = database.getDistanceQuery(relation, getDistanceFunction());
- d = DatabaseUtil.dimensionality(relation);
+ d = RelationUtil.dimensionality(relation);
WritableDoubleDataStore hilout_weight = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
// Compute extend of dataset.
@@ -181,18 +183,18 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA
min = new double[d];
double[] max = new double[d];
for(int i = 0; i < d; i++) {
- min[i] = hbbs.first.doubleValue(i + 1);
- max[i] = hbbs.second.doubleValue(i + 1);
+ min[i] = hbbs.first.doubleValue(i);
+ max[i] = hbbs.second.doubleValue(i);
diameter = Math.max(diameter, max[i] - min[i]);
}
// Enlarge bounding box to have equal lengths.
for(int i = 0; i < d; i++) {
- double diff = (diameter - (max[i] - min[i])) / 2;
+ double diff = (diameter - (max[i] - min[i])) * .5;
min[i] -= diff;
max[i] += diff;
}
- if(logger.isVerbose()) {
- logger.verbose("Rescaling dataset by " + (1 / diameter) + " to fit the unit cube.");
+ if(LOG.isVerbose()) {
+ LOG.verbose("Rescaling dataset by " + (1 / diameter) + " to fit the unit cube.");
}
}
@@ -200,8 +202,8 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA
capital_n_star = capital_n = relation.size();
HilbertFeatures h = new HilbertFeatures(relation, min, diameter);
- FiniteProgress progressHilOut = logger.isVerbose() ? new FiniteProgress("HilOut iterations", d + 1, logger) : null;
- FiniteProgress progressTrueOut = logger.isVerbose() ? new FiniteProgress("True outliers found", n, logger) : null;
+ FiniteProgress progressHilOut = LOG.isVerbose() ? new FiniteProgress("HilOut iterations", d + 1, LOG) : null;
+ FiniteProgress progressTrueOut = LOG.isVerbose() ? new FiniteProgress("True outliers found", n, LOG) : null;
// Main part: 1. Phase max. d+1 loops
for(int j = 0; j <= d && n_star < n; j++) {
// initialize (clear) out and wlb - not 100% clear in the paper
@@ -214,7 +216,7 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA
// determine the true outliers (n_star)
trueOutliers(h);
if(progressTrueOut != null) {
- progressTrueOut.setProcessed(n_star, logger);
+ progressTrueOut.setProcessed(n_star, LOG);
}
// Build the top Set as out + wlb
h.top.clear();
@@ -230,7 +232,7 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA
}
}
if(progressHilOut != null) {
- progressHilOut.incrementProcessed(logger);
+ progressHilOut.incrementProcessed(LOG);
}
}
// 2. Phase: Additional Scan if less than n true outliers determined
@@ -241,12 +243,12 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA
scan(h, capital_n);
}
if(progressHilOut != null) {
- progressHilOut.setProcessed(d, logger);
- progressHilOut.ensureCompleted(logger);
+ progressHilOut.setProcessed(d, LOG);
+ progressHilOut.ensureCompleted(LOG);
}
if(progressTrueOut != null) {
- progressTrueOut.setProcessed(n, logger);
- progressTrueOut.ensureCompleted(logger);
+ progressTrueOut.setProcessed(n, LOG);
+ progressTrueOut.ensureCompleted(LOG);
}
DoubleMinMax minmax = new DoubleMinMax();
// Return weights in out
@@ -281,8 +283,8 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA
*/
private void scan(HilbertFeatures hf, int k0) {
final int mink0 = Math.min(2 * k0, capital_n - 1);
- if(logger.isDebuggingFine()) {
- logger.debugFine("Scanning with k0=" + k0 + " (" + mink0 + ")" + " N*=" + capital_n_star);
+ if(LOG.isDebuggingFine()) {
+ LOG.debugFine("Scanning with k0=" + k0 + " (" + mink0 + ")" + " N*=" + capital_n_star);
}
for(int i = 0; i < hf.pf.length; i++) {
if(hf.pf[i].ubound < omega_star) {
@@ -366,7 +368,7 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA
if(mlevel < level) {
level = mlevel;
final double delta = hf.minDistLevel(hf.pf[i].id, level);
- if(delta >= hf.pf[i].nn.peek().getDoubleDistance()) {
+ if(delta >= hf.pf[i].nn.peek().doubleDistance()) {
break; // stop = true
}
}
@@ -376,10 +378,10 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA
double br = hf.boxRadius(i, a - 1, b + 1);
double newlb = 0.0;
double newub = 0.0;
- for(DoubleDistanceResultPair entry : hf.pf[i].nn) {
- newub += entry.getDoubleDistance();
- if(entry.getDoubleDistance() <= br) {
- newlb += entry.getDoubleDistance();
+ for(DoubleDistanceDBIDPair entry : hf.pf[i].nn) {
+ newub += entry.doubleDistance();
+ if(entry.doubleDistance() <= br) {
+ newlb += entry.doubleDistance();
}
}
if(newlb > hf.pf[i].lbound) {
@@ -408,7 +410,7 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
@Override
@@ -482,7 +484,7 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA
int pos = 0;
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- pf[pos++] = new HilFeature(iditer.getDBID(), new Heap<DoubleDistanceResultPair>(k, Collections.reverseOrder()));
+ pf[pos++] = new HilFeature(DBIDUtil.deref(iditer), new Heap<DoubleDistanceDBIDPair>(k, Collections.reverseOrder()));
}
this.out = new Heap<HilFeature>(n, new Comparator<HilFeature>() {
@Override
@@ -513,7 +515,7 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA
if(h >= 32) { // 32 to 63 bit
final long scale = Long.MAX_VALUE; // = 63 bits
for(int i = 0; i < pf.length; i++) {
- NumberVector<?, ?> obj = relation.get(pf[i].id);
+ NumberVector<?> obj = relation.get(pf[i].id);
long[] coord = new long[d];
for(int dim = 0; dim < d; dim++) {
coord[dim] = (long) (getDimForObject(obj, dim) * .5 * scale);
@@ -524,7 +526,7 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA
else if(h >= 16) { // 16-31 bit
final int scale = ~1 >>> 1;
for(int i = 0; i < pf.length; i++) {
- NumberVector<?, ?> obj = relation.get(pf[i].id);
+ NumberVector<?> obj = relation.get(pf[i].id);
int[] coord = new int[d];
for(int dim = 0; dim < d; dim++) {
coord[dim] = (int) (getDimForObject(obj, dim) * .5 * scale);
@@ -535,7 +537,7 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA
else if(h >= 8) { // 8-15 bit
final int scale = ~1 >>> 16;
for(int i = 0; i < pf.length; i++) {
- NumberVector<?, ?> obj = relation.get(pf[i].id);
+ NumberVector<?> obj = relation.get(pf[i].id);
short[] coord = new short[d];
for(int dim = 0; dim < d; dim++) {
coord[dim] = (short) (getDimForObject(obj, dim) * .5 * scale);
@@ -546,7 +548,7 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA
else { // 1-7 bit
final int scale = ~1 >>> 8;
for(int i = 0; i < pf.length; i++) {
- NumberVector<?, ?> obj = relation.get(pf[i].id);
+ NumberVector<?> obj = relation.get(pf[i].id);
byte[] coord = new byte[d];
for(int dim = 0; dim < d; dim++) {
coord[dim] = (byte) (getDimForObject(obj, dim) * .5 * scale);
@@ -575,15 +577,13 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA
*/
private void updateOUT(int i) {
if(out.size() < n) {
- out.offer(pf[i]);
+ out.add(pf[i]);
}
else {
HilFeature head = out.peek();
if(pf[i].ubound > head.ubound) {
// replace smallest
- out.poll();
- // assert (out.peek().ubound >= head.ubound);
- out.offer(pf[i]);
+ out.replaceTopElement(pf[i]);
}
}
}
@@ -595,15 +595,13 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA
*/
private void updateWLB(int i) {
if(wlb.size() < n) {
- wlb.offer(pf[i]);
+ wlb.add(pf[i]);
}
else {
HilFeature head = wlb.peek();
if(pf[i].lbound > head.lbound) {
// replace smallest
- wlb.poll();
- // assert (wlb.peek().lbound >= head.lbound);
- wlb.offer(pf[i]);
+ wlb.replaceTopElement(pf[i]);
}
}
}
@@ -639,7 +637,7 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA
* @param level Level of the corresponding r-region
*/
private double minDistLevel(DBID id, int level) {
- final NumberVector<?, ?> obj = relation.get(id);
+ final NumberVector<?> obj = relation.get(id);
// level 1 is supposed to have r=1 as in the original publication
// 2 ^ - (level - 1)
final double r = 1.0 / (1 << (level - 1));
@@ -659,7 +657,7 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA
* @param level Level of the corresponding r-region
*/
private double maxDistLevel(DBID id, int level) {
- final NumberVector<?, ?> obj = relation.get(id);
+ final NumberVector<?> obj = relation.get(id);
// level 1 is supposed to have r=1 as in the original publication
final double r = 1.0 / (1 << (level - 1));
double dist;
@@ -780,8 +778,8 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA
* @param dim Dimension
* @return Projected and shifted position
*/
- private double getDimForObject(NumberVector<?, ?> obj, int dim) {
- return (obj.doubleValue(dim + 1) - min[dim]) / diameter + shift;
+ private double getDimForObject(NumberVector<?> obj, int dim) {
+ return (obj.doubleValue(dim) - min[dim]) / diameter + shift;
}
}
@@ -824,7 +822,7 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA
/**
* Heap with the nearest known neighbors
*/
- public Heap<DoubleDistanceResultPair> nn;
+ public Heap<DoubleDistanceDBIDPair> nn;
/**
* Set representation of the nearest neighbors for faster lookups
@@ -842,7 +840,7 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA
* @param id Object ID
* @param nn Heap for neighbors
*/
- public HilFeature(DBID id, Heap<DoubleDistanceResultPair> nn) {
+ public HilFeature(DBID id, Heap<DoubleDistanceDBIDPair> nn) {
super();
this.id = id;
this.nn = nn;
@@ -864,27 +862,26 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA
protected void insert(DBID id, double dt, int k) {
// assert (!nn_keys.contains(id));
if(nn.size() < k) {
- DoubleDistanceResultPair entry = new DoubleDistanceResultPair(dt, id);
- nn.offer(entry);
+ DoubleDistanceDBIDPair entry = DBIDFactory.FACTORY.newDistancePair(dt, id);
+ nn.add(entry);
nn_keys.add(id);
sum_nn += dt;
}
else {
- DoubleDistanceResultPair head = nn.peek();
- if(dt < head.getDoubleDistance()) {
+ DoubleDistanceDBIDPair head = nn.peek();
+ if(dt < head.doubleDistance()) {
head = nn.poll(); // Remove worst
- sum_nn -= head.getDoubleDistance();
- nn_keys.remove(head.getDBID());
+ sum_nn -= head.doubleDistance();
+ nn_keys.remove(head);
- // assert (nn.peek().getDoubleDistance() <= head.getDoubleDistance());
+ // assert (nn.peek().doubleDistance() <= head.doubleDistance());
- DoubleDistanceResultPair entry = new DoubleDistanceResultPair(dt, id);
- nn.offer(entry);
+ DoubleDistanceDBIDPair entry = DBIDFactory.FACTORY.newDistancePair(dt, id);
+ nn.add(entry);
nn_keys.add(id);
sum_nn += dt;
}
}
-
}
}
@@ -897,33 +894,33 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA
*
* @param <O> Vector type
*/
- public static class Parameterizer<O extends NumberVector<O, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<O extends NumberVector<?>> extends AbstractParameterizer {
/**
* Parameter to specify how many next neighbors should be used in the
* computation
*/
- public static final OptionID K_ID = OptionID.getOrCreateOptionID("HilOut.k", "Compute up to k next neighbors");
+ public static final OptionID K_ID = new OptionID("HilOut.k", "Compute up to k next neighbors");
/**
* Parameter to specify how many outliers should be computed
*/
- public static final OptionID N_ID = OptionID.getOrCreateOptionID("HilOut.n", "Compute n outliers");
+ public static final OptionID N_ID = new OptionID("HilOut.n", "Compute n outliers");
/**
* Parameter to specify the maximum Hilbert-Level
*/
- public static final OptionID H_ID = OptionID.getOrCreateOptionID("HilOut.h", "Max. Hilbert-Level");
+ public static final OptionID H_ID = new OptionID("HilOut.h", "Max. Hilbert-Level");
/**
* Parameter to specify p of LP-NormDistance
*/
- public static final OptionID T_ID = OptionID.getOrCreateOptionID("HilOut.t", "t of Lt Metric");
+ public static final OptionID T_ID = new OptionID("HilOut.t", "t of Lt Metric");
/**
* Parameter to specify if only the Top n, or also approximations for the
* other elements, should be returned
*/
- public static final OptionID TN_ID = OptionID.getOrCreateOptionID("HilOut.tn", "output of Top n or all elements");
+ public static final OptionID TN_ID = new OptionID("HilOut.tn", "output of Top n or all elements");
/**
* Neighborhood size
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/INFLO.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/INFLO.java
index 1fe5fe71..655a0910 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/INFLO.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/INFLO.java
@@ -1,26 +1,27 @@
package de.lmu.ifi.dbs.elki.algorithm.outlier;
-/*
-This file is part of ELKI:
-Environment for Developing KDD-Applications Supported by Index-Structures
-
-Copyright (C) 2012
-Ludwig-Maximilians-Universität München
-Lehr- und Forschungseinheit für Datenbanksysteme
-ELKI Development Team
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
@@ -36,10 +37,10 @@ import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
@@ -81,7 +82,7 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(INFLO.class);
+ private static final Logging LOG = Logging.getLogger(INFLO.class);
/**
* Parameter to specify if any object is a Core Object must be a double
@@ -89,7 +90,7 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa
* <p>
* see paper "Two-way search method" 3.2
*/
- public static final OptionID M_ID = OptionID.getOrCreateOptionID("inflo.m", "The threshold");
+ public static final OptionID M_ID = new OptionID("inflo.m", "The threshold");
/**
* Holds the value of {@link #M_ID}.
@@ -101,7 +102,7 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa
* considered for computing its INFLO_SCORE. must be an integer greater than
* 1.
*/
- public static final OptionID K_ID = OptionID.getOrCreateOptionID("inflo.k", "The number of nearest neighbors of an object to be considered for computing its INFLO_SCORE.");
+ public static final OptionID K_ID = new OptionID("inflo.k", "The number of nearest neighbors of an object to be considered for computing its INFLO_SCORE.");
/**
* Holds the value of {@link #K_ID}.
@@ -140,7 +141,7 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa
// density
WritableDoubleDataStore density = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT);
// init knns and rnns
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
knns.put(iditer, DBIDUtil.newArray());
rnns.put(iditer, DBIDUtil.newArray());
}
@@ -148,38 +149,34 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa
// TODO: use kNN preprocessor?
KNNQuery<O, D> knnQuery = database.getKNNQuery(distFunc, k, DatabaseQuery.HINT_HEAVY_USE);
- for(DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) {
+ for (DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) {
// if not visited count=0
int count = rnns.get(id).size();
- ModifiableDBIDs s;
- if(!processedIDs.contains(id)) {
+ if (!processedIDs.contains(id)) {
// TODO: use exactly k neighbors?
KNNResult<D> list = knnQuery.getKNNForDBID(id, k);
- knns.get(id).addDBIDs(list.asDBIDs());
+ knns.get(id).addDBIDs(list);
processedIDs.add(id);
- s = knns.get(id);
- density.putDouble(id, 1 / list.get(k - 1).getDistance().doubleValue());
+ density.putDouble(id, 1 / list.getKNNDistance().doubleValue());
}
- else {
- s = knns.get(id);
- }
- for (DBIDIter q = s.iter(); q.valid(); q.advance()) {
- if(!processedIDs.contains(q)) {
+ ModifiableDBIDs s = knns.get(id);
+ for (DBIDIter q = knns.get(id).iter(); q.valid(); q.advance()) {
+ if (!processedIDs.contains(q)) {
// TODO: use exactly k neighbors?
KNNResult<D> listQ = knnQuery.getKNNForDBID(q, k);
- knns.get(q).addDBIDs(listQ.asDBIDs());
+ knns.get(q).addDBIDs(listQ);
density.putDouble(q, 1 / listQ.getKNNDistance().doubleValue());
processedIDs.add(q);
}
- if(knns.get(q).contains(id)) {
+ if (knns.get(q).contains(id)) {
rnns.get(q).add(id);
rnns.get(id).add(q);
count++;
}
}
- if(count >= s.size() * m) {
+ if (count >= s.size() * m) {
pruned.add(id);
}
}
@@ -188,8 +185,8 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa
// IF Object is pruned INFLO=1.0
DoubleMinMax inflominmax = new DoubleMinMax();
WritableDoubleDataStore inflos = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
- for(DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) {
- if(!pruned.contains(id)) {
+ for (DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) {
+ if (!pruned.contains(id)) {
ModifiableDBIDs knn = knns.get(id);
ModifiableDBIDs rnn = rnns.get(id);
@@ -205,7 +202,7 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa
inflominmax.put(den);
}
- if(pruned.contains(id)) {
+ if (pruned.contains(id)) {
inflos.putDouble(id, 1.0);
inflominmax.put(1.0);
}
@@ -224,15 +221,15 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
- * Parameterization class.
- *
- * @author Erich Schubert
- *
- * @apiviz.exclude
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
*/
public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
protected double m = 1.0;
@@ -242,14 +239,16 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final DoubleParameter mP = new DoubleParameter(M_ID, new GreaterConstraint(0.0), 1.0);
- if(config.grab(mP)) {
- m = mP.getValue();
+ final DoubleParameter mP = new DoubleParameter(M_ID, 1.0);
+ mP.addConstraint(new GreaterConstraint(0.0));
+ if (config.grab(mP)) {
+ m = mP.doubleValue();
}
- final IntParameter kP = new IntParameter(K_ID, new GreaterConstraint(1));
- if(config.grab(kP)) {
- k = kP.getValue();
+ final IntParameter kP = new IntParameter(K_ID);
+ kP.addConstraint(new GreaterConstraint(1));
+ if (config.grab(kP)) {
+ k = kP.intValue();
}
}
@@ -258,4 +257,4 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa
return new INFLO<O, D>(distanceFunction, m, k);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNOutlier.java
index 08be944a..4c4873dd 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNOutlier.java
@@ -1,26 +1,27 @@
package de.lmu.ifi.dbs.elki.algorithm.outlier;
-/*
-This file is part of ELKI:
-Environment for Developing KDD-Applications Supported by Index-Structures
-
-Copyright (C) 2012
-Ludwig-Maximilians-Universität München
-Lehr- und Forschungseinheit für Datenbanksysteme
-ELKI Development Team
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
@@ -32,10 +33,11 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceKNNList;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
@@ -77,12 +79,12 @@ public class KNNOutlier<O, D extends NumberDistance<D, ?>> extends AbstractDista
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(KNNOutlier.class);
+ private static final Logging LOG = Logging.getLogger(KNNOutlier.class);
/**
* Parameter to specify the k nearest neighbor
*/
- public static final OptionID K_ID = OptionID.getOrCreateOptionID("knno.k", "k nearest neighbor");
+ public static final OptionID K_ID = new OptionID("knno.k", "k nearest neighbor");
/**
* The parameter k
@@ -107,28 +109,34 @@ public class KNNOutlier<O, D extends NumberDistance<D, ?>> extends AbstractDista
final DistanceQuery<O, D> distanceQuery = database.getDistanceQuery(relation, getDistanceFunction());
KNNQuery<O, D> knnQuery = database.getKNNQuery(distanceQuery, k);
- if(logger.isVerbose()) {
- logger.verbose("Computing the kNN outlier degree (distance to the k nearest neighbor)");
+ if(LOG.isVerbose()) {
+ LOG.verbose("Computing the kNN outlier degree (distance to the k nearest neighbor)");
}
- FiniteProgress progressKNNDistance = logger.isVerbose() ? new FiniteProgress("kNN distance for objects", relation.size(), logger) : null;
+ FiniteProgress progressKNNDistance = LOG.isVerbose() ? new FiniteProgress("kNN distance for objects", relation.size(), LOG) : null;
DoubleMinMax minmax = new DoubleMinMax();
WritableDoubleDataStore knno_score = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
// compute distance to the k nearest neighbor.
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
// distance to the kth nearest neighbor
final KNNResult<D> knns = knnQuery.getKNNForDBID(iditer, k);
- double dkn = knns.getKNNDistance().doubleValue();
- knno_score.putDouble(iditer, dkn);
+ final double dkn;
+ if(knns instanceof DoubleDistanceKNNList) {
+ dkn = ((DoubleDistanceKNNList) knns).doubleKNNDistance();
+ }
+ else {
+ dkn = knns.getKNNDistance().doubleValue();
+ }
+ knno_score.putDouble(iditer, dkn);
minmax.put(dkn);
if(progressKNNDistance != null) {
- progressKNNDistance.incrementProcessed(logger);
+ progressKNNDistance.incrementProcessed(LOG);
}
}
if(progressKNNDistance != null) {
- progressKNNDistance.ensureCompleted(logger);
+ progressKNNDistance.ensureCompleted(LOG);
}
Relation<Double> scoreres = new MaterializedRelation<Double>("kNN Outlier Score", "knn-outlier", TypeUtil.DOUBLE, knno_score, relation.getDBIDs());
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
@@ -142,15 +150,15 @@ public class KNNOutlier<O, D extends NumberDistance<D, ?>> extends AbstractDista
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
- * Parameterization class.
- *
- * @author Erich Schubert
- *
- * @apiviz.exclude
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
*/
public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
protected int k = 0;
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNWeightOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNWeightOutlier.java
index cb3ca2f1..e7eeeb9c 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNWeightOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNWeightOutlier.java
@@ -31,13 +31,15 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDResultIter;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceKNNList;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
@@ -74,17 +76,17 @@ public class KNNWeightOutlier<O, D extends NumberDistance<D, ?>> extends Abstrac
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(KNNWeightOutlier.class);
+ private static final Logging LOG = Logging.getLogger(KNNWeightOutlier.class);
/**
* Parameter to specify the k nearest neighbor
*/
- public static final OptionID K_ID = OptionID.getOrCreateOptionID("knnwod.k", "k nearest neighbor");
+ public static final OptionID K_ID = new OptionID("knnwod.k", "k nearest neighbor");
/**
* The kNN query used.
*/
- public static final OptionID KNNQUERY_ID = OptionID.getOrCreateOptionID("knnwod.knnquery", "kNN query to use");
+ public static final OptionID KNNQUERY_ID = new OptionID("knnwod.knnquery", "kNN query to use");
/**
* Holds the value of {@link #K_ID}.
@@ -109,33 +111,40 @@ public class KNNWeightOutlier<O, D extends NumberDistance<D, ?>> extends Abstrac
final DistanceQuery<O, D> distanceQuery = database.getDistanceQuery(relation, getDistanceFunction());
KNNQuery<O, D> knnQuery = database.getKNNQuery(distanceQuery, k);
- if(logger.isVerbose()) {
- logger.verbose("computing outlier degree(sum of the distances to the k nearest neighbors");
+ if(LOG.isVerbose()) {
+ LOG.verbose("computing outlier degree(sum of the distances to the k nearest neighbors");
}
- FiniteProgress progressKNNWeight = logger.isVerbose() ? new FiniteProgress("KNNWOD_KNNWEIGHT for objects", relation.size(), logger) : null;
+ FiniteProgress progressKNNWeight = LOG.isVerbose() ? new FiniteProgress("KNNWOD_KNNWEIGHT for objects", relation.size(), LOG) : null;
DoubleMinMax minmax = new DoubleMinMax();
// compute distance to the k nearest neighbor. n objects with the highest
// distance are flagged as outliers
WritableDoubleDataStore knnw_score = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
// compute sum of the distances to the k nearest neighbors
final KNNResult<D> knn = knnQuery.getKNNForDBID(iditer, k);
double skn = 0;
- for(DistanceResultPair<D> r : knn) {
- skn += r.getDistance().doubleValue();
+ if(knn instanceof DoubleDistanceKNNList) {
+ for(DoubleDistanceDBIDResultIter neighbor = ((DoubleDistanceKNNList) knn).iter(); neighbor.valid(); neighbor.advance()) {
+ skn += neighbor.doubleDistance();
+ }
+ }
+ else {
+ for(DistanceDBIDResultIter<D> neighbor = knn.iter(); neighbor.valid(); neighbor.advance()) {
+ skn += neighbor.getDistance().doubleValue();
+ }
}
knnw_score.putDouble(iditer, skn);
minmax.put(skn);
if(progressKNNWeight != null) {
- progressKNNWeight.incrementProcessed(logger);
+ progressKNNWeight.incrementProcessed(LOG);
}
}
if(progressKNNWeight != null) {
- progressKNNWeight.ensureCompleted(logger);
+ progressKNNWeight.ensureCompleted(LOG);
}
Relation<Double> res = new MaterializedRelation<Double>("Weighted kNN Outlier Score", "knnw-outlier", TypeUtil.DOUBLE, knnw_score, relation.getDBIDs());
@@ -150,7 +159,7 @@ public class KNNWeightOutlier<O, D extends NumberDistance<D, ?>> extends Abstrac
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LDF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LDF.java
new file mode 100644
index 00000000..4ce0313e
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LDF.java
@@ -0,0 +1,342 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.type.CombinedTypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.QueryUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDResultIter;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceKNNList;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
+import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
+import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.math.statistics.GaussianKernelDensityFunction;
+import de.lmu.ifi.dbs.elki.math.statistics.KernelDensityFunction;
+import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * Outlier Detection with Kernel Density Functions.
+ *
+ * A variation of LOF which uses kernel density estimation, but in contrast to
+ * {@link SimpleKernelDensityLOF} also uses the reachability concept of LOF.
+ *
+ * Reference:
+ * <p>
+ * Outlier Detection with Kernel Density Functions.<br/>
+ * L. J. Latecki, A. Lazarevic, D. Pokrajac<br />
+ * Machine Learning and Data Mining in Pattern Recognition 2007
+ * </p>
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has KNNQuery
+ * @apiviz.has KernelDensityFunction
+ *
+ * @param <O> the type of objects handled by this Algorithm
+ * @param <D> Distance type
+ */
+@Reference(authors = "L. J. Latecki, A. Lazarevic, D. Pokrajac", title = "Outlier Detection with Kernel Density Functions", booktitle = "Machine Learning and Data Mining in Pattern Recognition", url = "http://dx.doi.org/10.1007/978-3-540-73499-4_6")
+public class LDF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(LDF.class);
+
+ /**
+ * Parameter k.
+ */
+ protected int k;
+
+ /**
+ * Bandwidth scaling factor.
+ */
+ protected double h = 1;
+
+ /**
+ * Scaling constant, to limit value range to 1/c
+ */
+ protected double c = 0.1;
+
+ /**
+ * Kernel density function
+ */
+ private KernelDensityFunction kernel;
+
+ /**
+ * Constructor.
+ *
+ * @param k the value of k
+ * @param kernel Kernel function
+ * @param h Kernel bandwidth scaling
+ * @param c Score scaling parameter
+ */
+ public LDF(int k, DistanceFunction<? super O, D> distance, KernelDensityFunction kernel, double h, double c) {
+ super(distance);
+ this.k = k + 1;
+ this.kernel = kernel;
+ this.h = h;
+ this.c = c;
+ }
+
+ /**
+ * Run the naive kernel density LOF algorithm.
+ *
+ * @param relation Data to process
+ * @return LOF outlier result
+ */
+ public OutlierResult run(Relation<O> relation) {
+ StepProgress stepprog = LOG.isVerbose() ? new StepProgress("LDF", 3) : null;
+
+ final int dim = RelationUtil.dimensionality(relation);
+
+ DBIDs ids = relation.getDBIDs();
+
+ // "HEAVY" flag for KNN Query since it is used more than once
+ KNNQuery<O, D> knnq = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
+ // No optimized kNN query - use a preprocessor!
+ if (!(knnq instanceof PreprocessorKNNQuery)) {
+ if (stepprog != null) {
+ stepprog.beginStep(1, "Materializing neighborhoods w.r.t. distance function.", LOG);
+ }
+ MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<O, D>(relation, getDistanceFunction(), k);
+ relation.getDatabase().addIndex(preproc);
+ DistanceQuery<O, D> rdq = relation.getDatabase().getDistanceQuery(relation, getDistanceFunction());
+ knnq = preproc.getKNNQuery(rdq, k);
+ }
+
+ // Compute LRDs
+ if (stepprog != null) {
+ stepprog.beginStep(2, "Computing LDEs.", LOG);
+ }
+ WritableDoubleDataStore ldes = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
+ FiniteProgress densProgress = LOG.isVerbose() ? new FiniteProgress("Densities", ids.size(), LOG) : null;
+ for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
+ final KNNResult<D> neighbors = knnq.getKNNForDBID(it, k);
+ double sum = 0.0;
+ int count = 0;
+ if (neighbors instanceof DoubleDistanceKNNList) {
+ // Fast version for double distances
+ for (DoubleDistanceDBIDResultIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) {
+ if (DBIDUtil.equal(neighbor, it)) {
+ continue;
+ }
+ double nkdist = ((DoubleDistanceKNNList) knnq.getKNNForDBID(neighbor, k)).doubleKNNDistance();
+
+ final double v = Math.max(nkdist, neighbor.doubleDistance()) / (h * nkdist);
+ sum += kernel.density(v) / Math.pow(h * nkdist, dim);
+ count++;
+ }
+ } else {
+ for (DistanceDBIDResultIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ if (DBIDUtil.equal(neighbor, it)) {
+ continue;
+ }
+ double nkdist = knnq.getKNNForDBID(neighbor, k).getKNNDistance().doubleValue();
+ final double v = Math.max(nkdist, neighbor.getDistance().doubleValue()) / (h * nkdist);
+ sum += kernel.density(v) / Math.pow(h * nkdist, dim);
+ count++;
+ }
+ }
+ ldes.putDouble(it, sum / count);
+ if (densProgress != null) {
+ densProgress.incrementProcessed(LOG);
+ }
+ }
+ if (densProgress != null) {
+ densProgress.ensureCompleted(LOG);
+ }
+
+ // Compute local density factors.
+ if (stepprog != null) {
+ stepprog.beginStep(3, "Computing LDFs.", LOG);
+ }
+ WritableDoubleDataStore ldfs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
+ // track the maximum value for normalization.
+ DoubleMinMax lofminmax = new DoubleMinMax();
+
+ FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("Local Density Factors", ids.size(), LOG) : null;
+ for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
+ final double lrdp = ldes.doubleValue(it);
+ final KNNResult<D> neighbors = knnq.getKNNForDBID(it, k);
+ double sum = 0.0;
+ int count = 0;
+ for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ // skip the point itself
+ if (DBIDUtil.equal(neighbor, it)) {
+ continue;
+ }
+ sum += ldes.doubleValue(neighbor);
+ count++;
+ }
+ sum /= count;
+ final double div = lrdp + c * sum;
+ double ldf = (div > 0) ? sum / div : 0;
+ ldfs.putDouble(it, ldf);
+ // update minimum and maximum
+ lofminmax.put(ldf);
+
+ if (progressLOFs != null) {
+ progressLOFs.incrementProcessed(LOG);
+ }
+ }
+ if (progressLOFs != null) {
+ progressLOFs.ensureCompleted(LOG);
+ }
+
+ if (stepprog != null) {
+ stepprog.setCompleted(LOG);
+ }
+
+ // Build result representation.
+ Relation<Double> scoreResult = new MaterializedRelation<Double>("Local Density Factor", "ldf-outlier", TypeUtil.DOUBLE, ldfs, ids);
+ OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, 1. / c, 1 / (1 + c));
+ OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
+
+ return result;
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(new CombinedTypeInformation(getDistanceFunction().getInputTypeRestriction(), TypeUtil.NUMBER_VECTOR_FIELD));
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ *
+ * @param <O> vector type
+ * @param <D> distance type
+ */
+ public static class Parameterizer<O extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ /**
+ * Option ID for kernel.
+ */
+ public static final OptionID KERNEL_ID = new OptionID("ldf.kernel", "Kernel to use for LDF.");
+
+ /**
+ * Option ID for k
+ */
+ public static final OptionID K_ID = new OptionID("ldf.k", "Number of neighbors to use for LDF.");
+
+ /**
+ * Option ID for h - kernel bandwidth scaling
+ */
+ public static final OptionID H_ID = new OptionID("ldf.h", "Kernel bandwidth multiplier for LDF.");
+
+ /**
+ * Option ID for c
+ */
+ public static final OptionID C_ID = new OptionID("ldf.c", "Score scaling parameter for LDF.");
+
+ /**
+ * The neighborhood size to use.
+ */
+ protected int k = 2;
+
+ /**
+ * Kernel density function parameter
+ */
+ KernelDensityFunction kernel;
+
+ /**
+ * Bandwidth scaling factor.
+ */
+ protected double h = 1;
+
+ /**
+ * Scaling constant, to limit value range to 1/c
+ */
+ protected double c = 0.1;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ final IntParameter pK = new IntParameter(K_ID);
+ pK.addConstraint(new GreaterConstraint(1));
+ if (config.grab(pK)) {
+ k = pK.getValue();
+ }
+
+ ObjectParameter<KernelDensityFunction> kernelP = new ObjectParameter<KernelDensityFunction>(KERNEL_ID, KernelDensityFunction.class, GaussianKernelDensityFunction.class);
+ if (config.grab(kernelP)) {
+ kernel = kernelP.instantiateClass(config);
+ }
+
+ DoubleParameter hP = new DoubleParameter(H_ID);
+ if (config.grab(hP)) {
+ h = hP.doubleValue();
+ }
+
+ DoubleParameter cP = new DoubleParameter(C_ID, 0.1);
+ if (config.grab(cP)) {
+ c = cP.doubleValue();
+ }
+ }
+
+ @Override
+ protected LDF<O, D> makeInstance() {
+ return new LDF<O, D>(k, distanceFunction, kernel, h, c);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LDOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LDOF.java
index 84f5dcc6..fbbfe484 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LDOF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LDOF.java
@@ -31,13 +31,14 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
@@ -81,13 +82,13 @@ public class LDOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(LDOF.class);
+ private static final Logging LOG = Logging.getLogger(LDOF.class);
/**
* Parameter to specify the number of nearest neighbors of an object to be
* considered for computing its LDOF_SCORE, must be an integer greater than 1.
*/
- public static final OptionID K_ID = OptionID.getOrCreateOptionID("ldof.k", "The number of nearest neighbors of an object to be considered for computing its LDOF_SCORE.");
+ public static final OptionID K_ID = new OptionID("ldof.k", "The number of nearest neighbors of an object to be considered for computing its LDOF_SCORE.");
/**
* The baseline for LDOF values. The paper gives 0.5 for uniform
@@ -128,21 +129,22 @@ public class LDOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
WritableDoubleDataStore ldofs = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
// compute LOF_SCORE of each db object
- if(logger.isVerbose()) {
- logger.verbose("Computing LDOFs");
+ if(LOG.isVerbose()) {
+ LOG.verbose("Computing LDOFs");
}
- FiniteProgress progressLDOFs = logger.isVerbose() ? new FiniteProgress("LDOF_SCORE for objects", relation.size(), logger) : null;
+ FiniteProgress progressLDOFs = LOG.isVerbose() ? new FiniteProgress("LDOF_SCORE for objects", relation.size(), LOG) : null;
Mean dxp = new Mean(), Dxp = new Mean();
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
KNNResult<D> neighbors = knnQuery.getKNNForDBID(iditer, k);
// skip the point itself
dxp.reset(); Dxp.reset();
- for(DistanceResultPair<D> neighbor1 : neighbors) {
- if(!neighbor1.sameDBID(iditer)) {
+ // TODO: optimize for double distances
+ for (DistanceDBIDResultIter<D> neighbor1 = neighbors.iter(); neighbor1.valid(); neighbor1.advance()) {
+ if(!DBIDUtil.equal(neighbor1, iditer)) {
dxp.put(neighbor1.getDistance().doubleValue());
- for(DistanceResultPair<D> neighbor2 : neighbors) {
- if(!neighbor1.sameDBID(neighbor2) && !neighbor2.sameDBID(iditer)) {
+ for (DistanceDBIDResultIter<D> neighbor2 = neighbors.iter(); neighbor2.valid(); neighbor2.advance()) {
+ if(!DBIDUtil.equal(neighbor1, neighbor2) && !DBIDUtil.equal(neighbor2, iditer)) {
Dxp.put(distFunc.distance(neighbor1, neighbor2).doubleValue());
}
}
@@ -157,11 +159,11 @@ public class LDOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
ldofminmax.put(ldof);
if(progressLDOFs != null) {
- progressLDOFs.incrementProcessed(logger);
+ progressLDOFs.incrementProcessed(LOG);
}
}
if(progressLDOFs != null) {
- progressLDOFs.ensureCompleted(logger);
+ progressLDOFs.ensureCompleted(LOG);
}
// Build result representation.
@@ -177,7 +179,7 @@ public class LDOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -193,7 +195,8 @@ public class LDOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final IntParameter kP = new IntParameter(K_ID, new GreaterConstraint(1));
+ final IntParameter kP = new IntParameter(K_ID);
+ kP.addConstraint(new GreaterConstraint(1));
if(config.grab(kP)) {
k = kP.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOCI.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOCI.java
index a04aa041..ba9ad20e 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOCI.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOCI.java
@@ -36,13 +36,14 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
-import de.lmu.ifi.dbs.elki.database.query.DistanceDBIDResult;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
+import de.lmu.ifi.dbs.elki.database.ids.DistanceDBIDPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
@@ -64,9 +65,7 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleIntPair;
/**
* Fast Outlier Detection Using the "Local Correlation Integral".
*
- * Exact implementation only, not aLOCI.
- *
- * TODO: add aLOCI
+ * Exact implementation only, not aLOCI. See {@link ALOCI}
*
* Outlier detection using multiple epsilon neighborhoods.
*
@@ -88,23 +87,23 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(LOCI.class);
+ private static final Logging LOG = Logging.getLogger(LOCI.class);
/**
* Parameter to specify the maximum radius of the neighborhood to be
* considered, must be suitable to the distance function specified.
*/
- public static final OptionID RMAX_ID = OptionID.getOrCreateOptionID("loci.rmax", "The maximum radius of the neighborhood to be considered.");
+ public static final OptionID RMAX_ID = new OptionID("loci.rmax", "The maximum radius of the neighborhood to be considered.");
/**
* Parameter to specify the minimum neighborhood size
*/
- public static final OptionID NMIN_ID = OptionID.getOrCreateOptionID("loci.nmin", "Minimum neighborhood size to be considered.");
+ public static final OptionID NMIN_ID = new OptionID("loci.nmin", "Minimum neighborhood size to be considered.");
/**
* Parameter to specify the averaging neighborhood scaling.
*/
- public static final OptionID ALPHA_ID = OptionID.getOrCreateOptionID("loci.alpha", "Scaling factor for averaging neighborhood");
+ public static final OptionID ALPHA_ID = new OptionID("loci.alpha", "Scaling factor for averaging neighborhood");
/**
* Holds the value of {@link #RMAX_ID}.
@@ -147,16 +146,16 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
DistanceQuery<O, D> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
RangeQuery<O, D> rangeQuery = database.getRangeQuery(distFunc);
- FiniteProgress progressPreproc = logger.isVerbose() ? new FiniteProgress("LOCI preprocessing", relation.size(), logger) : null;
+ FiniteProgress progressPreproc = LOG.isVerbose() ? new FiniteProgress("LOCI preprocessing", relation.size(), LOG) : null;
// LOCI preprocessing step
WritableDataStore<ArrayList<DoubleIntPair>> interestingDistances = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_SORTED, ArrayList.class);
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
DistanceDBIDResult<D> neighbors = rangeQuery.getRangeForDBID(iditer, rmax);
// build list of critical distances
- ArrayList<DoubleIntPair> cdist = new ArrayList<DoubleIntPair>(neighbors.size() * 2);
+ ArrayList<DoubleIntPair> cdist = new ArrayList<DoubleIntPair>(neighbors.size() << 1);
{
for(int i = 0; i < neighbors.size(); i++) {
- DistanceResultPair<D> r = neighbors.get(i);
+ DistanceDBIDPair<D> r = neighbors.get(i);
if(i + 1 < neighbors.size() && r.getDistance().compareTo(neighbors.get(i + 1).getDistance()) == 0) {
continue;
}
@@ -182,14 +181,14 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
interestingDistances.put(iditer, cdist);
if(progressPreproc != null) {
- progressPreproc.incrementProcessed(logger);
+ progressPreproc.incrementProcessed(LOG);
}
}
if(progressPreproc != null) {
- progressPreproc.ensureCompleted(logger);
+ progressPreproc.ensureCompleted(LOG);
}
// LOCI main step
- FiniteProgress progressLOCI = logger.isVerbose() ? new FiniteProgress("LOCI scores", relation.size(), logger) : null;
+ FiniteProgress progressLOCI = LOG.isVerbose() ? new FiniteProgress("LOCI scores", relation.size(), LOG) : null;
WritableDoubleDataStore mdef_norm = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
WritableDoubleDataStore mdef_radius = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
DoubleMinMax minmax = new DoubleMinMax();
@@ -204,9 +203,8 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
if(maxneig >= nmin) {
D range = distFunc.getDistanceFactory().fromDouble(maxdist);
// Compute the largest neighborhood we will need.
- List<DistanceResultPair<D>> maxneighbors = rangeQuery.getRangeForDBID(iditer, range);
- // Ensure the set is sorted. Should be a no-op with most indexes.
- Collections.sort(maxneighbors);
+ DistanceDBIDResult<D> maxneighbors = rangeQuery.getRangeForDBID(iditer, range);
+ // TODO: Ensure the set is sorted. Should be a no-op with most indexes.
// For any critical distance, compute the normalized MDEF score.
for(DoubleIntPair c : cdist) {
// Only start when minimum size is fulfilled
@@ -219,12 +217,13 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
final int n_alphar = elementsAtRadius(cdist, alpha_r);
// compute \hat{n}(p_i, r, \alpha) and the corresponding \simga_{MDEF}
MeanVariance mv_n_r_alpha = new MeanVariance();
- for(DistanceResultPair<D> ne : maxneighbors) {
+ // TODO: optimize for double distances
+ for (DistanceDBIDResultIter<D> neighbor = maxneighbors.iter(); neighbor.valid(); neighbor.advance()) {
// Stop at radius r
- if(ne.getDistance().doubleValue() > r) {
+ if(neighbor.getDistance().doubleValue() > r) {
break;
}
- int rn_alphar = elementsAtRadius(interestingDistances.get(ne), alpha_r);
+ int rn_alphar = elementsAtRadius(interestingDistances.get(neighbor), alpha_r);
mv_n_r_alpha.put(rn_alphar);
}
// We only use the average and standard deviation
@@ -251,11 +250,11 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
mdef_radius.putDouble(iditer, maxnormr);
minmax.put(maxmdefnorm);
if(progressLOCI != null) {
- progressLOCI.incrementProcessed(logger);
+ progressLOCI.incrementProcessed(LOG);
}
}
if(progressLOCI != null) {
- progressLOCI.ensureCompleted(logger);
+ progressLOCI.ensureCompleted(LOG);
}
Relation<Double> scoreResult = new MaterializedRelation<Double>("LOCI normalized MDEF", "loci-mdef-outlier", TypeUtil.DOUBLE, mdef_norm, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
@@ -293,7 +292,7 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -335,4 +334,4 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
return new LOCI<O, D>(distanceFunction, rmax, nmin, alpha);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOF.java
index 5aba41ec..66bed47a 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOF.java
@@ -29,29 +29,31 @@ import de.lmu.ifi.dbs.elki.data.type.CombinedTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.QueryUtil;
-import de.lmu.ifi.dbs.elki.database.datastore.DataStore;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.DoubleDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery;
import de.lmu.ifi.dbs.elki.database.query.rknn.RKNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDResultIter;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceKNNList;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
-import de.lmu.ifi.dbs.elki.math.Mean;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
@@ -118,19 +120,19 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(LOF.class);
+ private static final Logging LOG = Logging.getLogger(LOF.class);
/**
* The distance function to determine the reachability distance between
* database objects.
*/
- public static final OptionID REACHABILITY_DISTANCE_FUNCTION_ID = OptionID.getOrCreateOptionID("lof.reachdistfunction", "Distance function to determine the reachability distance between database objects.");
+ public static final OptionID REACHABILITY_DISTANCE_FUNCTION_ID = new OptionID("lof.reachdistfunction", "Distance function to determine the reachability distance between database objects.");
/**
* Parameter to specify the number of nearest neighbors of an object to be
* considered for computing its LOF_SCORE, must be an integer greater than 1.
*/
- public static final OptionID K_ID = OptionID.getOrCreateOptionID("lof.k", "The number of nearest neighbors of an object to be considered for computing its LOF_SCORE.");
+ public static final OptionID K_ID = new OptionID("lof.k", "The number of nearest neighbors of an object to be considered for computing its LOF_SCORE.");
/**
* Holds the value of {@link #K_ID}.
@@ -189,9 +191,10 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
* calling {@link #doRunInTime}.
*
* @param relation Data to process
+ * @return LOF outlier result
*/
public OutlierResult run(Relation<O> relation) {
- StepProgress stepprog = logger.isVerbose() ? new StepProgress("LOF", 3) : null;
+ StepProgress stepprog = LOG.isVerbose() ? new StepProgress("LOF", 3) : null;
Pair<KNNQuery<O, D>, KNNQuery<O, D>> pair = getKNNQueries(relation, stepprog);
KNNQuery<O, D> kNNRefer = pair.getFirst();
KNNQuery<O, D> kNNReach = pair.getSecond();
@@ -209,13 +212,12 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
// "HEAVY" flag for knnReach since it is used more than once
KNNQuery<O, D> knnReach = QueryUtil.getKNNQuery(relation, reachabilityDistanceFunction, k, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
// No optimized kNN query - use a preprocessor!
- if(!(knnReach instanceof PreprocessorKNNQuery)) {
- if(stepprog != null) {
- if(neighborhoodDistanceFunction.equals(reachabilityDistanceFunction)) {
- stepprog.beginStep(1, "Materializing neighborhoods w.r.t. reference neighborhood distance function.", logger);
- }
- else {
- stepprog.beginStep(1, "Not materializing neighborhoods w.r.t. reference neighborhood distance function, but materializing neighborhoods w.r.t. reachability distance function.", logger);
+ if (!(knnReach instanceof PreprocessorKNNQuery)) {
+ if (stepprog != null) {
+ if (neighborhoodDistanceFunction.equals(reachabilityDistanceFunction)) {
+ stepprog.beginStep(1, "Materializing neighborhoods w.r.t. reference neighborhood distance function.", LOG);
+ } else {
+ stepprog.beginStep(1, "Not materializing neighborhoods w.r.t. reference neighborhood distance function, but materializing neighborhoods w.r.t. reachability distance function.", LOG);
}
}
MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<O, D>(relation, reachabilityDistanceFunction, k);
@@ -226,10 +228,9 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
// knnReach is only used once
KNNQuery<O, D> knnRefer;
- if(neighborhoodDistanceFunction == reachabilityDistanceFunction || neighborhoodDistanceFunction.equals(reachabilityDistanceFunction)) {
+ if (neighborhoodDistanceFunction == reachabilityDistanceFunction || neighborhoodDistanceFunction.equals(reachabilityDistanceFunction)) {
knnRefer = knnReach;
- }
- else {
+ } else {
// do not materialize the first neighborhood, since it is used only once
knnRefer = QueryUtil.getKNNQuery(relation, neighborhoodDistanceFunction, k);
}
@@ -251,30 +252,30 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
*/
protected LOFResult<O, D> doRunInTime(DBIDs ids, KNNQuery<O, D> kNNRefer, KNNQuery<O, D> kNNReach, StepProgress stepprog) {
// Assert we got something
- if(kNNRefer == null) {
+ if (kNNRefer == null) {
throw new AbortException("No kNN queries supported by database for reference neighborhood distance function.");
}
- if(kNNReach == null) {
+ if (kNNReach == null) {
throw new AbortException("No kNN queries supported by database for reachability distance function.");
}
// Compute LRDs
- if(stepprog != null) {
- stepprog.beginStep(2, "Computing LRDs.", logger);
+ if (stepprog != null) {
+ stepprog.beginStep(2, "Computing LRDs.", LOG);
}
WritableDoubleDataStore lrds = computeLRDs(ids, kNNReach);
// compute LOF_SCORE of each db object
- if(stepprog != null) {
- stepprog.beginStep(3, "Computing LOFs.", logger);
+ if (stepprog != null) {
+ stepprog.beginStep(3, "Computing LOFs.", LOG);
}
Pair<WritableDoubleDataStore, DoubleMinMax> lofsAndMax = computeLOFs(ids, lrds, kNNRefer);
WritableDoubleDataStore lofs = lofsAndMax.getFirst();
// track the maximum value for normalization.
DoubleMinMax lofminmax = lofsAndMax.getSecond();
- if(stepprog != null) {
- stepprog.setCompleted(logger);
+ if (stepprog != null) {
+ stepprog.setCompleted(LOG);
}
// Build result representation.
@@ -295,26 +296,44 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
*/
protected WritableDoubleDataStore computeLRDs(DBIDs ids, KNNQuery<O, D> knnReach) {
WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
- FiniteProgress lrdsProgress = logger.isVerbose() ? new FiniteProgress("LRD", ids.size(), logger) : null;
- Mean mean = new Mean();
- for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
- mean.reset();
- KNNResult<D> neighbors = knnReach.getKNNForDBID(iter, k);
- for(DistanceResultPair<D> neighbor : neighbors) {
- if(objectIsInKNN || !neighbor.sameDBID(iter)) {
- KNNResult<D> neighborsNeighbors = knnReach.getKNNForDBID(neighbor, k);
- mean.put(Math.max(neighbor.getDistance().doubleValue(), neighborsNeighbors.getKNNDistance().doubleValue()));
+ FiniteProgress lrdsProgress = LOG.isVerbose() ? new FiniteProgress("LRD", ids.size(), LOG) : null;
+ for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ final KNNResult<D> neighbors = knnReach.getKNNForDBID(iter, k);
+ double sum = 0.0;
+ int count = 0;
+ if (neighbors instanceof DoubleDistanceKNNList) {
+ // Fast version for double distances
+ for (DoubleDistanceDBIDResultIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) {
+ if (objectIsInKNN || !DBIDUtil.equal(neighbor, iter)) {
+ KNNResult<D> neighborsNeighbors = knnReach.getKNNForDBID(neighbor, k);
+ final double nkdist;
+ if (neighborsNeighbors instanceof DoubleDistanceKNNList) {
+ nkdist = ((DoubleDistanceKNNList) neighborsNeighbors).doubleKNNDistance();
+ } else {
+ nkdist = neighborsNeighbors.getKNNDistance().doubleValue();
+ }
+ sum += Math.max(neighbor.doubleDistance(), nkdist);
+ count++;
+ }
+ }
+ } else {
+ for (DistanceDBIDResultIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ if (objectIsInKNN || !DBIDUtil.equal(neighbor, iter)) {
+ KNNResult<D> neighborsNeighbors = knnReach.getKNNForDBID(neighbor, k);
+ sum += Math.max(neighbor.getDistance().doubleValue(), neighborsNeighbors.getKNNDistance().doubleValue());
+ count++;
+ }
}
}
// Avoid division by 0
- final double lrd = (mean.getCount() > 0) ? 1 / mean.getMean() : 0.0;
+ final double lrd = (sum > 0) ? (count / sum) : 0;
lrds.putDouble(iter, lrd);
- if(lrdsProgress != null) {
- lrdsProgress.incrementProcessed(logger);
+ if (lrdsProgress != null) {
+ lrdsProgress.incrementProcessed(LOG);
}
}
- if(lrdsProgress != null) {
- lrdsProgress.ensureCompleted(logger);
+ if (lrdsProgress != null) {
+ lrdsProgress.ensureCompleted(LOG);
}
return lrds;
}
@@ -328,40 +347,40 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
* reference distance
* @return the LOFs of the objects and the maximum LOF
*/
- protected Pair<WritableDoubleDataStore, DoubleMinMax> computeLOFs(DBIDs ids, DataStore<Double> lrds, KNNQuery<O, D> knnRefer) {
+ protected Pair<WritableDoubleDataStore, DoubleMinMax> computeLOFs(DBIDs ids, DoubleDataStore lrds, KNNQuery<O, D> knnRefer) {
WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
// track the maximum value for normalization.
DoubleMinMax lofminmax = new DoubleMinMax();
- FiniteProgress progressLOFs = logger.isVerbose() ? new FiniteProgress("LOF_SCORE for objects", ids.size(), logger) : null;
- Mean mean = new Mean();
- for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
- double lrdp = lrds.get(iter);
+ FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("LOF_SCORE for objects", ids.size(), LOG) : null;
+ for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ final double lrdp = lrds.doubleValue(iter);
final double lof;
- if(lrdp > 0) {
+ if (lrdp > 0) {
final KNNResult<D> neighbors = knnRefer.getKNNForDBID(iter, k);
- mean.reset();
- for(DistanceResultPair<D> neighbor : neighbors) {
+ double sum = 0.0;
+ int count = 0;
+ for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
// skip the point itself
- if(objectIsInKNN || !neighbor.sameDBID(iter)) {
- mean.put(lrds.get(neighbor));
+ if (objectIsInKNN || !DBIDUtil.equal(neighbor, iter)) {
+ sum += lrds.doubleValue(neighbor);
+ count++;
}
}
- lof = mean.getMean() / lrdp;
- }
- else {
+ lof = sum / (count * lrdp);
+ } else {
lof = 1.0;
}
lofs.putDouble(iter, lof);
// update minimum and maximum
lofminmax.put(lof);
- if(progressLOFs != null) {
- progressLOFs.incrementProcessed(logger);
+ if (progressLOFs != null) {
+ progressLOFs.incrementProcessed(LOG);
}
}
- if(progressLOFs != null) {
- progressLOFs.ensureCompleted(logger);
+ if (progressLOFs != null) {
+ progressLOFs.ensureCompleted(LOG);
}
return new Pair<WritableDoubleDataStore, DoubleMinMax>(lofs, lofminmax);
}
@@ -369,10 +388,9 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
@Override
public TypeInformation[] getInputTypeRestriction() {
final TypeInformation type;
- if(reachabilityDistanceFunction.equals(neighborhoodDistanceFunction)) {
+ if (reachabilityDistanceFunction.equals(neighborhoodDistanceFunction)) {
type = reachabilityDistanceFunction.getInputTypeRestriction();
- }
- else {
+ } else {
type = new CombinedTypeInformation(neighborhoodDistanceFunction.getInputTypeRestriction(), reachabilityDistanceFunction.getInputTypeRestriction());
}
return TypeUtil.array(type);
@@ -380,7 +398,7 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -442,6 +460,8 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
}
/**
+ * Get the knn query for the reference set.
+ *
* @return the kNN query w.r.t. the reference neighborhood distance
*/
public KNNQuery<O, D> getKNNRefer() {
@@ -449,6 +469,8 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
}
/**
+ * Get the knn query for the reachability set.
+ *
* @return the kNN query w.r.t. the reachability distance
*/
public KNNQuery<O, D> getKNNReach() {
@@ -456,6 +478,8 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
}
/**
+ * Get the LRD data store.
+ *
* @return the LRD values of the objects
*/
public WritableDoubleDataStore getLrds() {
@@ -463,6 +487,8 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
}
/**
+ * Get the LOF data store.
+ *
* @return the LOF values of the objects
*/
public WritableDoubleDataStore getLofs() {
@@ -470,6 +496,8 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
}
/**
+ * Get the outlier result.
+ *
* @return the result of the run of the {@link LOF} algorithm
*/
public OutlierResult getResult() {
@@ -486,6 +514,8 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
}
/**
+ * Get the RkNN query for the reference set.
+ *
* @return the RkNN query w.r.t. the reference neighborhood distance
*/
public RKNNQuery<O, D> getRkNNRefer() {
@@ -493,6 +523,8 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
}
/**
+ * Get the RkNN query for the reachability set.
+ *
* @return the RkNN query w.r.t. the reachability distance
*/
public RKNNQuery<O, D> getRkNNReach() {
@@ -518,7 +550,7 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
*/
public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
/**
- * The neighborhood size to use
+ * The neighborhood size to use.
*/
protected int k = 2;
@@ -536,13 +568,14 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final IntParameter pK = new IntParameter(K_ID, new GreaterConstraint(1));
- if(config.grab(pK)) {
+ final IntParameter pK = new IntParameter(K_ID);
+ pK.addConstraint(new GreaterConstraint(1));
+ if (config.grab(pK)) {
k = pK.getValue();
}
final ObjectParameter<DistanceFunction<O, D>> reachDistP = new ObjectParameter<DistanceFunction<O, D>>(REACHABILITY_DISTANCE_FUNCTION_ID, DistanceFunction.class, true);
- if(config.grab(reachDistP)) {
+ if (config.grab(reachDistP)) {
reachabilityDistanceFunction = reachDistP.instantiateClass(config);
}
}
@@ -554,4 +587,4 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
return new LOF<O, D>(k, distanceFunction, rdist);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LoOP.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LoOP.java
index dc0d26a4..5da06983 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LoOP.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LoOP.java
@@ -33,15 +33,18 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDResultIter;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceKNNList;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor;
import de.lmu.ifi.dbs.elki.logging.Logging;
@@ -76,7 +79,8 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
*
* @apiviz.has KNNQuery
*
- * @param <O> the type of DatabaseObjects handled by this Algorithm
+ * @param <O> type of objects handled by this algorithm
+ * @param <D> type of distances used
*/
@Title("LoOP: Local Outlier Probabilities")
@Description("Variant of the LOF algorithm normalized using statistical values.")
@@ -85,37 +89,37 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(LoOP.class);
+ private static final Logging LOG = Logging.getLogger(LoOP.class);
/**
* The distance function to determine the reachability distance between
* database objects.
*/
- public static final OptionID REACHABILITY_DISTANCE_FUNCTION_ID = OptionID.getOrCreateOptionID("loop.referencedistfunction", "Distance function to determine the density of an object.");
+ public static final OptionID REACHABILITY_DISTANCE_FUNCTION_ID = new OptionID("loop.referencedistfunction", "Distance function to determine the density of an object.");
/**
* The distance function to determine the reachability distance between
* database objects.
*/
- public static final OptionID COMPARISON_DISTANCE_FUNCTION_ID = OptionID.getOrCreateOptionID("loop.comparedistfunction", "Distance function to determine the reference set of an object.");
+ public static final OptionID COMPARISON_DISTANCE_FUNCTION_ID = new OptionID("loop.comparedistfunction", "Distance function to determine the reference set of an object.");
/**
* Parameter to specify the number of nearest neighbors of an object to be
* considered for computing its LOOP_SCORE, must be an integer greater than 1.
*/
- public static final OptionID KREACH_ID = OptionID.getOrCreateOptionID("loop.kref", "The number of nearest neighbors of an object to be used for the PRD value.");
+ public static final OptionID KREACH_ID = new OptionID("loop.kref", "The number of nearest neighbors of an object to be used for the PRD value.");
/**
* Parameter to specify the number of nearest neighbors of an object to be
* considered for computing its LOOP_SCORE, must be an integer greater than 1.
*/
- public static final OptionID KCOMP_ID = OptionID.getOrCreateOptionID("loop.kcomp", "The number of nearest neighbors of an object to be considered for computing its LOOP_SCORE.");
+ public static final OptionID KCOMP_ID = new OptionID("loop.kcomp", "The number of nearest neighbors of an object to be considered for computing its LOOP_SCORE.");
/**
* Parameter to specify the number of nearest neighbors of an object to be
* considered for computing its LOOP_SCORE, must be an integer greater than 1.
*/
- public static final OptionID LAMBDA_ID = OptionID.getOrCreateOptionID("loop.lambda", "The number of standard deviations to consider for density computation.");
+ public static final OptionID LAMBDA_ID = new OptionID("loop.lambda", "The number of standard deviations to consider for density computation.");
/**
* Holds the value of {@link #KREACH_ID}.
@@ -133,12 +137,12 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
double lambda;
/**
- * Preprocessor Step 1
+ * Preprocessor Step 1.
*/
protected DistanceFunction<? super O, D> reachabilityDistanceFunction;
/**
- * Preprocessor Step 2
+ * Preprocessor Step 2.
*/
protected DistanceFunction<? super O, D> comparisonDistanceFunction;
@@ -150,11 +154,11 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
/**
* Constructor with parameters.
*
- * @param kreach
- * @param kcomp
- * @param reachabilityDistanceFunction
- * @param comparisonDistanceFunction
- * @param lambda
+ * @param kreach k for reachability
+ * @param kcomp k for comparison
+ * @param reachabilityDistanceFunction distance function for reachability
+ * @param comparisonDistanceFunction distance function for comparison
+ * @param lambda Lambda parameter
*/
public LoOP(int kreach, int kcomp, DistanceFunction<? super O, D> reachabilityDistanceFunction, DistanceFunction<? super O, D> comparisonDistanceFunction, double lambda) {
super();
@@ -168,36 +172,35 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
/**
* Get the kNN queries for the algorithm.
*
- * @param database Database
- * @param stepprog Progress logger
+ * @param database Database to analyze
+ * @param relation Relation to analyze
+ * @param stepprog Progress logger, may be {@code null}
* @return result
*/
protected Pair<KNNQuery<O, D>, KNNQuery<O, D>> getKNNQueries(Database database, Relation<O> relation, StepProgress stepprog) {
KNNQuery<O, D> knnComp;
KNNQuery<O, D> knnReach;
- if(comparisonDistanceFunction == reachabilityDistanceFunction || comparisonDistanceFunction.equals(reachabilityDistanceFunction)) {
+ if (comparisonDistanceFunction == reachabilityDistanceFunction || comparisonDistanceFunction.equals(reachabilityDistanceFunction)) {
// We need each neighborhood twice - use "HEAVY" flag.
knnComp = QueryUtil.getKNNQuery(relation, comparisonDistanceFunction, Math.max(kreach, kcomp), DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
// No optimized kNN query - use a preprocessor!
- if(knnComp == null) {
- if(stepprog != null) {
- stepprog.beginStep(1, "Materializing neighborhoods with respect to reference neighborhood distance function.", logger);
+ if (knnComp == null) {
+ if (stepprog != null) {
+ stepprog.beginStep(1, "Materializing neighborhoods with respect to reference neighborhood distance function.", LOG);
}
MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<O, D>(relation, comparisonDistanceFunction, kcomp);
database.addIndex(preproc);
DistanceQuery<O, D> cdq = database.getDistanceQuery(relation, comparisonDistanceFunction);
knnComp = preproc.getKNNQuery(cdq, kreach, DatabaseQuery.HINT_HEAVY_USE);
- }
- else {
- if(stepprog != null) {
- stepprog.beginStep(1, "Optimized neighborhoods provided by database.", logger);
+ } else {
+ if (stepprog != null) {
+ stepprog.beginStep(1, "Optimized neighborhoods provided by database.", LOG);
}
}
knnReach = knnComp;
- }
- else {
- if(stepprog != null) {
- stepprog.beginStep(1, "Not materializing distance functions, since we request each DBID once only.", logger);
+ } else {
+ if (stepprog != null) {
+ stepprog.beginStep(1, "Not materializing distance functions, since we request each DBID once only.", LOG);
}
knnComp = QueryUtil.getKNNQuery(relation, comparisonDistanceFunction, kreach);
knnReach = QueryUtil.getKNNQuery(relation, reachabilityDistanceFunction, kcomp);
@@ -215,17 +218,17 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
public OutlierResult run(Database database, Relation<O> relation) {
final double sqrt2 = Math.sqrt(2.0);
- StepProgress stepprog = logger.isVerbose() ? new StepProgress(5) : null;
+ StepProgress stepprog = LOG.isVerbose() ? new StepProgress(5) : null;
Pair<KNNQuery<O, D>, KNNQuery<O, D>> pair = getKNNQueries(database, relation, stepprog);
KNNQuery<O, D> knnComp = pair.getFirst();
KNNQuery<O, D> knnReach = pair.getSecond();
// Assert we got something
- if(knnComp == null) {
+ if (knnComp == null) {
throw new AbortException("No kNN queries supported by database for comparison distance function.");
}
- if(knnReach == null) {
+ if (knnReach == null) {
throw new AbortException("No kNN queries supported by database for density estimation distance function.");
}
@@ -233,29 +236,43 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
WritableDoubleDataStore pdists = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
Mean mean = new Mean();
{// computing PRDs
- if(stepprog != null) {
- stepprog.beginStep(3, "Computing pdists", logger);
+ if (stepprog != null) {
+ stepprog.beginStep(3, "Computing pdists", LOG);
}
- FiniteProgress prdsProgress = logger.isVerbose() ? new FiniteProgress("pdists", relation.size(), logger) : null;
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ FiniteProgress prdsProgress = LOG.isVerbose() ? new FiniteProgress("pdists", relation.size(), LOG) : null;
+ for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
final KNNResult<D> neighbors = knnReach.getKNNForDBID(iditer, kreach);
mean.reset();
// use first kref neighbors as reference set
int ks = 0;
- for(DistanceResultPair<D> neighbor : neighbors) {
- if(objectIsInKNN || !neighbor.sameDBID(iditer)) {
- double d = neighbor.getDistance().doubleValue();
- mean.put(d * d);
- ks++;
- if(ks >= kreach) {
- break;
+ // TODO: optimize for double distances
+ if (neighbors instanceof DoubleDistanceKNNList) {
+ for (DoubleDistanceDBIDResultIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) {
+ if (objectIsInKNN || !DBIDUtil.equal(neighbor, iditer)) {
+ final double d = neighbor.doubleDistance();
+ mean.put(d * d);
+ ks++;
+ if (ks >= kreach) {
+ break;
+ }
+ }
+ }
+ } else {
+ for (DistanceDBIDResultIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ if (objectIsInKNN || !DBIDUtil.equal(neighbor, iditer)) {
+ double d = neighbor.getDistance().doubleValue();
+ mean.put(d * d);
+ ks++;
+ if (ks >= kreach) {
+ break;
+ }
}
}
}
double pdist = lambda * Math.sqrt(mean.getMean());
pdists.putDouble(iditer, pdist);
- if(prdsProgress != null) {
- prdsProgress.incrementProcessed(logger);
+ if (prdsProgress != null) {
+ prdsProgress.incrementProcessed(LOG);
}
}
}
@@ -263,63 +280,63 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
WritableDoubleDataStore plofs = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
MeanVariance mvplof = new MeanVariance();
{// compute LOOP_SCORE of each db object
- if(stepprog != null) {
- stepprog.beginStep(4, "Computing PLOF", logger);
+ if (stepprog != null) {
+ stepprog.beginStep(4, "Computing PLOF", LOG);
}
- FiniteProgress progressPLOFs = logger.isVerbose() ? new FiniteProgress("PLOFs for objects", relation.size(), logger) : null;
+ FiniteProgress progressPLOFs = LOG.isVerbose() ? new FiniteProgress("PLOFs for objects", relation.size(), LOG) : null;
MeanVariance mv = new MeanVariance();
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
final KNNResult<D> neighbors = knnComp.getKNNForDBID(iditer, kcomp);
mv.reset();
// use first kref neighbors as comparison set.
int ks = 0;
- for(DistanceResultPair<D> neighbor1 : neighbors) {
- if(objectIsInKNN || !neighbor1.sameDBID(iditer)) {
- mv.put(pdists.doubleValue(neighbor1));
+ for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ if (objectIsInKNN || !DBIDUtil.equal(neighbor, iditer)) {
+ mv.put(pdists.doubleValue(neighbor));
ks++;
- if(ks >= kcomp) {
+ if (ks >= kcomp) {
break;
}
}
}
double plof = Math.max(pdists.doubleValue(iditer) / mv.getMean(), 1.0);
- if(Double.isNaN(plof) || Double.isInfinite(plof)) {
+ if (Double.isNaN(plof) || Double.isInfinite(plof)) {
plof = 1.0;
}
plofs.putDouble(iditer, plof);
mvplof.put((plof - 1.0) * (plof - 1.0));
- if(progressPLOFs != null) {
- progressPLOFs.incrementProcessed(logger);
+ if (progressPLOFs != null) {
+ progressPLOFs.incrementProcessed(LOG);
}
}
}
double nplof = lambda * Math.sqrt(mvplof.getMean());
- if(logger.isDebugging()) {
- logger.verbose("nplof normalization factor is " + nplof + " " + mvplof.getMean() + " " + mvplof.getSampleStddev());
+ if (LOG.isDebugging()) {
+ LOG.verbose("nplof normalization factor is " + nplof + " " + mvplof.getMean() + " " + mvplof.getSampleStddev());
}
// Compute final LoOP values.
WritableDoubleDataStore loops = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
{// compute LOOP_SCORE of each db object
- if(stepprog != null) {
- stepprog.beginStep(5, "Computing LoOP scores", logger);
+ if (stepprog != null) {
+ stepprog.beginStep(5, "Computing LoOP scores", LOG);
}
- FiniteProgress progressLOOPs = logger.isVerbose() ? new FiniteProgress("LoOP for objects", relation.size(), logger) : null;
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ FiniteProgress progressLOOPs = LOG.isVerbose() ? new FiniteProgress("LoOP for objects", relation.size(), LOG) : null;
+ for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
loops.putDouble(iditer, NormalDistribution.erf((plofs.doubleValue(iditer) - 1) / (nplof * sqrt2)));
- if(progressLOOPs != null) {
- progressLOOPs.incrementProcessed(logger);
+ if (progressLOOPs != null) {
+ progressLOOPs.incrementProcessed(LOG);
}
}
}
- if(stepprog != null) {
- stepprog.setCompleted(logger);
+ if (stepprog != null) {
+ stepprog.setCompleted(LOG);
}
// Build result representation.
@@ -331,10 +348,9 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
@Override
public TypeInformation[] getInputTypeRestriction() {
final TypeInformation type;
- if(reachabilityDistanceFunction.equals(comparisonDistanceFunction)) {
+ if (reachabilityDistanceFunction.equals(comparisonDistanceFunction)) {
type = reachabilityDistanceFunction.getInputTypeRestriction();
- }
- else {
+ } else {
type = new CombinedTypeInformation(reachabilityDistanceFunction.getInputTypeRestriction(), comparisonDistanceFunction.getInputTypeRestriction());
}
return TypeUtil.array(type);
@@ -342,7 +358,7 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -369,45 +385,48 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
double lambda = 2.0;
/**
- * Preprocessor Step 1
+ * Preprocessor Step 1.
*/
protected DistanceFunction<O, D> reachabilityDistanceFunction = null;
/**
- * Preprocessor Step 2
+ * Preprocessor Step 2.
*/
protected DistanceFunction<O, D> comparisonDistanceFunction = null;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final IntParameter kcompP = new IntParameter(KCOMP_ID, new GreaterConstraint(1));
- if(config.grab(kcompP)) {
- kcomp = kcompP.getValue();
+ final IntParameter kcompP = new IntParameter(KCOMP_ID);
+ kcompP.addConstraint(new GreaterConstraint(1));
+ if (config.grab(kcompP)) {
+ kcomp = kcompP.intValue();
}
final ObjectParameter<DistanceFunction<O, D>> compDistP = new ObjectParameter<DistanceFunction<O, D>>(COMPARISON_DISTANCE_FUNCTION_ID, DistanceFunction.class, EuclideanDistanceFunction.class);
- if(config.grab(compDistP)) {
+ if (config.grab(compDistP)) {
comparisonDistanceFunction = compDistP.instantiateClass(config);
}
- final IntParameter kreachP = new IntParameter(KREACH_ID, new GreaterConstraint(1), true);
- if(config.grab(kreachP)) {
- kreach = kreachP.getValue();
- }
- else {
+ final IntParameter kreachP = new IntParameter(KREACH_ID);
+ kreachP.addConstraint(new GreaterConstraint(1));
+ kreachP.setOptional(true);
+ if (config.grab(kreachP)) {
+ kreach = kreachP.intValue();
+ } else {
kreach = kcomp;
}
final ObjectParameter<DistanceFunction<O, D>> reachDistP = new ObjectParameter<DistanceFunction<O, D>>(REACHABILITY_DISTANCE_FUNCTION_ID, DistanceFunction.class, true);
- if(config.grab(reachDistP)) {
+ if (config.grab(reachDistP)) {
reachabilityDistanceFunction = reachDistP.instantiateClass(config);
}
// TODO: make default 1.0?
- final DoubleParameter lambdaP = new DoubleParameter(LAMBDA_ID, new GreaterConstraint(0.0), 2.0);
- if(config.grab(lambdaP)) {
- lambda = lambdaP.getValue();
+ final DoubleParameter lambdaP = new DoubleParameter(LAMBDA_ID, 2.0);
+ lambdaP.addConstraint(new GreaterConstraint(0.0));
+ if (config.grab(lambdaP)) {
+ lambda = lambdaP.doubleValue();
}
}
@@ -417,4 +436,4 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
return new LoOP<O, D>(kreach, kcomp, realreach, comparisonDistanceFunction, lambda);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java
index b3d24463..bed27a33 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java
@@ -37,14 +37,14 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
@@ -83,7 +83,7 @@ public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanc
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(OPTICSOF.class);
+ private static final Logging LOG = Logging.getLogger(OPTICSOF.class);
/**
* Parameter to specify the threshold MinPts.
@@ -136,9 +136,10 @@ public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanc
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
List<Double> core = new ArrayList<Double>();
double lrd = 0;
- for(DistanceResultPair<D> neighPair : nMinPts.get(iditer)) {
- double coreDist = coreDistance.doubleValue(neighPair);
- double dist = distQuery.distance(iditer, neighPair).doubleValue();
+ // TODO: optimize for double distances
+ for (DistanceDBIDResultIter<D> neighbor = nMinPts.get(iditer).iter(); neighbor.valid(); neighbor.advance()) {
+ double coreDist = coreDistance.doubleValue(neighbor);
+ double dist = distQuery.distance(iditer, neighbor).doubleValue();
double rd = Math.max(coreDist, dist);
lrd = rd + lrd;
core.add(rd);
@@ -153,9 +154,9 @@ public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanc
WritableDoubleDataStore ofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double of = 0;
- for(DistanceResultPair<D> pair : nMinPts.get(iditer)) {
+ for (DBIDIter neighbor = nMinPts.get(iditer).iter(); neighbor.valid(); neighbor.advance()) {
double lrd = lrds.doubleValue(iditer);
- double lrdN = lrds.doubleValue(pair);
+ double lrdN = lrds.doubleValue(neighbor);
of = of + lrdN / lrd;
}
of = of / minPtsNeighborhoodSize.intValue(iditer);
@@ -176,7 +177,7 @@ public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanc
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -192,7 +193,8 @@ public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanc
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final IntParameter param = new IntParameter(OPTICS.MINPTS_ID, new GreaterConstraint(1));
+ final IntParameter param = new IntParameter(OPTICS.MINPTS_ID);
+ param.addConstraint(new GreaterConstraint(1));
if(config.grab(param)) {
minpts = param.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OnlineLOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OnlineLOF.java
index 9b974ad9..bac5db36 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OnlineLOF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OnlineLOF.java
@@ -34,14 +34,14 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery;
import de.lmu.ifi.dbs.elki.database.query.rknn.RKNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.index.preprocessed.knn.AbstractMaterializeKNNPreprocessor;
import de.lmu.ifi.dbs.elki.index.preprocessed.knn.KNNChangeEvent;
@@ -73,7 +73,7 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
/**
* The logger for this class.
*/
- static final Logging logger = Logging.getLogger(OnlineLOF.class);
+ private static final Logging LOG = Logging.getLogger(OnlineLOF.class);
/**
* Constructor.
@@ -93,7 +93,7 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
*/
@Override
public OutlierResult run(Relation<O> relation) {
- StepProgress stepprog = logger.isVerbose() ? new StepProgress("OnlineLOF", 3) : null;
+ StepProgress stepprog = LOG.isVerbose() ? new StepProgress("OnlineLOF", 3) : null;
Pair<Pair<KNNQuery<O, D>, KNNQuery<O, D>>, Pair<RKNNQuery<O, D>, RKNNQuery<O, D>>> queries = getKNNAndRkNNQueries(relation, stepprog);
KNNQuery<O, D> kNNRefer = queries.getFirst().getFirst();
@@ -128,7 +128,7 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
// No optimized kNN query or RkNN query - use a preprocessor!
if(kNNRefer == null || rkNNRefer == null) {
if(stepprog != null) {
- stepprog.beginStep(1, "Materializing neighborhood w.r.t. reference neighborhood distance function.", logger);
+ stepprog.beginStep(1, "Materializing neighborhood w.r.t. reference neighborhood distance function.", LOG);
}
MaterializeKNNAndRKNNPreprocessor<O, D> preproc = new MaterializeKNNAndRKNNPreprocessor<O, D>(relation, neighborhoodDistanceFunction, k);
DistanceQuery<O, D> ndq = relation.getDatabase().getDistanceQuery(relation, neighborhoodDistanceFunction);
@@ -139,7 +139,7 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
}
else {
if(stepprog != null) {
- stepprog.beginStep(1, "Optimized neighborhood w.r.t. reference neighborhood distance function provided by database.", logger);
+ stepprog.beginStep(1, "Optimized neighborhood w.r.t. reference neighborhood distance function provided by database.", LOG);
}
}
@@ -147,7 +147,7 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
RKNNQuery<O, D> rkNNReach = QueryUtil.getRKNNQuery(relation, reachabilityDistanceFunction, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
if(kNNReach == null || rkNNReach == null) {
if(stepprog != null) {
- stepprog.beginStep(2, "Materializing neighborhood w.r.t. reachability distance function.", logger);
+ stepprog.beginStep(2, "Materializing neighborhood w.r.t. reachability distance function.", LOG);
}
ListParameterization config = new ListParameterization();
config.addParameter(AbstractMaterializeKNNPreprocessor.Factory.DISTANCE_FUNCTION_ID, reachabilityDistanceFunction);
@@ -261,14 +261,14 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
* @param lofResult the result of the former LOF run
*/
private void kNNsInserted(DBIDs insertions, DBIDs updates1, DBIDs updates2, LOFResult<O, D> lofResult) {
- StepProgress stepprog = logger.isVerbose() ? new StepProgress(3) : null;
+ StepProgress stepprog = LOG.isVerbose() ? new StepProgress(3) : null;
// recompute lrds
if(stepprog != null) {
- stepprog.beginStep(1, "Recompute LRDs.", logger);
+ stepprog.beginStep(1, "Recompute LRDs.", LOG);
}
ArrayDBIDs lrd_ids = DBIDUtil.ensureArray(DBIDUtil.union(insertions, updates2));
- List<List<DistanceResultPair<D>>> reachDistRKNNs = lofResult.getRkNNReach().getRKNNForBulkDBIDs(lrd_ids, k);
+ List<? extends DistanceDBIDResult<D>> reachDistRKNNs = lofResult.getRkNNReach().getRKNNForBulkDBIDs(lrd_ids, k);
ArrayDBIDs affected_lrd_id_candidates = mergeIDs(reachDistRKNNs, lrd_ids);
ArrayModifiableDBIDs affected_lrd_ids = DBIDUtil.newArray(affected_lrd_id_candidates.size());
WritableDoubleDataStore new_lrds = computeLRDs(affected_lrd_id_candidates, lofResult.getKNNReach());
@@ -283,20 +283,20 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
// recompute lofs
if(stepprog != null) {
- stepprog.beginStep(2, "Recompute LOFS.", logger);
+ stepprog.beginStep(2, "Recompute LOFS.", LOG);
}
- List<List<DistanceResultPair<D>>> primDistRKNNs = lofResult.getRkNNRefer().getRKNNForBulkDBIDs(affected_lrd_ids, k);
+ List<? extends DistanceDBIDResult<D>> primDistRKNNs = lofResult.getRkNNRefer().getRKNNForBulkDBIDs(affected_lrd_ids, k);
ArrayDBIDs affected_lof_ids = mergeIDs(primDistRKNNs, affected_lrd_ids, insertions, updates1);
recomputeLOFs(affected_lof_ids, lofResult);
// fire result changed
if(stepprog != null) {
- stepprog.beginStep(3, "Inform listeners.", logger);
+ stepprog.beginStep(3, "Inform listeners.", LOG);
}
lofResult.getResult().getHierarchy().resultChanged(lofResult.getResult());
if(stepprog != null) {
- stepprog.setCompleted(logger);
+ stepprog.setCompleted(LOG);
}
}
@@ -311,11 +311,11 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
* @param lofResult the result of the former LOF run
*/
private void kNNsRemoved(DBIDs deletions, DBIDs updates1, DBIDs updates2, LOFResult<O, D> lofResult) {
- StepProgress stepprog = logger.isVerbose() ? new StepProgress(4) : null;
+ StepProgress stepprog = LOG.isVerbose() ? new StepProgress(4) : null;
// delete lrds and lofs
if(stepprog != null) {
- stepprog.beginStep(1, "Delete old LRDs and LOFs.", logger);
+ stepprog.beginStep(1, "Delete old LRDs and LOFs.", LOG);
}
for (DBIDIter iter = deletions.iter(); iter.valid(); iter.advance()) {
lofResult.getLrds().delete(iter);
@@ -324,10 +324,10 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
// recompute lrds
if(stepprog != null) {
- stepprog.beginStep(2, "Recompute LRDs.", logger);
+ stepprog.beginStep(2, "Recompute LRDs.", LOG);
}
ArrayDBIDs lrd_ids = DBIDUtil.ensureArray(updates2);
- List<List<DistanceResultPair<D>>> reachDistRKNNs = lofResult.getRkNNReach().getRKNNForBulkDBIDs(lrd_ids, k);
+ List<? extends DistanceDBIDResult<D>> reachDistRKNNs = lofResult.getRkNNReach().getRKNNForBulkDBIDs(lrd_ids, k);
ArrayDBIDs affected_lrd_id_candidates = mergeIDs(reachDistRKNNs, lrd_ids);
ArrayModifiableDBIDs affected_lrd_ids = DBIDUtil.newArray(affected_lrd_id_candidates.size());
WritableDoubleDataStore new_lrds = computeLRDs(affected_lrd_id_candidates, lofResult.getKNNReach());
@@ -342,20 +342,20 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
// recompute lofs
if(stepprog != null) {
- stepprog.beginStep(3, "Recompute LOFS.", logger);
+ stepprog.beginStep(3, "Recompute LOFS.", LOG);
}
- List<List<DistanceResultPair<D>>> primDistRKNNs = lofResult.getRkNNRefer().getRKNNForBulkDBIDs(affected_lrd_ids, k);
+ List<? extends DistanceDBIDResult<D>> primDistRKNNs = lofResult.getRkNNRefer().getRKNNForBulkDBIDs(affected_lrd_ids, k);
ArrayDBIDs affected_lof_ids = mergeIDs(primDistRKNNs, affected_lrd_ids, updates1);
recomputeLOFs(affected_lof_ids, lofResult);
// fire result changed
if(stepprog != null) {
- stepprog.beginStep(4, "Inform listeners.", logger);
+ stepprog.beginStep(4, "Inform listeners.", LOG);
}
lofResult.getResult().getHierarchy().resultChanged(lofResult.getResult());
if(stepprog != null) {
- stepprog.setCompleted(logger);
+ stepprog.setCompleted(LOG);
}
}
@@ -367,15 +367,13 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
* @return a set containing the ids of the query result and the specified
* ids
*/
- private ArrayModifiableDBIDs mergeIDs(List<List<DistanceResultPair<D>>> queryResults, DBIDs... ids) {
+ private ArrayModifiableDBIDs mergeIDs(List<? extends DistanceDBIDResult<D>> queryResults, DBIDs... ids) {
ModifiableDBIDs result = DBIDUtil.newHashSet();
for(DBIDs dbids : ids) {
result.addDBIDs(dbids);
}
- for(List<DistanceResultPair<D>> queryResult : queryResults) {
- for(DistanceResultPair<D> qr : queryResult) {
- result.add(qr);
- }
+ for(DistanceDBIDResult<D> queryResult : queryResults) {
+ result.addDBIDs(queryResult);
}
return DBIDUtil.newArray(result);
}
@@ -410,7 +408,7 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -440,7 +438,8 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final IntParameter pK = new IntParameter(K_ID, new GreaterConstraint(1));
+ final IntParameter pK = new IntParameter(K_ID);
+ pK.addConstraint(new GreaterConstraint(1));
if(config.grab(pK)) {
k = pK.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OutlierAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OutlierAlgorithm.java
index d8322d8b..00c4a8ec 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OutlierAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OutlierAlgorithm.java
@@ -31,6 +31,8 @@ import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
* Generic super interface for outlier detection algorithms.
*
* @author Erich Schubert
+ *
+ * @apiviz.landmark
*
* @apiviz.has OutlierResult
*/
@@ -39,4 +41,4 @@ public interface OutlierAlgorithm extends Algorithm {
// Use the magic in AbstractAlgorithm and just implement a run method for your input data
@Override
OutlierResult run(Database database);
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ReferenceBasedOutlierDetection.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ReferenceBasedOutlierDetection.java
index dd1d37a3..93eca7db 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ReferenceBasedOutlierDetection.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ReferenceBasedOutlierDetection.java
@@ -23,11 +23,8 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.ArrayList;
import java.util.Collection;
-import java.util.Collections;
import java.util.Iterator;
-import java.util.List;
import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
@@ -39,12 +36,13 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
-import de.lmu.ifi.dbs.elki.database.query.GenericDistanceResultPair;
+import de.lmu.ifi.dbs.elki.database.ids.DistanceDBIDPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.GenericDistanceDBIDList;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.Mean;
@@ -88,23 +86,23 @@ import de.lmu.ifi.dbs.elki.utilities.referencepoints.ReferencePointsHeuristic;
@Title("An Efficient Reference-based Approach to Outlier Detection in Large Datasets")
@Description("Computes kNN distances approximately, using reference points with various reference point strategies.")
@Reference(authors = "Y. Pei, O.R. Zaiane, Y. Gao", title = "An Efficient Reference-based Approach to Outlier Detection in Large Datasets", booktitle = "Proc. 6th IEEE Int. Conf. on Data Mining (ICDM '06), Hong Kong, China, 2006", url = "http://dx.doi.org/10.1109/ICDM.2006.17")
-public class ReferenceBasedOutlierDetection<V extends NumberVector<?, ?>, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+public class ReferenceBasedOutlierDetection<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(ReferenceBasedOutlierDetection.class);
+ private static final Logging LOG = Logging.getLogger(ReferenceBasedOutlierDetection.class);
/**
* Parameter for the reference points heuristic.
*/
- public static final OptionID REFP_ID = OptionID.getOrCreateOptionID("refod.refp", "The heuristic for finding reference points.");
+ public static final OptionID REFP_ID = new OptionID("refod.refp", "The heuristic for finding reference points.");
/**
* Parameter to specify the number of nearest neighbors of an object, to be
* considered for computing its REFOD_SCORE, must be an integer greater than
* 1.
*/
- public static final OptionID K_ID = OptionID.getOrCreateOptionID("refod.k", "The number of nearest neighbors");
+ public static final OptionID K_ID = new OptionID("refod.k", "The number of nearest neighbors");
/**
* Holds the value of {@link #K_ID}.
@@ -160,7 +158,7 @@ public class ReferenceBasedOutlierDetection<V extends NumberVector<?, ?>, D exte
}
V firstRef = iter.next();
// compute distance vector for the first reference point
- List<DistanceResultPair<D>> firstReferenceDists = computeDistanceVector(firstRef, relation, distFunc);
+ DistanceDBIDResult<D> firstReferenceDists = computeDistanceVector(firstRef, relation, distFunc);
for(int l = 0; l < firstReferenceDists.size(); l++) {
double density = computeDensity(firstReferenceDists, l);
// Initial value
@@ -169,7 +167,7 @@ public class ReferenceBasedOutlierDetection<V extends NumberVector<?, ?>, D exte
// compute density values for all remaining reference points
while(iter.hasNext()) {
V refPoint = iter.next();
- List<DistanceResultPair<D>> referenceDists = computeDistanceVector(refPoint, relation, distFunc);
+ DistanceDBIDResult<D> referenceDists = computeDistanceVector(refPoint, relation, distFunc);
// compute density value for each object
for(int l = 0; l < referenceDists.size(); l++) {
double density = computeDensity(referenceDists, l);
@@ -215,14 +213,13 @@ public class ReferenceBasedOutlierDetection<V extends NumberVector<?, ?>, D exte
* @return array containing the distance to one reference point for each
* database object and the object id
*/
- protected List<DistanceResultPair<D>> computeDistanceVector(V refPoint, Relation<V> database, DistanceQuery<V, D> distFunc) {
+ protected DistanceDBIDResult<D> computeDistanceVector(V refPoint, Relation<V> database, DistanceQuery<V, D> distFunc) {
// TODO: optimize for double distances?
- List<DistanceResultPair<D>> referenceDists = new ArrayList<DistanceResultPair<D>>(database.size());
+ GenericDistanceDBIDList<D> referenceDists = new GenericDistanceDBIDList<D>(database.size());
for(DBIDIter iditer = database.iterDBIDs(); iditer.valid(); iditer.advance()) {
- final D distance = distFunc.distance(iditer, refPoint);
- referenceDists.add(new GenericDistanceResultPair<D>(distance, iditer.getDBID()));
+ referenceDists.add(distFunc.distance(iditer, refPoint), iditer);
}
- Collections.sort(referenceDists);
+ referenceDists.sort();
return referenceDists;
}
@@ -238,8 +235,8 @@ public class ReferenceBasedOutlierDetection<V extends NumberVector<?, ?>, D exte
* @param index index of the current object
* @return density for one object and reference point
*/
- protected double computeDensity(List<DistanceResultPair<D>> referenceDists, int index) {
- final DistanceResultPair<D> x = referenceDists.get(index);
+ protected double computeDensity(DistanceDBIDResult<D> referenceDists, int index) {
+ final DistanceDBIDPair<D> x = referenceDists.get(index);
final double xDist = x.getDistance().doubleValue();
int lef = index - 1;
@@ -295,7 +292,7 @@ public class ReferenceBasedOutlierDetection<V extends NumberVector<?, ?>, D exte
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -305,7 +302,7 @@ public class ReferenceBasedOutlierDetection<V extends NumberVector<?, ?>, D exte
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<?, ?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<V, D> {
+ public static class Parameterizer<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<V, D> {
/**
* Holds the value of {@link #K_ID}.
*/
@@ -319,7 +316,8 @@ public class ReferenceBasedOutlierDetection<V extends NumberVector<?, ?>, D exte
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final IntParameter pK = new IntParameter(K_ID, new GreaterConstraint(1));
+ final IntParameter pK = new IntParameter(K_ID);
+ pK.addConstraint(new GreaterConstraint(1));
if(config.grab(pK)) {
k = pK.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleCOP.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleCOP.java
new file mode 100644
index 00000000..e8077819
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleCOP.java
@@ -0,0 +1,236 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.DependencyDerivator;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.model.CorrelationAnalysisSolution;
+import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.QueryUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredRunner;
+import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore;
+import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * Algorithm to compute local correlation outlier probability.
+ *
+ * This is the simpler, original version of COP, as published in
+ * <p>
+ * Arthur Zimek<br />
+ * Correlation Clustering.<br />
+ * PhD thesis, Chapter 18
+ * </p>
+ * which has then been refined to the method published as {@link COP}
+ *
+ * @author Erich Schubert
+ * @param <V> the type of NumberVector handled by this Algorithm
+ */
+@Title("Simple COP: Correlation Outlier Probability")
+@Reference(authors = "Arthur Zimek", title = "Correlation Clustering. PhD thesis, Chapter 18", booktitle = "")
+public class SimpleCOP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<V, D, OutlierResult> implements OutlierAlgorithm {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(SimpleCOP.class);
+
+ /**
+ * Number of neighbors to be considered.
+ */
+ int k;
+
+ /**
+ * Holds the object performing the dependency derivation
+ */
+ private DependencyDerivator<V, D> dependencyDerivator;
+
+ /**
+ * Constructor.
+ *
+ * @param distanceFunction Distance function
+ * @param k k Parameter
+ * @param pca PCA runner-
+ */
+ public SimpleCOP(DistanceFunction<? super V, D> distanceFunction, int k, PCAFilteredRunner<V> pca) {
+ super(distanceFunction);
+ this.k = k;
+ this.dependencyDerivator = new DependencyDerivator<V, D>(null, FormatUtil.NF8, pca, 0, false);
+ }
+
+ public OutlierResult run(Database database, Relation<V> data) throws IllegalStateException {
+ KNNQuery<V, D> knnQuery = QueryUtil.getKNNQuery(data, getDistanceFunction(), k + 1);
+
+ DBIDs ids = data.getDBIDs();
+
+ WritableDoubleDataStore cop_score = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
+ WritableDataStore<Vector> cop_err_v = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, Vector.class);
+ WritableDataStore<Matrix> cop_datav = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, Matrix.class);
+ WritableIntegerDataStore cop_dim = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, -1);
+ WritableDataStore<CorrelationAnalysisSolution<?>> cop_sol = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, CorrelationAnalysisSolution.class);
+ {// compute neighbors of each db object
+ FiniteProgress progressLocalPCA = LOG.isVerbose() ? new FiniteProgress("Correlation Outlier Probabilities", data.size(), LOG) : null;
+ double sqrt2 = Math.sqrt(2.0);
+ for (DBIDIter id = data.iterDBIDs(); id.valid(); id.advance()) {
+ KNNResult<D> neighbors = knnQuery.getKNNForDBID(id, k + 1);
+ ModifiableDBIDs nids = DBIDUtil.newArray(neighbors);
+ nids.remove(id);
+
+ // TODO: do we want to use the query point as centroid?
+ CorrelationAnalysisSolution<V> depsol = dependencyDerivator.generateModel(data, nids);
+
+ double stddev = depsol.getStandardDeviation();
+ double distance = depsol.distance(data.get(id));
+ double prob = NormalDistribution.erf(distance / (stddev * sqrt2));
+
+ cop_score.putDouble(id, prob);
+
+ Vector errv = depsol.errorVector(data.get(id)).timesEquals(-1);
+ cop_err_v.put(id, errv);
+
+ Matrix datav = depsol.dataProjections(data.get(id));
+ cop_datav.put(id, datav);
+
+ cop_dim.putInt(id, depsol.getCorrelationDimensionality());
+
+ cop_sol.put(id, depsol);
+
+ if (progressLocalPCA != null) {
+ progressLocalPCA.incrementProcessed(LOG);
+ }
+ }
+ if (progressLocalPCA != null) {
+ progressLocalPCA.ensureCompleted(LOG);
+ }
+ }
+ // combine results.
+ Relation<Double> scoreResult = new MaterializedRelation<Double>("Original Correlation Outlier Probabilities", "origcop-outlier", TypeUtil.DOUBLE, cop_score, ids);
+ OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore();
+ OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
+ // extra results
+ result.addChildResult(new MaterializedRelation<Integer>("Local Dimensionality", COP.COP_DIM, TypeUtil.INTEGER, cop_dim, ids));
+ result.addChildResult(new MaterializedRelation<Vector>("Error vectors", COP.COP_ERRORVEC, TypeUtil.VECTOR, cop_err_v, ids));
+ result.addChildResult(new MaterializedRelation<Matrix>("Data vectors", "cop-datavec", TypeUtil.MATRIX, cop_datav, ids));
+ result.addChildResult(new MaterializedRelation<CorrelationAnalysisSolution<?>>("Correlation analysis", "cop-sol", new SimpleTypeInformation<CorrelationAnalysisSolution<?>>(CorrelationAnalysisSolution.class), cop_sol, ids));
+ return result;
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD);
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<V, D> {
+ /**
+ * Parameter to specify the number of nearest neighbors of an object to be
+ * considered for computing its COP_SCORE, must be an integer greater than
+ * 0.
+ * <p/>
+ * Key: {@code -cop.k}
+ * </p>
+ */
+ public static final OptionID K_ID = new OptionID("cop.k", "The number of nearest neighbors of an object to be considered for computing its COP_SCORE.");
+
+ /**
+ * Parameter for the PCA runner class.
+ *
+ * <p>
+ * Key: {@code -cop.pcarunner}
+ * </p>
+ */
+ public static final OptionID PCARUNNER_ID = new OptionID("cop.pcarunner", "The class to compute (filtered) PCA.");
+
+ /**
+ * Number of neighbors to be considered.
+ */
+ int k;
+
+ /**
+ * Holds the object performing the dependency derivation
+ */
+ protected PCAFilteredRunner<V> pca;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ IntParameter kP = new IntParameter(K_ID);
+ kP.addConstraint(new GreaterConstraint(0));
+ if (config.grab(kP)) {
+ k = kP.intValue();
+ }
+ ObjectParameter<PCAFilteredRunner<V>> pcaP = new ObjectParameter<PCAFilteredRunner<V>>(PCARUNNER_ID, PCAFilteredRunner.class, PCAFilteredRunner.class);
+ if (config.grab(pcaP)) {
+ pca = pcaP.instantiateClass(config);
+ }
+ }
+
+ @Override
+ protected SimpleCOP<V, D> makeInstance() {
+ return new SimpleCOP<V, D>(distanceFunction, k, pca);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleKernelDensityLOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleKernelDensityLOF.java
new file mode 100644
index 00000000..1c104c08
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleKernelDensityLOF.java
@@ -0,0 +1,284 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.type.CombinedTypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.QueryUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDResultIter;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceKNNList;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
+import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
+import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.math.statistics.EpanechnikovKernelDensityFunction;
+import de.lmu.ifi.dbs.elki.math.statistics.KernelDensityFunction;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * A simple variant of the LOF algorithm, which uses a simple kernel density
+ * estimation instead of the local reachability density.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has KNNQuery
+ * @apiviz.has KernelDensityFunction
+ *
+ * @param <O> the type of objects handled by this Algorithm
+ * @param <D> Distance type
+ */
+public class SimpleKernelDensityLOF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(SimpleKernelDensityLOF.class);
+
+ /**
+ * Parameter k.
+ */
+ protected int k;
+
+ /**
+ * Kernel density function
+ */
+ private KernelDensityFunction kernel;
+
+ /**
+ * Constructor.
+ *
+ * @param k the value of k
+ * @param kernel Kernel function
+ */
+ public SimpleKernelDensityLOF(int k, DistanceFunction<? super O, D> distance, KernelDensityFunction kernel) {
+ super(distance);
+ this.k = k + 1;
+ this.kernel = kernel;
+ }
+
+ /**
+ * Run the naive kernel density LOF algorithm.
+ *
+ * @param relation Data to process
+ * @return LOF outlier result
+ */
+ public OutlierResult run(Relation<O> relation) {
+ StepProgress stepprog = LOG.isVerbose() ? new StepProgress("KernelDensityLOF", 3) : null;
+
+ final int dim = RelationUtil.dimensionality(relation);
+
+ DBIDs ids = relation.getDBIDs();
+
+ // "HEAVY" flag for KNN Query since it is used more than once
+ KNNQuery<O, D> knnq = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
+ // No optimized kNN query - use a preprocessor!
+ if (!(knnq instanceof PreprocessorKNNQuery)) {
+ if (stepprog != null) {
+ stepprog.beginStep(1, "Materializing neighborhoods w.r.t. distance function.", LOG);
+ }
+ MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<O, D>(relation, getDistanceFunction(), k);
+ relation.getDatabase().addIndex(preproc);
+ DistanceQuery<O, D> rdq = relation.getDatabase().getDistanceQuery(relation, getDistanceFunction());
+ knnq = preproc.getKNNQuery(rdq, k);
+ }
+
+ // Compute LRDs
+ if (stepprog != null) {
+ stepprog.beginStep(2, "Computing densities.", LOG);
+ }
+ WritableDoubleDataStore dens = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
+ FiniteProgress densProgress = LOG.isVerbose() ? new FiniteProgress("Densities", ids.size(), LOG) : null;
+ for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
+ final KNNResult<D> neighbors = knnq.getKNNForDBID(it, k);
+ int count = 0;
+ double sum = 0.0;
+ if (neighbors instanceof DoubleDistanceKNNList) {
+ // Fast version for double distances
+ for (DoubleDistanceDBIDResultIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) {
+ if (DBIDUtil.equal(neighbor, it)) {
+ continue;
+ }
+ double max = ((DoubleDistanceKNNList)knnq.getKNNForDBID(neighbor, k)).doubleKNNDistance();
+ final double v = neighbor.doubleDistance() / max;
+ sum += kernel.density(v) / Math.pow(max, dim);
+ count++;
+ }
+ } else {
+ for (DistanceDBIDResultIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ if (DBIDUtil.equal(neighbor, it)) {
+ continue;
+ }
+ double max = knnq.getKNNForDBID(neighbor, k).getKNNDistance().doubleValue();
+ final double v = neighbor.getDistance().doubleValue() / max;
+ sum += kernel.density(v) / Math.pow(max, dim);
+ count++;
+ }
+ }
+ final double density = sum / count;
+ dens.putDouble(it, density);
+ if (densProgress != null) {
+ densProgress.incrementProcessed(LOG);
+ }
+ }
+ if (densProgress != null) {
+ densProgress.ensureCompleted(LOG);
+ }
+
+ // compute LOF_SCORE of each db object
+ if (stepprog != null) {
+ stepprog.beginStep(3, "Computing KLOFs.", LOG);
+ }
+ WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
+ // track the maximum value for normalization.
+ DoubleMinMax lofminmax = new DoubleMinMax();
+
+ FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("KLOF_SCORE for objects", ids.size(), LOG) : null;
+ for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
+ final double lrdp = dens.doubleValue(it);
+ final double lof;
+ if (lrdp > 0) {
+ final KNNResult<D> neighbors = knnq.getKNNForDBID(it, k);
+ double sum = 0.0;
+ int count = 0;
+ for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ // skip the point itself
+ if (DBIDUtil.equal(neighbor, it)) {
+ continue;
+ }
+ sum += dens.doubleValue(neighbor);
+ count++;
+ }
+ lof = sum / (count * lrdp);
+ } else {
+ lof = 1.0;
+ }
+ lofs.putDouble(it, lof);
+ // update minimum and maximum
+ lofminmax.put(lof);
+
+ if (progressLOFs != null) {
+ progressLOFs.incrementProcessed(LOG);
+ }
+ }
+ if (progressLOFs != null) {
+ progressLOFs.ensureCompleted(LOG);
+ }
+
+ if (stepprog != null) {
+ stepprog.setCompleted(LOG);
+ }
+
+ // Build result representation.
+ Relation<Double> scoreResult = new MaterializedRelation<Double>("Kernel Density Local Outlier Factor", "kernel-density-slof-outlier", TypeUtil.DOUBLE, lofs, ids);
+ OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
+ OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
+
+ return result;
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(new CombinedTypeInformation(getDistanceFunction().getInputTypeRestriction(), TypeUtil.NUMBER_VECTOR_FIELD));
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ *
+ * @param <O> vector type
+ * @param <D> distance type
+ */
+ public static class Parameterizer<O extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ /**
+ * Option ID for kernel density LOF kernel.
+ */
+ public static final OptionID KERNEL_ID = new OptionID("kernellof.kernel", "Kernel to use for kernel density LOF.");
+
+ /**
+ * The neighborhood size to use.
+ */
+ protected int k = 2;
+
+ /**
+ * Kernel density function parameter
+ */
+ KernelDensityFunction kernel;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ final IntParameter pK = new IntParameter(LOF.K_ID);
+ pK.addConstraint(new GreaterConstraint(1));
+ if (config.grab(pK)) {
+ k = pK.getValue();
+ }
+
+ ObjectParameter<KernelDensityFunction> kernelP = new ObjectParameter<KernelDensityFunction>(KERNEL_ID, KernelDensityFunction.class, EpanechnikovKernelDensityFunction.class);
+ if (config.grab(kernelP)) {
+ kernel = kernelP.instantiateClass(config);
+ }
+ }
+
+ @Override
+ protected SimpleKernelDensityLOF<O, D> makeInstance() {
+ return new SimpleKernelDensityLOF<O, D>(k, distanceFunction, kernel);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleLOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleLOF.java
new file mode 100644
index 00000000..48505ed5
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleLOF.java
@@ -0,0 +1,249 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.QueryUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDResultIter;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceKNNList;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
+import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
+import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+
+/**
+ * A simplified version of the original LOF algorithm, which does not use the
+ * reachability distance, yielding less stable results on inliers.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has KNNQuery
+ *
+ * @param <O> the type of DatabaseObjects handled by this Algorithm
+ * @param <D> Distance type
+ */
+public class SimpleLOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(SimpleLOF.class);
+
+ /**
+ * Parameter k.
+ */
+ protected int k;
+
+ /**
+ * Constructor.
+ *
+ * @param k the value of k
+ */
+ public SimpleLOF(int k, DistanceFunction<? super O, D> distance) {
+ super(distance);
+ this.k = k + 1;
+ }
+
+ /**
+ * Run the Simple LOF algorithm.
+ *
+ * @param relation Data to process
+ * @return LOF outlier result
+ */
+ public OutlierResult run(Relation<O> relation) {
+ StepProgress stepprog = LOG.isVerbose() ? new StepProgress("SimpleLOF", 3) : null;
+
+ DBIDs ids = relation.getDBIDs();
+
+ // "HEAVY" flag for KNN Query since it is used more than once
+ KNNQuery<O, D> knnq = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
+ // No optimized kNN query - use a preprocessor!
+ if (!(knnq instanceof PreprocessorKNNQuery)) {
+ if (stepprog != null) {
+ stepprog.beginStep(1, "Materializing neighborhoods w.r.t. distance function.", LOG);
+ }
+ MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<O, D>(relation, getDistanceFunction(), k);
+ relation.getDatabase().addIndex(preproc);
+ DistanceQuery<O, D> rdq = relation.getDatabase().getDistanceQuery(relation, getDistanceFunction());
+ knnq = preproc.getKNNQuery(rdq, k);
+ }
+
+ // Compute LRDs
+ if (stepprog != null) {
+ stepprog.beginStep(2, "Computing densities.", LOG);
+ }
+ WritableDoubleDataStore dens = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
+ FiniteProgress densProgress = LOG.isVerbose() ? new FiniteProgress("Densities", ids.size(), LOG) : null;
+ for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
+ final KNNResult<D> neighbors = knnq.getKNNForDBID(it, k);
+ double sum = 0.0;
+ int count = 0;
+ if (neighbors instanceof DoubleDistanceKNNList) {
+ // Fast version for double distances
+ for (DoubleDistanceDBIDResultIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) {
+ if (DBIDUtil.equal(neighbor, it)) {
+ continue;
+ }
+ sum += neighbor.doubleDistance();
+ count++;
+ }
+ } else {
+ for (DistanceDBIDResultIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ if (DBIDUtil.equal(neighbor, it)) {
+ continue;
+ }
+ sum += neighbor.getDistance().doubleValue();
+ count++;
+ }
+ }
+ // Avoid division by 0
+ final double lrd = (sum > 0) ? (count / sum) : 0;
+ dens.putDouble(it, lrd);
+ if (densProgress != null) {
+ densProgress.incrementProcessed(LOG);
+ }
+ }
+ if (densProgress != null) {
+ densProgress.ensureCompleted(LOG);
+ }
+
+ // compute LOF_SCORE of each db object
+ if (stepprog != null) {
+ stepprog.beginStep(3, "Computing SLOFs.", LOG);
+ }
+ WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
+ // track the maximum value for normalization.
+ DoubleMinMax lofminmax = new DoubleMinMax();
+
+ FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("Simple LOF scores.", ids.size(), LOG) : null;
+ for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
+ final double lrdp = dens.doubleValue(it);
+ final double lof;
+ if (lrdp > 0) {
+ final KNNResult<D> neighbors = knnq.getKNNForDBID(it, k);
+ double sum = 0.0;
+ int count = 0;
+ for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ // skip the point itself
+ if (DBIDUtil.equal(neighbor, it)) {
+ continue;
+ }
+ sum += dens.doubleValue(neighbor);
+ count++;
+ }
+ lof = sum / (count * lrdp);
+ } else {
+ lof = 1.0;
+ }
+ lofs.putDouble(it, lof);
+ // update minimum and maximum
+ lofminmax.put(lof);
+
+ if (progressLOFs != null) {
+ progressLOFs.incrementProcessed(LOG);
+ }
+ }
+ if (progressLOFs != null) {
+ progressLOFs.ensureCompleted(LOG);
+ }
+
+ if (stepprog != null) {
+ stepprog.setCompleted(LOG);
+ }
+
+ // Build result representation.
+ Relation<Double> scoreResult = new MaterializedRelation<Double>("Simple Local Outlier Factor", "simple-lof-outlier", TypeUtil.DOUBLE, lofs, ids);
+ OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
+ OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
+
+ return result;
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(getDistanceFunction().getInputTypeRestriction());
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ *
+ * @param <O> vector type
+ * @param <D> distance type
+ */
+ public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ /**
+ * The neighborhood size to use.
+ */
+ protected int k = 2;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ final IntParameter pK = new IntParameter(LOF.K_ID);
+ pK.addConstraint(new GreaterConstraint(1));
+ if (config.grab(pK)) {
+ k = pK.getValue();
+ }
+ }
+
+ @Override
+ protected SimpleLOF<O, D> makeInstance() {
+ return new SimpleLOF<O, D>(k, distanceFunction);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java
index 1542b8e3..f230fd3b 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java
@@ -77,7 +77,7 @@ public class ExternalDoubleOutlierScore extends AbstractAlgorithm<OutlierResult>
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(ExternalDoubleOutlierScore.class);
+ private static final Logging LOG = Logging.getLogger(ExternalDoubleOutlierScore.class);
/**
* The comment character.
@@ -183,7 +183,7 @@ public class ExternalDoubleOutlierScore extends AbstractAlgorithm<OutlierResult>
minmax.put(score);
}
else if(id == null && Double.isNaN(score)) {
- logger.warning("Line did not match either ID nor score nor comment: " + line);
+ LOG.warning("Line did not match either ID nor score nor comment: " + line);
}
else {
throw new AbortException("Line matched only ID or only SCORE patterns: " + line);
@@ -224,7 +224,7 @@ public class ExternalDoubleOutlierScore extends AbstractAlgorithm<OutlierResult>
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
@Override
@@ -246,7 +246,7 @@ public class ExternalDoubleOutlierScore extends AbstractAlgorithm<OutlierResult>
* Key: {@code -externaloutlier.file}
* </p>
*/
- public static final OptionID FILE_ID = OptionID.getOrCreateOptionID("externaloutlier.file", "The file name containing the (external) outlier scores.");
+ public static final OptionID FILE_ID = new OptionID("externaloutlier.file", "The file name containing the (external) outlier scores.");
/**
* Parameter that specifies the object ID pattern
@@ -255,7 +255,7 @@ public class ExternalDoubleOutlierScore extends AbstractAlgorithm<OutlierResult>
* Default: ^ID=
* </p>
*/
- public static final OptionID ID_ID = OptionID.getOrCreateOptionID("externaloutlier.idpattern", "The pattern to match object ID prefix");
+ public static final OptionID ID_ID = new OptionID("externaloutlier.idpattern", "The pattern to match object ID prefix");
/**
* Parameter that specifies the object score pattern
@@ -263,7 +263,7 @@ public class ExternalDoubleOutlierScore extends AbstractAlgorithm<OutlierResult>
* Key: {@code -externaloutlier.scorepattern}<br />
* </p>
*/
- public static final OptionID SCORE_ID = OptionID.getOrCreateOptionID("externaloutlier.scorepattern", "The pattern to match object score prefix");
+ public static final OptionID SCORE_ID = new OptionID("externaloutlier.scorepattern", "The pattern to match object score prefix");
/**
* Parameter to specify a scaling function to use.
@@ -271,12 +271,12 @@ public class ExternalDoubleOutlierScore extends AbstractAlgorithm<OutlierResult>
* Key: {@code -externaloutlier.scaling}
* </p>
*/
- public static final OptionID SCALING_ID = OptionID.getOrCreateOptionID("externaloutlier.scaling", "Class to use as scaling function.");
+ public static final OptionID SCALING_ID = new OptionID("externaloutlier.scaling", "Class to use as scaling function.");
/**
* Flag parameter for inverted scores.
*/
- public static final OptionID INVERTED_ID = OptionID.getOrCreateOptionID("externaloutlier.inverted", "Flag to signal an inverted outlier score.");
+ public static final OptionID INVERTED_ID = new OptionID("externaloutlier.inverted", "Flag to signal an inverted outlier score.");
/**
* The file to be reparsed
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java
index 407b7400..b53a0942 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java
@@ -39,6 +39,7 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
@@ -47,7 +48,7 @@ import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
@@ -57,7 +58,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualCons
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.LongParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
/**
@@ -85,22 +86,22 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(FeatureBagging.class);
+ private static final Logging LOG = Logging.getLogger(FeatureBagging.class);
/**
- * Number of instances to use
+ * Number of instances to use.
*/
protected int num = 1;
/**
- * Cumulative sum or breadth first combinations
+ * Cumulative sum or breadth first combinations.
*/
protected boolean breadth = false;
/**
- * Random number generator for subspace choice
+ * Random number generator for subspace choice.
*/
- private Random RANDOM;
+ private RandomFactory rnd;
/**
* The parameters k for LOF.
@@ -113,18 +114,14 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements
* @param k k Parameter for LOF
* @param num Number of subspaces to use
* @param breadth Flag for breadth-first merging
+ * @param rnd Random generator
*/
- public FeatureBagging(int k, int num, boolean breadth, Long seed) {
+ public FeatureBagging(int k, int num, boolean breadth, RandomFactory rnd) {
super();
this.k = k;
this.num = num;
this.breadth = breadth;
- if(seed != null) {
- this.RANDOM = new Random(seed);
- }
- else {
- this.RANDOM = new Random();
- }
+ this.rnd = rnd;
}
/**
@@ -133,80 +130,79 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements
* @param relation Relation to use
* @return Outlier detection result
*/
- public OutlierResult run(Relation<NumberVector<?, ?>> relation) {
- final int dbdim = DatabaseUtil.dimensionality(relation);
- final int mindim = dbdim / 2;
+ public OutlierResult run(Relation<NumberVector<?>> relation) {
+ final int dbdim = RelationUtil.dimensionality(relation);
+ final int mindim = dbdim >> 1;
final int maxdim = dbdim - 1;
+ final Random rand = rnd.getRandom();
ArrayList<OutlierResult> results = new ArrayList<OutlierResult>(num);
{
- FiniteProgress prog = logger.isVerbose() ? new FiniteProgress("LOF iterations", num, logger) : null;
- for(int i = 0; i < num; i++) {
- BitSet dimset = randomSubspace(dbdim, mindim, maxdim);
+ FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("LOF iterations", num, LOG) : null;
+ for (int i = 0; i < num; i++) {
+ BitSet dimset = randomSubspace(dbdim, mindim, maxdim, rand);
SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(dimset);
- LOF<NumberVector<?, ?>, DoubleDistance> lof = new LOF<NumberVector<?, ?>, DoubleDistance>(k, df);
+ LOF<NumberVector<?>, DoubleDistance> lof = new LOF<NumberVector<?>, DoubleDistance>(k, df);
// run LOF and collect the result
OutlierResult result = lof.run(relation);
results.add(result);
- if(prog != null) {
- prog.incrementProcessed(logger);
+ if (prog != null) {
+ prog.incrementProcessed(LOG);
}
}
- if(prog != null) {
- prog.ensureCompleted(logger);
+ if (prog != null) {
+ prog.ensureCompleted(LOG);
}
}
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
DoubleMinMax minmax = new DoubleMinMax();
- if(breadth) {
- FiniteProgress cprog = logger.isVerbose() ? new FiniteProgress("Combining results", relation.size(), logger) : null;
+ if (breadth) {
+ FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Combining results", relation.size(), LOG) : null;
Pair<DBIDIter, Relation<Double>>[] IDVectorOntoScoreVector = Pair.newPairArray(results.size());
// Mapping score-sorted DBID-Iterators onto their corresponding scores.
// We need to initialize them now be able to iterate them "in parallel".
{
int i = 0;
- for(OutlierResult r : results) {
+ for (OutlierResult r : results) {
IDVectorOntoScoreVector[i] = new Pair<DBIDIter, Relation<Double>>(r.getOrdering().iter(relation.getDBIDs()).iter(), r.getScores());
i++;
}
}
// Iterating over the *lines* of the AS_t(i)-matrix.
- for(int i = 0; i < relation.size(); i++) {
+ for (int i = 0; i < relation.size(); i++) {
// Iterating over the elements of a line (breadth-first).
- for(Pair<DBIDIter, Relation<Double>> pair : IDVectorOntoScoreVector) {
+ for (Pair<DBIDIter, Relation<Double>> pair : IDVectorOntoScoreVector) {
DBIDIter iter = pair.first;
// Always true if every algorithm returns a complete result (one score
// for every DBID).
- if(iter.valid()) {
+ if (iter.valid()) {
double score = pair.second.get(iter);
- if(Double.isNaN(scores.doubleValue(iter))) {
+ if (Double.isNaN(scores.doubleValue(iter))) {
scores.putDouble(iter, score);
minmax.put(score);
}
iter.advance();
- }
- else {
- logger.warning("Incomplete result: Iterator does not contain |DB| DBIDs");
+ } else {
+ LOG.warning("Incomplete result: Iterator does not contain |DB| DBIDs");
}
}
// Progress does not take the initial mapping into account.
- if(cprog != null) {
- cprog.incrementProcessed(logger);
+ if (cprog != null) {
+ cprog.incrementProcessed(LOG);
}
}
- if(cprog != null) {
- cprog.ensureCompleted(logger);
+ if (cprog != null) {
+ cprog.ensureCompleted(LOG);
}
- }
- else {
- FiniteProgress cprog = logger.isVerbose() ? new FiniteProgress("Combining results", relation.size(), logger) : null;
- for(DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
+ } else {
+ FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Combining results", relation.size(), LOG) : null;
+ for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
double sum = 0.0;
- for(OutlierResult r : results) {
+ for (OutlierResult r : results) {
final Double s = r.getScores().get(iter);
if (s != null && !Double.isNaN(s)) {
sum += s;
@@ -214,12 +210,12 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements
}
scores.putDouble(iter, sum);
minmax.put(sum);
- if(cprog != null) {
- cprog.incrementProcessed(logger);
+ if (cprog != null) {
+ cprog.incrementProcessed(LOG);
}
}
- if(cprog != null) {
- cprog.ensureCompleted(logger);
+ if (cprog != null) {
+ cprog.ensureCompleted(LOG);
}
}
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
@@ -228,36 +224,34 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements
}
/**
- * Choose a random subspace
+ * Choose a random subspace.
*
* @param alldim Number of total dimensions
* @param mindim Minimum number to choose
* @param maxdim Maximum number to choose
* @return Subspace as bits.
*/
- private BitSet randomSubspace(final int alldim, final int mindim, final int maxdim) {
+ private BitSet randomSubspace(final int alldim, final int mindim, final int maxdim, final Random rand) {
BitSet dimset = new BitSet();
- {
- // Fill with all dimensions
- int[] dims = new int[alldim];
- for(int d = 0; d < alldim; d++) {
- dims[d] = d;
- }
- // Target dimensionality:
- int subdim = mindim + RANDOM.nextInt(maxdim - mindim);
- // Shrink the subspace to the destination size
- for(int d = 0; d < alldim - subdim; d++) {
- int s = RANDOM.nextInt(alldim - d);
- dimset.set(dims[s]);
- dims[s] = dims[alldim - d - 1];
- }
+ // Fill with all dimensions
+ int[] dims = new int[alldim];
+ for (int d = 0; d < alldim; d++) {
+ dims[d] = d;
+ }
+ // Target dimensionality:
+ int subdim = mindim + rand.nextInt(maxdim - mindim);
+ // Shrink the subspace to the destination size
+ for (int d = 0; d < alldim - subdim; d++) {
+ int s = rand.nextInt(alldim - d);
+ dimset.set(dims[s]);
+ dims[s] = dims[alldim - d - 1];
}
return dimset;
}
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
@Override
@@ -279,69 +273,71 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements
* Key: {@code -fbagging.num}
* </p>
*/
- public static final OptionID NUM_ID = OptionID.getOrCreateOptionID("fbagging.num", "The number of instances to use in the ensemble.");
+ public static final OptionID NUM_ID = new OptionID("fbagging.num", "The number of instances to use in the ensemble.");
/**
- * The flag for using the breadth first approach
+ * The flag for using the breadth first approach.
* <p>
* Key: {@code -fbagging.breadth}
* </p>
*/
- public static final OptionID BREADTH_ID = OptionID.getOrCreateOptionID("fbagging.breadth", "Use the breadth first combinations instead of the cumulative sum approach");
+ public static final OptionID BREADTH_ID = new OptionID("fbagging.breadth", "Use the breadth first combinations instead of the cumulative sum approach");
/**
- * The parameter to specify the random seed
+ * The parameter to specify the random seed.
* <p>
* Key: {@code -fbagging.seed}
* </p>
*/
- public static final OptionID SEED_ID = OptionID.getOrCreateOptionID("fbagging.seed", "Specify a particular random seed.");
+ public static final OptionID SEED_ID = new OptionID("fbagging.seed", "Specify a particular random seed.");
/**
- * The neighborhood size to use
+ * The neighborhood size to use.
*/
protected int k = 2;
/**
- * Number of instances to use
+ * Number of instances to use.
*/
protected int num = 1;
/**
- * Cumulative sum or breadth first combinations
+ * Cumulative sum or breadth first combinations.
*/
protected boolean breadth = false;
/**
- * Random generator seed
+ * Random generator.
*/
- protected Long seed = null;
+ protected RandomFactory rnd;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final IntParameter pK = new IntParameter(LOF.K_ID, new GreaterConstraint(1));
- if(config.grab(pK)) {
+ final IntParameter pK = new IntParameter(LOF.K_ID);
+ pK.addConstraint(new GreaterConstraint(1));
+ if (config.grab(pK)) {
k = pK.getValue();
}
- IntParameter NUM_PARAM = new IntParameter(NUM_ID, new GreaterEqualConstraint(1));
- if(config.grab(NUM_PARAM)) {
- num = NUM_PARAM.getValue();
+ IntParameter numP = new IntParameter(NUM_ID);
+ numP.addConstraint(new GreaterEqualConstraint(1));
+ if (config.grab(numP)) {
+ num = numP.getValue();
}
- Flag BREADTH_FLAG = new Flag(BREADTH_ID);
- if(config.grab(BREADTH_FLAG)) {
- breadth = BREADTH_FLAG.getValue();
+ Flag breadthF = new Flag(BREADTH_ID);
+ if (config.grab(breadthF)) {
+ breadth = breadthF.getValue();
}
- LongParameter seedP = new LongParameter(SEED_ID, true);
- if(config.grab(seedP)) {
- seed = seedP.getValue();
+ RandomParameter rndP = new RandomParameter(SEED_ID);
+ if (config.grab(rndP)) {
+ rnd = rndP.getValue();
}
}
@Override
protected FeatureBagging makeInstance() {
// Default is to re-use the same distance
- return new FeatureBagging(k, num, breadth, seed);
+ return new FeatureBagging(k, num, breadth, rnd);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/HiCS.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/HiCS.java
index 73d4156a..15b94322 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/HiCS.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/HiCS.java
@@ -48,12 +48,14 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.ProjectedView;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
@@ -63,7 +65,7 @@ import de.lmu.ifi.dbs.elki.math.statistics.tests.KolmogorovSmirnovTest;
import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TopBoundedHeap;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
@@ -74,8 +76,8 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstrain
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.LongParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
/**
* Algorithm to compute High Contrast Subspaces for Density-Based Outlier
@@ -99,12 +101,12 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
*/
@Title("HiCS: High Contrast Subspaces for Density-Based Outlier Ranking")
@Description("Algorithm to compute High Contrast Subspaces in a database as a pre-processing step for for density-based outlier ranking methods.")
-@Reference(authors = "Fabian Keller, Emmanuel Müller, Klemens Böhm", title = "HiCS: High Contrast Subspaces for Density-Based Outlier Ranking", booktitle = "Proc. IEEE 28th International Conference on Data Engineering (ICDE 2012)")
-public class HiCS<V extends NumberVector<V, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+@Reference(authors = "Fabian Keller, Emmanuel Müller, Klemens Böhm", title = "HiCS: High Contrast Subspaces for Density-Based Outlier Ranking", booktitle = "Proc. IEEE 28th International Conference on Data Engineering (ICDE 2012)", url = "http://dx.doi.org/10.1109/ICDE.2012.88")
+public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
/**
- * The Logger for this class
+ * The Logger for this class.
*/
- private static final Logging logger = Logging.getLogger(HiCS.class);
+ private static final Logging LOG = Logging.getLogger(HiCS.class);
/**
* Maximum number of retries.
@@ -112,57 +114,57 @@ public class HiCS<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlie
private static final int MAX_RETRIES = 100;
/**
- * Monte-Carlo iterations
+ * Monte-Carlo iterations.
*/
private int m;
/**
- * Alpha threshold
+ * Alpha threshold.
*/
private double alpha;
/**
- * Outlier detection algorithm
+ * Outlier detection algorithm.
*/
private OutlierAlgorithm outlierAlgorithm;
/**
- * Statistical test to use
+ * Statistical test to use.
*/
private GoodnessOfFitTest statTest;
/**
- * Candidates limit
+ * Candidates limit.
*/
private int cutoff;
-
+
/**
- * Random generator
+ * Random generator.
*/
- private Random random;
+ private RandomFactory rnd;
/**
- * Constructor
+ * Constructor.
*
* @param m value of m
* @param alpha value of alpha
* @param outlierAlgorithm Inner outlier detection algorithm
* @param statTest Test to use
* @param cutoff Candidate limit
- * @param seed Random seed
+ * @param rnd Random generator
*/
- public HiCS(int m, double alpha, OutlierAlgorithm outlierAlgorithm, GoodnessOfFitTest statTest, int cutoff, Long seed) {
+ public HiCS(int m, double alpha, OutlierAlgorithm outlierAlgorithm, GoodnessOfFitTest statTest, int cutoff, RandomFactory rnd) {
super();
this.m = m;
this.alpha = alpha;
this.outlierAlgorithm = outlierAlgorithm;
this.statTest = statTest;
this.cutoff = cutoff;
- this.random = (seed != null) ? new Random(seed) : new Random();
+ this.rnd = rnd;
}
/**
- * Perform HiCS on a given database
+ * Perform HiCS on a given database.
*
* @param relation the database
* @return The aggregated resulting scores that were assigned by the given
@@ -170,23 +172,23 @@ public class HiCS<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlie
*/
public OutlierResult run(Relation<V> relation) {
final DBIDs ids = relation.getDBIDs();
- final V factory = DatabaseUtil.assumeVectorField(relation).getFactory();
+ final NumberVector.Factory<V, ?> factory = RelationUtil.getNumberVectorFactory(relation);
ArrayList<ArrayDBIDs> subspaceIndex = buildOneDimIndexes(relation);
- Set<HiCSSubspace> subspaces = calculateSubspaces(relation, subspaceIndex);
+ Set<HiCSSubspace> subspaces = calculateSubspaces(relation, subspaceIndex, rnd.getRandom());
- if(logger.isVerbose()) {
- logger.verbose("Number of high-contrast subspaces: " + subspaces.size());
+ if (LOG.isVerbose()) {
+ LOG.verbose("Number of high-contrast subspaces: " + subspaces.size());
}
List<Relation<Double>> results = new ArrayList<Relation<Double>>();
- FiniteProgress prog = logger.isVerbose() ? new FiniteProgress("Calculating Outlier scores for high Contrast subspaces", subspaces.size(), logger) : null;
+ FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Calculating Outlier scores for high Contrast subspaces", subspaces.size(), LOG) : null;
// run outlier detection and collect the result
// TODO extend so that any outlierAlgorithm can be used (use materialized
// relation instead of SubspaceEuclideanDistanceFunction?)
- for(HiCSSubspace dimset : subspaces) {
- if(logger.isVerbose()) {
- logger.verbose("Performing outlier detection in subspace " + dimset);
+ for (HiCSSubspace dimset : subspaces) {
+ if (LOG.isVerbose()) {
+ LOG.verbose("Performing outlier detection in subspace " + dimset);
}
ProxyDatabase pdb = new ProxyDatabase(ids);
@@ -196,22 +198,22 @@ public class HiCS<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlie
// run LOF and collect the result
OutlierResult result = outlierAlgorithm.run(pdb);
results.add(result.getScores());
- if(prog != null) {
- prog.incrementProcessed(logger);
+ if (prog != null) {
+ prog.incrementProcessed(LOG);
}
}
- if(prog != null) {
- prog.ensureCompleted(logger);
+ if (prog != null) {
+ prog.ensureCompleted(LOG);
}
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
DoubleMinMax minmax = new DoubleMinMax();
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double sum = 0.0;
- for(Relation<Double> r : results) {
+ for (Relation<Double> r : results) {
final Double s = r.get(iditer);
- if(s != null && !Double.isNaN(s)) {
+ if (s != null && !Double.isNaN(s)) {
sum += s;
}
}
@@ -232,12 +234,12 @@ public class HiCS<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlie
* @param relation Relation to index
* @return List of sorted objects
*/
- private ArrayList<ArrayDBIDs> buildOneDimIndexes(Relation<? extends NumberVector<?, ?>> relation) {
- final int dim = DatabaseUtil.dimensionality(relation);
+ private ArrayList<ArrayDBIDs> buildOneDimIndexes(Relation<? extends NumberVector<?>> relation) {
+ final int dim = RelationUtil.dimensionality(relation);
ArrayList<ArrayDBIDs> subspaceIndex = new ArrayList<ArrayDBIDs>(dim + 1);
SortDBIDsBySingleDimension comp = new VectorUtil.SortDBIDsBySingleDimension(relation);
- for(int i = 1; i <= dim; i++) {
+ for (int i = 0; i < dim; i++) {
ArrayModifiableDBIDs amDBIDs = DBIDUtil.newArray(relation.getDBIDs());
comp.setDimension(i);
amDBIDs.sort(comp);
@@ -248,140 +250,143 @@ public class HiCS<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlie
}
/**
- * Identifies high contrast subspaces in a given full-dimensional database
+ * Identifies high contrast subspaces in a given full-dimensional database.
*
* @param relation the relation the HiCS should be evaluated for
* @param subspaceIndex Subspace indexes
* @return a set of high contrast subspaces
*/
- private Set<HiCSSubspace> calculateSubspaces(Relation<? extends NumberVector<?, ?>> relation, ArrayList<ArrayDBIDs> subspaceIndex) {
- final int dbdim = DatabaseUtil.dimensionality(relation);
+ private Set<HiCSSubspace> calculateSubspaces(Relation<? extends NumberVector<?>> relation, ArrayList<ArrayDBIDs> subspaceIndex, Random random) {
+ final int dbdim = RelationUtil.dimensionality(relation);
- FiniteProgress dprog = logger.isVerbose() ? new FiniteProgress("Subspace dimensionality", dbdim, logger) : null;
- if(dprog != null) {
- dprog.setProcessed(2, logger);
+ FiniteProgress dprog = LOG.isVerbose() ? new FiniteProgress("Subspace dimensionality", dbdim, LOG) : null;
+ if (dprog != null) {
+ dprog.setProcessed(2, LOG);
}
TreeSet<HiCSSubspace> subspaceList = new TreeSet<HiCSSubspace>(HiCSSubspace.SORT_BY_SUBSPACE);
TopBoundedHeap<HiCSSubspace> dDimensionalList = new TopBoundedHeap<HiCSSubspace>(cutoff, HiCSSubspace.SORT_BY_CONTRAST_ASC);
- FiniteProgress prog = logger.isVerbose() ? new FiniteProgress("Generating two-element subsets", dbdim * (dbdim - 1) / 2, logger) : null;
+ FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Generating two-element subsets", (dbdim * (dbdim - 1)) >> 1, LOG) : null;
// compute two-element sets of subspaces
- for(int i = 0; i < dbdim; i++) {
- for(int j = i + 1; j < dbdim; j++) {
+ for (int i = 0; i < dbdim; i++) {
+ for (int j = i + 1; j < dbdim; j++) {
HiCSSubspace ts = new HiCSSubspace();
ts.set(i);
ts.set(j);
- calculateContrast(relation, ts, subspaceIndex);
+ calculateContrast(relation, ts, subspaceIndex, random);
dDimensionalList.add(ts);
- if(prog != null) {
- prog.incrementProcessed(logger);
+ if (prog != null) {
+ prog.incrementProcessed(LOG);
}
}
}
- if(prog != null) {
- prog.ensureCompleted(logger);
+ if (prog != null) {
+ prog.ensureCompleted(LOG);
}
- IndefiniteProgress qprog = logger.isVerbose() ? new IndefiniteProgress("Testing subspace candidates", logger) : null;
- for(int d = 3; !dDimensionalList.isEmpty(); d++) {
- if(dprog != null) {
- dprog.setProcessed(d, logger);
+ IndefiniteProgress qprog = LOG.isVerbose() ? new IndefiniteProgress("Testing subspace candidates", LOG) : null;
+ for (int d = 3; !dDimensionalList.isEmpty(); d++) {
+ if (dprog != null) {
+ dprog.setProcessed(d, LOG);
}
- subspaceList.addAll(dDimensionalList);
// result now contains all d-dimensional sets of subspaces
- ArrayList<HiCSSubspace> candidateList = new ArrayList<HiCSSubspace>(dDimensionalList);
+ ArrayList<HiCSSubspace> candidateList = new ArrayList<HiCSSubspace>(dDimensionalList.size());
+ for (HiCSSubspace sub : dDimensionalList) {
+ subspaceList.add(sub);
+ candidateList.add(sub);
+ }
dDimensionalList.clear();
// candidateList now contains the *m* best d-dimensional sets
Collections.sort(candidateList, HiCSSubspace.SORT_BY_SUBSPACE);
// TODO: optimize APRIORI style, by not even computing the bit set or?
- for(int i = 0; i < candidateList.size() - 1; i++) {
- for(int j = i + 1; j < candidateList.size(); j++) {
+ for (int i = 0; i < candidateList.size() - 1; i++) {
+ for (int j = i + 1; j < candidateList.size(); j++) {
HiCSSubspace set1 = candidateList.get(i);
HiCSSubspace set2 = candidateList.get(j);
HiCSSubspace joinedSet = new HiCSSubspace();
joinedSet.or(set1);
joinedSet.or(set2);
- if(joinedSet.cardinality() != d) {
+ if (joinedSet.cardinality() != d) {
continue;
}
- calculateContrast(relation, joinedSet, subspaceIndex);
+ calculateContrast(relation, joinedSet, subspaceIndex, random);
dDimensionalList.add(joinedSet);
- if(qprog != null) {
- qprog.incrementProcessed(logger);
+ if (qprog != null) {
+ qprog.incrementProcessed(LOG);
}
}
}
// Prune
- for(HiCSSubspace cand : candidateList) {
- for(HiCSSubspace nextSet : dDimensionalList) {
- if(nextSet.contrast > cand.contrast) {
+ for (HiCSSubspace cand : candidateList) {
+ for (HiCSSubspace nextSet : dDimensionalList) {
+ if (nextSet.contrast > cand.contrast) {
subspaceList.remove(cand);
break;
}
}
}
}
- if(qprog != null) {
- qprog.setCompleted(logger);
+ if (qprog != null) {
+ qprog.setCompleted(LOG);
}
- if(dprog != null) {
- dprog.setProcessed(dbdim, logger);
- dprog.ensureCompleted(logger);
+ if (dprog != null) {
+ dprog.setProcessed(dbdim, LOG);
+ dprog.ensureCompleted(LOG);
}
return subspaceList;
}
/**
- * Calculates the actual contrast of a given subspace
+ * Calculates the actual contrast of a given subspace.
*
- * @param relation
- * @param subspace
+ * @param relation Relation to process
+ * @param subspace Subspace
* @param subspaceIndex Subspace indexes
*/
- private void calculateContrast(Relation<? extends NumberVector<?, ?>> relation, HiCSSubspace subspace, ArrayList<ArrayDBIDs> subspaceIndex) {
+ private void calculateContrast(Relation<? extends NumberVector<?>> relation, HiCSSubspace subspace, ArrayList<ArrayDBIDs> subspaceIndex, Random random) {
final int card = subspace.cardinality();
final double alpha1 = Math.pow(alpha, (1.0 / card));
final int windowsize = (int) (relation.size() * alpha1);
- final FiniteProgress prog = logger.isDebugging() ? new FiniteProgress("Monte-Carlo iterations", m, logger) : null;
+ final FiniteProgress prog = LOG.isDebugging() ? new FiniteProgress("Monte-Carlo iterations", m, LOG) : null;
int retries = 0;
double deviationSum = 0.0;
- for(int i = 0; i < m; i++) {
+ for (int i = 0; i < m; i++) {
// Choose a random set bit.
int chosen = -1;
- for(int tmp = random.nextInt(card); tmp >= 0; tmp--) {
+ for (int tmp = random.nextInt(card); tmp >= 0; tmp--) {
chosen = subspace.nextSetBit(chosen + 1);
}
// initialize sample
DBIDs conditionalSample = relation.getDBIDs();
- for(int j = subspace.nextSetBit(0); j >= 0; j = subspace.nextSetBit(j + 1)) {
- if(j == chosen) {
+ for (int j = subspace.nextSetBit(0); j >= 0; j = subspace.nextSetBit(j + 1)) {
+ if (j == chosen) {
continue;
}
ArrayDBIDs sortedIndices = subspaceIndex.get(j);
- ArrayModifiableDBIDs indexBlock = DBIDUtil.newArray();
+ ArrayModifiableDBIDs indexBlock = DBIDUtil.newArray(windowsize);
// initialize index block
- int start = random.nextInt(relation.size() - windowsize);
- for(int k = start; k < start + windowsize; k++) {
- indexBlock.add(sortedIndices.get(k)); // select index block
+ DBIDArrayIter iter = sortedIndices.iter();
+ iter.seek(random.nextInt(relation.size() - windowsize));
+ for (int k = 0; k < windowsize; k++, iter.advance()) {
+ indexBlock.add(iter); // select index block
}
conditionalSample = DBIDUtil.intersection(conditionalSample, indexBlock);
}
- if(conditionalSample.size() < 10) {
+ if (conditionalSample.size() < 10) {
retries++;
- if(logger.isDebugging()) {
- logger.debug("Sample size very small. Retry no. " + retries);
+ if (LOG.isDebugging()) {
+ LOG.debug("Sample size very small. Retry no. " + retries);
}
- if(retries >= MAX_RETRIES) {
- logger.warning("Too many retries, for small samples: " + retries);
- }
- else {
+ if (retries >= MAX_RETRIES) {
+ LOG.warning("Too many retries, for small samples: " + retries);
+ } else {
i--;
continue;
}
@@ -391,7 +396,7 @@ public class HiCS<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlie
{
int l = 0;
for (DBIDIter iter = conditionalSample.iter(); iter.valid(); iter.advance()) {
- sampleValues[l] = relation.get(iter).doubleValue(chosen + 1);
+ sampleValues[l] = relation.get(iter).doubleValue(chosen);
l++;
}
}
@@ -400,23 +405,23 @@ public class HiCS<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlie
{
int l = 0;
for (DBIDIter iter = subspaceIndex.get(chosen).iter(); iter.valid(); iter.advance()) {
- fullValues[l] = relation.get(iter).doubleValue(chosen + 1);
+ fullValues[l] = relation.get(iter).doubleValue(chosen);
l++;
}
}
double contrast = statTest.deviation(fullValues, sampleValues);
- if(Double.isNaN(contrast)) {
+ if (Double.isNaN(contrast)) {
i--;
- logger.warning("Contrast was NaN");
+ LOG.warning("Contrast was NaN");
continue;
}
deviationSum += contrast;
- if(prog != null) {
- prog.incrementProcessed(logger);
+ if (prog != null) {
+ prog.incrementProcessed(LOG);
}
}
- if(prog != null) {
- prog.ensureCompleted(logger);
+ if (prog != null) {
+ prog.ensureCompleted(LOG);
}
subspace.contrast = deviationSum / m;
}
@@ -428,7 +433,7 @@ public class HiCS<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlie
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -441,12 +446,12 @@ public class HiCS<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlie
*/
public static class HiCSSubspace extends BitSet {
/**
- * Serial version
+ * Serial version.
*/
private static final long serialVersionUID = 1L;
/**
- * The HiCS contrast value
+ * The HiCS contrast value.
*/
protected double contrast;
@@ -459,22 +464,22 @@ public class HiCS<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlie
@Override
public String toString() {
- StringBuffer buf = new StringBuffer();
+ StringBuilder buf = new StringBuilder();
buf.append("[contrast=").append(contrast);
- for(int i = nextSetBit(0); i >= 0; i = nextSetBit(i + 1)) {
- buf.append(" ").append(i + 1);
+ for (int i = nextSetBit(0); i >= 0; i = nextSetBit(i + 1)) {
+ buf.append(' ').append(i + 1);
}
- buf.append("]");
+ buf.append(']');
return buf.toString();
}
/**
* Sort subspaces by their actual subspace.
*/
- public static Comparator<HiCSSubspace> SORT_BY_CONTRAST_ASC = new Comparator<HiCSSubspace>() {
+ public static final Comparator<HiCSSubspace> SORT_BY_CONTRAST_ASC = new Comparator<HiCSSubspace>() {
@Override
public int compare(HiCSSubspace o1, HiCSSubspace o2) {
- if(o1.contrast == o2.contrast) {
+ if (o1.contrast == o2.contrast) {
return 0;
}
return o1.contrast > o2.contrast ? 1 : -1;
@@ -484,10 +489,10 @@ public class HiCS<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlie
/**
* Sort subspaces by their actual subspace.
*/
- public static Comparator<HiCSSubspace> SORT_BY_CONTRAST_DESC = new Comparator<HiCSSubspace>() {
+ public static final Comparator<HiCSSubspace> SORT_BY_CONTRAST_DESC = new Comparator<HiCSSubspace>() {
@Override
public int compare(HiCSSubspace o1, HiCSSubspace o2) {
- if(o1.contrast == o2.contrast) {
+ if (o1.contrast == o2.contrast) {
return 0;
}
return o1.contrast < o2.contrast ? 1 : -1;
@@ -497,16 +502,15 @@ public class HiCS<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlie
/**
* Sort subspaces by their actual subspace.
*/
- public static Comparator<HiCSSubspace> SORT_BY_SUBSPACE = new Comparator<HiCSSubspace>() {
+ public static final Comparator<HiCSSubspace> SORT_BY_SUBSPACE = new Comparator<HiCSSubspace>() {
@Override
public int compare(HiCSSubspace o1, HiCSSubspace o2) {
int dim1 = o1.nextSetBit(0);
int dim2 = o2.nextSetBit(0);
- while(dim1 >= 0 && dim2 >= 0) {
- if(dim1 < dim2) {
+ while (dim1 >= 0 && dim2 >= 0) {
+ if (dim1 < dim2) {
return -1;
- }
- else if(dim1 > dim2) {
+ } else if (dim1 > dim2) {
return 1;
}
dim1 = o1.nextSetBit(dim1 + 1);
@@ -518,7 +522,7 @@ public class HiCS<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlie
}
/**
- * Parameterization class
+ * Parameterization class.
*
* @author Jan Brusis
*
@@ -526,40 +530,40 @@ public class HiCS<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlie
*
* @param <V> vector type
*/
- public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
/**
* Parameter that specifies the number of iterations in the Monte-Carlo
- * process of identifying high contrast subspaces
+ * process of identifying high contrast subspaces.
*/
- public static final OptionID M_ID = OptionID.getOrCreateOptionID("hics.m", "The number of iterations in the Monte-Carlo processing.");
+ public static final OptionID M_ID = new OptionID("hics.m", "The number of iterations in the Monte-Carlo processing.");
/**
* Parameter that determines the size of the test statistic during the
- * Monte-Carlo iteration
+ * Monte-Carlo iteration.
*/
- public static final OptionID ALPHA_ID = OptionID.getOrCreateOptionID("hics.alpha", "The discriminance value that determines the size of the test statistic .");
+ public static final OptionID ALPHA_ID = new OptionID("hics.alpha", "The discriminance value that determines the size of the test statistic .");
/**
* Parameter that specifies which outlier detection algorithm to use on the
- * resulting set of high contrast subspaces
+ * resulting set of high contrast subspaces.
*/
- public static final OptionID ALGO_ID = OptionID.getOrCreateOptionID("hics.algo", "The Algorithm that performs the actual outlier detection on the resulting set of subspace");
+ public static final OptionID ALGO_ID = new OptionID("hics.algo", "The Algorithm that performs the actual outlier detection on the resulting set of subspace");
/**
* Parameter that specifies which statistical test to use in order to
- * calculate the deviation of two given data samples
+ * calculate the deviation of two given data samples.
*/
- public static final OptionID TEST_ID = OptionID.getOrCreateOptionID("hics.test", "The statistical test that is used to calculate the deviation of two data samples");
+ public static final OptionID TEST_ID = new OptionID("hics.test", "The statistical test that is used to calculate the deviation of two data samples");
/**
- * Parameter that specifies the candidate_cutoff
+ * Parameter that specifies the candidate_cutoff.
*/
- public static final OptionID LIMIT_ID = OptionID.getOrCreateOptionID("hics.limit", "The threshold that determines how many d-dimensional subspace candidates to retain in each step of the generation");
+ public static final OptionID LIMIT_ID = new OptionID("hics.limit", "The threshold that determines how many d-dimensional subspace candidates to retain in each step of the generation");
/**
- * Parameter that specifies the random seed
+ * Parameter that specifies the random seed.
*/
- public static final OptionID SEED_ID = OptionID.getOrCreateOptionID("hics.seed", "The random seed.");
+ public static final OptionID SEED_ID = new OptionID("hics.seed", "The random seed.");
/**
* Holds the value of {@link #M_ID}.
@@ -582,52 +586,55 @@ public class HiCS<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlie
private GoodnessOfFitTest statTest;
/**
- * Holds the value of {@link #LIMIT_ID}
+ * Holds the value of {@link #LIMIT_ID}.
*/
private int cutoff = 400;
-
+
/**
- * Random seed (optional)
+ * Random generator.
*/
- private Long seed = null;
+ private RandomFactory rnd;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final IntParameter mP = new IntParameter(M_ID, new GreaterConstraint(1), 50);
- if(config.grab(mP)) {
- m = mP.getValue();
+ final IntParameter mP = new IntParameter(M_ID, 50);
+ mP.addConstraint(new GreaterConstraint(1));
+ if (config.grab(mP)) {
+ m = mP.intValue();
}
- final DoubleParameter alphaP = new DoubleParameter(ALPHA_ID, new GreaterConstraint(0), 0.1);
- if(config.grab(alphaP)) {
- alpha = alphaP.getValue();
+ final DoubleParameter alphaP = new DoubleParameter(ALPHA_ID, 0.1);
+ alphaP.addConstraint(new GreaterConstraint(0));
+ if (config.grab(alphaP)) {
+ alpha = alphaP.doubleValue();
}
final ObjectParameter<OutlierAlgorithm> algoP = new ObjectParameter<OutlierAlgorithm>(ALGO_ID, OutlierAlgorithm.class, LOF.class);
- if(config.grab(algoP)) {
+ if (config.grab(algoP)) {
outlierAlgorithm = algoP.instantiateClass(config);
}
final ObjectParameter<GoodnessOfFitTest> testP = new ObjectParameter<GoodnessOfFitTest>(TEST_ID, GoodnessOfFitTest.class, KolmogorovSmirnovTest.class);
- if(config.grab(testP)) {
+ if (config.grab(testP)) {
statTest = testP.instantiateClass(config);
}
- final IntParameter cutoffP = new IntParameter(LIMIT_ID, new GreaterConstraint(1), 100);
- if(config.grab(cutoffP)) {
- cutoff = cutoffP.getValue();
+ final IntParameter cutoffP = new IntParameter(LIMIT_ID, 100);
+ cutoffP.addConstraint(new GreaterConstraint(1));
+ if (config.grab(cutoffP)) {
+ cutoff = cutoffP.intValue();
}
- final LongParameter seedP = new LongParameter(SEED_ID, true);
- if(config.grab(seedP)) {
- seed = seedP.getValue();
+ final RandomParameter rndP = new RandomParameter(SEED_ID);
+ if (config.grab(rndP)) {
+ rnd = rndP.getValue();
}
-}
+ }
@Override
protected HiCS<V> makeInstance() {
- return new HiCS<V>(m, alpha, outlierAlgorithm, statTest, cutoff, seed);
+ return new HiCS<V>(m, alpha, outlierAlgorithm, statTest, cutoff, rnd);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/RescaleMetaOutlierAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/RescaleMetaOutlierAlgorithm.java
index a4db7e3d..387041da 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/RescaleMetaOutlierAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/RescaleMetaOutlierAlgorithm.java
@@ -62,7 +62,7 @@ public class RescaleMetaOutlierAlgorithm extends AbstractAlgorithm<OutlierResult
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(RescaleMetaOutlierAlgorithm.class);
+ private static final Logging LOG = Logging.getLogger(RescaleMetaOutlierAlgorithm.class);
/**
* Parameter to specify a scaling function to use.
@@ -70,7 +70,7 @@ public class RescaleMetaOutlierAlgorithm extends AbstractAlgorithm<OutlierResult
* Key: {@code -comphist.scaling}
* </p>
*/
- public static final OptionID SCALING_ID = OptionID.getOrCreateOptionID("metaoutlier.scaling", "Class to use as scaling function.");
+ public static final OptionID SCALING_ID = new OptionID("metaoutlier.scaling", "Class to use as scaling function.");
/**
* Holds the algorithm to run.
@@ -137,7 +137,7 @@ public class RescaleMetaOutlierAlgorithm extends AbstractAlgorithm<OutlierResult
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
@Override
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/SimpleOutlierEnsemble.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/SimpleOutlierEnsemble.java
new file mode 100644
index 00000000..b7791fc4
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/SimpleOutlierEnsemble.java
@@ -0,0 +1,222 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier.meta;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.Algorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
+import de.lmu.ifi.dbs.elki.data.type.CombinedTypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.result.Result;
+import de.lmu.ifi.dbs.elki.result.ResultUtil;
+import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.ensemble.EnsembleVoting;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ChainedParameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectListParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * Simple outlier ensemble method.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.composedOf EnsembleVoting
+ * @apiviz.uses OutlierResult oneway - - reads
+ * @apiviz.uses OutlierResult oneway - - «create»
+ */
+public class SimpleOutlierEnsemble extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging LOG = Logging.getLogger(SimpleOutlierEnsemble.class);
+
+ /**
+ * The algorithms to run.
+ */
+ private List<OutlierAlgorithm> algorithms;
+
+ /**
+ * The voting in use.
+ */
+ private EnsembleVoting voting;
+
+ /**
+ * Constructor.
+ *
+ * @param algorithms Algorithms to run
+ * @param voting Voting method
+ */
+ public SimpleOutlierEnsemble(List<OutlierAlgorithm> algorithms, EnsembleVoting voting) {
+ this.algorithms = algorithms;
+ this.voting = voting;
+ }
+
+ @Override
+ public OutlierResult run(Database database) throws IllegalStateException {
+ int num = algorithms.size();
+ // Run inner outlier algorithms
+ ModifiableDBIDs ids = DBIDUtil.newHashSet();
+ ArrayList<OutlierResult> results = new ArrayList<OutlierResult>(num);
+ {
+ FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Inner outlier algorithms", num, LOG) : null;
+ for (Algorithm alg : algorithms) {
+ Result res = alg.run(database);
+ List<OutlierResult> ors = ResultUtil.getOutlierResults(res);
+ for (OutlierResult or : ors) {
+ results.add(or);
+ ids.addDBIDs(or.getScores().getDBIDs());
+ }
+ if (prog != null) {
+ prog.incrementProcessed(LOG);
+ }
+ }
+ if (prog != null) {
+ prog.ensureCompleted(LOG);
+ }
+ }
+ // Combine
+ WritableDoubleDataStore sumscore = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
+ DoubleMinMax minmax = new DoubleMinMax();
+ {
+ FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Combining results", ids.size(), LOG) : null;
+ for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
+ double[] scores = new double[num];
+ int i = 0;
+ for (OutlierResult r : results) {
+ Double score = r.getScores().get(id);
+ if (score != null) {
+ scores[i] = score;
+ i++;
+ } else {
+ LOG.warning("DBID " + id + " was not given a score by result " + r);
+ }
+ }
+ if (i > 0) {
+ // Shrink array if necessary.
+ if (i < scores.length) {
+ scores = Arrays.copyOf(scores, i);
+ }
+ double combined = voting.combine(scores);
+ sumscore.putDouble(id, combined);
+ minmax.put(combined);
+ } else {
+ LOG.warning("DBID " + id + " was not given any score at all.");
+ }
+ if (cprog != null) {
+ cprog.incrementProcessed(LOG);
+ }
+ }
+ if (cprog != null) {
+ cprog.ensureCompleted(LOG);
+ }
+ }
+ OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
+ Relation<Double> scores = new MaterializedRelation<Double>("Simple Outlier Ensemble", "ensemble-outlier", TypeUtil.DOUBLE, sumscore, ids);
+ return new OutlierResult(meta, scores);
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ TypeInformation[] trs = new TypeInformation[algorithms.size()];
+ for (int i = 0; i < trs.length; i++) {
+ // FIXME: what if an algorithm needs more than one input data source?
+ trs[i] = algorithms.get(i).getInputTypeRestriction()[0];
+ }
+ return TypeUtil.array(new CombinedTypeInformation(trs));
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ /**
+ * Voting strategy to use in the ensemble.
+ */
+ public static final OptionID VOTING_ID = new OptionID("ensemble.voting", "Voting strategy to use in the ensemble.");
+
+ /**
+ * The algorithms to run.
+ */
+ private List<OutlierAlgorithm> algorithms;
+
+ /**
+ * The voting in use.
+ */
+ private EnsembleVoting voting;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ ObjectListParameter<OutlierAlgorithm> algP = new ObjectListParameter<OutlierAlgorithm>(OptionID.ALGORITHM, OutlierAlgorithm.class);
+ if (config.grab(algP)) {
+ ListParameterization subconfig = new ListParameterization();
+ ChainedParameterization chain = new ChainedParameterization(subconfig, config);
+ chain.errorsTo(config);
+ algorithms = algP.instantiateClasses(chain);
+ subconfig.logAndClearReportedErrors();
+ }
+ ObjectParameter<EnsembleVoting> votingP = new ObjectParameter<EnsembleVoting>(VOTING_ID, EnsembleVoting.class);
+ if (config.grab(votingP)) {
+ voting = votingP.instantiateClass(config);
+ }
+ }
+
+ @Override
+ protected SimpleOutlierEnsemble makeInstance() {
+ return new SimpleOutlierEnsemble(algorithms, voting);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/package-info.java
index d7e78281..7c5dd8b0 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/package-info.java
@@ -1,5 +1,8 @@
/**
* <p>Meta outlier detection algorithms: external scores, score rescaling.</p>
+ *
+ * @apiviz.exclude java.io.File
+ * @apiviz.exclude algorithm.AbstractAlgorithm
*/
/*
This file is part of ELKI:
@@ -23,4 +26,4 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-package de.lmu.ifi.dbs.elki.algorithm.outlier.meta; \ No newline at end of file
+package de.lmu.ifi.dbs.elki.algorithm.outlier.meta;
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/package-info.java
index ea5d3ec4..eca0d876 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/package-info.java
@@ -4,6 +4,11 @@
* @see de.lmu.ifi.dbs.elki.algorithm
*
* @apiviz.exclude database.query
+ * @apiviz.exclude java.lang.Comparable
+ * @apiviz.exclude de.lmu.ifi.dbs.elki.utilities
+ * @apiviz.exclude de.lmu.ifi.dbs.elki.algorithm.Algorithm
+ * @apiviz.exclude de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm
+ * @apiviz.exclude AggarwalYuEvoluationary.Individuum
*/
/*
This file is part of ELKI:
@@ -27,4 +32,4 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-package de.lmu.ifi.dbs.elki.algorithm.outlier; \ No newline at end of file
+package de.lmu.ifi.dbs.elki.algorithm.outlier;
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractDistanceBasedSpatialOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractDistanceBasedSpatialOutlier.java
index 1caf7582..f37ee182 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractDistanceBasedSpatialOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractDistanceBasedSpatialOutlier.java
@@ -45,7 +45,7 @@ public abstract class AbstractDistanceBasedSpatialOutlier<N, O, D extends Number
/**
* Parameter to specify the non spatial distance function to use
*/
- public static final OptionID NON_SPATIAL_DISTANCE_FUNCTION_ID = OptionID.getOrCreateOptionID("spatialoutlier.nonspatialdistance", "The distance function to use for non spatial attributes");
+ public static final OptionID NON_SPATIAL_DISTANCE_FUNCTION_ID = new OptionID("spatialoutlier.nonspatialdistance", "The distance function to use for non spatial attributes");
/**
* The distance function to use
@@ -84,7 +84,7 @@ public abstract class AbstractDistanceBasedSpatialOutlier<N, O, D extends Number
* @param <O> Non-spatial object type
* @param <D> Distance value type
*/
- public static abstract class Parameterizer<N, O, D extends NumberDistance<D, ?>> extends AbstractNeighborhoodOutlier.Parameterizer<N> {
+ public abstract static class Parameterizer<N, O, D extends NumberDistance<D, ?>> extends AbstractNeighborhoodOutlier.Parameterizer<N> {
/**
* The distance function to use on the non-spatial attributes.
*/
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractNeighborhoodOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractNeighborhoodOutlier.java
index f0c05e1e..d3770504 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractNeighborhoodOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractNeighborhoodOutlier.java
@@ -44,7 +44,7 @@ public abstract class AbstractNeighborhoodOutlier<O> extends AbstractAlgorithm<O
/**
* Parameter to specify the neighborhood predicate to use.
*/
- public static final OptionID NEIGHBORHOOD_ID = OptionID.getOrCreateOptionID("neighborhood", "The neighborhood predicate to use in comparison step.");
+ public static final OptionID NEIGHBORHOOD_ID = new OptionID("neighborhood", "The neighborhood predicate to use in comparison step.");
/**
* Our predicate to obtain the neighbors
@@ -79,7 +79,7 @@ public abstract class AbstractNeighborhoodOutlier<O> extends AbstractAlgorithm<O
*
* @param <O> Object type
*/
- public static abstract class Parameterizer<O> extends AbstractParameterizer {
+ public abstract static class Parameterizer<O> extends AbstractParameterizer {
/**
* The predicate to obtain the neighbors.
*/
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuGLSBackwardSearchAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuGLSBackwardSearchAlgorithm.java
index 7f3bac29..cd5670f7 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuGLSBackwardSearchAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuGLSBackwardSearchAlgorithm.java
@@ -37,13 +37,13 @@ import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.ProxyView;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
@@ -52,7 +52,6 @@ import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution;
import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
@@ -85,11 +84,11 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
*/
@Title("GLS-Backward Search")
@Reference(authors = "F. Chen and C.-T. Lu and A. P. Boedihardjo", title = "GLS-SOD: A Generalized Local Statistical Approach for Spatial Outlier Detection", booktitle = "Proc. 16th ACM SIGKDD international conference on Knowledge discovery and data mining", url = "http://dx.doi.org/10.1145/1835804.1835939")
-public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?, ?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<V, D, OutlierResult> implements OutlierAlgorithm {
+public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<V, D, OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(CTLuGLSBackwardSearchAlgorithm.class);
+ private static final Logging LOG = Logging.getLogger(CTLuGLSBackwardSearchAlgorithm.class);
/**
* Parameter Alpha - significance niveau
@@ -121,7 +120,7 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?, ?>, D exte
* @param relationy Attribute relation
* @return Algorithm result
*/
- public OutlierResult run(Relation<V> relationx, Relation<? extends NumberVector<?, ?>> relationy) {
+ public OutlierResult run(Relation<V> relationx, Relation<? extends NumberVector<?>> relationy) {
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relationx.getDBIDs(), DataStoreFactory.HINT_STATIC);
DoubleMinMax mm = new DoubleMinMax(0.0, 0.0);
@@ -130,7 +129,7 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?, ?>, D exte
ModifiableDBIDs idview = DBIDUtil.newHashSet(relationx.getDBIDs());
ProxyView<V> proxy = new ProxyView<V>(relationx.getDatabase(), idview, relationx);
- double phialpha = NormalDistribution.standardNormalQuantile(1.0 - alpha / 2);
+ double phialpha = NormalDistribution.standardNormalQuantile(1.0 - alpha *.5);
// Detect outliers while significant.
while(true) {
Pair<DBID, Double> candidate = singleIteration(proxy, relationy);
@@ -138,15 +137,15 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?, ?>, D exte
break;
}
scores.putDouble(candidate.first, candidate.second);
- if (!Double.isNaN(candidate.second)) {
+ if(!Double.isNaN(candidate.second)) {
mm.put(candidate.second);
}
idview.remove(candidate.first);
}
// Remaining objects are inliers
- for (DBIDIter iter = idview.iter(); iter.valid(); iter.advance()) {
- scores.putDouble(iter.getDBID(), 0.0);
+ for(DBIDIter iter = idview.iter(); iter.valid(); iter.advance()) {
+ scores.putDouble(iter, 0.0);
}
}
@@ -162,9 +161,9 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?, ?>, D exte
* @param relationy Attribute relation
* @return Top outlier and associated score
*/
- private Pair<DBID, Double> singleIteration(Relation<V> relationx, Relation<? extends NumberVector<?, ?>> relationy) {
- final int dim = DatabaseUtil.dimensionality(relationx);
- final int dimy = DatabaseUtil.dimensionality(relationy);
+ private Pair<DBID, Double> singleIteration(Relation<V> relationx, Relation<? extends NumberVector<?>> relationy) {
+ final int dim = RelationUtil.dimensionality(relationx);
+ final int dimy = RelationUtil.dimensionality(relationy);
assert (dim == 2);
KNNQuery<V, D> knnQuery = QueryUtil.getKNNQuery(relationx, getDistanceFunction(), k + 1);
@@ -177,47 +176,51 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?, ?>, D exte
Matrix X = new Matrix(ids.size(), 6);
Matrix F = new Matrix(ids.size(), ids.size());
Matrix Y = new Matrix(ids.size(), dimy);
- for(int i = 0; i < ids.size(); i++) {
- DBID id = ids.get(i);
-
- // Fill the data matrix
- {
- V vec = relationx.get(id);
- double la = vec.doubleValue(1);
- double lo = vec.doubleValue(2);
- X.set(i, 0, 1.0);
- X.set(i, 1, la);
- X.set(i, 2, lo);
- X.set(i, 3, la * lo);
- X.set(i, 4, la * la);
- X.set(i, 5, lo * lo);
- }
- {
- for(int d = 0; d < dimy; d++) {
- double idy = relationy.get(id).doubleValue(d + 1);
- Y.set(i, d, idy);
+ {
+ int i = 0;
+ for(DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
+ // Fill the data matrix
+ {
+ V vec = relationx.get(id);
+ double la = vec.doubleValue(0);
+ double lo = vec.doubleValue(1);
+ X.set(i, 0, 1.0);
+ X.set(i, 1, la);
+ X.set(i, 2, lo);
+ X.set(i, 3, la * lo);
+ X.set(i, 4, la * la);
+ X.set(i, 5, lo * lo);
}
- }
- // Fill the neighborhood matrix F:
- {
- KNNResult<D> neighbors = knnQuery.getKNNForDBID(id, k + 1);
- ModifiableDBIDs neighborhood = DBIDUtil.newArray(neighbors.size());
- for(DistanceResultPair<D> dpair : neighbors) {
- if(id.sameDBID(dpair.getDBID())) {
- continue;
+ {
+ final NumberVector<?> vecy = relationy.get(id);
+ for(int d = 0; d < dimy; d++) {
+ double idy = vecy.doubleValue(d);
+ Y.set(i, d, idy);
}
- neighborhood.add(dpair.getDBID());
}
- // Weight object itself positively.
- F.set(i, i, 1.0);
- final int nweight = -1 / neighborhood.size();
- // We need to find the index positions of the neighbors, unfortunately.
- for (DBIDIter iter = neighborhood.iter(); iter.valid(); iter.advance()) {
- int pos = ids.binarySearch(iter.getDBID());
- assert (pos >= 0);
- F.set(pos, i, nweight);
+
+ // Fill the neighborhood matrix F:
+ {
+ KNNResult<D> neighbors = knnQuery.getKNNForDBID(id, k + 1);
+ ModifiableDBIDs neighborhood = DBIDUtil.newArray(neighbors.size());
+ for(DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ if(DBIDUtil.equal(id, neighbor)) {
+ continue;
+ }
+ neighborhood.add(neighbor);
+ }
+ // Weight object itself positively.
+ F.set(i, i, 1.0);
+ final int nweight = -1 / neighborhood.size();
+ // We need to find the index positions of the neighbors,
+ // unfortunately.
+ for(DBIDIter iter = neighborhood.iter(); iter.valid(); iter.advance()) {
+ int pos = ids.binarySearch(iter);
+ assert (pos >= 0);
+ F.set(pos, i, nweight);
+ }
}
}
}
@@ -236,13 +239,13 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?, ?>, D exte
DBID worstid = null;
double worstscore = Double.NEGATIVE_INFINITY;
- for(int i = 0; i < ids.size(); i++) {
- DBID id = ids.get(i);
+ int i = 0;
+ for(DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
double err = E.getRow(i).euclideanLength();
// double err = Math.abs(E.get(i, 0));
if(err > worstscore) {
worstscore = err;
- worstid = id;
+ worstid = DBIDUtil.deref(id);
}
}
@@ -256,7 +259,7 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?, ?>, D exte
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -269,16 +272,16 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?, ?>, D exte
* @param <V> Input vector type
* @param <D> Distance type
*/
- public static class Parameterizer<V extends NumberVector<?, ?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<V, D> {
+ public static class Parameterizer<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<V, D> {
/**
* Holds the alpha value - significance niveau
*/
- public static final OptionID ALPHA_ID = OptionID.getOrCreateOptionID("glsbs.alpha", "Significance niveau");
+ public static final OptionID ALPHA_ID = new OptionID("glsbs.alpha", "Significance niveau");
/**
* Parameter to specify the k nearest neighbors
*/
- public static final OptionID K_ID = OptionID.getOrCreateOptionID("glsbs.k", "k nearest neighbors to use");
+ public static final OptionID K_ID = new OptionID("glsbs.k", "k nearest neighbors to use");
/**
* Parameter Alpha - significance niveau
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMeanMultipleAttributes.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMeanMultipleAttributes.java
index a0c09057..2caee128 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMeanMultipleAttributes.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMeanMultipleAttributes.java
@@ -31,11 +31,11 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid;
@@ -45,7 +45,6 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
/**
@@ -72,11 +71,11 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
* @param <O> Attribute Vector
*/
@Reference(authors = "Chang-Tien Lu and Dechang Chen and Yufeng Kou", title = "Detecting Spatial Outliers with Multiple Attributes", booktitle = "Proc. 15th IEEE International Conference on Tools with Artificial Intelligence, 2003", url = "http://dx.doi.org/10.1109/TAI.2003.1250179")
-public class CTLuMeanMultipleAttributes<N, O extends NumberVector<?, ?>> extends AbstractNeighborhoodOutlier<N> {
+public class CTLuMeanMultipleAttributes<N, O extends NumberVector<?>> extends AbstractNeighborhoodOutlier<N> {
/**
* logger
*/
- public static final Logging logger = Logging.getLogger(CTLuMeanMultipleAttributes.class);
+ private static final Logging LOG = Logging.getLogger(CTLuMeanMultipleAttributes.class);
/**
* Constructor
@@ -89,28 +88,27 @@ public class CTLuMeanMultipleAttributes<N, O extends NumberVector<?, ?>> extends
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
public OutlierResult run(Relation<N> spatial, Relation<O> attributes) {
- if(logger.isDebugging()) {
- logger.debug("Dimensionality: " + DatabaseUtil.dimensionality(attributes));
+ if(LOG.isDebugging()) {
+ LOG.debug("Dimensionality: " + RelationUtil.dimensionality(attributes));
}
final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(spatial);
- CovarianceMatrix covmaker = new CovarianceMatrix(DatabaseUtil.dimensionality(attributes));
+ CovarianceMatrix covmaker = new CovarianceMatrix(RelationUtil.dimensionality(attributes));
WritableDataStore<Vector> deltas = DataStoreUtil.makeStorage(attributes.getDBIDs(), DataStoreFactory.HINT_TEMP, Vector.class);
for(DBIDIter iditer = attributes.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
- final O obj = attributes.get(id);
- final DBIDs neighbors = npred.getNeighborDBIDs(id);
+ final O obj = attributes.get(iditer);
+ final DBIDs neighbors = npred.getNeighborDBIDs(iditer);
// TODO: remove object itself from neighbors?
// Mean vector "g"
Vector mean = Centroid.make(attributes, neighbors);
// Delta vector "h"
- Vector delta = obj.getColumnVector().minus(mean);
- deltas.put(id, delta);
+ Vector delta = obj.getColumnVector().minusEquals(mean);
+ deltas.put(iditer, delta);
covmaker.put(delta);
}
// Finalize covariance matrix:
@@ -120,11 +118,10 @@ public class CTLuMeanMultipleAttributes<N, O extends NumberVector<?, ?>> extends
DoubleMinMax minmax = new DoubleMinMax();
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(attributes.getDBIDs(), DataStoreFactory.HINT_STATIC);
for(DBIDIter iditer = attributes.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
- Vector temp = deltas.get(id).minus(mean);
+ Vector temp = deltas.get(iditer).minus(mean);
final double score = temp.transposeTimesTimes(cmati, temp);
minmax.put(score);
- scores.putDouble(id, score);
+ scores.putDouble(iditer, score);
}
Relation<Double> scoreResult = new MaterializedRelation<Double>("mean multiple attributes spatial outlier", "mean-multipleattributes-outlier", TypeUtil.DOUBLE, scores, attributes.getDBIDs());
@@ -149,7 +146,7 @@ public class CTLuMeanMultipleAttributes<N, O extends NumberVector<?, ?>> extends
* @param <N> Neighborhood type
* @param <O> Attribute object type
*/
- public static class Parameterizer<N, O extends NumberVector<?, ?>> extends AbstractNeighborhoodOutlier.Parameterizer<N> {
+ public static class Parameterizer<N, O extends NumberVector<?>> extends AbstractNeighborhoodOutlier.Parameterizer<N> {
@Override
protected CTLuMeanMultipleAttributes<N, O> makeInstance() {
return new CTLuMeanMultipleAttributes<N, O>(npredf);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianAlgorithm.java
index 20ab9a00..7755a459 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianAlgorithm.java
@@ -1,26 +1,27 @@
package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial;
-/*
-This file is part of ELKI:
-Environment for Developing KDD-Applications Supported by Index-Structures
-
-Copyright (C) 2012
-Ludwig-Maximilians-Universität München
-Lehr- und Forschungseinheit für Datenbanksysteme
-ELKI Development Team
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
import de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate;
import de.lmu.ifi.dbs.elki.data.NumberVector;
@@ -30,8 +31,8 @@ import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
@@ -60,22 +61,22 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
* The Difference e = non-spatial-Attribute-Value - Median (Neighborhood) is
* computed.<br>
* The Spatial Objects with the highest standardized e value are Spatial
- * Outliers. </p>
+ * Outliers.
*
* @author Ahmed Hettab
*
* @param <N> Neighborhood type
*/
@Title("Median Algorithm for Spatial Outlier Detection")
-@Reference(authors = "C.-T. Lu and D. Chen and Y. Kou", title = "Algorithms for Spatial Outlier Detection", booktitle = "Proc. 3rd IEEE International Conference on Data Mining", url="http://dx.doi.org/10.1109/ICDM.2003.1250986")
+@Reference(authors = "C.-T. Lu and D. Chen and Y. Kou", title = "Algorithms for Spatial Outlier Detection", booktitle = "Proc. 3rd IEEE International Conference on Data Mining", url = "http://dx.doi.org/10.1109/ICDM.2003.1250986")
public class CTLuMedianAlgorithm<N> extends AbstractNeighborhoodOutlier<N> {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(CTLuMedianAlgorithm.class);
+ private static final Logging LOG = Logging.getLogger(CTLuMedianAlgorithm.class);
/**
- * Constructor
+ * Constructor.
*
* @param npredf Neighborhood predicate
*/
@@ -84,42 +85,40 @@ public class CTLuMedianAlgorithm<N> extends AbstractNeighborhoodOutlier<N> {
}
/**
- * Main method
+ * Main method.
*
* @param nrel Neighborhood relation
* @param relation Data relation (1d!)
* @return Outlier detection result
*/
- public OutlierResult run(Relation<N> nrel, Relation<? extends NumberVector<?, ?>> relation) {
+ public OutlierResult run(Relation<N> nrel, Relation<? extends NumberVector<?>> relation) {
final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(nrel);
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
MeanVariance mv = new MeanVariance();
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
- DBIDs neighbors = npred.getNeighborDBIDs(id);
+ for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ DBIDs neighbors = npred.getNeighborDBIDs(iditer);
final double median;
{
double[] fi = new double[neighbors.size()];
// calculate and store Median of neighborhood
int c = 0;
- for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
- if(id.sameDBID(iter)) {
+ for (DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
+ if (DBIDUtil.equal(iditer, iter)) {
continue;
}
- fi[c] = relation.get(iter).doubleValue(1);
+ fi[c] = relation.get(iter).doubleValue(0);
c++;
}
- if(c > 0) {
+ if (c > 0) {
median = QuickSelect.median(fi, 0, c);
- }
- else {
- median = relation.get(id).doubleValue(1);
+ } else {
+ median = relation.get(iditer).doubleValue(0);
}
}
- double h = relation.get(id).doubleValue(1) - median;
- scores.putDouble(id, h);
+ double h = relation.get(iditer).doubleValue(0) - median;
+ scores.putDouble(iditer, h);
mv.put(h);
}
@@ -127,11 +126,10 @@ public class CTLuMedianAlgorithm<N> extends AbstractNeighborhoodOutlier<N> {
final double mean = mv.getMean();
final double stddev = mv.getNaiveStddev();
DoubleMinMax minmax = new DoubleMinMax();
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
- double score = Math.abs((scores.doubleValue(id) - mean) / stddev);
+ for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ double score = Math.abs((scores.doubleValue(iditer) - mean) / stddev);
minmax.put(score);
- scores.putDouble(id, score);
+ scores.putDouble(iditer, score);
}
Relation<Double> scoreResult = new MaterializedRelation<Double>("MO", "Median-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
@@ -143,16 +141,16 @@ public class CTLuMedianAlgorithm<N> extends AbstractNeighborhoodOutlier<N> {
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
@Override
public TypeInformation[] getInputTypeRestriction() {
- return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), VectorFieldTypeInformation.get(NumberVector.class, 1));
+ return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), new VectorFieldTypeInformation<NumberVector<?>>(NumberVector.class, 1));
}
/**
- * Parameterization class
+ * Parameterization class.
*
* @author Ahmed Hettab
*
@@ -166,4 +164,4 @@ public class CTLuMedianAlgorithm<N> extends AbstractNeighborhoodOutlier<N> {
return new CTLuMedianAlgorithm<N>(npredf);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianMultipleAttributes.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianMultipleAttributes.java
index c8bcba74..0d515ac7 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianMultipleAttributes.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianMultipleAttributes.java
@@ -31,11 +31,11 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix;
@@ -44,7 +44,6 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.datastructures.QuickSelect;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
@@ -73,11 +72,11 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
* @param <O> Non Spatial Vector
*/
@Reference(authors = "Chang-Tien Lu and Dechang Chen and Yufeng Kou", title = "Detecting Spatial Outliers with Multiple Attributes", booktitle = "Proc. 15th IEEE International Conference on Tools with Artificial Intelligence, 2003", url = "http://dx.doi.org/10.1109/TAI.2003.1250179")
-public class CTLuMedianMultipleAttributes<N, O extends NumberVector<?, ?>> extends AbstractNeighborhoodOutlier<N> {
+public class CTLuMedianMultipleAttributes<N, O extends NumberVector<?>> extends AbstractNeighborhoodOutlier<N> {
/**
* logger
*/
- public static final Logging logger = Logging.getLogger(CTLuMedianMultipleAttributes.class);
+ private static final Logging LOG = Logging.getLogger(CTLuMedianMultipleAttributes.class);
/**
* Constructor
@@ -90,7 +89,7 @@ public class CTLuMedianMultipleAttributes<N, O extends NumberVector<?, ?>> exten
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -101,18 +100,17 @@ public class CTLuMedianMultipleAttributes<N, O extends NumberVector<?, ?>> exten
* @return Outlier detection result
*/
public OutlierResult run(Relation<N> spatial, Relation<O> attributes) {
- final int dim = DatabaseUtil.dimensionality(attributes);
- if(logger.isDebugging()) {
- logger.debug("Dimensionality: " + dim);
+ final int dim = RelationUtil.dimensionality(attributes);
+ if(LOG.isDebugging()) {
+ LOG.debug("Dimensionality: " + dim);
}
final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(spatial);
CovarianceMatrix covmaker = new CovarianceMatrix(dim);
WritableDataStore<Vector> deltas = DataStoreUtil.makeStorage(attributes.getDBIDs(), DataStoreFactory.HINT_TEMP, Vector.class);
for(DBIDIter iditer = attributes.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
- final O obj = attributes.get(id);
- final DBIDs neighbors = npred.getNeighborDBIDs(id);
+ final O obj = attributes.get(iditer);
+ final DBIDs neighbors = npred.getNeighborDBIDs(iditer);
// Compute the median vector
final Vector median;
{
@@ -123,7 +121,7 @@ public class CTLuMedianMultipleAttributes<N, O extends NumberVector<?, ?>> exten
// TODO: skip object itself within neighbors?
O nobj = attributes.get(iter);
for(int d = 0; d < dim; d++) {
- data[d][i] = nobj.doubleValue(d + 1);
+ data[d][i] = nobj.doubleValue(d);
}
i++;
}
@@ -135,8 +133,8 @@ public class CTLuMedianMultipleAttributes<N, O extends NumberVector<?, ?>> exten
}
// Delta vector "h"
- Vector delta = obj.getColumnVector().minus(median);
- deltas.put(id, delta);
+ Vector delta = obj.getColumnVector().minusEquals(median);
+ deltas.put(iditer, delta);
covmaker.put(delta);
}
// Finalize covariance matrix:
@@ -146,11 +144,10 @@ public class CTLuMedianMultipleAttributes<N, O extends NumberVector<?, ?>> exten
DoubleMinMax minmax = new DoubleMinMax();
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(attributes.getDBIDs(), DataStoreFactory.HINT_STATIC);
for(DBIDIter iditer = attributes.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
- Vector temp = deltas.get(id).minus(mean);
+ Vector temp = deltas.get(iditer).minus(mean);
final double score = temp.transposeTimesTimes(cmati, temp);
minmax.put(score);
- scores.putDouble(id, score);
+ scores.putDouble(iditer, score);
}
Relation<Double> scoreResult = new MaterializedRelation<Double>("Median multiple attributes outlier", "median-outlier", TypeUtil.DOUBLE, scores, attributes.getDBIDs());
@@ -175,7 +172,7 @@ public class CTLuMedianMultipleAttributes<N, O extends NumberVector<?, ?>> exten
* @param <N> Neighborhood type
* @param <O> Attributes vector type
*/
- public static class Parameterizer<N, O extends NumberVector<?, ?>> extends AbstractNeighborhoodOutlier.Parameterizer<N> {
+ public static class Parameterizer<N, O extends NumberVector<?>> extends AbstractNeighborhoodOutlier.Parameterizer<N> {
@Override
protected CTLuMedianMultipleAttributes<N, O> makeInstance() {
return new CTLuMedianMultipleAttributes<N, O>(npredf);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMoranScatterplotOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMoranScatterplotOutlier.java
index 7b88ae66..3b876bba 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMoranScatterplotOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMoranScatterplotOutlier.java
@@ -32,8 +32,8 @@ import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.logging.Logging;
@@ -76,10 +76,10 @@ public class CTLuMoranScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(CTLuMoranScatterplotOutlier.class);
+ private static final Logging LOG = Logging.getLogger(CTLuMoranScatterplotOutlier.class);
/**
- * Constructor
+ * Constructor.
*
* @param npredf Neighborhood
*/
@@ -88,20 +88,19 @@ public class CTLuMoranScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<
}
/**
- * Main method
+ * Main method.
*
* @param nrel Neighborhood relation
* @param relation Data relation (1d!)
* @return Outlier detection result
*/
- public OutlierResult run(Relation<N> nrel, Relation<? extends NumberVector<?, ?>> relation) {
+ public OutlierResult run(Relation<N> nrel, Relation<? extends NumberVector<?>> relation) {
final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(nrel);
// Compute the global mean and variance
MeanVariance globalmv = new MeanVariance();
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
- globalmv.put(relation.get(id).doubleValue(1));
+ globalmv.put(relation.get(iditer).doubleValue(0));
}
DoubleMinMax minmax = new DoubleMinMax();
@@ -110,17 +109,15 @@ public class CTLuMoranScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<
// calculate normalized attribute values
// calculate neighborhood average of normalized attribute values.
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
// Compute global z score
- final double globalZ = (relation.get(id).doubleValue(1) - globalmv.getMean()) / globalmv.getNaiveStddev();
+ final double globalZ = (relation.get(iditer).doubleValue(0) - globalmv.getMean()) / globalmv.getNaiveStddev();
// Compute local average z score
Mean localm = new Mean();
- for(DBIDIter iter = npred.getNeighborDBIDs(id).iter(); iter.valid(); iter.advance()) {
- DBID n = iter.getDBID();
- if(id.equals(n)) {
+ for(DBIDIter iter = npred.getNeighborDBIDs(iditer).iter(); iter.valid(); iter.advance()) {
+ if(DBIDUtil.equal(iditer, iter)) {
continue;
}
- localm.put((relation.get(n).doubleValue(1) - globalmv.getMean()) / globalmv.getNaiveStddev());
+ localm.put((relation.get(iter).doubleValue(0) - globalmv.getMean()) / globalmv.getNaiveStddev());
}
// if neighors.size == 0
final double localZ;
@@ -136,7 +133,7 @@ public class CTLuMoranScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<
// Note: in the original moran scatterplot, any object with a score < 0 would be an outlier.
final double score = Math.max(-globalZ * localZ, 0);
minmax.put(score);
- scores.putDouble(id, score);
+ scores.putDouble(iditer, score);
}
Relation<Double> scoreResult = new MaterializedRelation<Double>("MoranOutlier", "Moran Scatterplot Outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
@@ -148,16 +145,16 @@ public class CTLuMoranScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<
@Override
public TypeInformation[] getInputTypeRestriction() {
- return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), VectorFieldTypeInformation.get(NumberVector.class, 1));
+ return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), new VectorFieldTypeInformation<NumberVector<?>>(NumberVector.class, 1));
}
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
- * Parameterization class
+ * Parameterization class.
*
* @author Ahmed Hettab
*
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java
index 852c4be4..ec92afd7 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java
@@ -1,26 +1,27 @@
package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial;
-/*
-This file is part of ELKI:
-Environment for Developing KDD-Applications Supported by Index-Structures
-
-Copyright (C) 2012
-Ludwig-Maximilians-Universität München
-Lehr- und Forschungseinheit für Datenbanksysteme
-ELKI Development Team
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
@@ -33,7 +34,6 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
@@ -42,6 +42,8 @@ import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNHeap;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNUtil;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
@@ -51,7 +53,6 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
-import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.KNNHeap;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -82,30 +83,30 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
*/
@Title("Random Walk on Exhaustive Combination")
@Description("Spatial Outlier Detection using Random Walk on Exhaustive Combination")
-@Reference(authors = "X. Liu and C.-T. Lu and F. Chen", title = "Spatial outlier detection: random walk based approaches", booktitle = "Proc. 18th SIGSPATIAL International Conference on Advances in Geographic Information Systems, 2010", url="http://dx.doi.org/10.1145/1869790.1869841")
+@Reference(authors = "X. Liu and C.-T. Lu and F. Chen", title = "Spatial outlier detection: random walk based approaches", booktitle = "Proc. 18th SIGSPATIAL International Conference on Advances in Geographic Information Systems, 2010", url = "http://dx.doi.org/10.1145/1869790.1869841")
public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<N, D, OutlierResult> implements OutlierAlgorithm {
/**
- * Logger
+ * Logger.
*/
- private static final Logging logger = Logging.getLogger(CTLuRandomWalkEC.class);
+ private static final Logging LOG = Logging.getLogger(CTLuRandomWalkEC.class);
/**
- * Parameter alpha: Attribute difference exponent
+ * Parameter alpha: Attribute difference exponent.
*/
private double alpha;
/**
- * Parameter c: damping factor
+ * Parameter c: damping factor.
*/
private double c;
/**
- * Parameter k
+ * Parameter k.
*/
private int k;
/**
- * Constructor
+ * Constructor.
*
* @param distanceFunction Distance function
* @param alpha Alpha parameter
@@ -120,13 +121,13 @@ public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends Abstrac
}
/**
- * Run the algorithm
+ * Run the algorithm.
*
* @param spatial Spatial neighborhood relation
* @param relation Attribute value relation
* @return Outlier result
*/
- public OutlierResult run(Relation<N> spatial, Relation<? extends NumberVector<?, ?>> relation) {
+ public OutlierResult run(Relation<N> spatial, Relation<? extends NumberVector<?>> relation) {
DistanceQuery<N, D> distFunc = getDistanceFunction().instantiate(spatial);
WritableDataStore<Vector> similarityVectors = DataStoreUtil.makeStorage(spatial.getDBIDs(), DataStoreFactory.HINT_TEMP, Vector.class);
WritableDataStore<DBIDs> neighbors = DataStoreUtil.makeStorage(spatial.getDBIDs(), DataStoreFactory.HINT_TEMP, DBIDs.class);
@@ -136,39 +137,41 @@ public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends Abstrac
// construct the relation Matrix of the ec-graph
Matrix E = new Matrix(ids.size(), ids.size());
- KNNHeap<D> heap = new KNNHeap<D>(k);
- for(int i = 0; i < ids.size(); i++) {
- final DBID id = ids.get(i);
- final double val = relation.get(id).doubleValue(1);
- assert (heap.size() == 0);
- for(int j = 0; j < ids.size(); j++) {
- if(i == j) {
- continue;
- }
- final DBID n = ids.get(j);
- final double e;
- final D distance = distFunc.distance(id, n);
- heap.add(distance, n);
- double dist = distance.doubleValue();
- if(dist == 0) {
- logger.warning("Zero distances are not supported - skipping: " + id + " " + n);
- e = 0;
+ KNNHeap<D> heap = KNNUtil.newHeap(distFunc.getDistanceFactory(), k);
+ {
+ int i = 0;
+ for(DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
+ final double val = relation.get(id).doubleValue(0);
+ assert (heap.size() == 0);
+ int j = 0;
+ for(DBIDIter n = ids.iter(); n.valid(); n.advance(), j++) {
+ if(i == j) {
+ continue;
+ }
+ final double e;
+ final D distance = distFunc.distance(id, n);
+ heap.add(distance, n);
+ double dist = distance.doubleValue();
+ if(dist == 0) {
+ LOG.warning("Zero distances are not supported - skipping: " + DBIDUtil.toString(id) + " " + DBIDUtil.toString(n));
+ e = 0;
+ }
+ else {
+ double diff = Math.abs(val - relation.get(n).doubleValue(0));
+ double exp = Math.exp(Math.pow(diff, alpha));
+ // Implementation note: not inverting exp worked a lot better.
+ // Therefore we diverge from the article here.
+ e = exp / dist;
+ }
+ E.set(j, i, e);
}
- else {
- double diff = Math.abs(val - relation.get(n).doubleValue(1));
- double exp = Math.exp(Math.pow(diff, alpha));
- // Implementation note: not inverting exp worked a lot better.
- // Therefore we diverge from the article here.
- e = exp / dist;
+ // Convert kNN Heap into DBID array
+ ModifiableDBIDs nids = DBIDUtil.newArray(heap.size());
+ while(heap.size() > 0) {
+ nids.add(heap.poll());
}
- E.set(j, i, e);
- }
- // Convert kNN Heap into DBID array
- ModifiableDBIDs nids = DBIDUtil.newArray(heap.size());
- while(!heap.isEmpty()) {
- nids.add(heap.poll().getDBID());
+ neighbors.put(id, nids);
}
- neighbors.put(id, nids);
}
// normalize the adjacent Matrix
// Sum based normalization - don't use E.normalizeColumns()
@@ -195,26 +198,26 @@ public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends Abstrac
E = E.inverse().timesEquals(1 - c);
// Split the matrix into columns
- for(int i = 0; i < ids.size(); i++) {
- DBID id = ids.get(i);
- // Note: matrix times ith unit vector = ith column
- Vector sim = E.getCol(i);
- similarityVectors.put(id, sim);
+ {
+ int i = 0;
+ for(DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
+ // Note: matrix times ith unit vector = ith column
+ Vector sim = E.getCol(i);
+ similarityVectors.put(id, sim);
+ }
}
E = null;
// compute the relevance scores between specified Object and its neighbors
DoubleMinMax minmax = new DoubleMinMax();
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(spatial.getDBIDs(), DataStoreFactory.HINT_STATIC);
- for(int i = 0; i < ids.size(); i++) {
- DBID id = ids.get(i);
+ for(DBIDIter id = ids.iter(); id.valid(); id.advance()) {
double gmean = 1.0;
int cnt = 0;
for(DBIDIter iter = neighbors.get(id).iter(); iter.valid(); iter.advance()) {
- DBID n = iter.getDBID();
- if(id.equals(n)) {
+ if(DBIDUtil.equal(id, iter)) {
continue;
}
- double sim = MathUtil.angle(similarityVectors.get(id), similarityVectors.get(n));
+ double sim = MathUtil.angle(similarityVectors.get(id), similarityVectors.get(iter));
gmean *= sim;
cnt++;
}
@@ -230,12 +233,12 @@ public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends Abstrac
@Override
public TypeInformation[] getInputTypeRestriction() {
- return TypeUtil.array(getDistanceFunction().getInputTypeRestriction(), VectorFieldTypeInformation.get(NumberVector.class, 1));
+ return TypeUtil.array(getDistanceFunction().getInputTypeRestriction(), new VectorFieldTypeInformation<NumberVector<?>>(NumberVector.class, 1));
}
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -250,32 +253,32 @@ public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends Abstrac
*/
public static class Parameterizer<N, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<N, D> {
/**
- * Parameter to specify the number of neighbors
+ * Parameter to specify the number of neighbors.
*/
- public static final OptionID K_ID = OptionID.getOrCreateOptionID("randomwalkec.k", "Number of nearest neighbors to use.");
+ public static final OptionID K_ID = new OptionID("randomwalkec.k", "Number of nearest neighbors to use.");
/**
- * Parameter to specify alpha
+ * Parameter to specify alpha.
*/
- public static final OptionID ALPHA_ID = OptionID.getOrCreateOptionID("randomwalkec.alpha", "Scaling exponent for value differences.");
+ public static final OptionID ALPHA_ID = new OptionID("randomwalkec.alpha", "Scaling exponent for value differences.");
/**
- * Parameter to specify the c
+ * Parameter to specify the c.
*/
- public static final OptionID C_ID = OptionID.getOrCreateOptionID("randomwalkec.c", "The damping parameter c.");
+ public static final OptionID C_ID = new OptionID("randomwalkec.c", "The damping parameter c.");
/**
- * Parameter alpha: scaling
+ * Parameter alpha: scaling.
*/
double alpha = 0.5;
/**
- * Parameter c: damping coefficient
+ * Parameter c: damping coefficient.
*/
double c = 0.9;
/**
- * Parameter for kNN
+ * Parameter for kNN.
*/
int k;
@@ -288,19 +291,20 @@ public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends Abstrac
}
/**
- * Get the kNN parameter
+ * Get the kNN parameter.
*
* @param config Parameterization
*/
protected void configK(Parameterization config) {
- final IntParameter param = new IntParameter(K_ID, new GreaterEqualConstraint(1));
+ final IntParameter param = new IntParameter(K_ID);
+ param.addConstraint(new GreaterEqualConstraint(1));
if(config.grab(param)) {
k = param.getValue();
}
}
/**
- * Get the alpha parameter
+ * Get the alpha parameter.
*
* @param config Parameterization
*/
@@ -312,9 +316,9 @@ public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends Abstrac
}
/**
- * get the c parameter
+ * get the c parameter.
*
- * @param config
+ * @param config Parameterization
*/
protected void configC(Parameterization config) {
final DoubleParameter param = new DoubleParameter(C_ID);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuScatterplotOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuScatterplotOutlier.java
index 4f11cb38..295c7414 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuScatterplotOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuScatterplotOutlier.java
@@ -31,8 +31,8 @@ import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
@@ -78,10 +78,10 @@ public class CTLuScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<N> {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(CTLuScatterplotOutlier.class);
+ private static final Logging LOG = Logging.getLogger(CTLuScatterplotOutlier.class);
/**
- * Constructor
+ * Constructor.
*
* @param npredf Neighborhood predicate
*/
@@ -90,13 +90,13 @@ public class CTLuScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<N> {
}
/**
- * Main method
+ * Main method.
*
* @param nrel Neighborhood relation
* @param relation Data relation (1d!)
* @return Outlier detection result
*/
- public OutlierResult run(Relation<N> nrel, Relation<? extends NumberVector<?, ?>> relation) {
+ public OutlierResult run(Relation<N> nrel, Relation<? extends NumberVector<?>> relation) {
final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(nrel);
WritableDoubleDataStore means = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP);
@@ -104,17 +104,15 @@ public class CTLuScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<N> {
// regression using the covariance matrix
CovarianceMatrix covm = new CovarianceMatrix(2);
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
- final double local = relation.get(id).doubleValue(1);
+ final double local = relation.get(iditer).doubleValue(0);
// Compute mean of neighbors
Mean mean = new Mean();
- DBIDs neighbors = npred.getNeighborDBIDs(id);
+ DBIDs neighbors = npred.getNeighborDBIDs(iditer);
for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
- DBID n = iter.getDBID();
- if(id.equals(n)) {
+ if(DBIDUtil.equal(iditer, iter)) {
continue;
}
- mean.put(relation.get(n).doubleValue(1));
+ mean.put(relation.get(iter).doubleValue(0));
}
final double m;
if(mean.getCount() > 0) {
@@ -125,7 +123,7 @@ public class CTLuScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<N> {
m = local;
}
// Store the mean for the score calculation
- means.putDouble(id, m);
+ means.putDouble(iditer, m);
covm.put(new double[] { local, m });
}
// Finalize covariance matrix, compute linear regression
@@ -143,11 +141,10 @@ public class CTLuScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<N> {
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
MeanVariance mv = new MeanVariance();
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
// Compute the error from the linear regression
- double y_i = relation.get(id).doubleValue(1);
- double e = means.doubleValue(id) - (slope * y_i + inter);
- scores.putDouble(id, e);
+ double y_i = relation.get(iditer).doubleValue(0);
+ double e = means.doubleValue(iditer) - (slope * y_i + inter);
+ scores.putDouble(iditer, e);
mv.put(e);
}
@@ -157,10 +154,9 @@ public class CTLuScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<N> {
final double mean = mv.getMean();
final double variance = mv.getNaiveStddev();
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
- double score = Math.abs((scores.doubleValue(id) - mean) / variance);
+ double score = Math.abs((scores.doubleValue(iditer) - mean) / variance);
minmax.put(score);
- scores.putDouble(id, score);
+ scores.putDouble(iditer, score);
}
}
// build representation
@@ -173,16 +169,16 @@ public class CTLuScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<N> {
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
@Override
public TypeInformation[] getInputTypeRestriction() {
- return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), VectorFieldTypeInformation.get(NumberVector.class, 1));
+ return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), new VectorFieldTypeInformation<NumberVector<?>>(NumberVector.class, 1));
}
/**
- * Parameterization class
+ * Parameterization class.
*
* @author Ahmed Hettab
*
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuZTestOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuZTestOutlier.java
index 05729481..02573a06 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuZTestOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuZTestOutlier.java
@@ -32,8 +32,8 @@ import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
@@ -79,60 +79,57 @@ public class CTLuZTestOutlier<N> extends AbstractNeighborhoodOutlier<N> {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(CTLuZTestOutlier.class);
+ private static final Logging LOG = Logging.getLogger(CTLuZTestOutlier.class);
/**
- * Constructor
+ * Constructor.
*
- * @param npredf
+ * @param npredf Neighbor predicate
*/
public CTLuZTestOutlier(NeighborSetPredicate.Factory<N> npredf) {
super(npredf);
}
/**
- * Main method
+ * Main method.
*
* @param database Database
* @param nrel Neighborhood relation
* @param relation Data relation (1d!)
* @return Outlier detection result
*/
- public OutlierResult run(Database database, Relation<N> nrel, Relation<? extends NumberVector<?, ?>> relation) {
+ public OutlierResult run(Database database, Relation<N> nrel, Relation<? extends NumberVector<?>> relation) {
final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(nrel);
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
MeanVariance zmv = new MeanVariance();
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
- DBIDs neighbors = npred.getNeighborDBIDs(id);
+ DBIDs neighbors = npred.getNeighborDBIDs(iditer);
// Compute Mean of neighborhood
Mean localmean = new Mean();
for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
- DBID n = iter.getDBID();
- if(id.equals(n)) {
+ if(DBIDUtil.equal(iditer, iter)) {
continue;
}
- localmean.put(relation.get(n).doubleValue(1));
+ localmean.put(relation.get(iter).doubleValue(0));
}
final double localdiff;
if(localmean.getCount() > 0) {
- localdiff = relation.get(id).doubleValue(1) - localmean.getMean();
+ localdiff = relation.get(iditer).doubleValue(0) - localmean.getMean();
}
else {
localdiff = 0.0;
}
- scores.putDouble(id, localdiff);
+ scores.putDouble(iditer, localdiff);
zmv.put(localdiff);
}
// Normalize scores using mean and variance
DoubleMinMax minmax = new DoubleMinMax();
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
- double score = Math.abs(scores.doubleValue(id) - zmv.getMean()) / zmv.getSampleStddev();
+ double score = Math.abs(scores.doubleValue(iditer) - zmv.getMean()) / zmv.getSampleStddev();
minmax.put(score);
- scores.putDouble(id, score);
+ scores.putDouble(iditer, score);
}
// Wrap result
@@ -145,16 +142,16 @@ public class CTLuZTestOutlier<N> extends AbstractNeighborhoodOutlier<N> {
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
@Override
public TypeInformation[] getInputTypeRestriction() {
- return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), VectorFieldTypeInformation.get(NumberVector.class, 1));
+ return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), new VectorFieldTypeInformation<NumberVector<?>>(NumberVector.class, 1));
}
/**
- * Parameterization class
+ * Parameterization class.
*
* @author Ahmed Hettab
*
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SLOM.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SLOM.java
index 8ae23229..720fa39f 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SLOM.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SLOM.java
@@ -30,8 +30,8 @@ import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
@@ -74,7 +74,7 @@ public class SLOM<N, O, D extends NumberDistance<D, ?>> extends AbstractDistance
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(SLOM.class);
+ private static final Logging LOG = Logging.getLogger(SLOM.class);
/**
* Constructor.
@@ -100,29 +100,27 @@ public class SLOM<N, O, D extends NumberDistance<D, ?>> extends AbstractDistance
WritableDoubleDataStore modifiedDistance = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
// calculate D-Tilde
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
double sum = 0;
double maxDist = 0;
int cnt = 0;
- final DBIDs neighbors = npred.getNeighborDBIDs(id);
+ final DBIDs neighbors = npred.getNeighborDBIDs(iditer);
for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
- DBID neighbor = iter.getDBID();
- if(id.equals(neighbor)) {
+ if(DBIDUtil.equal(iditer, iter)) {
continue;
}
- double dist = distFunc.distance(id, neighbor).doubleValue();
+ double dist = distFunc.distance(iditer, iter).doubleValue();
sum += dist;
cnt++;
maxDist = Math.max(maxDist, dist);
}
if(cnt > 1) {
- modifiedDistance.putDouble(id, ((sum - maxDist) / (cnt - 1)));
+ modifiedDistance.putDouble(iditer, ((sum - maxDist) / (cnt - 1)));
}
else {
// Use regular distance when the d-tilde trick is undefined.
// Note: this can be 0 when there were no neighbors.
- modifiedDistance.putDouble(id, maxDist);
+ modifiedDistance.putDouble(iditer, maxDist);
}
}
@@ -131,29 +129,26 @@ public class SLOM<N, O, D extends NumberDistance<D, ?>> extends AbstractDistance
WritableDoubleDataStore sloms = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
double sum = 0;
int cnt = 0;
- final DBIDs neighbors = npred.getNeighborDBIDs(id);
+ final DBIDs neighbors = npred.getNeighborDBIDs(iditer);
for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
- DBID neighbor = iter.getDBID();
- if(neighbor.equals(id)) {
+ if(DBIDUtil.equal(iditer, iter)) {
continue;
}
- sum += modifiedDistance.doubleValue(neighbor);
+ sum += modifiedDistance.doubleValue(iter);
cnt++;
}
double slom;
if(cnt > 0) {
// With and without the object itself:
- double avgPlus = (sum + modifiedDistance.doubleValue(id)) / (cnt + 1);
+ double avgPlus = (sum + modifiedDistance.doubleValue(iditer)) / (cnt + 1);
double avg = sum / cnt;
double beta = 0;
for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
- DBID neighbor = iter.getDBID();
- final double dist = modifiedDistance.doubleValue(neighbor);
+ final double dist = modifiedDistance.doubleValue(iter);
if(dist > avgPlus) {
beta += 1;
}
@@ -162,8 +157,8 @@ public class SLOM<N, O, D extends NumberDistance<D, ?>> extends AbstractDistance
}
}
// Include object itself
- if(!neighbors.contains(id)) {
- final double dist = modifiedDistance.doubleValue(id);
+ if(!neighbors.contains(iditer)) {
+ final double dist = modifiedDistance.doubleValue(iditer);
if(dist > avgPlus) {
beta += 1;
}
@@ -182,13 +177,13 @@ public class SLOM<N, O, D extends NumberDistance<D, ?>> extends AbstractDistance
}
beta = beta / (1 + avg);
- slom = beta * modifiedDistance.doubleValue(id);
+ slom = beta * modifiedDistance.doubleValue(iditer);
}
else {
// No neighbors to compare to - no score.
slom = 0.0;
}
- sloms.putDouble(id, slom);
+ sloms.putDouble(iditer, slom);
slomminmax.put(slom);
}
@@ -201,7 +196,7 @@ public class SLOM<N, O, D extends NumberDistance<D, ?>> extends AbstractDistance
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
@Override
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SOF.java
index e9987bf0..a6f39a60 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SOF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SOF.java
@@ -29,7 +29,6 @@ import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
@@ -74,7 +73,7 @@ public class SOF<N, O, D extends NumberDistance<D, ?>> extends AbstractDistanceB
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(SOF.class);
+ private static final Logging LOG = Logging.getLogger(SOF.class);
/**
* Constructor.
@@ -89,7 +88,7 @@ public class SOF<N, O, D extends NumberDistance<D, ?>> extends AbstractDistanceB
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -110,33 +109,31 @@ public class SOF<N, O, D extends NumberDistance<D, ?>> extends AbstractDistanceB
// Compute densities
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
- DBIDs neighbors = npred.getNeighborDBIDs(id);
+ DBIDs neighbors = npred.getNeighborDBIDs(iditer);
double avg = 0;
for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
- avg += distFunc.distance(id, iter.getDBID()).doubleValue();
+ avg += distFunc.distance(iditer, iter).doubleValue();
}
double lrd = 1 / (avg / neighbors.size());
if (Double.isNaN(lrd)) {
lrd = 0;
}
- lrds.putDouble(id, lrd);
+ lrds.putDouble(iditer, lrd);
}
// Compute density quotients
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
- DBIDs neighbors = npred.getNeighborDBIDs(id);
+ DBIDs neighbors = npred.getNeighborDBIDs(iditer);
double avg = 0;
for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
- avg += lrds.doubleValue(iter.getDBID());
+ avg += lrds.doubleValue(iter);
}
- final double lrd = (avg / neighbors.size()) / lrds.doubleValue(id);
+ final double lrd = (avg / neighbors.size()) / lrds.doubleValue(iditer);
if (!Double.isNaN(lrd)) {
- lofs.putDouble(id, lrd);
+ lofs.putDouble(iditer, lrd);
lofminmax.put(lrd);
} else {
- lofs.putDouble(id, 0.0);
+ lofs.putDouble(iditer, 0.0);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java
index 41022414..9aa21b66 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java
@@ -33,11 +33,11 @@ import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
@@ -45,14 +45,13 @@ import de.lmu.ifi.dbs.elki.math.Mean;
import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.datastructures.QuickSelect;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.IntervalConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.IntervalConstraint.IntervalBoundary;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
@@ -83,15 +82,15 @@ public class TrimmedMeanApproach<N> extends AbstractNeighborhoodOutlier<N> {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(TrimmedMeanApproach.class);
+ private static final Logging LOG = Logging.getLogger(TrimmedMeanApproach.class);
/**
- * the parameter p
+ * the parameter p.
*/
private double p;
/**
- * Constructor
+ * Constructor.
*
* @param p Parameter p
* @param npredf Neighborhood factory.
@@ -102,29 +101,28 @@ public class TrimmedMeanApproach<N> extends AbstractNeighborhoodOutlier<N> {
}
/**
- * Run the algorithm
+ * Run the algorithm.
*
* @param database Database
* @param nrel Neighborhood relation
* @param relation Data Relation (1 dimensional!)
* @return Outlier detection result
*/
- public OutlierResult run(Database database, Relation<N> nrel, Relation<? extends NumberVector<?, ?>> relation) {
- assert (DatabaseUtil.dimensionality(relation) == 1) : "TrimmedMean can only process one-dimensional data sets.";
+ public OutlierResult run(Database database, Relation<N> nrel, Relation<? extends NumberVector<?>> relation) {
+ assert (RelationUtil.dimensionality(relation) == 1) : "TrimmedMean can only process one-dimensional data sets.";
final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(nrel);
WritableDoubleDataStore errors = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP);
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
- FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("Computing trimmed means", relation.size(), logger) : null;
+ FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Computing trimmed means", relation.size(), LOG) : null;
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
- DBIDs neighbors = npred.getNeighborDBIDs(id);
+ DBIDs neighbors = npred.getNeighborDBIDs(iditer);
int num = 0;
double[] values = new double[neighbors.size()];
// calculate trimmedMean
for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
- values[num] = relation.get(iter).doubleValue(1);
+ values[num] = relation.get(iter).doubleValue(0);
num++;
}
@@ -141,21 +139,21 @@ public class TrimmedMeanApproach<N> extends AbstractNeighborhoodOutlier<N> {
tm = mean.getMean();
}
else {
- tm = relation.get(id).doubleValue(1);
+ tm = relation.get(iditer).doubleValue(0);
}
// Error: deviation from trimmed mean
- errors.putDouble(id, relation.get(id).doubleValue(1) - tm);
+ errors.putDouble(iditer, relation.get(iditer).doubleValue(0) - tm);
if(progress != null) {
- progress.incrementProcessed(logger);
+ progress.incrementProcessed(LOG);
}
}
if(progress != null) {
- progress.ensureCompleted(logger);
+ progress.ensureCompleted(LOG);
}
- if(logger.isVerbose()) {
- logger.verbose("Computing median error.");
+ if(LOG.isVerbose()) {
+ LOG.verbose("Computing median error.");
}
double median_dev_from_median;
{
@@ -164,8 +162,7 @@ public class TrimmedMeanApproach<N> extends AbstractNeighborhoodOutlier<N> {
{
int i = 0;
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
- ei[i] = errors.doubleValue(id);
+ ei[i] = errors.doubleValue(iditer);
i++;
}
}
@@ -178,15 +175,14 @@ public class TrimmedMeanApproach<N> extends AbstractNeighborhoodOutlier<N> {
median_dev_from_median = QuickSelect.median(ei);
}
- if(logger.isVerbose()) {
- logger.verbose("Normalizing scores.");
+ if(LOG.isVerbose()) {
+ LOG.verbose("Normalizing scores.");
}
// calculate score
DoubleMinMax minmax = new DoubleMinMax();
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
- double score = Math.abs(errors.doubleValue(id)) * 0.6745 / median_dev_from_median;
- scores.putDouble(id, score);
+ double score = Math.abs(errors.doubleValue(iditer)) * 0.6745 / median_dev_from_median;
+ scores.putDouble(iditer, score);
minmax.put(score);
}
//
@@ -199,17 +195,17 @@ public class TrimmedMeanApproach<N> extends AbstractNeighborhoodOutlier<N> {
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
@Override
public TypeInformation[] getInputTypeRestriction() {
// Get one dimensional attribute for analysis.
- return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), VectorFieldTypeInformation.get(NumberVector.class, 1));
+ return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), new VectorFieldTypeInformation<NumberVector<?>>(NumberVector.class, 1));
}
/**
- * Parameterizer
+ * Parameterizer.
*
* @author Ahmed Hettab
*
@@ -219,19 +215,21 @@ public class TrimmedMeanApproach<N> extends AbstractNeighborhoodOutlier<N> {
*/
public static class Parameterizer<N> extends AbstractNeighborhoodOutlier.Parameterizer<N> {
/**
- * Parameter for the percentile value p
+ * Parameter for the percentile value p.
*/
- public static final OptionID P_ID = OptionID.getOrCreateOptionID("tma.p", "the percentile parameter");
+ public static final OptionID P_ID = new OptionID("tma.p", "the percentile parameter");
/**
- * Percentile parameter p
+ * Percentile parameter p.
*/
protected double p = 0.2;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- DoubleParameter pP = new DoubleParameter(P_ID, new IntervalConstraint(0.0, IntervalBoundary.OPEN, 0.5, IntervalBoundary.OPEN));
+ DoubleParameter pP = new DoubleParameter(P_ID);
+ pP.addConstraint(new GreaterConstraint(0.0));
+ pP.addConstraint(new LessConstraint(0.5));
if(config.grab(pP)) {
p = pP.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/AbstractPrecomputedNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/AbstractPrecomputedNeighborhood.java
index 5898b053..2c706ce0 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/AbstractPrecomputedNeighborhood.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/AbstractPrecomputedNeighborhood.java
@@ -24,7 +24,8 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood;
*/
import de.lmu.ifi.dbs.elki.database.datastore.DataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.logging.Logging;
@@ -50,7 +51,7 @@ public abstract class AbstractPrecomputedNeighborhood implements NeighborSetPred
}
@Override
- public DBIDs getNeighborDBIDs(DBID reference) {
+ public DBIDs getNeighborDBIDs(DBIDRef reference) {
DBIDs neighbors = store.get(reference);
if(neighbors != null) {
return neighbors;
@@ -60,7 +61,7 @@ public abstract class AbstractPrecomputedNeighborhood implements NeighborSetPred
if(getLogger().isDebugging()) {
getLogger().warning("No neighbors for object " + reference);
}
- return reference;
+ return DBIDUtil.deref(reference);
}
}
@@ -69,7 +70,7 @@ public abstract class AbstractPrecomputedNeighborhood implements NeighborSetPred
*
* @return Logger
*/
- abstract protected Logging getLogger();
+ protected abstract Logging getLogger();
/**
* Factory class.
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java
index 7a2fda52..4aa96b25 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java
@@ -28,7 +28,6 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStore;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
@@ -54,7 +53,7 @@ public class ExtendedNeighborhood extends AbstractPrecomputedNeighborhood {
/**
* The logger to use.
*/
- static final Logging logger = Logging.getLogger(ExtendedNeighborhood.class);
+ private static final Logging LOG = Logging.getLogger(ExtendedNeighborhood.class);
/**
* Constructor.
@@ -67,7 +66,7 @@ public class ExtendedNeighborhood extends AbstractPrecomputedNeighborhood {
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
@Override
@@ -132,23 +131,22 @@ public class ExtendedNeighborhood extends AbstractPrecomputedNeighborhood {
final WritableDataStore<DBIDs> store = DataStoreUtil.makeStorage(database.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC | DataStoreFactory.HINT_TEMP, DBIDs.class);
// Expand multiple steps
- FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("Expanding neighborhoods", database.size(), logger) : null;
+ FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Expanding neighborhoods", database.size(), LOG) : null;
for(DBIDIter iter = database.iterDBIDs(); iter.valid(); iter.advance()) {
- DBID id = iter.getDBID();
- HashSetModifiableDBIDs res = DBIDUtil.newHashSet(id);
- DBIDs todo = id;
+ HashSetModifiableDBIDs res = DBIDUtil.newHashSet();
+ res.add(iter);
+ DBIDs todo = DBIDUtil.deref(iter);
for(int i = 0; i < steps; i++) {
ModifiableDBIDs ntodo = DBIDUtil.newHashSet();
for(DBIDIter iter2 = todo.iter(); iter2.valid(); iter2.advance()) {
- DBIDs add = innerinst.getNeighborDBIDs(iter2.getDBID());
+ DBIDs add = innerinst.getNeighborDBIDs(iter2);
if(add != null) {
- for(DBIDIter iter3 = add.iter(); iter.valid(); iter.advance()) {
- DBID nid = iter3.getDBID();
- if(res.contains(nid)) {
+ for(DBIDIter iter3 = add.iter(); iter3.valid(); iter3.advance()) {
+ if(res.contains(iter3)) {
continue;
}
- ntodo.add(nid);
- res.add(nid);
+ ntodo.add(iter3);
+ res.add(iter3);
}
}
}
@@ -157,13 +155,13 @@ public class ExtendedNeighborhood extends AbstractPrecomputedNeighborhood {
}
todo = ntodo;
}
- store.put(id, res);
+ store.put(iter, res);
if(progress != null) {
- progress.incrementProcessed(logger);
+ progress.incrementProcessed(LOG);
}
}
if(progress != null) {
- progress.ensureCompleted(logger);
+ progress.ensureCompleted(LOG);
}
return store;
@@ -180,12 +178,12 @@ public class ExtendedNeighborhood extends AbstractPrecomputedNeighborhood {
/**
* Parameter to specify the neighborhood predicate to use.
*/
- public static final OptionID NEIGHBORHOOD_ID = OptionID.getOrCreateOptionID("extendedneighbors.neighborhood", "The inner neighborhood predicate to use.");
+ public static final OptionID NEIGHBORHOOD_ID = new OptionID("extendedneighbors.neighborhood", "The inner neighborhood predicate to use.");
/**
* Parameter to specify the number of steps allowed
*/
- public static final OptionID STEPS_ID = OptionID.getOrCreateOptionID("extendedneighbors.steps", "The number of steps allowed in the neighborhood graph.");
+ public static final OptionID STEPS_ID = new OptionID("extendedneighbors.steps", "The number of steps allowed in the neighborhood graph.");
/**
* The number of steps to do.
@@ -225,7 +223,8 @@ public class ExtendedNeighborhood extends AbstractPrecomputedNeighborhood {
* @return number of steps, default 1
*/
public static int getParameterSteps(Parameterization config) {
- final IntParameter param = new IntParameter(STEPS_ID, new GreaterEqualConstraint(1));
+ final IntParameter param = new IntParameter(STEPS_ID);
+ param.addConstraint(new GreaterEqualConstraint(1));
if(config.grab(param)) {
return param.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java
index 74e5bbcf..01052c1f 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java
@@ -63,12 +63,12 @@ public class ExternalNeighborhood extends AbstractPrecomputedNeighborhood {
/**
* Logger
*/
- static final Logging logger = Logging.getLogger(ExternalNeighborhood.class);
+ private static final Logging LOG = Logging.getLogger(ExternalNeighborhood.class);
/**
* Parameter to specify the neighborhood file
*/
- public static final OptionID NEIGHBORHOOD_FILE_ID = OptionID.getOrCreateOptionID("externalneighbors.file", "The file listing the neighbors.");
+ public static final OptionID NEIGHBORHOOD_FILE_ID = new OptionID("externalneighbors.file", "The file listing the neighbors.");
/**
* Constructor.
@@ -91,7 +91,7 @@ public class ExternalNeighborhood extends AbstractPrecomputedNeighborhood {
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -136,33 +136,32 @@ public class ExternalNeighborhood extends AbstractPrecomputedNeighborhood {
private DataStore<DBIDs> loadNeighbors(Relation<?> database) {
final WritableDataStore<DBIDs> store = DataStoreUtil.makeStorage(database.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC | DataStoreFactory.HINT_TEMP, DBIDs.class);
- if(logger.isVerbose()) {
- logger.verbose("Loading external neighborhoods.");
+ if(LOG.isVerbose()) {
+ LOG.verbose("Loading external neighborhoods.");
}
- if(logger.isDebugging()) {
- logger.verbose("Building reverse label index...");
+ if(LOG.isDebugging()) {
+ LOG.verbose("Building reverse label index...");
}
// Build a map label/ExternalId -> DBID
// (i.e. a reverse index!)
// TODO: move this into the database layer to share?
- Map<String, DBID> lblmap = new HashMap<String, DBID>(database.size() * 2);
+ Map<String, DBID> lblmap = new HashMap<String, DBID>(database.size() << 1);
{
Relation<LabelList> olq = database.getDatabase().getRelation(TypeUtil.LABELLIST);
Relation<ExternalID> eidq = database.getDatabase().getRelation(TypeUtil.EXTERNALID);
for(DBIDIter iditer = database.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
if(eidq != null) {
- ExternalID eid = eidq.get(id);
+ ExternalID eid = eidq.get(iditer);
if(eid != null) {
- lblmap.put(eid.toString(), id);
+ lblmap.put(eid.toString(), DBIDUtil.deref(iditer));
}
}
if(olq != null) {
- LabelList label = olq.get(id);
+ LabelList label = olq.get(iditer);
if(label != null) {
for(String lbl : label) {
- lblmap.put(lbl, id);
+ lblmap.put(lbl, DBIDUtil.deref(iditer));
}
}
}
@@ -170,8 +169,8 @@ public class ExternalNeighborhood extends AbstractPrecomputedNeighborhood {
}
try {
- if(logger.isDebugging()) {
- logger.verbose("Loading neighborhood file.");
+ if(LOG.isDebugging()) {
+ LOG.verbose("Loading neighborhood file.");
}
InputStream in = new FileInputStream(file);
in = FileUtil.tryGzipInput(in);
@@ -187,16 +186,16 @@ public class ExternalNeighborhood extends AbstractPrecomputedNeighborhood {
neighbours.add(neigh);
}
else {
- if(logger.isDebugging()) {
- logger.debug("No object found for label " + entries[i]);
+ if(LOG.isDebugging()) {
+ LOG.debug("No object found for label " + entries[i]);
}
}
}
store.put(id, neighbours);
}
else {
- if(logger.isDebugging()) {
- logger.warning("No object found for label " + entries[0]);
+ if(LOG.isDebugging()) {
+ LOG.warning("No object found for label " + entries[0]);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/NeighborSetPredicate.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/NeighborSetPredicate.java
index 3a6d0e28..b52f8e91 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/NeighborSetPredicate.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/NeighborSetPredicate.java
@@ -24,7 +24,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood;
*/
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.result.Result;
@@ -42,7 +42,7 @@ public interface NeighborSetPredicate extends Result {
* @param reference Reference object
* @return Neighborhood
*/
- public DBIDs getNeighborDBIDs(DBID reference);
+ public DBIDs getNeighborDBIDs(DBIDRef reference);
/**
* Factory interface to produce instances.
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/PrecomputedKNearestNeighborNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/PrecomputedKNearestNeighborNeighborhood.java
index 9dd2dee1..f6000ef0 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/PrecomputedKNearestNeighborNeighborhood.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/PrecomputedKNearestNeighborNeighborhood.java
@@ -29,15 +29,13 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
@@ -57,7 +55,7 @@ public class PrecomputedKNearestNeighborNeighborhood<D extends Distance<D>> exte
/**
* Logger
*/
- private static final Logging logger = Logging.getLogger(PrecomputedKNearestNeighborNeighborhood.class);
+ private static final Logging LOG = Logging.getLogger(PrecomputedKNearestNeighborNeighborhood.class);
/**
* Constructor.
@@ -80,7 +78,7 @@ public class PrecomputedKNearestNeighborNeighborhood<D extends Distance<D>> exte
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -121,13 +119,12 @@ public class PrecomputedKNearestNeighborNeighborhood<D extends Distance<D>> exte
// TODO: use bulk?
WritableDataStore<DBIDs> s = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, DBIDs.class);
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- DBID id = iditer.getDBID();
- KNNResult<D> neighbors = knnQuery.getKNNForDBID(id, k);
+ KNNResult<D> neighbors = knnQuery.getKNNForDBID(iditer, k);
ArrayModifiableDBIDs neighbours = DBIDUtil.newArray(neighbors.size());
- for(DistanceResultPair<D> dpair : neighbors) {
- neighbours.add(dpair.getDBID());
+ for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ neighbours.add(neighbor);
}
- s.put(id, neighbours);
+ s.put(iditer, neighbours);
}
return new PrecomputedKNearestNeighborNeighborhood<D>(s);
}
@@ -151,12 +148,12 @@ public class PrecomputedKNearestNeighborNeighborhood<D extends Distance<D>> exte
/**
* Parameter k
*/
- public static final OptionID K_ID = OptionID.getOrCreateOptionID("neighborhood.k", "the number of neighbors");
+ public static final OptionID K_ID = new OptionID("neighborhood.k", "the number of neighbors");
/**
* Parameter to specify the distance function to use
*/
- public static final OptionID DISTANCEFUNCTION_ID = OptionID.getOrCreateOptionID("neighborhood.distancefunction", "the distance function to use");
+ public static final OptionID DISTANCEFUNCTION_ID = new OptionID("neighborhood.distancefunction", "the distance function to use");
/**
* Parameter k
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java
index d170571f..f1c68577 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java
@@ -29,10 +29,11 @@ import java.util.List;
import de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
@@ -41,7 +42,6 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualCons
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
-import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair;
/**
* Neighborhood obtained by computing the k-fold closure of an existing
@@ -87,29 +87,27 @@ public class LinearWeightedExtendedNeighborhood implements WeightedNeighborSetPr
}
@Override
- public Collection<DoubleObjPair<DBID>> getWeightedNeighbors(DBID reference) {
+ public Collection<DoubleDBIDPair> getWeightedNeighbors(DBIDRef reference) {
ModifiableDBIDs seen = DBIDUtil.newHashSet();
- List<DoubleObjPair<DBID>> result = new ArrayList<DoubleObjPair<DBID>>();
+ List<DoubleDBIDPair> result = new ArrayList<DoubleDBIDPair>();
// Add starting object
- result.add(new DoubleObjPair<DBID>(computeWeight(0), reference));
+ result.add(DBIDUtil.newPair(computeWeight(0), reference));
seen.add(reference);
// Extend.
- DBIDs cur = reference;
+ DBIDs cur = DBIDUtil.deref(reference);
for(int i = 1; i <= steps; i++) {
final double weight = computeWeight(i);
// Collect newly discovered IDs
ModifiableDBIDs add = DBIDUtil.newHashSet();
for(DBIDIter iter = cur.iter(); iter.valid(); iter.advance()) {
- DBID id = iter.getDBID();
- for(DBIDIter iter2 = inner.getNeighborDBIDs(id).iter(); iter2.valid(); iter2.advance()) {
- DBID nid = iter2.getDBID();
+ for(DBIDIter iter2 = inner.getNeighborDBIDs(iter).iter(); iter2.valid(); iter2.advance()) {
// Seen before?
- if(seen.contains(nid)) {
+ if(seen.contains(iter2)) {
continue;
}
- add.add(nid);
- result.add(new DoubleObjPair<DBID>(weight, nid));
+ add.add(iter2);
+ result.add(DBIDUtil.newPair(weight, iter2));
}
}
if(add.size() == 0) {
@@ -172,12 +170,12 @@ public class LinearWeightedExtendedNeighborhood implements WeightedNeighborSetPr
/**
* Parameter to specify the neighborhood predicate to use.
*/
- public static final OptionID NEIGHBORHOOD_ID = OptionID.getOrCreateOptionID("extendedneighbors.neighborhood", "The inner neighborhood predicate to use.");
+ public static final OptionID NEIGHBORHOOD_ID = new OptionID("extendedneighbors.neighborhood", "The inner neighborhood predicate to use.");
/**
* Parameter to specify the number of steps allowed
*/
- public static final OptionID STEPS_ID = OptionID.getOrCreateOptionID("extendedneighbors.steps", "The number of steps allowed in the neighborhood graph.");
+ public static final OptionID STEPS_ID = new OptionID("extendedneighbors.steps", "The number of steps allowed in the neighborhood graph.");
/**
* The number of steps to do.
@@ -217,7 +215,8 @@ public class LinearWeightedExtendedNeighborhood implements WeightedNeighborSetPr
* @return number of steps, default 1
*/
public static int getParameterSteps(Parameterization config) {
- final IntParameter param = new IntParameter(STEPS_ID, new GreaterEqualConstraint(1));
+ final IntParameter param = new IntParameter(STEPS_ID);
+ param.addConstraint(new GreaterEqualConstraint(1));
if(config.grab(param)) {
return param.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/UnweightedNeighborhoodAdapter.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/UnweightedNeighborhoodAdapter.java
index ce0666df..c179d81f 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/UnweightedNeighborhoodAdapter.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/UnweightedNeighborhoodAdapter.java
@@ -28,15 +28,16 @@ import java.util.Collection;
import de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
-import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair;
/**
* Adapter to use unweighted neighborhoods in an algorithm that requires
@@ -61,12 +62,11 @@ public class UnweightedNeighborhoodAdapter implements WeightedNeighborSetPredica
}
@Override
- public Collection<DoubleObjPair<DBID>> getWeightedNeighbors(DBID reference) {
+ public Collection<DoubleDBIDPair> getWeightedNeighbors(DBIDRef reference) {
DBIDs neighbors = inner.getNeighborDBIDs(reference);
- ArrayList<DoubleObjPair<DBID>> adapted = new ArrayList<DoubleObjPair<DBID>>(neighbors.size());
+ ArrayList<DoubleDBIDPair> adapted = new ArrayList<DoubleDBIDPair>(neighbors.size());
for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
- DBID id = iter.getDBID();
- adapted.add(new DoubleObjPair<DBID>(1.0, id));
+ adapted.add(DBIDUtil.newPair(1.0, iter));
}
return adapted;
}
@@ -120,7 +120,7 @@ public class UnweightedNeighborhoodAdapter implements WeightedNeighborSetPredica
/**
* The parameter to give the non-weighted neighborhood to use.
*/
- public static final OptionID INNER_ID = OptionID.getOrCreateOptionID("neighborhood.inner", "Parameter for the non-weighted neighborhood to use.");
+ public static final OptionID INNER_ID = new OptionID("neighborhood.inner", "Parameter for the non-weighted neighborhood to use.");
/**
* The actual predicate.
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/WeightedNeighborSetPredicate.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/WeightedNeighborSetPredicate.java
index b147935a..16d37587 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/WeightedNeighborSetPredicate.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/WeightedNeighborSetPredicate.java
@@ -26,10 +26,10 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.weighted;
import java.util.Collection;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.Parameterizable;
-import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair;
/**
* Neighbor predicate with weight support.
@@ -43,7 +43,7 @@ public interface WeightedNeighborSetPredicate {
* @param reference Reference object
* @return Weighted Neighborhood
*/
- public Collection<DoubleObjPair<DBID>> getWeightedNeighbors(DBID reference);
+ public Collection<DoubleDBIDPair> getWeightedNeighbors(DBIDRef reference);
/**
* Factory interface to produce instances.
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java
index 573233a7..1965914d 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java
@@ -23,10 +23,8 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.subspace;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
-import java.util.List;
import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
@@ -37,16 +35,20 @@ import de.lmu.ifi.dbs.elki.database.QueryUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
-import de.lmu.ifi.dbs.elki.database.query.DoubleDistanceResultPair;
+import de.lmu.ifi.dbs.elki.database.ids.DistanceDBIDPair;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDistanceDBIDPair;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDList;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDResultIter;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
@@ -58,7 +60,6 @@ import de.lmu.ifi.dbs.elki.math.statistics.distribution.GammaDistribution;
import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
@@ -89,11 +90,11 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
* @param <V> vector type
*/
@Reference(authors = "E. Müller, M. Schiffer, T. Seidl", title = "Adaptive outlierness for subspace outlier ranking", booktitle = "Proc. 19th ACM International Conference on Information and knowledge management")
-public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(OUTRES.class);
+ private static final Logging LOG = Logging.getLogger(OUTRES.class);
/**
* The epsilon (in 2d) parameter
@@ -128,7 +129,7 @@ public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outl
KernelDensityEstimator kernel = new KernelDensityEstimator(relation);
BitSet subspace = new BitSet(kernel.dim);
- FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("OutRank scores", relation.size(), logger) : null;
+ FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("OUTRES scores", relation.size(), LOG) : null;
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
subspace.clear();
@@ -136,11 +137,11 @@ public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outl
ranks.putDouble(iditer, score);
minmax.put(score);
if(progress != null) {
- progress.incrementProcessed(logger);
+ progress.incrementProcessed(LOG);
}
}
if(progress != null) {
- progress.ensureCompleted(logger);
+ progress.ensureCompleted(LOG);
}
OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0., 1., 1.);
@@ -159,33 +160,34 @@ public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outl
*/
public double outresScore(final int s, BitSet subspace, DBIDRef id, KernelDensityEstimator kernel) {
double score = 1.0; // Initial score is 1.0
+ final SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(subspace);
+ MeanVariance meanv = new MeanVariance();
for(int i = s; i < kernel.dim; i++) {
if(subspace.get(i)) { // TODO: needed? Or should we always start with i=0?
continue;
}
subspace.set(i);
- final SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(subspace);
+ df.setSelectedDimensions(subspace);
final double adjustedEps = kernel.adjustedEps(kernel.dim);
// Query with a larger window, to also get neighbors of neighbors
// Subspace euclidean is metric!
- final DoubleDistance range = new DoubleDistance(adjustedEps * 2);
+ final DoubleDistance range = new DoubleDistance(adjustedEps * 2.);
RangeQuery<V, DoubleDistance> rq = QueryUtil.getRangeQuery(kernel.relation, df, range);
- List<DistanceResultPair<DoubleDistance>> neighc = rq.getRangeForDBID(id, range);
- List<DoubleDistanceResultPair> neigh = refineRange(neighc, adjustedEps);
+ DistanceDBIDResult<DoubleDistance> neighc = rq.getRangeForDBID(id, range);
+ DoubleDistanceDBIDList neigh = refineRange(neighc, adjustedEps);
if(neigh.size() > 2) {
// Relevance test
if(relevantSubspace(subspace, neigh, kernel)) {
final double density = kernel.subspaceDensity(subspace, neigh);
- final double deviation;
// Compute mean and standard deviation for densities of neighbors.
- MeanVariance meanv = new MeanVariance();
- for(DoubleDistanceResultPair pair : neigh) {
- List<DoubleDistanceResultPair> n2 = subsetNeighborhoodQuery(neighc, pair.getDBID(), df, adjustedEps, kernel);
+ meanv.reset();
+ for (DoubleDistanceDBIDResultIter neighbor = neigh.iter(); neighbor.valid(); neighbor.advance()) {
+ DoubleDistanceDBIDList n2 = subsetNeighborhoodQuery(neighc, neighbor, df, adjustedEps, kernel);
meanv.put(kernel.subspaceDensity(subspace, n2));
}
- deviation = (meanv.getMean() - density) / (2. * meanv.getSampleStddev());
+ final double deviation = (meanv.getMean() - density) / (2. * meanv.getSampleStddev());
// High deviation:
if(deviation >= 1) {
score *= (density / deviation);
@@ -206,19 +208,20 @@ public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outl
* @param adjustedEps New epsilon
* @return refined list
*/
- private List<DoubleDistanceResultPair> refineRange(List<DistanceResultPair<DoubleDistance>> neighc, double adjustedEps) {
- List<DoubleDistanceResultPair> n = new ArrayList<DoubleDistanceResultPair>(neighc.size());
+ private DoubleDistanceDBIDList refineRange(DistanceDBIDResult<DoubleDistance> neighc, double adjustedEps) {
+ DoubleDistanceDBIDList n = new DoubleDistanceDBIDList(neighc.size());
// We don't have a guarantee for this list to be sorted
- for(DistanceResultPair<DoubleDistance> p : neighc) {
- if(p instanceof DoubleDistanceResultPair) {
- if(((DoubleDistanceResultPair) p).getDoubleDistance() <= adjustedEps) {
- n.add((DoubleDistanceResultPair) p);
+ for (DistanceDBIDResultIter<DoubleDistance> neighbor = neighc.iter(); neighbor.valid(); neighbor.advance()) {
+ DistanceDBIDPair<DoubleDistance> p = neighbor.getDistancePair();
+ if(p instanceof DoubleDistanceDBIDPair) {
+ if(((DoubleDistanceDBIDPair) p).doubleDistance() <= adjustedEps) {
+ n.add((DoubleDistanceDBIDPair) p);
}
}
else {
double dist = p.getDistance().doubleValue();
if(dist <= adjustedEps) {
- n.add(new DoubleDistanceResultPair(dist, p.getDBID()));
+ n.add(dist, p);
}
}
}
@@ -235,13 +238,14 @@ public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outl
* @param kernel Kernel
* @return Neighbors of neighbor object
*/
- private List<DoubleDistanceResultPair> subsetNeighborhoodQuery(List<DistanceResultPair<DoubleDistance>> neighc, DBID dbid, PrimitiveDoubleDistanceFunction<? super V> df, double adjustedEps, KernelDensityEstimator kernel) {
- List<DoubleDistanceResultPair> n = new ArrayList<DoubleDistanceResultPair>(neighc.size());
+ private DoubleDistanceDBIDList subsetNeighborhoodQuery(DistanceDBIDResult<DoubleDistance> neighc, DBIDRef dbid, PrimitiveDoubleDistanceFunction<? super V> df, double adjustedEps, KernelDensityEstimator kernel) {
+ DoubleDistanceDBIDList n = new DoubleDistanceDBIDList(neighc.size());
V query = kernel.relation.get(dbid);
- for(DistanceResultPair<DoubleDistance> p : neighc) {
+ for (DistanceDBIDResultIter<DoubleDistance> neighbor = neighc.iter(); neighbor.valid(); neighbor.advance()) {
+ DistanceDBIDPair<DoubleDistance> p = neighbor.getDistancePair();
double dist = df.doubleDistance(query, kernel.relation.get(p));
if(dist <= adjustedEps) {
- n.add(new DoubleDistanceResultPair(dist, p.getDBID()));
+ n.add(dist, p);
}
}
return n;
@@ -255,7 +259,7 @@ public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outl
* @param kernel Kernel density estimator
* @return relevance test result
*/
- protected boolean relevantSubspace(BitSet subspace, List<DoubleDistanceResultPair> neigh, KernelDensityEstimator kernel) {
+ protected boolean relevantSubspace(BitSet subspace, DoubleDistanceDBIDList neigh, KernelDensityEstimator kernel) {
Relation<V> relation = kernel.relation;
final double crit = K_S_CRITICAL001 / Math.sqrt(neigh.size());
@@ -264,9 +268,9 @@ public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outl
double[] data = new double[neigh.size()];
{
int count = 0;
- for(DoubleDistanceResultPair object : neigh) {
- V vector = relation.get(object.getDBID());
- data[count] = vector.doubleValue(dim + 1);
+ for (DBIDIter neighbor = neigh.iter(); neighbor.valid(); neighbor.advance()) {
+ V vector = relation.get(neighbor);
+ data[count] = vector.doubleValue(dim);
count++;
}
assert (count == neigh.size());
@@ -278,7 +282,7 @@ public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outl
// Kolmogorow-Smirnow-Test against uniform distribution:
for(int j = 1; j < data.length - 2; j++) {
- double delta = (j / (data.length - 1)) - ((data[j] - min) / norm);
+ double delta = (j / (data.length - 1.)) - ((data[j] - min) / norm);
if(Math.abs(delta) > crit) {
return false;
}
@@ -326,7 +330,7 @@ public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outl
public KernelDensityEstimator(Relation<V> relation) {
super();
this.relation = relation;
- dim = DatabaseUtil.dimensionality(relation);
+ dim = RelationUtil.dimensionality(relation);
hopttwo = optimalBandwidth(2);
epsilons = new double[dim + 1];
Arrays.fill(epsilons, Double.NEGATIVE_INFINITY);
@@ -337,15 +341,15 @@ public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outl
* Compute density in the given subspace.
*
* @param subspace Subspace
- * @param neighbours Neighbor distance list
+ * @param neighbors Neighbor distance list
* @return Density
*/
- protected double subspaceDensity(BitSet subspace, List<DoubleDistanceResultPair> neighbours) {
+ protected double subspaceDensity(BitSet subspace, DoubleDistanceDBIDList neighbors) {
final double bandwidth = optimalBandwidth(subspace.cardinality());
double density = 0;
- for(DoubleDistanceResultPair pair : neighbours) {
- double v = pair.getDoubleDistance() / bandwidth;
+ for (DoubleDistanceDBIDResultIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ double v = neighbor.doubleDistance() / bandwidth;
if(v < 1) {
density += 1 - (v * v);
}
@@ -363,7 +367,7 @@ public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outl
protected double optimalBandwidth(int dim) {
// Pi in the publication is redundant and cancels out!
double hopt = 8 * GammaDistribution.gamma(dim / 2.0 + 1) * (dim + 4) * Math.pow(2, dim);
- return hopt * Math.pow(relation.size(), (-1 / (dim + 4)));
+ return hopt * Math.pow(relation.size(), (-1. / (dim + 4)));
}
/**
@@ -385,7 +389,7 @@ public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outl
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
@Override
@@ -400,11 +404,11 @@ public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outl
*
* @apiviz.exclude
*/
- public static class Parameterizer<O extends NumberVector<O, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<O extends NumberVector<?>> extends AbstractParameterizer {
/**
* Option ID for Epsilon parameter
*/
- public static final OptionID D_ID = OptionID.getOrCreateOptionID("outres.epsilon", "Range value for OUTRES in 2 dimensions.");
+ public static final OptionID D_ID = new OptionID("outres.epsilon", "Range value for OUTRES in 2 dimensions.");
/**
* Query radius
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OutRankS1.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OutRankS1.java
index e370d2bf..79243213 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OutRankS1.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OutRankS1.java
@@ -78,7 +78,7 @@ public class OutRankS1 extends AbstractAlgorithm<OutlierResult> implements Outli
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(OutRankS1.class);
+ private static final Logging LOG = Logging.getLogger(OutRankS1.class);
/**
* Clustering algorithm to run.
@@ -110,23 +110,23 @@ public class OutRankS1 extends AbstractAlgorithm<OutlierResult> implements Outli
Clustering<? extends SubspaceModel<?>> clustering = clusteralg.run(database);
WritableDoubleDataStore score = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT);
- for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
score.putDouble(iter, 0);
}
int maxdim = 0, maxsize = 0;
// Find maximum dimensionality and cluster size
- for(Cluster<? extends SubspaceModel<?>> cluster : clustering.getAllClusters()) {
+ for (Cluster<? extends SubspaceModel<?>> cluster : clustering.getAllClusters()) {
maxsize = Math.max(maxsize, cluster.size());
maxdim = Math.max(maxdim, cluster.getModel().getDimensions().cardinality());
}
// Iterate over all clusters:
DoubleMinMax minmax = new DoubleMinMax();
- for(Cluster<? extends SubspaceModel<?>> cluster : clustering.getAllClusters()) {
+ for (Cluster<? extends SubspaceModel<?>> cluster : clustering.getAllClusters()) {
double relsize = cluster.size() / (double) maxsize;
double reldim = cluster.getModel().getDimensions().cardinality() / (double) maxdim;
// Process objects in the cluster
- for(DBIDIter iter = cluster.getIDs().iter(); iter.valid(); iter.advance()) {
+ for (DBIDIter iter = cluster.getIDs().iter(); iter.valid(); iter.advance()) {
double newscore = score.doubleValue(iter) + alpha * relsize + (1 - alpha) * reldim;
score.putDouble(iter, newscore);
minmax.put(newscore);
@@ -147,7 +147,7 @@ public class OutRankS1 extends AbstractAlgorithm<OutlierResult> implements Outli
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -161,12 +161,12 @@ public class OutRankS1 extends AbstractAlgorithm<OutlierResult> implements Outli
/**
* Clustering algorithm to use.
*/
- public static final OptionID ALGORITHM_ID = OptionID.getOrCreateOptionID("outrank.algorithm", "Subspace clustering algorithm to use.");
+ public static final OptionID ALGORITHM_ID = new OptionID("outrank.algorithm", "Subspace clustering algorithm to use.");
/**
* Alpha parameter for S1
*/
- public static final OptionID ALPHA_ID = OptionID.getOrCreateOptionID("outrank.s1.alpha", "Alpha parameter for S1 score.");
+ public static final OptionID ALPHA_ID = new OptionID("outrank.s1.alpha", "Alpha parameter for S1 score.");
/**
* Clustering algorithm to run.
@@ -182,12 +182,13 @@ public class OutRankS1 extends AbstractAlgorithm<OutlierResult> implements Outli
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
ObjectParameter<SubspaceClusteringAlgorithm<? extends SubspaceModel<?>>> algP = new ObjectParameter<SubspaceClusteringAlgorithm<? extends SubspaceModel<?>>>(ALGORITHM_ID, SubspaceClusteringAlgorithm.class);
- if(config.grab(algP)) {
+ if (config.grab(algP)) {
algorithm = algP.instantiateClass(config);
}
- DoubleParameter alphaP = new DoubleParameter(ALPHA_ID, new GreaterConstraint(0), 0.25);
- if(config.grab(alphaP)) {
- alpha = alphaP.getValue();
+ DoubleParameter alphaP = new DoubleParameter(ALPHA_ID, 0.25);
+ alphaP.addConstraint(new GreaterConstraint(0));
+ if (config.grab(alphaP)) {
+ alpha = alphaP.doubleValue();
}
}
@@ -196,4 +197,4 @@ public class OutRankS1 extends AbstractAlgorithm<OutlierResult> implements Outli
return new OutRankS1(algorithm, alpha);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/SOD.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/SOD.java
index 7fef95e0..35a780cd 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/SOD.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/SOD.java
@@ -36,14 +36,15 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair;
import de.lmu.ifi.dbs.elki.database.query.similarity.SimilarityQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.distance.similarityfunction.SharedNearestNeighborSimilarityFunction;
@@ -57,7 +58,6 @@ import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.textwriter.TextWriteable;
import de.lmu.ifi.dbs.elki.result.textwriter.TextWriterStream;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TiedTopBoundedHeap;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
@@ -70,10 +70,10 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
-import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair;
/**
- * Subspace Outlier Degree. Outlier detection method for axis-parallel subspaces.
+ * Subspace Outlier Degree. Outlier detection method for axis-parallel
+ * subspaces.
*
* Reference:
* <p>
@@ -89,34 +89,35 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair;
* @apiviz.has SharedNearestNeighborSimilarityFunction
*
* @param <V> the type of NumberVector handled by this Algorithm
+ * @param <D> distance type
*/
// todo arthur comment
@Title("SOD: Subspace outlier degree")
@Description("Outlier Detection in Axis-Parallel Subspaces of High Dimensional Data")
@Reference(authors = "H.-P. Kriegel, P. Kröger, E. Schubert, A. Zimek", title = "Outlier Detection in Axis-Parallel Subspaces of High Dimensional Data", booktitle = "Proceedings of the 13th Pacific-Asia Conference on Knowledge Discovery and Data Mining (PAKDD), Bangkok, Thailand, 2009", url = "http://dx.doi.org/10.1007/978-3-642-01307-2")
-public class SOD<V extends NumberVector<V, ?>, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+public class SOD<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(SOD.class);
+ private static final Logging LOG = Logging.getLogger(SOD.class);
/**
* Parameter to specify the number of shared nearest neighbors to be
* considered for learning the subspace properties., must be an integer
* greater than 0.
*/
- public static final OptionID KNN_ID = OptionID.getOrCreateOptionID("sod.knn", "The number of most snn-similar objects to use as reference set for learning the subspace properties.");
+ public static final OptionID KNN_ID = new OptionID("sod.knn", "The number of most snn-similar objects to use as reference set for learning the subspace properties.");
/**
* Parameter to indicate the multiplier for the discriminance value for
* discerning small from large variances.
*/
- public static final OptionID ALPHA_ID = OptionID.getOrCreateOptionID("sod.alpha", "The multiplier for the discriminance value for discerning small from large variances.");
+ public static final OptionID ALPHA_ID = new OptionID("sod.alpha", "The multiplier for the discriminance value for discerning small from large variances.");
/**
* Parameter for the similarity function.
*/
- public static final OptionID SIM_ID = OptionID.getOrCreateOptionID("sod.similarity", "The similarity function used for the neighborhood set.");
+ public static final OptionID SIM_ID = new OptionID("sod.similarity", "The similarity function used for the neighborhood set.");
/**
* Holds the value of {@link #KNN_ID}.
@@ -155,20 +156,20 @@ public class SOD<V extends NumberVector<V, ?>, D extends NumberDistance<D, ?>> e
*/
public OutlierResult run(Relation<V> relation) {
SimilarityQuery<V, D> snnInstance = similarityFunction.instantiate(relation);
- FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("Assigning Subspace Outlier Degree", relation.size(), logger) : null;
+ FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Assigning Subspace Outlier Degree", relation.size(), LOG) : null;
WritableDataStore<SODModel<?>> sod_models = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, SODModel.class);
DoubleMinMax minmax = new DoubleMinMax();
- for(DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
- if(progress != null) {
- progress.incrementProcessed(logger);
+ for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
+ if (progress != null) {
+ progress.incrementProcessed(LOG);
}
DBIDs knnList = getNearestNeighbors(relation, snnInstance, iter);
SODModel<V> model = new SODModel<V>(relation, knnList, alpha, relation.get(iter));
sod_models.put(iter, model);
minmax.put(model.getSod());
}
- if(progress != null) {
- progress.ensureCompleted(logger);
+ if (progress != null) {
+ progress.ensureCompleted(LOG);
}
// combine results.
Relation<SODModel<?>> models = new MaterializedRelation<SODModel<?>>("Subspace Outlier Model", "sod-outlier", new SimpleTypeInformation<SODModel<?>>(SODModel.class), sod_models, relation.getDBIDs());
@@ -193,20 +194,19 @@ public class SOD<V extends NumberVector<V, ?>, D extends NumberDistance<D, ?>> e
*/
private DBIDs getNearestNeighbors(Relation<V> relation, SimilarityQuery<V, D> simQ, DBIDRef queryObject) {
// similarityFunction.getPreprocessor().getParameters();
- Heap<DoubleObjPair<DBID>> nearestNeighbors = new TiedTopBoundedHeap<DoubleObjPair<DBID>>(knn);
- for(DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
- if(!iter.sameDBID(queryObject)) {
+ Heap<DoubleDBIDPair> nearestNeighbors = new TiedTopBoundedHeap<DoubleDBIDPair>(knn);
+ for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
+ if (!DBIDUtil.equal(iter, queryObject)) {
double sim = simQ.similarity(queryObject, iter).doubleValue();
- if(sim > 0) {
- nearestNeighbors.add(new DoubleObjPair<DBID>(sim, iter.getDBID()));
+ if (sim > 0) {
+ nearestNeighbors.add(DBIDUtil.newPair(sim, iter));
}
}
}
// Collect DBIDs
ArrayModifiableDBIDs dbids = DBIDUtil.newArray(nearestNeighbors.size());
- while(nearestNeighbors.size() > 0) {
- final DoubleObjPair<DBID> next = nearestNeighbors.poll();
- dbids.add(next.second);
+ while (nearestNeighbors.size() > 0) {
+ dbids.add(nearestNeighbors.poll());
}
return dbids;
}
@@ -218,17 +218,17 @@ public class SOD<V extends NumberVector<V, ?>, D extends NumberDistance<D, ?>> e
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
- *
+ * SOD Model class
*
* @author Arthur Zimek
* @param <V> the type of DatabaseObjects handled by this Result
*/
// TODO: arthur comment
- public static class SODModel<V extends NumberVector<V, ?>> implements TextWriteable, Comparable<SODModel<?>> {
+ public static class SODModel<V extends NumberVector<?>> implements TextWriteable, Comparable<SODModel<?>> {
private double[] centerValues;
private V center;
@@ -250,61 +250,60 @@ public class SOD<V extends NumberVector<V, ?>, D extends NumberDistance<D, ?>> e
* @param queryObject Query object
*/
public SODModel(Relation<V> relation, DBIDs neighborhood, double alpha, V queryObject) {
- if(neighborhood.size() > 0) {
+ if (neighborhood.size() > 0) {
// TODO: store database link?
- centerValues = new double[DatabaseUtil.dimensionality(relation)];
+ centerValues = new double[RelationUtil.dimensionality(relation)];
variances = new double[centerValues.length];
- for(DBIDIter iter = neighborhood.iter(); iter.valid(); iter.advance()) {
+ for (DBIDIter iter = neighborhood.iter(); iter.valid(); iter.advance()) {
V databaseObject = relation.get(iter);
- for(int d = 0; d < centerValues.length; d++) {
- centerValues[d] += databaseObject.doubleValue(d + 1);
+ for (int d = 0; d < centerValues.length; d++) {
+ centerValues[d] += databaseObject.doubleValue(d);
}
}
- for(int d = 0; d < centerValues.length; d++) {
+ for (int d = 0; d < centerValues.length; d++) {
centerValues[d] /= neighborhood.size();
}
- for(DBIDIter iter = neighborhood.iter(); iter.valid(); iter.advance()) {
+ for (DBIDIter iter = neighborhood.iter(); iter.valid(); iter.advance()) {
V databaseObject = relation.get(iter);
- for(int d = 0; d < centerValues.length; d++) {
+ for (int d = 0; d < centerValues.length; d++) {
// distance
- double distance = centerValues[d] - databaseObject.doubleValue(d + 1);
+ double distance = centerValues[d] - databaseObject.doubleValue(d);
// variance
variances[d] += distance * distance;
}
}
expectationOfVariance = 0;
- for(int d = 0; d < variances.length; d++) {
+ for (int d = 0; d < variances.length; d++) {
variances[d] /= neighborhood.size();
expectationOfVariance += variances[d];
}
expectationOfVariance /= variances.length;
weightVector = new BitSet(variances.length);
- for(int d = 0; d < variances.length; d++) {
- if(variances[d] < alpha * expectationOfVariance) {
+ for (int d = 0; d < variances.length; d++) {
+ if (variances[d] < alpha * expectationOfVariance) {
weightVector.set(d, true);
}
}
- center = DatabaseUtil.assumeVectorField(relation).getFactory().newNumberVector(centerValues);
+ center = RelationUtil.getNumberVectorFactory(relation).newNumberVector(centerValues);
sod = subspaceOutlierDegree(queryObject, center, weightVector);
- }
- else {
+ } else {
center = queryObject;
sod = 0.0;
}
}
/**
- * Compute SOD score
+ * Compute SOD score.
*
- * @param queryObject
- * @param center
- * @param weightVector
- * @return sod value
+ * @param queryObject Query object
+ * @param center Center vector
+ * @param weightVector Weight vector
+ * @return sod score
*/
private double subspaceOutlierDegree(V queryObject, V center, BitSet weightVector) {
final SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(weightVector);
final int card = weightVector.cardinality();
- if(card == 0) {
+ if (card == 0) {
return 0;
}
double distance = df.distance(queryObject, center).doubleValue();
@@ -352,7 +351,7 @@ public class SOD<V extends NumberVector<V, ?>, D extends NumberDistance<D, ?>> e
Relation<SODModel<?>> models;
/**
- * The IDs we are defined for
+ * The IDs we are defined for.
*/
DBIDs dbids;
@@ -436,7 +435,7 @@ public class SOD<V extends NumberVector<V, ?>, D extends NumberDistance<D, ?>> e
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>, D extends NumberDistance<D, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractParameterizer {
/**
* Holds the value of {@link #KNN_ID}.
*/
@@ -456,18 +455,20 @@ public class SOD<V extends NumberVector<V, ?>, D extends NumberDistance<D, ?>> e
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
final ObjectParameter<SimilarityFunction<V, D>> simP = new ObjectParameter<SimilarityFunction<V, D>>(SIM_ID, SimilarityFunction.class, SharedNearestNeighborSimilarityFunction.class);
- if(config.grab(simP)) {
+ if (config.grab(simP)) {
similarityFunction = simP.instantiateClass(config);
}
- final IntParameter knnP = new IntParameter(KNN_ID, new GreaterConstraint(0));
- if(config.grab(knnP)) {
+ final IntParameter knnP = new IntParameter(KNN_ID);
+ knnP.addConstraint(new GreaterConstraint(0));
+ if (config.grab(knnP)) {
knn = knnP.getValue();
}
- final DoubleParameter alphaP = new DoubleParameter(ALPHA_ID, new GreaterConstraint(0), 1.1);
- if(config.grab(alphaP)) {
- alpha = alphaP.getValue();
+ final DoubleParameter alphaP = new DoubleParameter(ALPHA_ID, 1.1);
+ alphaP.addConstraint(new GreaterConstraint(0));
+ if (config.grab(alphaP)) {
+ alpha = alphaP.doubleValue();
}
}
@@ -476,4 +477,4 @@ public class SOD<V extends NumberVector<V, ?>, D extends NumberDistance<D, ?>> e
return new SOD<V, D>(knn, alpha, similarityFunction);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java
index 66a89cf5..ae95abfa 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java
@@ -57,7 +57,7 @@ public class ByLabelOutlier extends AbstractAlgorithm<OutlierResult> implements
/**
* Our logger.
*/
- private static final Logging logger = Logging.getLogger(ByLabelOutlier.class);
+ private static final Logging LOG = Logging.getLogger(ByLabelOutlier.class);
/**
* The default pattern to use.
@@ -124,7 +124,7 @@ public class ByLabelOutlier extends AbstractAlgorithm<OutlierResult> implements
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -145,7 +145,7 @@ public class ByLabelOutlier extends AbstractAlgorithm<OutlierResult> implements
* Key: {@code -outlier.pattern}
* </p>
*/
- public static final OptionID OUTLIER_PATTERN_ID = OptionID.getOrCreateOptionID("outlier.pattern", "Label pattern to match outliers.");
+ public static final OptionID OUTLIER_PATTERN_ID = new OptionID("outlier.pattern", "Label pattern to match outliers.");
/**
* Stores the "outlier" class.
*/
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java
index b50226f1..35a85d51 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java
@@ -48,7 +48,7 @@ public class TrivialAllOutlier extends AbstractAlgorithm<OutlierResult> implemen
/**
* Our logger.
*/
- private static final Logging logger = Logging.getLogger(TrivialAllOutlier.class);
+ private static final Logging LOG = Logging.getLogger(TrivialAllOutlier.class);
/**
* Constructor.
@@ -80,6 +80,6 @@ public class TrivialAllOutlier extends AbstractAlgorithm<OutlierResult> implemen
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java
index d1c2e076..e4c3861f 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java
@@ -65,12 +65,12 @@ public class TrivialGeneratedOutlier extends AbstractAlgorithm<OutlierResult> im
/**
* Class logger
*/
- private static final Logging logger = Logging.getLogger(TrivialGeneratedOutlier.class);
+ private static final Logging LOG = Logging.getLogger(TrivialGeneratedOutlier.class);
/**
* Expected share of outliers
*/
- public static final OptionID EXPECT_ID = OptionID.getOrCreateOptionID("modeloutlier.expect", "Expected amount of outliers, for making the scores more intuitive.");
+ public static final OptionID EXPECT_ID = new OptionID("modeloutlier.expect", "Expected amount of outliers, for making the scores more intuitive.");
/**
* Expected share of outliers.
@@ -101,7 +101,7 @@ public class TrivialGeneratedOutlier extends AbstractAlgorithm<OutlierResult> im
@Override
public OutlierResult run(Database database) {
- Relation<NumberVector<?, ?>> vecs = database.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
+ Relation<NumberVector<?>> vecs = database.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
Relation<Model> models = database.getRelation(new SimpleTypeInformation<Model>(Model.class));
// Prefer a true class label
try {
@@ -122,7 +122,7 @@ public class TrivialGeneratedOutlier extends AbstractAlgorithm<OutlierResult> im
* @param labels Label relation
* @return Outlier result
*/
- public OutlierResult run(Relation<Model> models, Relation<NumberVector<?, ?>> vecs, Relation<?> labels) {
+ public OutlierResult run(Relation<Model> models, Relation<NumberVector<?>> vecs, Relation<?> labels) {
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(models.getDBIDs(), DataStoreFactory.HINT_HOT);
// Adjustment constant
@@ -136,7 +136,7 @@ public class TrivialGeneratedOutlier extends AbstractAlgorithm<OutlierResult> im
}
}
if(generators.size() == 0) {
- logger.warning("No generator models found for dataset - all points will be considered outliers.");
+ LOG.warning("No generator models found for dataset - all points will be considered outliers.");
}
for(DBIDIter iditer = models.iterDBIDs(); iditer.valid(); iditer.advance()) {
@@ -179,7 +179,7 @@ public class TrivialGeneratedOutlier extends AbstractAlgorithm<OutlierResult> im
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java
index 6d8e9f46..695ff112 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java
@@ -48,7 +48,7 @@ public class TrivialNoOutlier extends AbstractAlgorithm<OutlierResult> implement
/**
* Our logger.
*/
- private static final Logging logger = Logging.getLogger(TrivialNoOutlier.class);
+ private static final Logging LOG = Logging.getLogger(TrivialNoOutlier.class);
/**
* Constructor.
@@ -80,6 +80,6 @@ public class TrivialNoOutlier extends AbstractAlgorithm<OutlierResult> implement
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
} \ No newline at end of file