summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/algorithm/outlier
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/algorithm/outlier')
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/ABOD.java142
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/ALOCI.java724
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractAggarwalYuOutlier.java14
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractDBOutlier.java5
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuEvolutionary.java10
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuNaive.java14
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierDetection.java31
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierScore.java8
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/EMOutlier.java59
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java18
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java21
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/HilOut.java988
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/INFLO.java36
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNOutlier.java8
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNWeightOutlier.java8
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/LDOF.java37
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOCI.java33
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOF.java55
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/LoOP.java43
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java47
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/OnlineLOF.java39
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/OutlierAlgorithm.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/ReferenceBasedOutlierDetection.java26
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java10
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java27
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/HiCS.java633
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/RescaleMetaOutlierAlgorithm.java13
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuGLSBackwardSearchAlgorithm.java13
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMeanMultipleAttributes.java7
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianAlgorithm.java13
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianMultipleAttributes.java11
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMoranScatterplotOutlier.java10
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuScatterplotOutlier.java13
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuZTestOutlier.java14
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SLOM.java20
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SOF.java15
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java14
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java11
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/PrecomputedKNearestNeighborNeighborhood.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java7
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/UnweightedNeighborhoodAdapter.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java (renamed from src/de/lmu/ifi/dbs/elki/algorithm/outlier/OUTRES.java)108
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OutRankS1.java199
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/SOD.java (renamed from src/de/lmu/ifi/dbs/elki/algorithm/outlier/SOD.java)58
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/package-info.java28
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java15
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java14
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java8
51 files changed, 3188 insertions, 463 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ABOD.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ABOD.java
index f0b31d32..88a62e38 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ABOD.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ABOD.java
@@ -38,6 +38,8 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
@@ -186,20 +188,21 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
assert (k == this.k);
KNNQuery<V, DoubleDistance> knnQuery = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k);
- for(DBID objKey : relation.iterDBIDs()) {
- MeanVariance s = new MeanVariance();
+ MeanVariance s = new MeanVariance();
+ for(DBIDIter objKey = relation.iterDBIDs(); objKey.valid(); objKey.advance()) {
+ s.reset();
// System.out.println("Processing: " +objKey);
KNNResult<DoubleDistance> neighbors = knnQuery.getKNNForDBID(objKey, k);
Iterator<DistanceResultPair<DoubleDistance>> iter = neighbors.iterator();
while(iter.hasNext()) {
- DBID key1 = iter.next().getDBID();
+ DistanceResultPair<DoubleDistance> key1 = iter.next();
// Iterator iter2 = data.keyIterator();
Iterator<DistanceResultPair<DoubleDistance>> iter2 = neighbors.iterator();
// PriorityQueue best = new PriorityQueue(false, k);
while(iter2.hasNext()) {
- DBID key2 = iter2.next().getDBID();
- if(key2.equals(key1) || key1.equals(objKey) || key2.equals(objKey)) {
+ DistanceResultPair<DoubleDistance> key2 = iter2.next();
+ if(key2.sameDBID(key1) || key1.sameDBID(objKey) || key2.sameDBID(objKey)) {
continue;
}
double nenner = calcDenominator(kernelMatrix, objKey, key1, key2);
@@ -214,7 +217,7 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
}
// Sample variance probably would be correct, however the numerical
// instabilities can actually break ABOD here.
- pq.add(new DoubleObjPair<DBID>(s.getNaiveVariance(), objKey));
+ pq.add(new DoubleObjPair<DBID>(s.getNaiveVariance(), objKey.getDBID()));
}
DoubleMinMax minmaxabod = new DoubleMinMax();
@@ -238,16 +241,18 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
* @return result
*/
public OutlierResult getFastRanking(Relation<V> relation, int k, int sampleSize) {
+ final DBIDs ids = relation.getDBIDs();
// Fix a static set of IDs
- staticids = DBIDUtil.newArray(relation.getDBIDs());
+ // TODO: add a DBIDUtil.ensureSorted?
+ staticids = DBIDUtil.newArray(ids);
staticids.sort();
KernelMatrix kernelMatrix = new KernelMatrix(primitiveKernelFunction, relation, staticids);
Heap<DoubleObjPair<DBID>> pq = new Heap<DoubleObjPair<DBID>>(relation.size(), Collections.reverseOrder());
// get Candidate Ranking
- for(DBID aKey : relation.iterDBIDs()) {
- HashMap<DBID, Double> dists = new HashMap<DBID, Double>(relation.size());
+ for(DBIDIter aKey = relation.iterDBIDs(); aKey.valid(); aKey.advance()) {
+ WritableDoubleDataStore dists = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT);
// determine kNearestNeighbors and pairwise distances
Heap<DoubleObjPair<DBID>> nn;
if(!useRNDSample) {
@@ -259,7 +264,7 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
}
// get normalization
- double[] counter = calcFastNormalization(aKey, dists);
+ double[] counter = calcFastNormalization(aKey, dists, staticids);
// System.out.println(counter[0] + " " + counter2[0] + " " + counter[1] +
// " " + counter2[1]);
// umsetzen von Pq zu list
@@ -269,13 +274,14 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
}
// getFilter
double var = getAbofFilter(kernelMatrix, aKey, dists, counter[1], counter[0], neighbors);
- pq.add(new DoubleObjPair<DBID>(var, aKey));
+ pq.add(new DoubleObjPair<DBID>(var, aKey.getDBID()));
// System.out.println("prog "+(prog++));
}
// refine Candidates
Heap<DoubleObjPair<DBID>> resqueue = new Heap<DoubleObjPair<DBID>>(k);
// System.out.println(pq.size() + " objects ordered into candidate list.");
// int v = 0;
+ MeanVariance s = new MeanVariance();
while(!pq.isEmpty()) {
if(resqueue.size() == k && pq.peek().first > resqueue.peek().first) {
break;
@@ -290,13 +296,13 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
// + " worst result: " + Double.MAX_VALUE);
// }
// v++;
- MeanVariance s = new MeanVariance();
- for(DBID bKey : relation.iterDBIDs()) {
- if(bKey.equals(aKey)) {
+ s.reset();
+ for(DBIDIter bKey = relation.iterDBIDs(); bKey.valid(); bKey.advance()) {
+ if(bKey.sameDBID(aKey)) {
continue;
}
- for(DBID cKey : relation.iterDBIDs()) {
- if(cKey.equals(aKey)) {
+ for(DBIDIter cKey = relation.iterDBIDs(); cKey.valid(); cKey.advance()) {
+ if(cKey.sameDBID(aKey)) {
continue;
}
// double nenner = dists[y]*dists[z];
@@ -325,64 +331,60 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
}
// System.out.println(v + " Punkte von " + data.size() + " verfeinert !!");
DoubleMinMax minmaxabod = new DoubleMinMax();
- WritableDoubleDataStore abodvalues = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
+ WritableDoubleDataStore abodvalues = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
for(DoubleObjPair<DBID> pair : pq) {
abodvalues.putDouble(pair.getSecond(), pair.first);
minmaxabod.put(pair.first);
}
// Build result representation.
- Relation<Double> scoreResult = new MaterializedRelation<Double>("Angle-based Outlier Detection", "abod-outlier", TypeUtil.DOUBLE, abodvalues, relation.getDBIDs());
+ Relation<Double> scoreResult = new MaterializedRelation<Double>("Angle-based Outlier Detection", "abod-outlier", TypeUtil.DOUBLE, abodvalues, ids);
OutlierScoreMeta scoreMeta = new InvertedOutlierScoreMeta(minmaxabod.getMin(), minmaxabod.getMax(), 0.0, Double.POSITIVE_INFINITY);
return new OutlierResult(scoreMeta, scoreResult);
}
- private double[] calcFastNormalization(DBID x, HashMap<DBID, Double> dists) {
+ private double[] calcFastNormalization(DBIDRef x, WritableDoubleDataStore dists, DBIDs ids) {
double[] result = new double[2];
double sum = 0;
double sumF = 0;
- for(DBID yKey : dists.keySet()) {
- if(dists.get(yKey) != 0) {
- double tmp = 1 / Math.sqrt(dists.get(yKey));
+ for (DBIDIter yKey = ids.iter(); yKey.valid(); yKey.advance()) {
+ if(dists.doubleValue(yKey) != 0) {
+ double tmp = 1 / Math.sqrt(dists.doubleValue(yKey));
sum += tmp;
- sumF += (1 / dists.get(yKey)) * tmp;
+ sumF += (1 / dists.doubleValue(yKey)) * tmp;
}
}
double sofar = 0;
double sofarF = 0;
- for(DBID zKey : dists.keySet()) {
- if(dists.get(zKey) != 0) {
- double tmp = 1 / Math.sqrt(dists.get(zKey));
+ for (DBIDIter zKey = ids.iter(); zKey.valid(); zKey.advance()) {
+ if(dists.doubleValue(zKey) != 0) {
+ double tmp = 1 / Math.sqrt(dists.doubleValue(zKey));
sofar += tmp;
double rest = sum - sofar;
result[0] += tmp * rest;
- sofarF += (1 / dists.get(zKey)) * tmp;
+ sofarF += (1 / dists.doubleValue(zKey)) * tmp;
double restF = sumF - sofarF;
- result[1] += (1 / dists.get(zKey)) * tmp * restF;
+ result[1] += (1 / dists.doubleValue(zKey)) * tmp * restF;
}
}
return result;
}
- private double getAbofFilter(KernelMatrix kernelMatrix, DBID aKey, HashMap<DBID, Double> dists, double fulCounter, double counter, DBIDs neighbors) {
+ private double getAbofFilter(KernelMatrix kernelMatrix, DBIDRef aKey, WritableDoubleDataStore dists, double fulCounter, double counter, DBIDs neighbors) {
double sum = 0.0;
double sqrSum = 0.0;
double partCounter = 0;
- Iterator<DBID> iter = neighbors.iterator();
- while(iter.hasNext()) {
- DBID bKey = iter.next();
- if(bKey.equals(aKey)) {
+ for(DBIDIter bKey = neighbors.iter(); bKey.valid(); bKey.advance()) {
+ if(bKey.sameDBID(aKey)) {
continue;
}
- Iterator<DBID> iter2 = neighbors.iterator();
- while(iter2.hasNext()) {
- DBID cKey = iter2.next();
- if(cKey.equals(aKey)) {
+ for(DBIDIter cKey = neighbors.iter(); cKey.valid(); cKey.advance()) {
+ if(cKey.sameDBID(aKey)) {
continue;
}
- if(bKey.compareTo(cKey) > 0) {
- double nenner = dists.get(bKey).doubleValue() * dists.get(cKey).doubleValue();
+ if(bKey.compareDBID(cKey) > 0) {
+ double nenner = dists.doubleValue(bKey) * dists.doubleValue(cKey);
if(nenner != 0) {
double tmp = calcNumerator(kernelMatrix, aKey, bKey, cKey) / nenner;
double sqrtNenner = Math.sqrt(nenner);
@@ -406,13 +408,13 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
* @param bKey
* @return cosinus value
*/
- private double calcCos(KernelMatrix kernelMatrix, DBID aKey, DBID bKey) {
+ private double calcCos(KernelMatrix kernelMatrix, DBIDRef aKey, DBIDRef bKey) {
final int ai = mapDBID(aKey);
final int bi = mapDBID(bKey);
return kernelMatrix.getDistance(ai, ai) + kernelMatrix.getDistance(bi, bi) - 2 * kernelMatrix.getDistance(ai, bi);
}
- private int mapDBID(DBID aKey) {
+ private int mapDBID(DBIDRef aKey) {
// TODO: this is not the most efficient...
int off = staticids.binarySearch(aKey);
if(off < 0) {
@@ -421,44 +423,44 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
return off + 1;
}
- private double calcDenominator(KernelMatrix kernelMatrix, DBID aKey, DBID bKey, DBID cKey) {
+ private double calcDenominator(KernelMatrix kernelMatrix, DBIDRef aKey, DBIDRef bKey, DBIDRef cKey) {
return calcCos(kernelMatrix, aKey, bKey) * calcCos(kernelMatrix, aKey, cKey);
}
- private double calcNumerator(KernelMatrix kernelMatrix, DBID aKey, DBID bKey, DBID cKey) {
+ private double calcNumerator(KernelMatrix kernelMatrix, DBIDRef aKey, DBIDRef bKey, DBIDRef cKey) {
final int ai = mapDBID(aKey);
final int bi = mapDBID(bKey);
final int ci = mapDBID(cKey);
return (kernelMatrix.getDistance(ai, ai) + kernelMatrix.getDistance(bi, ci) - kernelMatrix.getDistance(ai, ci) - kernelMatrix.getDistance(ai, bi));
}
- private Heap<DoubleObjPair<DBID>> calcDistsandNN(Relation<V> data, KernelMatrix kernelMatrix, int sampleSize, DBID aKey, HashMap<DBID, Double> dists) {
+ private Heap<DoubleObjPair<DBID>> calcDistsandNN(Relation<V> data, KernelMatrix kernelMatrix, int sampleSize, DBIDRef aKey, WritableDoubleDataStore dists) {
Heap<DoubleObjPair<DBID>> nn = new Heap<DoubleObjPair<DBID>>(sampleSize);
- for(DBID bKey : data.iterDBIDs()) {
+ for(DBIDIter bKey = data.iterDBIDs(); bKey.valid(); bKey.advance()) {
double val = calcCos(kernelMatrix, aKey, bKey);
- dists.put(bKey, val);
+ dists.putDouble(bKey, val);
if(nn.size() < sampleSize) {
- nn.add(new DoubleObjPair<DBID>(val, bKey));
+ nn.add(new DoubleObjPair<DBID>(val, bKey.getDBID()));
}
else {
if(val < nn.peek().first) {
nn.remove();
- nn.add(new DoubleObjPair<DBID>(val, bKey));
+ nn.add(new DoubleObjPair<DBID>(val, bKey.getDBID()));
}
}
}
return nn;
}
- private Heap<DoubleObjPair<DBID>> calcDistsandRNDSample(Relation<V> data, KernelMatrix kernelMatrix, int sampleSize, DBID aKey, HashMap<DBID, Double> dists) {
+ private Heap<DoubleObjPair<DBID>> calcDistsandRNDSample(Relation<V> data, KernelMatrix kernelMatrix, int sampleSize, DBIDRef aKey, WritableDoubleDataStore dists) {
Heap<DoubleObjPair<DBID>> nn = new Heap<DoubleObjPair<DBID>>(sampleSize);
int step = (int) ((double) data.size() / (double) sampleSize);
int counter = 0;
- for(DBID bKey : data.iterDBIDs()) {
+ for(DBIDIter bKey = data.iterDBIDs(); bKey.valid(); bKey.advance()) {
double val = calcCos(kernelMatrix, aKey, bKey);
- dists.put(bKey, val);
+ dists.putDouble(bKey, val);
if(counter % step == 0) {
- nn.add(new DoubleObjPair<DBID>(val, bKey));
+ nn.add(new DoubleObjPair<DBID>(val, bKey.getDBID()));
}
counter++;
}
@@ -477,24 +479,21 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
Heap<DoubleObjPair<DBID>> pq = new Heap<DoubleObjPair<DBID>>(data.size(), Collections.reverseOrder());
HashMap<DBID, DBIDs> explaintab = new HashMap<DBID, DBIDs>();
// test all objects
- for(DBID objKey : data.iterDBIDs()) {
- MeanVariance s = new MeanVariance();
+ MeanVariance s = new MeanVariance(), s2 = new MeanVariance();
+ for(DBIDIter objKey = data.iterDBIDs(); objKey.valid(); objKey.advance()) {
+ s.reset();
// Queue for the best explanation
Heap<DoubleObjPair<DBID>> explain = new Heap<DoubleObjPair<DBID>>();
// determine Object
// for each pair of other objects
- Iterator<DBID> iter = data.iterDBIDs();
+ for (DBIDIter key1 = data.iterDBIDs(); key1.valid(); key1.advance()) {
// Collect Explanation Vectors
- while(iter.hasNext()) {
- MeanVariance s2 = new MeanVariance();
- DBID key1 = iter.next();
- Iterator<DBID> iter2 = data.iterDBIDs();
- if(objKey.equals(key1)) {
+ s2.reset();
+ if(objKey.sameDBID(key1)) {
continue;
}
- while(iter2.hasNext()) {
- DBID key2 = iter2.next();
- if(key2.equals(key1) || objKey.equals(key2)) {
+ for (DBIDIter key2 = data.iterDBIDs(); key2.valid(); key2.advance()) {
+ if(key2.sameDBID(key1) || objKey.sameDBID(key2)) {
continue;
}
double nenner = calcDenominator(kernelMatrix, objKey, key1, key2);
@@ -504,22 +503,22 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
s2.put(tmp, 1 / sqr);
}
}
- explain.add(new DoubleObjPair<DBID>(s2.getSampleVariance(), key1));
+ explain.add(new DoubleObjPair<DBID>(s2.getSampleVariance(), key1.getDBID()));
s.put(s2);
}
// build variance of the observed vectors
- pq.add(new DoubleObjPair<DBID>(s.getSampleVariance(), objKey));
+ pq.add(new DoubleObjPair<DBID>(s.getSampleVariance(), objKey.getDBID()));
//
ModifiableDBIDs expList = DBIDUtil.newArray();
expList.add(explain.remove().getSecond());
while(!explain.isEmpty()) {
DBID nextKey = explain.remove().getSecond();
- if(nextKey.equals(objKey)) {
+ if(nextKey.sameDBID(objKey)) {
continue;
}
double max = Double.MIN_VALUE;
- for(DBID exp : expList) {
- if(exp.equals(objKey) || nextKey.equals(exp)) {
+ for(DBIDIter exp = expList.iter(); exp.valid(); exp.advance()) {
+ if(exp.sameDBID(objKey) || nextKey.sameDBID(exp)) {
continue;
}
double nenner = Math.sqrt(calcCos(kernelMatrix, objKey, nextKey)) * Math.sqrt(calcCos(kernelMatrix, objKey, exp));
@@ -530,7 +529,7 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
expList.add(nextKey);
}
}
- explaintab.put(objKey, expList);
+ explaintab.put(objKey.getDBID(), expList);
}
System.out.println("--------------------------------------------");
System.out.println("Result: ABOD");
@@ -552,10 +551,9 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg
private void generateExplanation(Relation<V> data, DBID key, DBIDs expList) {
Vector vect1 = data.get(key).getColumnVector();
- Iterator<DBID> iter = expList.iterator();
- while(iter.hasNext()) {
+ for(DBIDIter iter = expList.iter(); iter.valid(); iter.advance()) {
System.out.println("Outlier: " + vect1);
- Vector exp = data.get(iter.next()).getColumnVector();
+ Vector exp = data.get(iter).getColumnVector();
System.out.println("Most common neighbor: " + exp);
// determine difference Vector
Vector vals = exp.minus(vect1);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ALOCI.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ALOCI.java
new file mode 100644
index 00000000..39c3db60
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ALOCI.java
@@ -0,0 +1,724 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.NumberVectorDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.LongParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
+
+/**
+ * Fast Outlier Detection Using the "approximate Local Correlation Integral".
+ *
+ * Outlier detection using multiple epsilon neighborhoods.
+ *
+ * Reference:
+ * <p>
+ * S. Papadimitriou, H. Kitagawa, P. B. Gibbons and C. Faloutsos:<br />
+ * LOCI: Fast Outlier Detection Using the Local Correlation Integral.<br />
+ * In: Proc. 19th IEEE Int. Conf. on Data Engineering (ICDE '03), Bangalore,
+ * India, 2003.
+ * </p>
+ *
+ * @author Jonathan von Brünken
+ * @author Erich Schubert
+ *
+ * @param <O> Object type
+ * @param <D> Distance type
+ */
+@Title("LOCI: Fast Outlier Detection Using the Local Correlation Integral")
+@Description("Algorithm to compute outliers based on the Local Correlation Integral")
+@Reference(authors = "S. Papadimitriou, H. Kitagawa, P. B. Gibbons, C. Faloutsos", title = "LOCI: Fast Outlier Detection Using the Local Correlation Integral", booktitle = "Proc. 19th IEEE Int. Conf. on Data Engineering (ICDE '03), Bangalore, India, 2003", url = "http://dx.doi.org/10.1109/ICDE.2003.1260802")
+public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging logger = Logging.getLogger(ALOCI.class);
+
+ /**
+ * Minimum size for a leaf.
+ */
+ private int nmin;
+
+ /**
+ * Alpha (level difference of sampling and counting neighborhoods)
+ */
+ private int alpha;
+
+ /**
+ * Number of trees to generate (forest size)
+ */
+ private int g;
+
+ /**
+ * Random generator
+ */
+ private Random random;
+
+ /**
+ * Distance function
+ */
+ private NumberVectorDistanceFunction<D> distFunc;
+
+ /**
+ * Constructor.
+ *
+ * @param distanceFunction Distance function
+ * @param nmin Minimum neighborhood size
+ * @param alpha Alpha value
+ * @param g Number of grids to use
+ * @param seed Random generator seed.
+ */
+ public ALOCI(NumberVectorDistanceFunction<D> distanceFunction, int nmin, int alpha, int g, Long seed) {
+ super();
+ this.distFunc = distanceFunction;
+ this.nmin = nmin;
+ this.alpha = alpha;
+ this.g = g;
+ this.random = (seed != null) ? new Random(seed) : new Random(0);
+ }
+
+ public OutlierResult run(Database database, Relation<O> relation) {
+ final int dim = DatabaseUtil.dimensionality(relation);
+ FiniteProgress progressPreproc = logger.isVerbose() ? new FiniteProgress("Build aLOCI quadtress", g, logger) : null;
+
+ // Compute extend of dataset.
+ double[] min, max;
+ {
+ Pair<O, O> hbbs = DatabaseUtil.computeMinMax(relation);
+ double maxd = 0;
+ min = new double[dim];
+ max = new double[dim];
+ for(int i = 0; i < dim; i++) {
+ min[i] = hbbs.first.doubleValue(i + 1);
+ max[i] = hbbs.second.doubleValue(i + 1);
+ maxd = Math.max(maxd, max[i] - min[i]);
+ }
+ // Enlarge bounding box to have equal lengths.
+ for(int i = 0; i < dim; i++) {
+ double diff = (maxd - (max[i] - min[i])) / 2;
+ min[i] -= diff;
+ max[i] += diff;
+ }
+ }
+
+ List<ALOCIQuadTree> qts = new ArrayList<ALOCIQuadTree>(g);
+
+ double[] nshift = new double[dim];
+ ALOCIQuadTree qt = new ALOCIQuadTree(min, max, nshift, nmin, relation);
+ qts.add(qt);
+ if(progressPreproc != null) {
+ progressPreproc.incrementProcessed(logger);
+ }
+ /*
+ * create the remaining g-1 shifted QuadTrees. This not clearly described in
+ * the paper and therefore implemented in a way that achieves good results
+ * with the test data.
+ */
+ for(int shift = 1; shift < g; shift++) {
+ double[] svec = new double[dim];
+ for(int i = 0; i < dim; i++) {
+ svec[i] = random.nextDouble() * (max[i] - min[i]);
+ }
+ qt = new ALOCIQuadTree(min, max, svec, nmin, relation);
+ qts.add(qt);
+ if(progressPreproc != null) {
+ progressPreproc.incrementProcessed(logger);
+ }
+ }
+ if(progressPreproc != null) {
+ progressPreproc.ensureCompleted(logger);
+ }
+
+ // aLOCI main loop: evaluate
+ FiniteProgress progressLOCI = logger.isVerbose() ? new FiniteProgress("Compute aLOCI scores", relation.size(), logger) : null;
+ WritableDoubleDataStore mdef_norm = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
+ DoubleMinMax minmax = new DoubleMinMax();
+
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ final O obj = relation.get(iditer);
+
+ double maxmdefnorm = 0;
+ // For each level
+ for(int l = 0;; l++) {
+ // Find the closest C_i
+ Node ci = null;
+ for(int i = 0; i < g; i++) {
+ Node ci2 = qts.get(i).findClosestNode(obj, l);
+ if(ci2.getLevel() != l) {
+ continue;
+ }
+ // TODO: always use manhattan?
+ if(ci == null || distFunc.distance(ci.getCenter(), obj).compareTo(distFunc.distance(ci2.getCenter(), obj)) > 0) {
+ ci = ci2;
+ }
+ }
+ // logger.debug("level:" + (ci != null ? ci.getLevel() : -1) +" l:"+l);
+ if(ci == null) {
+ break; // no matching tree for this level.
+ }
+
+ // Find the closest C_j
+ Node cj = null;
+ for(int i = 0; i < g; i++) {
+ Node cj2 = qts.get(i).findClosestNode(ci.getCenter(), l - alpha);
+ // TODO: allow higher levels or not?
+ if(cj != null && cj2.getLevel() < cj.getLevel()) {
+ continue;
+ }
+ // TODO: always use manhattan?
+ if(cj == null || distFunc.distance(cj.getCenter(), ci.getCenter()).compareTo(distFunc.distance(cj2.getCenter(), ci.getCenter())) > 0) {
+ cj = cj2;
+ }
+ }
+ // logger.debug("level:" + (cj != null ? cj.getLevel() : -1) +" l:"+l);
+ if(cj == null) {
+ continue; // no matching tree for this level.
+ }
+ double mdefnorm = calculate_MDEF_norm(cj, ci);
+ // logger.warning("level:" + ci.getLevel() + "/" + cj.getLevel() +
+ // " mdef: " + mdefnorm);
+ maxmdefnorm = Math.max(maxmdefnorm, mdefnorm);
+ }
+ // Store results
+ mdef_norm.putDouble(iditer, maxmdefnorm);
+ minmax.put(maxmdefnorm);
+ if(progressLOCI != null) {
+ progressLOCI.incrementProcessed(logger);
+ }
+ }
+ if(progressLOCI != null) {
+ progressLOCI.ensureCompleted(logger);
+ }
+ Relation<Double> scoreResult = new MaterializedRelation<Double>("aLOCI normalized MDEF", "aloci-mdef-outlier", TypeUtil.DOUBLE, mdef_norm, relation.getDBIDs());
+ OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY);
+ OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
+ return result;
+ }
+
+ /**
+ * Method for the MDEF calculation
+ *
+ * @param sn Sampling Neighborhood
+ * @param cg Counting Neighborhood
+ *
+ * @return MDEF norm
+ */
+ private static double calculate_MDEF_norm(Node sn, Node cg) {
+ // get the square sum of the counting neighborhoods box counts
+ long sq = sn.getSquareSum(cg.getLevel() - sn.getLevel());
+ /*
+ * if the square sum is equal to box count of the sampling Neighborhood then
+ * n_hat is equal one, and as cg needs to have at least one Element mdef
+ * would get zero or lower than zero. This is the case when all of the
+ * counting Neighborhoods contain one or zero Objects. Additionally, the
+ * cubic sum, square sum and sampling Neighborhood box count are all equal,
+ * which leads to sig_n_hat being zero and thus mdef_norm is either negative
+ * infinite or undefined. As the distribution of the Objects seem quite
+ * uniform, a mdef_norm value of zero ( = no outlier) is appropriate and
+ * circumvents the problem of undefined values.
+ */
+ if(sq == sn.getCount()) {
+ return 0.0;
+ }
+ // calculation of mdef according to the paper and standardization as done in
+ // LOCI
+ long cb = sn.getCubicSum(cg.getLevel() - sn.getLevel());
+ double n_hat = (double) sq / sn.getCount();
+ double sig_n_hat = java.lang.Math.sqrt(cb * sn.getCount() - (sq * sq)) / sn.getCount();
+ // Avoid NaN - correct result 0.0?
+ if(sig_n_hat < Double.MIN_NORMAL) {
+ return 0.0;
+ }
+ double mdef = n_hat - cg.getCount();
+ return mdef / sig_n_hat;
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return logger;
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(distFunc.getInputTypeRestriction());
+ }
+
+ /**
+ * Simple quadtree for ALOCI. Not storing the actual objects, just the counts.
+ *
+ * Furthermore, the quadtree can be shifted by a specified vector, wrapping
+ * around min/max
+ *
+ * @author Jonathan von Brünken
+ * @author Erich Schubert
+ *
+ * @apiviz.composedOf Node
+ */
+ static class ALOCIQuadTree {
+ /**
+ * Tree parameters
+ */
+ private double[] shift, min, width;
+
+ /**
+ * Maximum fill for a page before splitting
+ */
+ private int nmin;
+
+ /**
+ * Tree root
+ */
+ Node root;
+
+ /**
+ * Relation indexed.
+ */
+ private Relation<? extends NumberVector<?, ?>> relation;
+
+ /**
+ * Constructor.
+ *
+ * @param min Minimum coordinates
+ * @param max Maximum coordinates
+ * @param shift Tree shift offset
+ * @param nmin Maximum size for a page to split
+ * @param relation Relation to index
+ */
+ public ALOCIQuadTree(double[] min, double[] max, double[] shift, int nmin, Relation<? extends NumberVector<?, ?>> relation) {
+ super();
+ assert (min.length <= 32) : "Quadtrees are only supported for up to 32 dimensions";
+ this.shift = shift;
+ this.nmin = nmin;
+ this.min = min;
+ this.width = new double[min.length];
+ for(int d = 0; d < min.length; d++) {
+ width[d] = max[d] - min[d];
+ if(width[d] <= 0) {
+ width[d] = 1;
+ }
+ }
+ double[] center = new double[min.length];
+ for(int d = 0; d < min.length; d++) {
+ if(shift[d] < width[d] * .5) {
+ center[d] = min[d] + shift[d] + width[d] * .5;
+ }
+ else {
+ center[d] = min[d] + shift[d] - width[d] * .5;
+ }
+ }
+ this.relation = relation;
+ ArrayModifiableDBIDs ids = DBIDUtil.newArray(relation.getDBIDs());
+ List<Node> children = new ArrayList<Node>();
+ bulkLoad(min.clone(), max.clone(), children, ids, 0, ids.size(), 0, 0, 0);
+ this.root = new Node(0, new Vector(center), ids.size(), -1, children);
+ }
+
+ /**
+ * Bulk load the tree
+ *
+ * @param lmin Subtree minimum (unshifted, will be modified)
+ * @param lmax Subtree maximum (unshifted, will be modified)
+ * @param children List of children for current parent
+ * @param ids IDs to process
+ * @param start Start of ids subinterval
+ * @param end End of ids subinterval
+ * @param dim Current dimension
+ * @param level Current tree level
+ * @param code Bit code of node position
+ */
+ private void bulkLoad(double[] lmin, double[] lmax, List<Node> children, ArrayModifiableDBIDs ids, int start, int end, int dim, int level, int code) {
+ // logger.warning(FormatUtil.format(lmin)+" "+FormatUtil.format(lmax)+" "+start+"->"+end+" "+(end-start));
+ // Hack: Check degenerate cases that won't split
+ if(dim == 0) {
+ NumberVector<?, ?> first = relation.get(ids.get(start));
+ boolean degenerate = true;
+ loop: for(int pos = start + 1; pos < end; pos++) {
+ NumberVector<?, ?> other = relation.get(ids.get(pos));
+ for(int d = 1; d <= lmin.length; d++) {
+ if(Math.abs(first.doubleValue(d) - other.doubleValue(d)) > 1E-15) {
+ degenerate = false;
+ break loop;
+ }
+ }
+ }
+ if(degenerate) {
+ double[] center = new double[lmin.length];
+ for(int d = 0; d < lmin.length; d++) {
+ center[d] = lmin[d] * .5 + lmax[d] * .5 + shift[d];
+ if(center[d] > min[d] + width[d]) {
+ center[d] -= width[d];
+ }
+ }
+ children.add(new Node(code, new Vector(center), end - start, level, null));
+ return;
+ }
+ }
+ // Complete level
+ if(dim == lmin.length) {
+ double[] center = new double[lmin.length];
+ for(int d = 0; d < lmin.length; d++) {
+ center[d] = lmin[d] * .5 + lmax[d] * .5 + shift[d];
+ if(center[d] > min[d] + width[d]) {
+ center[d] -= width[d];
+ }
+ }
+ if(end - start < nmin) {
+ children.add(new Node(code, new Vector(center), end - start, level, null));
+ return;
+ }
+ else {
+ List<Node> newchildren = new ArrayList<Node>();
+ bulkLoad(lmin, lmax, newchildren, ids, start, end, 0, level + 1, 0);
+ children.add(new Node(code, new Vector(center), end - start, level, newchildren));
+ return;
+ }
+ }
+ else {
+ // Partially sort data, by dimension dim < mid
+ int spos = start, epos = end;
+ while(spos < epos) {
+ if(getShiftedDim(relation.get(ids.get(spos)), dim, level) <= .5) {
+ spos++;
+ continue;
+ }
+ if(getShiftedDim(relation.get(ids.get(epos - 1)), dim, level) > 0.5) {
+ epos--;
+ continue;
+ }
+ ids.swap(spos, epos - 1);
+ spos++;
+ epos--;
+ }
+ if(start < spos) {
+ final double tmp = lmax[dim];
+ lmax[dim] = lmax[dim] * .5 + lmin[dim] * .5;
+ bulkLoad(lmin, lmax, children, ids, start, spos, dim + 1, level, code);
+ lmax[dim] = tmp; // Restore
+ }
+ if(spos < end) {
+ final double tmp = lmin[dim];
+ lmin[dim] = lmax[dim] * .5 + lmin[dim] * .5;
+ bulkLoad(lmin, lmax, children, ids, spos, end, dim + 1, level, code | (1 << dim));
+ lmin[dim] = tmp; // Restore
+ }
+ }
+ }
+
+ /**
+ * Shift and wrap a single dimension.
+ *
+ * @param obj Object
+ * @param dim Dimension
+ * @param level Level (controls scaling/wraping!)
+ * @return Shifted position
+ */
+ private double getShiftedDim(NumberVector<?, ?> obj, int dim, int level) {
+ double pos = obj.doubleValue(dim + 1) + shift[dim];
+ pos = (pos - min[dim]) / width[dim] * (1 + level);
+ return pos - Math.floor(pos);
+ }
+
+ /**
+ * Find the closest node (of depth tlevel or above, if there is no node at
+ * this depth) for the given vector.
+ *
+ * @param vec Query vector
+ * @param tlevel Target level
+ * @return Node
+ */
+ public Node findClosestNode(NumberVector<?, ?> vec, int tlevel) {
+ Node cur = root;
+ for(int level = 0; level <= tlevel; level++) {
+ if(cur.children == null) {
+ break;
+ }
+ int code = 0;
+ for(int d = 0; d < min.length; d++) {
+ if(getShiftedDim(vec, d, level) > .5) {
+ code |= 1 << d;
+ }
+ }
+ boolean found = false;
+ for(Node child : cur.children) {
+ if(child.code == code) {
+ cur = child;
+ found = true;
+ break;
+ }
+ }
+ if(!found) {
+ break; // Do not descend
+ }
+ }
+ return cur;
+ }
+ }
+
+ /**
+ * Node of the ALOCI Quadtree
+ *
+ * @author Erich Schubert
+ */
+ static class Node {
+ /**
+ * Position code
+ */
+ final int code;
+
+ /**
+ * Number of elements
+ */
+ final int count;
+
+ /**
+ * Level of node
+ */
+ final int level;
+
+ /**
+ * Child nodes, may be null
+ */
+ List<Node> children;
+
+ /**
+ * Parent node
+ */
+ Node parent = null;
+
+ /**
+ * Center vector
+ */
+ Vector center;
+
+ /**
+ * Constructor.
+ *
+ * @param code Node code
+ * @param center Center vector
+ * @param count Element count
+ * @param level Node level
+ * @param children Children list
+ */
+ protected Node(int code, Vector center, int count, int level, List<Node> children) {
+ this.code = code;
+ this.center = center;
+ this.count = count;
+ this.level = level;
+ this.children = children;
+ if(children != null) {
+ for(Node child : children) {
+ child.parent = this;
+ }
+ }
+ }
+
+ /**
+ * Get level of node.
+ *
+ * @return Level of node
+ */
+ public int getLevel() {
+ return level;
+ }
+
+ /**
+ * Get count of subtree
+ *
+ * @return subtree count
+ */
+ public int getCount() {
+ return count;
+ }
+
+ /**
+ * Return center vector
+ *
+ * @return center vector
+ */
+ public Vector getCenter() {
+ return center;
+ }
+
+ /**
+ * Get sum of squares, recursively
+ *
+ * @param levels Depth to collect
+ * @return Sum of squares
+ */
+ public long getSquareSum(int levels) {
+ if(levels <= 0 || children == null) {
+ return ((long) count) * ((long) count);
+ }
+ long agg = 0;
+ for(Node child : children) {
+ agg += child.getSquareSum(levels - 1);
+ }
+ return agg;
+ }
+
+ /**
+ * Get cubic sum.
+ *
+ * @param levels Level to collect
+ * @return sum of cubes
+ */
+ public long getCubicSum(int levels) {
+ if(levels <= 0 || children == null) {
+ return ((long) count) * ((long) count) * ((long) count);
+ }
+ long agg = 0;
+ for(Node child : children) {
+ agg += child.getCubicSum(levels - 1);
+ }
+ return agg;
+ }
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>> extends AbstractParameterizer {
+ /**
+ * Parameter to specify the minimum neighborhood size
+ */
+ public static final OptionID NMIN_ID = OptionID.getOrCreateOptionID("loci.nmin", "Minimum neighborhood size to be considered.");
+
+ /**
+ * Parameter to specify the averaging neighborhood scaling.
+ */
+ public static final OptionID ALPHA_ID = OptionID.getOrCreateOptionID("loci.alpha", "Scaling factor for averaging neighborhood");
+
+ /**
+ * Parameter to specify the number of Grids to use.
+ */
+ public static final OptionID GRIDS_ID = OptionID.getOrCreateOptionID("loci.g", "The number of Grids to use.");
+
+ /**
+ * Parameter to specify the seed to initialize Random.
+ */
+ public static final OptionID SEED_ID = OptionID.getOrCreateOptionID("loci.seed", "The seed to use for initializing Random.");
+
+ /**
+ * Neighborhood minimum size
+ */
+ protected int nmin = 0;
+
+ /**
+ * Alpha: number of levels difference to use in comparison
+ */
+ protected int alpha = 4;
+
+ /**
+ * G: number of shifted trees to create.
+ */
+ protected int g = 1;
+
+ /**
+ * Random generator seed
+ */
+ protected Long seed = null;
+
+ /**
+ * The distance function
+ */
+ private NumberVectorDistanceFunction<D> distanceFunction;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ ObjectParameter<NumberVectorDistanceFunction<D>> distanceFunctionP = makeParameterDistanceFunction(EuclideanDistanceFunction.class, NumberVectorDistanceFunction.class);
+ if(config.grab(distanceFunctionP)) {
+ distanceFunction = distanceFunctionP.instantiateClass(config);
+ }
+
+ final IntParameter nminP = new IntParameter(NMIN_ID, 20);
+ if(config.grab(nminP)) {
+ nmin = nminP.getValue();
+ }
+
+ final IntParameter g = new IntParameter(GRIDS_ID, 1);
+ if(config.grab(g)) {
+ this.g = g.getValue();
+ }
+
+ final LongParameter seedP = new LongParameter(SEED_ID, true);
+ if(config.grab(seedP)) {
+ this.seed = seedP.getValue();
+ }
+
+ final IntParameter alphaP = new IntParameter(ALPHA_ID, 4);
+ if(config.grab(alphaP)) {
+ this.alpha = alphaP.getValue();
+ if(this.alpha < 1) {
+ this.alpha = 1;
+ }
+ }
+ }
+
+ @Override
+ protected ALOCI<O, D> makeInstance() {
+ return new ALOCI<O, D>(distanceFunction, nmin, alpha, g, seed);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractAggarwalYuOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractAggarwalYuOutlier.java
index 994ce8e2..9c1a216a 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractAggarwalYuOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractAggarwalYuOutlier.java
@@ -33,6 +33,7 @@ import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs;
@@ -45,7 +46,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
-import de.lmu.ifi.dbs.elki.utilities.pairs.FCPair;
+import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair;
import de.lmu.ifi.dbs.elki.utilities.pairs.IntIntPair;
/**
@@ -121,22 +122,23 @@ public abstract class AbstractAggarwalYuOutlier<V extends NumberVector<?, ?>> ex
final ArrayList<ArrayList<DBIDs>> ranges = new ArrayList<ArrayList<DBIDs>>();
// Temporary projection storage of the database
- final ArrayList<ArrayList<FCPair<Double, DBID>>> dbAxis = new ArrayList<ArrayList<FCPair<Double, DBID>>>(dim);
+ final ArrayList<ArrayList<DoubleObjPair<DBID>>> dbAxis = new ArrayList<ArrayList<DoubleObjPair<DBID>>>(dim);
for(int i = 0; i < dim; i++) {
- ArrayList<FCPair<Double, DBID>> axis = new ArrayList<FCPair<Double, DBID>>(size);
+ ArrayList<DoubleObjPair<DBID>> axis = new ArrayList<DoubleObjPair<DBID>>(size);
dbAxis.add(i, axis);
}
// Project
- for(DBID id : allids) {
+ for(DBIDIter iter = allids.iter(); iter.valid(); iter.advance()) {
+ DBID id = iter.getDBID();
final V obj = database.get(id);
for(int d = 1; d <= dim; d++) {
- dbAxis.get(d - 1).add(new FCPair<Double, DBID>(obj.doubleValue(d), id));
+ dbAxis.get(d - 1).add(new DoubleObjPair<DBID>(obj.doubleValue(d), id));
}
}
// Split into cells
final double part = size * 1.0 / phi;
for(int d = 1; d <= dim; d++) {
- ArrayList<FCPair<Double, DBID>> axis = dbAxis.get(d - 1);
+ ArrayList<DoubleObjPair<DBID>> axis = dbAxis.get(d - 1);
Collections.sort(axis);
ArrayList<DBIDs> dimranges = new ArrayList<DBIDs>(phi + 1);
dimranges.add(allids);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractDBOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractDBOutlier.java
index 1d77af3a..a5ccce3a 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractDBOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractDBOutlier.java
@@ -77,8 +77,11 @@ public abstract class AbstractDBOutlier<O, D extends Distance<D>> extends Abstra
/**
* Runs the algorithm in the timed evaluation part.
*
+ * @param database Database to process
+ * @param relation Relation to process
+ * @return Outlier result
*/
- public OutlierResult run(Database database, Relation<O> relation) throws IllegalStateException {
+ public OutlierResult run(Database database, Relation<O> relation) {
// Run the actual score process
DataStore<Double> dbodscore = computeOutlierScores(database, relation, d);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuEvolutionary.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuEvolutionary.java
index 5d357744..1d02e865 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuEvolutionary.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuEvolutionary.java
@@ -38,6 +38,7 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
@@ -139,9 +140,8 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra
* @param database Database
* @param relation Relation
* @return Result
- * @throws IllegalStateException
*/
- public OutlierResult run(Database database, Relation<V> relation) throws IllegalStateException {
+ public OutlierResult run(Database database, Relation<V> relation) {
final int dbsize = relation.size();
ArrayList<ArrayList<DBIDs>> ranges = buildRanges(relation);
@@ -151,7 +151,8 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra
for(Individuum ind : individuums) {
DBIDs ids = computeSubspaceForGene(ind.getGene(), ranges);
double sparsityC = sparsity(ids.size(), dbsize, k);
- for(DBID id : ids) {
+ for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ DBID id = iter.getDBID();
double prev = outlierScore.doubleValue(id);
if(Double.isNaN(prev) || sparsityC < prev) {
outlierScore.putDouble(id, sparsityC);
@@ -160,7 +161,8 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra
}
DoubleMinMax minmax = new DoubleMinMax();
- for(DBID id : relation.iterDBIDs()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ DBID id = iditer.getDBID();
double val = outlierScore.doubleValue(id);
if(Double.isNaN(val)) {
outlierScore.putDouble(id, 0.0);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuNaive.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuNaive.java
index 190211c3..0bb73aba 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuNaive.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuNaive.java
@@ -31,7 +31,7 @@ import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
@@ -147,19 +147,19 @@ public class AggarwalYuNaive<V extends NumberVector<?, ?>> extends AbstractAggar
final double sparsityC = sparsity(ids.size(), size, k);
if(sparsityC < 0) {
- for(DBID id : ids) {
- double prev = sparsity.doubleValue(id);
+ for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ double prev = sparsity.doubleValue(iter);
if(Double.isNaN(prev) || sparsityC < prev) {
- sparsity.putDouble(id, sparsityC);
+ sparsity.putDouble(iter, sparsityC);
}
}
}
}
DoubleMinMax minmax = new DoubleMinMax();
- for(DBID id : relation.iterDBIDs()) {
- double val = sparsity.doubleValue(id);
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ double val = sparsity.doubleValue(iditer);
if(Double.isNaN(val)) {
- sparsity.putDouble(id, 0.0);
+ sparsity.putDouble(iditer, 0.0);
val = 0.0;
}
minmax.put(val);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierDetection.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierDetection.java
index f4b0ba35..dbaf8a5a 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierDetection.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierDetection.java
@@ -23,14 +23,12 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.Iterator;
-
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStore;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
@@ -117,19 +115,19 @@ public class DBOutlierDetection<O, D extends Distance<D>> extends AbstractDBOutl
// if index exists, kNN query. if the distance to the mth nearest neighbor
// is more than d -> object is outlier
if(knnQuery != null) {
- for(DBID id : distFunc.getRelation().iterDBIDs()) {
+ for(DBIDIter iditer = distFunc.getRelation().iterDBIDs(); iditer.valid(); iditer.advance()) {
counter++;
- final KNNResult<D> knns = knnQuery.getKNNForDBID(id, m);
+ final KNNResult<D> knns = knnQuery.getKNNForDBID(iditer, m);
if(logger.isDebugging()) {
logger.debugFine("distance to mth nearest neighbour" + knns.toString());
}
if(knns.get(Math.min(m, knns.size()) - 1).getDistance().compareTo(neighborhoodSize) <= 0) {
// flag as outlier
- scores.putDouble(id, 1.0);
+ scores.putDouble(iditer, 1.0);
}
else {
// flag as no outlier
- scores.putDouble(id, 0.0);
+ scores.putDouble(iditer, 0.0);
}
}
if(progressOFlags != null) {
@@ -138,27 +136,16 @@ public class DBOutlierDetection<O, D extends Distance<D>> extends AbstractDBOutl
}
else {
// range query for each object. stop if m objects are found
- for(DBID id : distFunc.getRelation().iterDBIDs()) {
+ for(DBIDIter iditer = distFunc.getRelation().iterDBIDs(); iditer.valid(); iditer.advance()) {
counter++;
- Iterator<DBID> iterator = distFunc.getRelation().iterDBIDs();
int count = 0;
- while(iterator.hasNext() && count < m) {
- DBID currentID = iterator.next();
- D currentDistance = distFunc.distance(id, currentID);
-
+ for (DBIDIter iterator = distFunc.getRelation().iterDBIDs(); iterator.valid() && count < m; iterator.advance()) {
+ D currentDistance = distFunc.distance(iditer, iterator);
if(currentDistance.compareTo(neighborhoodSize) <= 0) {
count++;
}
}
-
- if(count < m) {
- // flag as outlier
- scores.putDouble(id, 1.0);
- }
- else {
- // flag as no outlier
- scores.putDouble(id, 0.0);
- }
+ scores.putDouble(iditer, (count < m) ? 1.0 : 0);
}
if(progressOFlags != null) {
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierScore.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierScore.java
index ec83a2a2..419b9a0e 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierScore.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierScore.java
@@ -28,7 +28,7 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStore;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
@@ -80,10 +80,10 @@ public class DBOutlierScore<O, D extends Distance<D>> extends AbstractDBOutlier<
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(distFunc.getRelation().getDBIDs(), DataStoreFactory.HINT_STATIC);
// TODO: use bulk when implemented.
- for(DBID id : distFunc.getRelation().iterDBIDs()) {
+ for(DBIDIter iditer = distFunc.getRelation().iterDBIDs(); iditer.valid(); iditer.advance()) {
// compute percentage of neighbors in the given neighborhood with size d
- double n = (rangeQuery.getRangeForDBID(id, d).size()) / size;
- scores.putDouble(id, 1.0 - n);
+ double n = (rangeQuery.getRangeForDBID(iditer, d).size()) / size;
+ scores.putDouble(iditer, 1.0 - n);
}
return scores;
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/EMOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/EMOutlier.java
index 92d92036..db4b7782 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/EMOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/EMOutlier.java
@@ -1,26 +1,27 @@
package de.lmu.ifi.dbs.elki.algorithm.outlier;
-/*
-This file is part of ELKI:
-Environment for Developing KDD-Applications Supported by Index-Structures
-
-Copyright (C) 2012
-Ludwig-Maximilians-Universität München
-Lehr- und Forschungseinheit für Datenbanksysteme
-ELKI Development Team
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.clustering.EM;
@@ -33,7 +34,7 @@ import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.logging.Logging;
@@ -84,19 +85,23 @@ public class EMOutlier<V extends NumberVector<V, ?>> extends AbstractAlgorithm<O
/**
* Runs the algorithm in the timed evaluation part.
+ *
+ * @param database Database to process
+ * @param relation Relation to process
+ * @return Outlier result
*/
- public OutlierResult run(Database database, Relation<V> relation) throws IllegalStateException {
+ public OutlierResult run(Database database, Relation<V> relation) {
Clustering<EMModel<V>> emresult = emClustering.run(database, relation);
double globmax = 0.0;
WritableDoubleDataStore emo_score = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT);
- for(DBID id : relation.iterDBIDs()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double maxProb = Double.POSITIVE_INFINITY;
- double[] probs = emClustering.getProbClusterIGivenX(id);
+ double[] probs = emClustering.getProbClusterIGivenX(iditer);
for(double prob : probs) {
maxProb = Math.min(1 - prob, maxProb);
}
- emo_score.putDouble(id, maxProb);
+ emo_score.putDouble(iditer, maxProb);
globmax = Math.max(maxProb, globmax);
}
Relation<Double> scoreres = new MaterializedRelation<Double>("EM outlier scores", "em-outlier", TypeUtil.DOUBLE, emo_score, relation.getDBIDs());
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java
index ae47c100..51833c8b 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java
@@ -30,6 +30,7 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.logging.Logging;
@@ -92,7 +93,13 @@ public class GaussianModel<V extends NumberVector<V, ?>> extends AbstractAlgorit
this.invert = invert;
}
- public OutlierResult run(Relation<V> relation) throws IllegalStateException {
+ /**
+ * Run the algorithm
+ *
+ * @param relation Data relation
+ * @return Outlier result
+ */
+ public OutlierResult run(Relation<V> relation) {
DoubleMinMax mm = new DoubleMinMax();
// resulting scores
WritableDoubleDataStore oscores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT);
@@ -109,20 +116,21 @@ public class GaussianModel<V extends NumberVector<V, ?>> extends AbstractAlgorit
final double fakt = (1.0 / (Math.sqrt(Math.pow(MathUtil.TWOPI, DatabaseUtil.dimensionality(relation)) * covarianceMatrix.det())));
// for each object compute Mahalanobis distance
- for(DBID id : relation.iterDBIDs()) {
- Vector x = relation.get(id).getColumnVector().minusEquals(mean);
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ Vector x = relation.get(iditer).getColumnVector().minusEquals(mean);
// Gaussian PDF
final double mDist = x.transposeTimesTimes(covarianceTransposed, x);
final double prob = fakt * Math.exp(-mDist / 2.0);
mm.put(prob);
- oscores.putDouble(id, prob);
+ oscores.putDouble(iditer, prob);
}
final OutlierScoreMeta meta;
if(invert) {
double max = mm.getMax() != 0 ? mm.getMax() : 1.;
- for(DBID id : relation.iterDBIDs()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ DBID id = iditer.getDBID();
oscores.putDouble(id, (max - oscores.doubleValue(id)) / max);
}
meta = new BasicOutlierScoreMeta(0.0, 1.0);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java
index aa352582..1cd31442 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java
@@ -33,6 +33,7 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.generic.MaskedDBIDs;
@@ -41,6 +42,7 @@ import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.math.MathUtil;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
@@ -127,7 +129,13 @@ public class GaussianUniformMixture<V extends NumberVector<V, ?>> extends Abstra
this.c = c;
}
- public OutlierResult run(Relation<V> relation) throws IllegalStateException {
+ /**
+ * Run the algorithm
+ *
+ * @param relation Data relation
+ * @return Outlier result
+ */
+ public OutlierResult run(Relation<V> relation) {
// Use an array list of object IDs for fast random access by an offset
ArrayDBIDs objids = DBIDUtil.ensureArray(relation.getDBIDs());
// A bit set to flag objects as anomalous, none at the beginning
@@ -205,9 +213,9 @@ public class GaussianUniformMixture<V extends NumberVector<V, ?>> extends Abstra
if(objids.isEmpty()) {
return 0;
}
- double prob = 0;
- Vector mean = DatabaseUtil.centroid(database, objids).getColumnVector();
- Matrix covarianceMatrix = DatabaseUtil.covarianceMatrix(database, objids);
+ CovarianceMatrix builder = CovarianceMatrix.make(database, objids);
+ Vector mean = builder.getMeanVector();
+ Matrix covarianceMatrix = builder.destroyToSampleMatrix();
// test singulaere matrix
Matrix covInv = covarianceMatrix.cheatToAvoidSingularity(SINGULARITY_CHEAT).inverse();
@@ -215,8 +223,9 @@ public class GaussianUniformMixture<V extends NumberVector<V, ?>> extends Abstra
double covarianceDet = covarianceMatrix.det();
double fakt = 1.0 / Math.sqrt(Math.pow(MathUtil.TWOPI, DatabaseUtil.dimensionality(database)) * covarianceDet);
// for each object compute probability and sum
- for(DBID id : objids) {
- Vector x = database.get(id).getColumnVector().minusEquals(mean);
+ double prob = 0;
+ for (DBIDIter iter = objids.iter(); iter.valid(); iter.advance()) {
+ Vector x = database.get(iter).getColumnVector().minusEquals(mean);
double mDist = x.transposeTimesTimes(covInv, x);
prob += Math.log(fakt * Math.exp(-mDist / 2.0));
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/HilOut.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/HilOut.java
new file mode 100644
index 00000000..4ed56e1a
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/HilOut.java
@@ -0,0 +1,988 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier;
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashSet;
+import java.util.Set;
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.query.DoubleDistanceResultPair;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.LPNormDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.math.spacefillingcurves.HilbertSpatialSorter;
+import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.BitsUtil;
+import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.EnumParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
+
+/**
+ * Fast Outlier Detection in High Dimensional Spaces
+ *
+ * Outlier Detection using Hilbert space filling curves
+ *
+ * Reference:
+ * <p>
+ * F. Angiulli, C. Pizzuti:<br />
+ * Fast Outlier Detection in High Dimensional Spaces.<br />
+ * In: Proc. European Conference on Principles of Knowledge Discovery and Data
+ * Mining (PKDD'02), Helsinki, Finland, 2002.
+ * </p>
+ *
+ * @author Jonathan von Brünken
+ * @author Erich Schubert
+ *
+ * @apiviz.composedOf HilbertFeatures
+ * @apiviz.uses HilFeature
+ *
+ * @param <O> Object type
+ */
+@Title("Fast Outlier Detection in High Dimensional Spaces")
+@Description("Algorithm to compute outliers using Hilbert space filling curves")
+@Reference(authors = "F. Angiulli, C. Pizzuti", title = "Fast Outlier Detection in High Dimensional Spaces", booktitle = "Proc. European Conference on Principles of Knowledge Discovery and Data Mining (PKDD'02)", url = "http://dx.doi.org/10.1145/375663.375668")
+public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedAlgorithm<O, DoubleDistance, OutlierResult> implements OutlierAlgorithm {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging logger = Logging.getLogger(HilOut.class);
+
+ /**
+ * Number of nearest neighbors
+ */
+ private int k;
+
+ /**
+ * Number of outliers to compute exactly
+ */
+ private int n;
+
+ /**
+ * Hilbert precision
+ */
+ private int h;
+
+ /**
+ * LPNorm p parameter
+ */
+ private double t;
+
+ /**
+ * Reporting mode: exact (top n) only, or all
+ */
+ private Enum<ScoreType> tn;
+
+ /**
+ * Distance query
+ */
+ private DistanceQuery<O, DoubleDistance> distq;
+
+ /**
+ * Set sizes, total and current iteration
+ */
+ private int capital_n, n_star, capital_n_star, d;
+
+ /**
+ * Outlier threshold
+ */
+ private double omega_star;
+
+ /**
+ * Type of output: all scores (upper bounds) or top n only
+ *
+ * @author Jonathan von Brünken
+ *
+ * @apiviz.exclude
+ */
+ public static enum ScoreType {
+ All, TopN
+ }
+
+ /**
+ * Constructor.
+ *
+ * @param k Number of Next Neighbors
+ * @param n Number of Outlier
+ * @param h Number of Bits for precision to use - max 32
+ * @param tn TopN or All Outlier Rank to return
+ */
+ protected HilOut(LPNormDistanceFunction distfunc, int k, int n, int h, Enum<ScoreType> tn) {
+ super(distfunc);
+ this.n = n;
+ // HilOut does not count the object itself. We do in KNNWeightOutlier.
+ this.k = k - 1;
+ this.h = h;
+ this.tn = tn;
+ this.t = distfunc.getP();
+ this.n_star = 0;
+ this.omega_star = 0.0;
+ }
+
+ public OutlierResult run(Database database, Relation<O> relation) {
+ distq = database.getDistanceQuery(relation, getDistanceFunction());
+ d = DatabaseUtil.dimensionality(relation);
+ WritableDoubleDataStore hilout_weight = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
+
+ // Compute extend of dataset.
+ double[] min;
+ double diameter = 0; // Actually "length of edge"
+ {
+ Pair<O, O> hbbs = DatabaseUtil.computeMinMax(relation);
+ min = new double[d];
+ double[] max = new double[d];
+ for(int i = 0; i < d; i++) {
+ min[i] = hbbs.first.doubleValue(i + 1);
+ max[i] = hbbs.second.doubleValue(i + 1);
+ diameter = Math.max(diameter, max[i] - min[i]);
+ }
+ // Enlarge bounding box to have equal lengths.
+ for(int i = 0; i < d; i++) {
+ double diff = (diameter - (max[i] - min[i])) / 2;
+ min[i] -= diff;
+ max[i] += diff;
+ }
+ if(logger.isVerbose()) {
+ logger.verbose("Rescaling dataset by " + (1 / diameter) + " to fit the unit cube.");
+ }
+ }
+
+ // Initialization part
+ capital_n_star = capital_n = relation.size();
+ HilbertFeatures h = new HilbertFeatures(relation, min, diameter);
+
+ FiniteProgress progressHilOut = logger.isVerbose() ? new FiniteProgress("HilOut iterations", d + 1, logger) : null;
+ FiniteProgress progressTrueOut = logger.isVerbose() ? new FiniteProgress("True outliers found", n, logger) : null;
+ // Main part: 1. Phase max. d+1 loops
+ for(int j = 0; j <= d && n_star < n; j++) {
+ // initialize (clear) out and wlb - not 100% clear in the paper
+ h.out.clear();
+ h.wlb.clear();
+ // Initialize Hilbert values in pf according to current shift
+ h.initialize(.5 * j / (d + 1));
+ // scan the Data according to the current shift; build out and wlb
+ scan(h, (int) (k * capital_n / (double) capital_n_star));
+ // determine the true outliers (n_star)
+ trueOutliers(h);
+ if(progressTrueOut != null) {
+ progressTrueOut.setProcessed(n_star, logger);
+ }
+ // Build the top Set as out + wlb
+ h.top.clear();
+ HashSetModifiableDBIDs top_keys = DBIDUtil.newHashSet(h.out.size());
+ for(HilFeature entry : h.out) {
+ top_keys.add(entry.id);
+ h.top.add(entry);
+ }
+ for(HilFeature entry : h.wlb) {
+ if(!top_keys.contains(entry.id)) {
+ // No need to update top_keys - discarded
+ h.top.add(entry);
+ }
+ }
+ if(progressHilOut != null) {
+ progressHilOut.incrementProcessed(logger);
+ }
+ }
+ // 2. Phase: Additional Scan if less than n true outliers determined
+ if(n_star < n) {
+ h.out.clear();
+ h.wlb.clear();
+ // TODO: reinitialize shift to 0?
+ scan(h, capital_n);
+ }
+ if(progressHilOut != null) {
+ progressHilOut.setProcessed(d, logger);
+ progressHilOut.ensureCompleted(logger);
+ }
+ if(progressTrueOut != null) {
+ progressTrueOut.setProcessed(n, logger);
+ progressTrueOut.ensureCompleted(logger);
+ }
+ DoubleMinMax minmax = new DoubleMinMax();
+ // Return weights in out
+ if(tn == ScoreType.TopN) {
+ minmax.put(0.0);
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ hilout_weight.putDouble(iditer, 0.0);
+ }
+ for(HilFeature ent : h.out) {
+ minmax.put(ent.ubound);
+ hilout_weight.putDouble(ent.id, ent.ubound);
+ }
+ }
+ // Return all weights in pf
+ else {
+ for(HilFeature ent : h.pf) {
+ minmax.put(ent.ubound);
+ hilout_weight.putDouble(ent.id, ent.ubound);
+ }
+ }
+ Relation<Double> scoreResult = new MaterializedRelation<Double>("HilOut weight", "hilout-weight", TypeUtil.DOUBLE, hilout_weight, relation.getDBIDs());
+ OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY);
+ OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
+ return result;
+ }
+
+ /**
+ * Scan function performs a squential scan over the data.
+ *
+ * @param hf the hilbert features
+ * @param k0
+ */
+ private void scan(HilbertFeatures hf, int k0) {
+ final int mink0 = Math.min(2 * k0, capital_n - 1);
+ if(logger.isDebuggingFine()) {
+ logger.debugFine("Scanning with k0=" + k0 + " (" + mink0 + ")" + " N*=" + capital_n_star);
+ }
+ for(int i = 0; i < hf.pf.length; i++) {
+ if(hf.pf[i].ubound < omega_star) {
+ continue;
+ }
+ if(hf.pf[i].lbound < hf.pf[i].ubound) {
+ double omega = hf.fastUpperBound(i);
+ if(omega < omega_star) {
+ hf.pf[i].ubound = omega;
+ }
+ else {
+ int maxcount;
+ // capital_n-1 instead of capital_n: all, except self
+ if(hf.top.contains(hf.pf[i])) {
+ maxcount = capital_n - 1;
+ }
+ else {
+ maxcount = mink0;
+ }
+ innerScan(hf, i, maxcount);
+ }
+ }
+ if(hf.pf[i].ubound > 0) {
+ hf.updateOUT(i);
+ }
+ if(hf.pf[i].lbound > 0) {
+ hf.updateWLB(i);
+ }
+ if(hf.wlb.size() >= n) {
+ omega_star = Math.max(omega_star, hf.wlb.peek().lbound);
+ }
+ }
+ }
+
+ /**
+ * innerScan function calculates new upper and lower bounds and inserts the
+ * points of the neighborhood the bounds are based on in the NN Set
+ *
+ * @param i position in pf of the feature for which the bounds should be
+ * calculated
+ * @param maxcount maximal size of the neighborhood
+ */
+ private void innerScan(HilbertFeatures hf, final int i, final int maxcount) {
+ final O p = hf.relation.get(hf.pf[i].id); // Get only once for performance
+ int a = i, b = i;
+ int level = h, levela = h, levelb = h;
+ // Explore up to "maxcount" neighbors in this pass
+ for(int count = 0; count < maxcount; count++) {
+ final int c; // Neighbor to explore
+ if(a == 0) { // At left end, explore right
+ // assert (b < capital_n - 1);
+ levelb = Math.min(levelb, hf.pf[b].level);
+ b++;
+ c = b;
+ }
+ else if(b >= capital_n - 1) { // At right end, explore left
+ // assert (a > 0);
+ a--;
+ levela = Math.min(levela, hf.pf[a].level);
+ c = a;
+ }
+ else if(hf.pf[a - 1].level >= hf.pf[b].level) { // Prefer higher level
+ a--;
+ levela = Math.min(levela, hf.pf[a].level);
+ c = a;
+ }
+ else {
+ // assert (b < capital_n - 1);
+ levelb = Math.min(levelb, hf.pf[b].level);
+ b++;
+ c = b;
+ }
+ if(!hf.pf[i].nn_keys.contains(hf.pf[c].id)) {
+ // hf.distcomp ++;
+ hf.pf[i].insert(hf.pf[c].id, distq.distance(p, hf.pf[c].id).doubleValue(), k);
+ if(hf.pf[i].nn.size() == k) {
+ if(hf.pf[i].sum_nn < omega_star) {
+ break; // stop = true
+ }
+ final int mlevel = Math.max(levela, levelb);
+ if(mlevel < level) {
+ level = mlevel;
+ final double delta = hf.minDistLevel(hf.pf[i].id, level);
+ if(delta >= hf.pf[i].nn.peek().getDoubleDistance()) {
+ break; // stop = true
+ }
+ }
+ }
+ }
+ }
+ double br = hf.boxRadius(i, a - 1, b + 1);
+ double newlb = 0.0;
+ double newub = 0.0;
+ for(DoubleDistanceResultPair entry : hf.pf[i].nn) {
+ newub += entry.getDoubleDistance();
+ if(entry.getDoubleDistance() <= br) {
+ newlb += entry.getDoubleDistance();
+ }
+ }
+ if(newlb > hf.pf[i].lbound) {
+ hf.pf[i].lbound = newlb;
+ }
+ if(newub < hf.pf[i].ubound) {
+ hf.pf[i].ubound = newub;
+ }
+ }
+
+ /**
+ * trueOutliers function updates n_star
+ *
+ * @param h the HilberFeatures
+ *
+ */
+
+ private void trueOutliers(HilbertFeatures h) {
+ n_star = 0;
+ for(HilFeature entry : h.out) {
+ if(entry.ubound >= omega_star && (entry.ubound - entry.lbound < 1E-10)) {
+ n_star++;
+ }
+ }
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return logger;
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(new LPNormDistanceFunction(t).getInputTypeRestriction());
+ }
+
+ /**
+ * Class organizing the data points along a hilbert curve.
+ *
+ * @author Jonathan von Brünken
+ *
+ * @apiviz.composedOf HilFeature
+ */
+ class HilbertFeatures {
+ // public int distcomp = 1;
+
+ /**
+ * Relation indexed
+ */
+ Relation<O> relation;
+
+ /**
+ * Hilbert representation ("point features")
+ */
+ HilFeature[] pf;
+
+ /**
+ * Data space minimums
+ */
+ double[] min;
+
+ /**
+ * Data space diameter
+ */
+ double diameter;
+
+ /**
+ * Current curve shift
+ */
+ double shift;
+
+ /**
+ * Top candidates
+ */
+ private Set<HilFeature> top;
+
+ /**
+ * "OUT"
+ */
+ private Heap<HilFeature> out;
+
+ /**
+ * "WLB"
+ */
+ private Heap<HilFeature> wlb;
+
+ /**
+ * Constructor.
+ *
+ * @param relation Relation to index
+ * @param min Minimums for data space
+ * @param diameter Diameter of data space
+ */
+ public HilbertFeatures(Relation<O> relation, double[] min, double diameter) {
+ super();
+ this.relation = relation;
+ this.min = min;
+ this.diameter = diameter;
+ this.pf = new HilFeature[relation.size()];
+
+ int pos = 0;
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ pf[pos++] = new HilFeature(iditer.getDBID(), new Heap<DoubleDistanceResultPair>(k, Collections.reverseOrder()));
+ }
+ this.out = new Heap<HilFeature>(n, new Comparator<HilFeature>() {
+ @Override
+ public int compare(HilFeature o1, HilFeature o2) {
+ return Double.compare(o1.ubound, o2.ubound);
+ }
+ });
+ this.wlb = new Heap<HilFeature>(n, new Comparator<HilFeature>() {
+ @Override
+ public int compare(HilFeature o1, HilFeature o2) {
+ return Double.compare(o1.lbound, o2.lbound);
+ }
+ });
+ this.top = new HashSet<HilFeature>(2 * n);
+ }
+
+ /**
+ * Hilbert function to fill pf with shifted Hilbert values. Also calculates
+ * the number current Outlier candidates capital_n_star
+ *
+ * @param shift the new shift factor
+ */
+ private void initialize(double shift) {
+ this.shift = shift;
+ // FIXME: 64 bit mode untested - sign bit is tricky to handle correctly
+ // with the rescaling. 63 bit should be fine. The sign bit probably needs
+ // to be handled differently, or at least needs careful testing of the API
+ if(h >= 32) { // 32 to 63 bit
+ final long scale = Long.MAX_VALUE; // = 63 bits
+ for(int i = 0; i < pf.length; i++) {
+ NumberVector<?, ?> obj = relation.get(pf[i].id);
+ long[] coord = new long[d];
+ for(int dim = 0; dim < d; dim++) {
+ coord[dim] = (long) (getDimForObject(obj, dim) * .5 * scale);
+ }
+ pf[i].hilbert = HilbertSpatialSorter.coordinatesToHilbert(coord, h, 1);
+ }
+ }
+ else if(h >= 16) { // 16-31 bit
+ final int scale = ~1 >>> 1;
+ for(int i = 0; i < pf.length; i++) {
+ NumberVector<?, ?> obj = relation.get(pf[i].id);
+ int[] coord = new int[d];
+ for(int dim = 0; dim < d; dim++) {
+ coord[dim] = (int) (getDimForObject(obj, dim) * .5 * scale);
+ }
+ pf[i].hilbert = HilbertSpatialSorter.coordinatesToHilbert(coord, h, 1);
+ }
+ }
+ else if(h >= 8) { // 8-15 bit
+ final int scale = ~1 >>> 16;
+ for(int i = 0; i < pf.length; i++) {
+ NumberVector<?, ?> obj = relation.get(pf[i].id);
+ short[] coord = new short[d];
+ for(int dim = 0; dim < d; dim++) {
+ coord[dim] = (short) (getDimForObject(obj, dim) * .5 * scale);
+ }
+ pf[i].hilbert = HilbertSpatialSorter.coordinatesToHilbert(coord, h, 16);
+ }
+ }
+ else { // 1-7 bit
+ final int scale = ~1 >>> 8;
+ for(int i = 0; i < pf.length; i++) {
+ NumberVector<?, ?> obj = relation.get(pf[i].id);
+ byte[] coord = new byte[d];
+ for(int dim = 0; dim < d; dim++) {
+ coord[dim] = (byte) (getDimForObject(obj, dim) * .5 * scale);
+ }
+ pf[i].hilbert = HilbertSpatialSorter.coordinatesToHilbert(coord, h, 24);
+ }
+ }
+ java.util.Arrays.sort(pf);
+ // Update levels
+ for(int i = 0; i < pf.length - 1; i++) {
+ pf[i].level = minRegLevel(i, i + 1);
+ }
+ // Count candidates
+ capital_n_star = 0;
+ for(int i = 0; i < pf.length; i++) {
+ if(pf[i].ubound >= omega_star) {
+ capital_n_star++;
+ }
+ }
+ }
+
+ /**
+ * updateOUT function inserts pf[i] in out.
+ *
+ * @param i position in pf of the feature to be inserted
+ */
+ private void updateOUT(int i) {
+ if(out.size() < n) {
+ out.offer(pf[i]);
+ }
+ else {
+ HilFeature head = out.peek();
+ if(pf[i].ubound > head.ubound) {
+ // replace smallest
+ out.poll();
+ // assert (out.peek().ubound >= head.ubound);
+ out.offer(pf[i]);
+ }
+ }
+ }
+
+ /**
+ * updateWLB function inserts pf[i] in wlb.
+ *
+ * @param i position in pf of the feature to be inserted
+ */
+ private void updateWLB(int i) {
+ if(wlb.size() < n) {
+ wlb.offer(pf[i]);
+ }
+ else {
+ HilFeature head = wlb.peek();
+ if(pf[i].lbound > head.lbound) {
+ // replace smallest
+ wlb.poll();
+ // assert (wlb.peek().lbound >= head.lbound);
+ wlb.offer(pf[i]);
+ }
+ }
+ }
+
+ /**
+ * fastUpperBound function calculates an upper Bound as k*maxDist(pf[i],
+ * smallest neighborhood)
+ *
+ * @param i position in pf of the feature for which the bound should be
+ * calculated
+ */
+ private double fastUpperBound(int i) {
+ int pre = i;
+ int post = i;
+ while(post - pre < k) {
+ int pre_level = (pre - 1 >= 0) ? pf[pre - 1].level : -2;
+ int post_level = (post < capital_n - 1) ? pf[post].level : -2;
+ if(post_level >= pre_level) {
+ post++;
+ }
+ else {
+ pre--;
+ }
+ }
+ return k * maxDistLevel(pf[i].id, minRegLevel(pre, post));
+ }
+
+ /**
+ * minDist function calculate the minimal Distance from Vector p to the
+ * border of the corresponding r-region at the given level
+ *
+ * @param id Object ID
+ * @param level Level of the corresponding r-region
+ */
+ private double minDistLevel(DBID id, int level) {
+ final NumberVector<?, ?> obj = relation.get(id);
+ // level 1 is supposed to have r=1 as in the original publication
+ // 2 ^ - (level - 1)
+ final double r = 1.0 / (1 << (level - 1));
+ double dist = Double.POSITIVE_INFINITY;
+ for(int dim = 0; dim < d; dim++) {
+ final double p_m_r = getDimForObject(obj, dim) % r;
+ dist = Math.min(dist, Math.min(p_m_r, r - p_m_r));
+ }
+ return dist * diameter;
+ }
+
+ /**
+ * maxDist function calculate the maximal Distance from Vector p to the
+ * border of the corresponding r-region at the given level
+ *
+ * @param id Object ID
+ * @param level Level of the corresponding r-region
+ */
+ private double maxDistLevel(DBID id, int level) {
+ final NumberVector<?, ?> obj = relation.get(id);
+ // level 1 is supposed to have r=1 as in the original publication
+ final double r = 1.0 / (1 << (level - 1));
+ double dist;
+ if(t == 1.0) {
+ dist = 0.0;
+ for(int dim = 0; dim < d; dim++) {
+ final double p_m_r = getDimForObject(obj, dim) % r;
+ // assert (p_m_r >= 0);
+ dist += Math.max(p_m_r, r - p_m_r);
+ }
+ }
+ else if(t == 2.0) {
+ dist = 0.0;
+ for(int dim = 0; dim < d; dim++) {
+ final double p_m_r = getDimForObject(obj, dim) % r;
+ // assert (p_m_r >= 0);
+ double a = Math.max(p_m_r, r - p_m_r);
+ dist += a * a;
+ }
+ dist = Math.sqrt(dist);
+ }
+ else if(!Double.isInfinite(t)) {
+ dist = 0.0;
+ for(int dim = 0; dim < d; dim++) {
+ final double p_m_r = getDimForObject(obj, dim) % r;
+ dist += Math.pow(Math.max(p_m_r, r - p_m_r), t);
+ }
+ dist = Math.pow(dist, 1.0 / t);
+ }
+ else {
+ dist = Double.NEGATIVE_INFINITY;
+ for(int dim = 0; dim < d; dim++) {
+ final double p_m_r = getDimForObject(obj, dim) % r;
+ dist = Math.max(dist, Math.max(p_m_r, r - p_m_r));
+ }
+ }
+ return dist * diameter;
+ }
+
+ /**
+ * Number of levels shared
+ *
+ * @param a First bitset
+ * @param b Second bitset
+ * @return Number of level shared
+ */
+ private int numberSharedLevels(long[] a, long[] b) {
+ for(int i = 0, j = a.length - 1; i < a.length; i++, j--) {
+ final long diff = a[j] ^ b[j];
+ if(diff != 0) {
+ // expected unused = available - used
+ final int expected = (a.length * Long.SIZE) - (d * h);
+ return ((BitsUtil.numberOfLeadingZeros(diff) + i * Long.SIZE) - expected) / d;
+ }
+ }
+ return h - 1;
+ }
+
+ /**
+ * minReg function calculate the minimal r-region level containing two
+ * points
+ *
+ * @param a index of first point in pf
+ * @param b index of second point in pf
+ *
+ * @return Level of the r-region
+ */
+ private int minRegLevel(int a, int b) {
+ // Sanity test: first level different -> region of level 0, r=2
+ // all same: level h - 1
+ return numberSharedLevels(pf[a].hilbert, pf[b].hilbert);
+ }
+
+ /**
+ * Level of the maximum region containing ref but not q
+ *
+ * @param ref Reference point
+ * @param q Query point
+ * @return Number of bits shared across all dimensions
+ */
+ private int maxRegLevel(int ref, int q) {
+ // Sanity test: first level different -> region of level 1, r=1
+ // all same: level h
+ return numberSharedLevels(pf[ref].hilbert, pf[q].hilbert) + 1;
+ }
+
+ /**
+ * boxRadius function calculate the Boxradius
+ *
+ * @param i index of first point
+ * @param a index of second point
+ * @param b index of third point
+ *
+ * @return box radius
+ */
+ private double boxRadius(int i, int a, int b) {
+ // level are inversely ordered to box sizes. min -> max
+ final int level;
+ if(a < 0) {
+ if(b >= pf.length) {
+ return Double.POSITIVE_INFINITY;
+ }
+ level = maxRegLevel(i, b);
+ }
+ else if(b >= pf.length) {
+ level = maxRegLevel(i, a);
+ }
+ else {
+ level = Math.max(maxRegLevel(i, a), maxRegLevel(i, b));
+ }
+ return minDistLevel(pf[i].id, level);
+ }
+
+ /**
+ * Get the (projected) position of the object in dimension dim.
+ *
+ * @param obj Object
+ * @param dim Dimension
+ * @return Projected and shifted position
+ */
+ private double getDimForObject(NumberVector<?, ?> obj, int dim) {
+ return (obj.doubleValue(dim + 1) - min[dim]) / diameter + shift;
+ }
+ }
+
+ /**
+ * Hilbert representation of a single object.
+ *
+ * Details of this representation are discussed in the main HilOut
+ * publication, see "point features".
+ *
+ * @author Jonathan von Brünken
+ */
+ final static class HilFeature implements Comparable<HilFeature> {
+ /**
+ * Object ID
+ */
+ public DBID id;
+
+ /**
+ * Hilbert representation
+ *
+ * TODO: use byte[] to save some memory, but slower?
+ */
+ public long[] hilbert = null;
+
+ /**
+ * Object level
+ */
+ public int level = 0;
+
+ /**
+ * Upper bound for object
+ */
+ public double ubound = Double.POSITIVE_INFINITY;
+
+ /**
+ * Lower bound of object
+ */
+ public double lbound = 0.0;
+
+ /**
+ * Heap with the nearest known neighbors
+ */
+ public Heap<DoubleDistanceResultPair> nn;
+
+ /**
+ * Set representation of the nearest neighbors for faster lookups
+ */
+ public HashSetModifiableDBIDs nn_keys = DBIDUtil.newHashSet();
+
+ /**
+ * Current weight (sum of nn distances)
+ */
+ public double sum_nn = 0.0;
+
+ /**
+ * Constructor.
+ *
+ * @param id Object ID
+ * @param nn Heap for neighbors
+ */
+ public HilFeature(DBID id, Heap<DoubleDistanceResultPair> nn) {
+ super();
+ this.id = id;
+ this.nn = nn;
+ }
+
+ @Override
+ public int compareTo(HilFeature o) {
+ return BitsUtil.compare(this.hilbert, o.hilbert);
+ }
+
+ /**
+ * insert function inserts a nearest neighbor into a features nn list and
+ * its distance
+ *
+ * @param id DBID of the nearest neighbor
+ * @param dt distance or the neighbor to the features position
+ * @param k K
+ */
+ protected void insert(DBID id, double dt, int k) {
+ // assert (!nn_keys.contains(id));
+ if(nn.size() < k) {
+ DoubleDistanceResultPair entry = new DoubleDistanceResultPair(dt, id);
+ nn.offer(entry);
+ nn_keys.add(id);
+ sum_nn += dt;
+ }
+ else {
+ DoubleDistanceResultPair head = nn.peek();
+ if(dt < head.getDoubleDistance()) {
+ head = nn.poll(); // Remove worst
+ sum_nn -= head.getDoubleDistance();
+ nn_keys.remove(head.getDBID());
+
+ // assert (nn.peek().getDoubleDistance() <= head.getDoubleDistance());
+
+ DoubleDistanceResultPair entry = new DoubleDistanceResultPair(dt, id);
+ nn.offer(entry);
+ nn_keys.add(id);
+ sum_nn += dt;
+ }
+ }
+
+ }
+ }
+
+ /**
+ * Parameterization class
+ *
+ * @author Jonathan von Brünken
+ *
+ * @apiviz.exclude
+ *
+ * @param <O> Vector type
+ */
+ public static class Parameterizer<O extends NumberVector<O, ?>> extends AbstractParameterizer {
+ /**
+ * Parameter to specify how many next neighbors should be used in the
+ * computation
+ */
+ public static final OptionID K_ID = OptionID.getOrCreateOptionID("HilOut.k", "Compute up to k next neighbors");
+
+ /**
+ * Parameter to specify how many outliers should be computed
+ */
+ public static final OptionID N_ID = OptionID.getOrCreateOptionID("HilOut.n", "Compute n outliers");
+
+ /**
+ * Parameter to specify the maximum Hilbert-Level
+ */
+ public static final OptionID H_ID = OptionID.getOrCreateOptionID("HilOut.h", "Max. Hilbert-Level");
+
+ /**
+ * Parameter to specify p of LP-NormDistance
+ */
+ public static final OptionID T_ID = OptionID.getOrCreateOptionID("HilOut.t", "t of Lt Metric");
+
+ /**
+ * Parameter to specify if only the Top n, or also approximations for the
+ * other elements, should be returned
+ */
+ public static final OptionID TN_ID = OptionID.getOrCreateOptionID("HilOut.tn", "output of Top n or all elements");
+
+ /**
+ * Neighborhood size
+ */
+ protected int k = 5;
+
+ /**
+ * Top-n candidates to compute exactly
+ */
+ protected int n = 10;
+
+ /**
+ * Hilbert curve precision
+ */
+ protected int h = 32;
+
+ /**
+ * LPNorm distance function
+ */
+ protected LPNormDistanceFunction distfunc;
+
+ /**
+ * Scores to report: all or top-n only
+ */
+ protected Enum<ScoreType> tn;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ final IntParameter kP = new IntParameter(K_ID, 5);
+ if(config.grab(kP)) {
+ k = kP.getValue();
+ }
+
+ final IntParameter nP = new IntParameter(N_ID, 10);
+ if(config.grab(nP)) {
+ n = nP.getValue();
+ }
+
+ final IntParameter hP = new IntParameter(H_ID, 32);
+ if(config.grab(hP)) {
+ h = hP.getValue();
+ }
+
+ ObjectParameter<LPNormDistanceFunction> distP = AbstractDistanceBasedAlgorithm.makeParameterDistanceFunction(EuclideanDistanceFunction.class, LPNormDistanceFunction.class);
+ if (config.grab(distP)) {
+ distfunc = distP.instantiateClass(config);
+ }
+
+ final EnumParameter<ScoreType> tnP = new EnumParameter<ScoreType>(TN_ID, ScoreType.class, ScoreType.TopN);
+ if(config.grab(tnP)) {
+ tn = tnP.getValue();
+ }
+ }
+
+ @Override
+ protected HilOut<O> makeInstance() {
+ return new HilOut<O>(distfunc, k, n, h, tn);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/INFLO.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/INFLO.java
index 083a72a6..1fe5fe71 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/INFLO.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/INFLO.java
@@ -30,7 +30,7 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
@@ -43,6 +43,7 @@ import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.math.Mean;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
@@ -120,9 +121,14 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa
this.k = k;
}
- @Override
- public OutlierResult run(Database database) throws IllegalStateException {
- Relation<O> relation = database.getRelation(getInputTypeRestriction()[0]);
+ /**
+ * Run the algorithm
+ *
+ * @param database Database to process
+ * @param relation Relation to process
+ * @return Outlier result
+ */
+ public OutlierResult run(Database database, Relation<O> relation) {
DistanceQuery<O, D> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
ModifiableDBIDs processedIDs = DBIDUtil.newHashSet(relation.size());
@@ -134,15 +140,15 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa
// density
WritableDoubleDataStore density = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT);
// init knns and rnns
- for(DBID id : relation.iterDBIDs()) {
- knns.put(id, DBIDUtil.newArray());
- rnns.put(id, DBIDUtil.newArray());
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ knns.put(iditer, DBIDUtil.newArray());
+ rnns.put(iditer, DBIDUtil.newArray());
}
// TODO: use kNN preprocessor?
KNNQuery<O, D> knnQuery = database.getKNNQuery(distFunc, k, DatabaseQuery.HINT_HEAVY_USE);
- for(DBID id : relation.iterDBIDs()) {
+ for(DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) {
// if not visited count=0
int count = rnns.get(id).size();
ModifiableDBIDs s;
@@ -158,7 +164,7 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa
else {
s = knns.get(id);
}
- for(DBID q : s) {
+ for (DBIDIter q = s.iter(); q.valid(); q.advance()) {
if(!processedIDs.contains(q)) {
// TODO: use exactly k neighbors?
KNNResult<D> listQ = knnQuery.getKNNForDBID(q, k);
@@ -182,20 +188,18 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa
// IF Object is pruned INFLO=1.0
DoubleMinMax inflominmax = new DoubleMinMax();
WritableDoubleDataStore inflos = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
- for(DBID id : relation.iterDBIDs()) {
+ for(DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) {
if(!pruned.contains(id)) {
ModifiableDBIDs knn = knns.get(id);
ModifiableDBIDs rnn = rnns.get(id);
double denP = density.doubleValue(id);
knn.addDBIDs(rnn);
- double den = 0;
- for(DBID q : knn) {
- double denQ = density.doubleValue(q);
- den = den + denQ;
+ Mean mean = new Mean();
+ for (DBIDIter iter = knn.iter(); iter.valid(); iter.advance()) {
+ mean.put(density.doubleValue(iter));
}
- den = den / rnn.size();
- den = den / denP;
+ double den = mean.getMean() / denP;
inflos.putDouble(id, den);
// update minimum and maximum
inflominmax.put(den);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNOutlier.java
index ee748f99..08be944a 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNOutlier.java
@@ -29,7 +29,7 @@ import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
@@ -115,11 +115,11 @@ public class KNNOutlier<O, D extends NumberDistance<D, ?>> extends AbstractDista
DoubleMinMax minmax = new DoubleMinMax();
WritableDoubleDataStore knno_score = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
// compute distance to the k nearest neighbor.
- for(DBID id : relation.iterDBIDs()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
// distance to the kth nearest neighbor
- final KNNResult<D> knns = knnQuery.getKNNForDBID(id, k);
+ final KNNResult<D> knns = knnQuery.getKNNForDBID(iditer, k);
double dkn = knns.getKNNDistance().doubleValue();
- knno_score.putDouble(id, dkn);
+ knno_score.putDouble(iditer, dkn);
minmax.put(dkn);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNWeightOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNWeightOutlier.java
index e9657e12..cb3ca2f1 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNWeightOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNWeightOutlier.java
@@ -30,7 +30,7 @@ import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
@@ -119,15 +119,15 @@ public class KNNWeightOutlier<O, D extends NumberDistance<D, ?>> extends Abstrac
// compute distance to the k nearest neighbor. n objects with the highest
// distance are flagged as outliers
WritableDoubleDataStore knnw_score = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
- for(DBID id : relation.iterDBIDs()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
// compute sum of the distances to the k nearest neighbors
- final KNNResult<D> knn = knnQuery.getKNNForDBID(id, k);
+ final KNNResult<D> knn = knnQuery.getKNNForDBID(iditer, k);
double skn = 0;
for(DistanceResultPair<D> r : knn) {
skn += r.getDistance().doubleValue();
}
- knnw_score.putDouble(id, skn);
+ knnw_score.putDouble(iditer, skn);
minmax.put(skn);
if(progressKNNWeight != null) {
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LDOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LDOF.java
index d9256428..84f5dcc6 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LDOF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LDOF.java
@@ -30,7 +30,7 @@ import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
@@ -42,6 +42,7 @@ import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.math.Mean;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
@@ -110,7 +111,14 @@ public class LDOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
this.k = k;
}
- public OutlierResult run(Database database, Relation<O> relation) throws IllegalStateException {
+ /**
+ * Run the algorithm
+ *
+ * @param database Database to process
+ * @param relation Relation to process
+ * @return Outlier result
+ */
+ public OutlierResult run(Database database, Relation<O> relation) {
DistanceQuery<O, D> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
KNNQuery<O, D> knnQuery = database.getKNNQuery(distFunc, k);
@@ -125,29 +133,26 @@ public class LDOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
}
FiniteProgress progressLDOFs = logger.isVerbose() ? new FiniteProgress("LDOF_SCORE for objects", relation.size(), logger) : null;
- for(DBID id : relation.iterDBIDs()) {
- KNNResult<D> neighbors = knnQuery.getKNNForDBID(id, k);
- int nsize = neighbors.size() - 1;
+ Mean dxp = new Mean(), Dxp = new Mean();
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ KNNResult<D> neighbors = knnQuery.getKNNForDBID(iditer, k);
// skip the point itself
- double dxp = 0;
- double Dxp = 0;
+ dxp.reset(); Dxp.reset();
for(DistanceResultPair<D> neighbor1 : neighbors) {
- if(!neighbor1.getDBID().equals(id)) {
- dxp += neighbor1.getDistance().doubleValue();
+ if(!neighbor1.sameDBID(iditer)) {
+ dxp.put(neighbor1.getDistance().doubleValue());
for(DistanceResultPair<D> neighbor2 : neighbors) {
- if(!neighbor1.getDBID().equals(neighbor2.getDBID()) && !neighbor2.getDBID().equals(id)) {
- Dxp += distFunc.distance(neighbor1.getDBID(), neighbor2.getDBID()).doubleValue();
+ if(!neighbor1.sameDBID(neighbor2) && !neighbor2.sameDBID(iditer)) {
+ Dxp.put(distFunc.distance(neighbor1, neighbor2).doubleValue());
}
}
}
}
- dxp /= nsize;
- Dxp /= (nsize * (nsize - 1));
- Double ldof = dxp / Dxp;
- if(ldof.isNaN() || ldof.isInfinite()) {
+ double ldof = dxp.getMean() / Dxp.getMean();
+ if(Double.isNaN(ldof) || Double.isInfinite(ldof)) {
ldof = 1.0;
}
- ldofs.putDouble(id, ldof);
+ ldofs.putDouble(iditer, ldof);
// update maximum
ldofminmax.put(ldof);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOCI.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOCI.java
index cfd8623c..a04aa041 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOCI.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOCI.java
@@ -35,7 +35,8 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.query.DistanceDBIDResult;
import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
@@ -136,19 +137,21 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
}
/**
- * Runs the algorithm in the timed evaluation part.
+ * Run the algorithm
+ *
+ * @param database Database to process
+ * @param relation Relation to process
+ * @return Outlier result
*/
- @Override
- public OutlierResult run(Database database) throws IllegalStateException {
- Relation<O> relation = database.getRelation(getInputTypeRestriction()[0]);
+ public OutlierResult run(Database database, Relation<O> relation) {
DistanceQuery<O, D> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
RangeQuery<O, D> rangeQuery = database.getRangeQuery(distFunc);
FiniteProgress progressPreproc = logger.isVerbose() ? new FiniteProgress("LOCI preprocessing", relation.size(), logger) : null;
// LOCI preprocessing step
WritableDataStore<ArrayList<DoubleIntPair>> interestingDistances = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_SORTED, ArrayList.class);
- for(DBID id : relation.iterDBIDs()) {
- List<DistanceResultPair<D>> neighbors = rangeQuery.getRangeForDBID(id, rmax);
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ DistanceDBIDResult<D> neighbors = rangeQuery.getRangeForDBID(iditer, rmax);
// build list of critical distances
ArrayList<DoubleIntPair> cdist = new ArrayList<DoubleIntPair>(neighbors.size() * 2);
{
@@ -177,7 +180,7 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
}
}
- interestingDistances.put(id, cdist);
+ interestingDistances.put(iditer, cdist);
if(progressPreproc != null) {
progressPreproc.incrementProcessed(logger);
}
@@ -191,8 +194,8 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
WritableDoubleDataStore mdef_radius = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
DoubleMinMax minmax = new DoubleMinMax();
- for(DBID id : relation.iterDBIDs()) {
- final List<DoubleIntPair> cdist = interestingDistances.get(id);
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ final List<DoubleIntPair> cdist = interestingDistances.get(iditer);
final double maxdist = cdist.get(cdist.size() - 1).first;
final int maxneig = cdist.get(cdist.size() - 1).second;
@@ -201,7 +204,7 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
if(maxneig >= nmin) {
D range = distFunc.getDistanceFactory().fromDouble(maxdist);
// Compute the largest neighborhood we will need.
- List<DistanceResultPair<D>> maxneighbors = rangeQuery.getRangeForDBID(id, range);
+ List<DistanceResultPair<D>> maxneighbors = rangeQuery.getRangeForDBID(iditer, range);
// Ensure the set is sorted. Should be a no-op with most indexes.
Collections.sort(maxneighbors);
// For any critical distance, compute the normalized MDEF score.
@@ -221,7 +224,7 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
if(ne.getDistance().doubleValue() > r) {
break;
}
- int rn_alphar = elementsAtRadius(interestingDistances.get(ne.getDBID()), alpha_r);
+ int rn_alphar = elementsAtRadius(interestingDistances.get(ne), alpha_r);
mv_n_r_alpha.put(rn_alphar);
}
// We only use the average and standard deviation
@@ -244,8 +247,8 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
maxmdefnorm = 1.0;
maxnormr = maxdist;
}
- mdef_norm.putDouble(id, maxmdefnorm);
- mdef_radius.putDouble(id, maxnormr);
+ mdef_norm.putDouble(iditer, maxmdefnorm);
+ mdef_radius.putDouble(iditer, maxnormr);
minmax.put(maxmdefnorm);
if(progressLOCI != null) {
progressLOCI.incrementProcessed(logger);
@@ -255,7 +258,7 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas
progressLOCI.ensureCompleted(logger);
}
Relation<Double> scoreResult = new MaterializedRelation<Double>("LOCI normalized MDEF", "loci-mdef-outlier", TypeUtil.DOUBLE, mdef_norm, relation.getDBIDs());
- OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(minmax.getMin(), minmax.getMax(), Double.POSITIVE_INFINITY, 0.0);
+ OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
result.addChildResult(new MaterializedRelation<Double>("LOCI MDEF Radius", "loci-critical-radius", TypeUtil.DOUBLE, mdef_radius, relation.getDBIDs()));
return result;
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOF.java
index 85e1aef2..5aba41ec 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOF.java
@@ -33,7 +33,7 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStore;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
@@ -51,6 +51,7 @@ import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.math.Mean;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
@@ -174,13 +175,15 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
* @param k the value of k
* @param distanceFunction the distance function
*
- * Uses the same distance function for neighborhood computation and reachability distance (standard as in the original publication),
- * same as {@link #LOF(int, DistanceFunction, DistanceFunction) LOF(int, distanceFunction, distanceFunction)}.
+ * Uses the same distance function for neighborhood computation and
+ * reachability distance (standard as in the original publication),
+ * same as {@link #LOF(int, DistanceFunction, DistanceFunction)
+ * LOF(int, distanceFunction, distanceFunction)}.
*/
public LOF(int k, DistanceFunction<? super O, D> distanceFunction) {
this(k, distanceFunction, distanceFunction);
}
-
+
/**
* Performs the Generalized LOF_SCORE algorithm on the given database by
* calling {@link #doRunInTime}.
@@ -239,11 +242,14 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
* returns a {@link LOF.LOFResult} encapsulating information that may be
* needed by an OnlineLOF algorithm.
*
+ * @param ids Object ids
* @param kNNRefer the kNN query w.r.t. reference neighborhood distance
* function
* @param kNNReach the kNN query w.r.t. reachability distance function
+ * @param stepprog Progress logger
+ * @return LOF result
*/
- protected LOFResult<O, D> doRunInTime(DBIDs ids, KNNQuery<O, D> kNNRefer, KNNQuery<O, D> kNNReach, StepProgress stepprog) throws IllegalStateException {
+ protected LOFResult<O, D> doRunInTime(DBIDs ids, KNNQuery<O, D> kNNRefer, KNNQuery<O, D> kNNReach, StepProgress stepprog) {
// Assert we got something
if(kNNRefer == null) {
throw new AbortException("No kNN queries supported by database for reference neighborhood distance function.");
@@ -290,19 +296,19 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
protected WritableDoubleDataStore computeLRDs(DBIDs ids, KNNQuery<O, D> knnReach) {
WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
FiniteProgress lrdsProgress = logger.isVerbose() ? new FiniteProgress("LRD", ids.size(), logger) : null;
- for(DBID id : ids) {
- double sum = 0;
- KNNResult<D> neighbors = knnReach.getKNNForDBID(id, k);
- int nsize = neighbors.size() - (objectIsInKNN ? 0 : 1);
+ Mean mean = new Mean();
+ for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ mean.reset();
+ KNNResult<D> neighbors = knnReach.getKNNForDBID(iter, k);
for(DistanceResultPair<D> neighbor : neighbors) {
- if(objectIsInKNN || !neighbor.getDBID().equals(id)) {
- KNNResult<D> neighborsNeighbors = knnReach.getKNNForDBID(neighbor.getDBID(), k);
- sum += Math.max(neighbor.getDistance().doubleValue(), neighborsNeighbors.getKNNDistance().doubleValue());
+ if(objectIsInKNN || !neighbor.sameDBID(iter)) {
+ KNNResult<D> neighborsNeighbors = knnReach.getKNNForDBID(neighbor, k);
+ mean.put(Math.max(neighbor.getDistance().doubleValue(), neighborsNeighbors.getKNNDistance().doubleValue()));
}
}
// Avoid division by 0
- double lrd = (sum > 0) ? nsize / sum : 0.0;
- lrds.putDouble(id, lrd);
+ final double lrd = (mean.getCount() > 0) ? 1 / mean.getMean() : 0.0;
+ lrds.putDouble(iter, lrd);
if(lrdsProgress != null) {
lrdsProgress.incrementProcessed(logger);
}
@@ -328,26 +334,25 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou
DoubleMinMax lofminmax = new DoubleMinMax();
FiniteProgress progressLOFs = logger.isVerbose() ? new FiniteProgress("LOF_SCORE for objects", ids.size(), logger) : null;
- for(DBID id : ids) {
- double lrdp = lrds.get(id);
+ Mean mean = new Mean();
+ for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ double lrdp = lrds.get(iter);
final double lof;
if(lrdp > 0) {
- final KNNResult<D> neighbors = knnRefer.getKNNForDBID(id, k);
- int nsize = neighbors.size() - (objectIsInKNN ? 0 : 1);
- // skip the point itself
- // neighbors.remove(0);
- double sum = 0;
+ final KNNResult<D> neighbors = knnRefer.getKNNForDBID(iter, k);
+ mean.reset();
for(DistanceResultPair<D> neighbor : neighbors) {
- if(objectIsInKNN || !neighbor.getDBID().equals(id)) {
- sum += lrds.get(neighbor.getDBID());
+ // skip the point itself
+ if(objectIsInKNN || !neighbor.sameDBID(iter)) {
+ mean.put(lrds.get(neighbor));
}
}
- lof = (sum / nsize) / lrdp;
+ lof = mean.getMean() / lrdp;
}
else {
lof = 1.0;
}
- lofs.putDouble(id, lof);
+ lofs.putDouble(iter, lof);
// update minimum and maximum
lofminmax.put(lof);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LoOP.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LoOP.java
index f1c273f6..dc0d26a4 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LoOP.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LoOP.java
@@ -32,7 +32,7 @@ import de.lmu.ifi.dbs.elki.database.QueryUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
@@ -47,6 +47,7 @@ import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
+import de.lmu.ifi.dbs.elki.math.Mean;
import de.lmu.ifi.dbs.elki.math.MeanVariance;
import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
@@ -206,8 +207,12 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
/**
* Performs the LoOP algorithm on the given database.
+ *
+ * @param database Database to process
+ * @param relation Relation to process
+ * @return Outlier result
*/
- public OutlierResult run(Database database, Relation<O> relation) throws IllegalStateException {
+ public OutlierResult run(Database database, Relation<O> relation) {
final double sqrt2 = Math.sqrt(2.0);
StepProgress stepprog = logger.isVerbose() ? new StepProgress(5) : null;
@@ -226,28 +231,29 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
// Probabilistic distances
WritableDoubleDataStore pdists = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
+ Mean mean = new Mean();
{// computing PRDs
if(stepprog != null) {
stepprog.beginStep(3, "Computing pdists", logger);
}
FiniteProgress prdsProgress = logger.isVerbose() ? new FiniteProgress("pdists", relation.size(), logger) : null;
- for(DBID id : relation.iterDBIDs()) {
- final KNNResult<D> neighbors = knnReach.getKNNForDBID(id, kreach);
- double sqsum = 0.0;
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ final KNNResult<D> neighbors = knnReach.getKNNForDBID(iditer, kreach);
+ mean.reset();
// use first kref neighbors as reference set
int ks = 0;
for(DistanceResultPair<D> neighbor : neighbors) {
- if(objectIsInKNN || !neighbor.getDBID().equals(id)) {
+ if(objectIsInKNN || !neighbor.sameDBID(iditer)) {
double d = neighbor.getDistance().doubleValue();
- sqsum += d * d;
+ mean.put(d * d);
ks++;
if(ks >= kreach) {
break;
}
}
}
- double pdist = lambda * Math.sqrt(sqsum / ks);
- pdists.putDouble(id, pdist);
+ double pdist = lambda * Math.sqrt(mean.getMean());
+ pdists.putDouble(iditer, pdist);
if(prdsProgress != null) {
prdsProgress.incrementProcessed(logger);
}
@@ -262,25 +268,26 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
}
FiniteProgress progressPLOFs = logger.isVerbose() ? new FiniteProgress("PLOFs for objects", relation.size(), logger) : null;
- for(DBID id : relation.iterDBIDs()) {
- final KNNResult<D> neighbors = knnComp.getKNNForDBID(id, kcomp);
- MeanVariance mv = new MeanVariance();
+ MeanVariance mv = new MeanVariance();
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ final KNNResult<D> neighbors = knnComp.getKNNForDBID(iditer, kcomp);
+ mv.reset();
// use first kref neighbors as comparison set.
int ks = 0;
for(DistanceResultPair<D> neighbor1 : neighbors) {
- if(objectIsInKNN || !neighbor1.getDBID().equals(id)) {
- mv.put(pdists.doubleValue(neighbor1.getDBID()));
+ if(objectIsInKNN || !neighbor1.sameDBID(iditer)) {
+ mv.put(pdists.doubleValue(neighbor1));
ks++;
if(ks >= kcomp) {
break;
}
}
}
- double plof = Math.max(pdists.doubleValue(id) / mv.getMean(), 1.0);
+ double plof = Math.max(pdists.doubleValue(iditer) / mv.getMean(), 1.0);
if(Double.isNaN(plof) || Double.isInfinite(plof)) {
plof = 1.0;
}
- plofs.putDouble(id, plof);
+ plofs.putDouble(iditer, plof);
mvplof.put((plof - 1.0) * (plof - 1.0));
if(progressPLOFs != null) {
@@ -302,8 +309,8 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O
}
FiniteProgress progressLOOPs = logger.isVerbose() ? new FiniteProgress("LoOP for objects", relation.size(), logger) : null;
- for(DBID id : relation.iterDBIDs()) {
- loops.putDouble(id, NormalDistribution.erf((plofs.doubleValue(id) - 1) / (nplof * sqrt2)));
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ loops.putDouble(iditer, NormalDistribution.erf((plofs.doubleValue(iditer) - 1) / (nplof * sqrt2)));
if(progressLOOPs != null) {
progressLOOPs.incrementProcessed(logger);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java
index 2f120c44..b3d24463 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java
@@ -34,19 +34,20 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
@@ -116,51 +117,49 @@ public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanc
// FIXME: implicit preprocessor.
WritableDataStore<KNNResult<D>> nMinPts = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, KNNResult.class);
WritableDoubleDataStore coreDistance = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
- WritableDataStore<Integer> minPtsNeighborhoodSize = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, Integer.class);
+ WritableIntegerDataStore minPtsNeighborhoodSize = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, -1);
// Pass 1
// N_minpts(id) and core-distance(id)
- for(DBID id : relation.iterDBIDs()) {
- KNNResult<D> minptsNeighbours = knnQuery.getKNNForDBID(id, minpts);
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ KNNResult<D> minptsNeighbours = knnQuery.getKNNForDBID(iditer, minpts);
D d = minptsNeighbours.getKNNDistance();
- nMinPts.put(id, minptsNeighbours);
- coreDistance.putDouble(id, d.doubleValue());
- minPtsNeighborhoodSize.put(id, rangeQuery.getRangeForDBID(id, d).size());
+ nMinPts.put(iditer, minptsNeighbours);
+ coreDistance.putDouble(iditer, d.doubleValue());
+ minPtsNeighborhoodSize.put(iditer, rangeQuery.getRangeForDBID(iditer, d).size());
}
// Pass 2
WritableDataStore<List<Double>> reachDistance = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, List.class);
WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
- for(DBID id : relation.iterDBIDs()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
List<Double> core = new ArrayList<Double>();
double lrd = 0;
- for(DistanceResultPair<D> neighPair : nMinPts.get(id)) {
- DBID idN = neighPair.getDBID();
- double coreDist = coreDistance.doubleValue(idN);
- double dist = distQuery.distance(id, idN).doubleValue();
- Double rd = Math.max(coreDist, dist);
+ for(DistanceResultPair<D> neighPair : nMinPts.get(iditer)) {
+ double coreDist = coreDistance.doubleValue(neighPair);
+ double dist = distQuery.distance(iditer, neighPair).doubleValue();
+ double rd = Math.max(coreDist, dist);
lrd = rd + lrd;
core.add(rd);
}
- lrd = (minPtsNeighborhoodSize.get(id) / lrd);
- reachDistance.put(id, core);
- lrds.putDouble(id, lrd);
+ lrd = minPtsNeighborhoodSize.intValue(iditer) / lrd;
+ reachDistance.put(iditer, core);
+ lrds.putDouble(iditer, lrd);
}
// Pass 3
DoubleMinMax ofminmax = new DoubleMinMax();
WritableDoubleDataStore ofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
- for(DBID id : relation.iterDBIDs()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double of = 0;
- for(DistanceResultPair<D> pair : nMinPts.get(id)) {
- DBID idN = pair.getDBID();
- double lrd = lrds.doubleValue(id);
- double lrdN = lrds.doubleValue(idN);
+ for(DistanceResultPair<D> pair : nMinPts.get(iditer)) {
+ double lrd = lrds.doubleValue(iditer);
+ double lrdN = lrds.doubleValue(pair);
of = of + lrdN / lrd;
}
- of = of / minPtsNeighborhoodSize.get(id);
- ofs.putDouble(id, of);
+ of = of / minPtsNeighborhoodSize.intValue(iditer);
+ ofs.putDouble(iditer, of);
// update minimum and maximum
ofminmax.put(of);
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OnlineLOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OnlineLOF.java
index ad17398c..9b974ad9 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OnlineLOF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OnlineLOF.java
@@ -29,7 +29,7 @@ import de.lmu.ifi.dbs.elki.database.QueryUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
@@ -67,7 +67,6 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
* @author Elke Achtert
*
* @apiviz.has LOF.LOFResult oneway - - updates
- * @apiviz.composedOf OnlineLOF.LOFKNNListener
*/
// TODO: related to publication?
public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
@@ -170,6 +169,10 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
/**
* Encapsulates a listener for changes of kNNs used in the online LOF
* algorithm.
+ *
+ * @author Elke Achtert
+ *
+ * @apiviz.exclude
*/
private class LOFKNNListener implements KNNListener {
/**
@@ -269,12 +272,12 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
ArrayDBIDs affected_lrd_id_candidates = mergeIDs(reachDistRKNNs, lrd_ids);
ArrayModifiableDBIDs affected_lrd_ids = DBIDUtil.newArray(affected_lrd_id_candidates.size());
WritableDoubleDataStore new_lrds = computeLRDs(affected_lrd_id_candidates, lofResult.getKNNReach());
- for(DBID id : affected_lrd_id_candidates) {
- double new_lrd = new_lrds.doubleValue(id);
- double old_lrd = lofResult.getLrds().doubleValue(id);
+ for (DBIDIter iter = affected_lrd_id_candidates.iter(); iter.valid(); iter.advance()) {
+ double new_lrd = new_lrds.doubleValue(iter);
+ double old_lrd = lofResult.getLrds().doubleValue(iter);
if(Double.isNaN(old_lrd) || old_lrd != new_lrd) {
- lofResult.getLrds().putDouble(id, new_lrd);
- affected_lrd_ids.add(id);
+ lofResult.getLrds().putDouble(iter, new_lrd);
+ affected_lrd_ids.add(iter);
}
}
@@ -314,9 +317,9 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
if(stepprog != null) {
stepprog.beginStep(1, "Delete old LRDs and LOFs.", logger);
}
- for(DBID id : deletions) {
- lofResult.getLrds().delete(id);
- lofResult.getLofs().delete(id);
+ for (DBIDIter iter = deletions.iter(); iter.valid(); iter.advance()) {
+ lofResult.getLrds().delete(iter);
+ lofResult.getLofs().delete(iter);
}
// recompute lrds
@@ -328,12 +331,12 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
ArrayDBIDs affected_lrd_id_candidates = mergeIDs(reachDistRKNNs, lrd_ids);
ArrayModifiableDBIDs affected_lrd_ids = DBIDUtil.newArray(affected_lrd_id_candidates.size());
WritableDoubleDataStore new_lrds = computeLRDs(affected_lrd_id_candidates, lofResult.getKNNReach());
- for(DBID id : affected_lrd_id_candidates) {
- double new_lrd = new_lrds.doubleValue(id);
- double old_lrd = lofResult.getLrds().doubleValue(id);
+ for (DBIDIter iter = affected_lrd_id_candidates.iter(); iter.valid(); iter.advance()) {
+ double new_lrd = new_lrds.doubleValue(iter);
+ double old_lrd = lofResult.getLrds().doubleValue(iter);
if(old_lrd != new_lrd) {
- lofResult.getLrds().putDouble(id, new_lrd);
- affected_lrd_ids.add(id);
+ lofResult.getLrds().putDouble(iter, new_lrd);
+ affected_lrd_ids.add(iter);
}
}
@@ -371,7 +374,7 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
}
for(List<DistanceResultPair<D>> queryResult : queryResults) {
for(DistanceResultPair<D> qr : queryResult) {
- result.add(qr.getDBID());
+ result.add(qr);
}
}
return DBIDUtil.newArray(result);
@@ -386,8 +389,8 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
private void recomputeLOFs(DBIDs ids, LOFResult<O, D> lofResult) {
Pair<WritableDoubleDataStore, DoubleMinMax> lofsAndMax = computeLOFs(ids, lofResult.getLrds(), lofResult.getKNNRefer());
WritableDoubleDataStore new_lofs = lofsAndMax.getFirst();
- for(DBID id : ids) {
- lofResult.getLofs().putDouble(id, new_lofs.doubleValue(id));
+ for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ lofResult.getLofs().putDouble(iter, new_lofs.doubleValue(iter));
}
// track the maximum value for normalization.
DoubleMinMax new_lofminmax = lofsAndMax.getSecond();
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OutlierAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OutlierAlgorithm.java
index 2b122183..d8322d8b 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OutlierAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OutlierAlgorithm.java
@@ -38,5 +38,5 @@ public interface OutlierAlgorithm extends Algorithm {
// Note: usually you won't override this method directly, but instead
// Use the magic in AbstractAlgorithm and just implement a run method for your input data
@Override
- OutlierResult run(Database database) throws IllegalStateException;
+ OutlierResult run(Database database);
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ReferenceBasedOutlierDetection.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ReferenceBasedOutlierDetection.java
index befd03ed..dd1d37a3 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ReferenceBasedOutlierDetection.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ReferenceBasedOutlierDetection.java
@@ -37,7 +37,7 @@ import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.GenericDistanceResultPair;
@@ -87,7 +87,7 @@ import de.lmu.ifi.dbs.elki.utilities.referencepoints.ReferencePointsHeuristic;
*/
@Title("An Efficient Reference-based Approach to Outlier Detection in Large Datasets")
@Description("Computes kNN distances approximately, using reference points with various reference point strategies.")
-@Reference(authors = "Y. Pei, O.R. Zaiane, Y. Gao", title = "An Efficient Reference-based Approach to Outlier Detection in Large Datasets", booktitle = "Proc. 19th IEEE Int. Conf. on Data Engineering (ICDE '03), Bangalore, India, 2003", url = "http://dx.doi.org/10.1109/ICDM.2006.17")
+@Reference(authors = "Y. Pei, O.R. Zaiane, Y. Gao", title = "An Efficient Reference-based Approach to Outlier Detection in Large Datasets", booktitle = "Proc. 6th IEEE Int. Conf. on Data Mining (ICDM '06), Hong Kong, China, 2006", url = "http://dx.doi.org/10.1109/ICDM.2006.17")
public class ReferenceBasedOutlierDetection<V extends NumberVector<?, ?>, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
@@ -164,7 +164,7 @@ public class ReferenceBasedOutlierDetection<V extends NumberVector<?, ?>, D exte
for(int l = 0; l < firstReferenceDists.size(); l++) {
double density = computeDensity(firstReferenceDists, l);
// Initial value
- rbod_score.putDouble(firstReferenceDists.get(l).getDBID(), density);
+ rbod_score.putDouble(firstReferenceDists.get(l), density);
}
// compute density values for all remaining reference points
while(iter.hasNext()) {
@@ -174,24 +174,24 @@ public class ReferenceBasedOutlierDetection<V extends NumberVector<?, ?>, D exte
for(int l = 0; l < referenceDists.size(); l++) {
double density = computeDensity(referenceDists, l);
// Update minimum
- if(density < rbod_score.doubleValue(referenceDists.get(l).getDBID())) {
- rbod_score.putDouble(referenceDists.get(l).getDBID(), density);
+ if(density < rbod_score.doubleValue(referenceDists.get(l))) {
+ rbod_score.putDouble(referenceDists.get(l), density);
}
}
}
}
// compute maximum density
double maxDensity = 0.0;
- for(DBID id : relation.iterDBIDs()) {
- double dens = rbod_score.doubleValue(id);
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ double dens = rbod_score.doubleValue(iditer);
if(dens > maxDensity) {
maxDensity = dens;
}
}
// compute ROS
- for(DBID id : relation.iterDBIDs()) {
- double score = 1 - (rbod_score.doubleValue(id) / maxDensity);
- rbod_score.putDouble(id, score);
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ double score = 1 - (rbod_score.doubleValue(iditer) / maxDensity);
+ rbod_score.putDouble(iditer, score);
}
// adds reference points to the result. header information for the
@@ -218,9 +218,9 @@ public class ReferenceBasedOutlierDetection<V extends NumberVector<?, ?>, D exte
protected List<DistanceResultPair<D>> computeDistanceVector(V refPoint, Relation<V> database, DistanceQuery<V, D> distFunc) {
// TODO: optimize for double distances?
List<DistanceResultPair<D>> referenceDists = new ArrayList<DistanceResultPair<D>>(database.size());
- for(DBID id : database.iterDBIDs()) {
- final D distance = distFunc.distance(id, refPoint);
- referenceDists.add(new GenericDistanceResultPair<D>(distance, id));
+ for(DBIDIter iditer = database.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ final D distance = distFunc.distance(iditer, refPoint);
+ referenceDists.add(new GenericDistanceResultPair<D>(distance, iditer.getDBID()));
}
Collections.sort(referenceDists);
return referenceDists;
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java
index 22447454..1542b8e3 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java
@@ -40,7 +40,7 @@ import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
@@ -142,7 +142,7 @@ public class ExternalDoubleOutlierScore extends AbstractAlgorithm<OutlierResult>
public OutlierResult run(Database database, Relation<?> relation) {
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
- Pattern colSep = Pattern.compile(AbstractParser.WHITESPACE_PATTERN);
+ Pattern colSep = Pattern.compile(AbstractParser.DEFAULT_SEPARATOR);
DoubleMinMax minmax = new DoubleMinMax();
InputStream in;
try {
@@ -210,10 +210,10 @@ public class ExternalDoubleOutlierScore extends AbstractAlgorithm<OutlierResult>
((OutlierScalingFunction) scaling).prepare(or);
}
DoubleMinMax mm = new DoubleMinMax();
- for(DBID id : relation.iterDBIDs()) {
- double val = scoresult.get(id); // scores.get(id);
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ double val = scoresult.get(iditer);
val = scaling.getScaled(val);
- scores.putDouble(id, val);
+ scores.putDouble(iditer, val);
mm.put(val);
}
meta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax());
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java
index c8da9501..407b7400 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java
@@ -36,7 +36,7 @@ import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction;
@@ -50,7 +50,6 @@ import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
-import de.lmu.ifi.dbs.elki.utilities.iterator.IterableIterator;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
@@ -163,14 +162,14 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements
DoubleMinMax minmax = new DoubleMinMax();
if(breadth) {
FiniteProgress cprog = logger.isVerbose() ? new FiniteProgress("Combining results", relation.size(), logger) : null;
- Pair<IterableIterator<DBID>, Relation<Double>>[] IDVectorOntoScoreVector = Pair.newPairArray(results.size());
+ Pair<DBIDIter, Relation<Double>>[] IDVectorOntoScoreVector = Pair.newPairArray(results.size());
// Mapping score-sorted DBID-Iterators onto their corresponding scores.
// We need to initialize them now be able to iterate them "in parallel".
{
int i = 0;
for(OutlierResult r : results) {
- IDVectorOntoScoreVector[i] = new Pair<IterableIterator<DBID>, Relation<Double>>(r.getOrdering().iter(relation.getDBIDs()), r.getScores());
+ IDVectorOntoScoreVector[i] = new Pair<DBIDIter, Relation<Double>>(r.getOrdering().iter(relation.getDBIDs()).iter(), r.getScores());
i++;
}
}
@@ -178,17 +177,17 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements
// Iterating over the *lines* of the AS_t(i)-matrix.
for(int i = 0; i < relation.size(); i++) {
// Iterating over the elements of a line (breadth-first).
- for(Pair<IterableIterator<DBID>, Relation<Double>> pair : IDVectorOntoScoreVector) {
- IterableIterator<DBID> iter = pair.first;
+ for(Pair<DBIDIter, Relation<Double>> pair : IDVectorOntoScoreVector) {
+ DBIDIter iter = pair.first;
// Always true if every algorithm returns a complete result (one score
// for every DBID).
- if(iter.hasNext()) {
- DBID tmpID = iter.next();
- double score = pair.second.get(tmpID);
- if(Double.isNaN(scores.doubleValue(tmpID))) {
- scores.putDouble(tmpID, score);
+ if(iter.valid()) {
+ double score = pair.second.get(iter);
+ if(Double.isNaN(scores.doubleValue(iter))) {
+ scores.putDouble(iter, score);
minmax.put(score);
}
+ iter.advance();
}
else {
logger.warning("Incomplete result: Iterator does not contain |DB| DBIDs");
@@ -205,15 +204,15 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements
}
else {
FiniteProgress cprog = logger.isVerbose() ? new FiniteProgress("Combining results", relation.size(), logger) : null;
- for(DBID id : relation.iterDBIDs()) {
+ for(DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
double sum = 0.0;
for(OutlierResult r : results) {
- final Double s = r.getScores().get(id);
+ final Double s = r.getScores().get(iter);
if (s != null && !Double.isNaN(s)) {
sum += s;
}
}
- scores.putDouble(id, sum);
+ scores.putDouble(iter, sum);
minmax.put(sum);
if(cprog != null) {
cprog.incrementProcessed(logger);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/HiCS.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/HiCS.java
new file mode 100644
index 00000000..73d4156a
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/HiCS.java
@@ -0,0 +1,633 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier.meta;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Random;
+import java.util.Set;
+import java.util.TreeSet;
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.LOF;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.VectorUtil;
+import de.lmu.ifi.dbs.elki.data.VectorUtil.SortDBIDsBySingleDimension;
+import de.lmu.ifi.dbs.elki.data.projection.NumericalFeatureSelection;
+import de.lmu.ifi.dbs.elki.data.projection.Projection;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.ProxyDatabase;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.ProjectedView;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
+import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.math.statistics.tests.GoodnessOfFitTest;
+import de.lmu.ifi.dbs.elki.math.statistics.tests.KolmogorovSmirnovTest;
+import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TopBoundedHeap;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.LongParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * Algorithm to compute High Contrast Subspaces for Density-Based Outlier
+ * Ranking.
+ *
+ * Reference:
+ * <p>
+ * Fabian Keller, Emmanuel Müller, Klemens Böhm:<br />
+ * HiCS: High Contrast Subspaces for Density-Based Outlier Ranking<br />
+ * in: Proc. IEEE 28th Int. Conf. on Data Engineering (ICDE 2012), Washington,
+ * DC, USA
+ * </p>
+ *
+ * @author Jan Brusis
+ * @author Erich Schubert
+ *
+ * @apiviz.composedOf GoodnessOfFitTest
+ * @apiviz.composedOf OutlierAlgorithm
+ *
+ * @param <V> vector type
+ */
+@Title("HiCS: High Contrast Subspaces for Density-Based Outlier Ranking")
+@Description("Algorithm to compute High Contrast Subspaces in a database as a pre-processing step for for density-based outlier ranking methods.")
+@Reference(authors = "Fabian Keller, Emmanuel Müller, Klemens Böhm", title = "HiCS: High Contrast Subspaces for Density-Based Outlier Ranking", booktitle = "Proc. IEEE 28th International Conference on Data Engineering (ICDE 2012)")
+public class HiCS<V extends NumberVector<V, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+ /**
+ * The Logger for this class
+ */
+ private static final Logging logger = Logging.getLogger(HiCS.class);
+
+ /**
+ * Maximum number of retries.
+ */
+ private static final int MAX_RETRIES = 100;
+
+ /**
+ * Monte-Carlo iterations
+ */
+ private int m;
+
+ /**
+ * Alpha threshold
+ */
+ private double alpha;
+
+ /**
+ * Outlier detection algorithm
+ */
+ private OutlierAlgorithm outlierAlgorithm;
+
+ /**
+ * Statistical test to use
+ */
+ private GoodnessOfFitTest statTest;
+
+ /**
+ * Candidates limit
+ */
+ private int cutoff;
+
+ /**
+ * Random generator
+ */
+ private Random random;
+
+ /**
+ * Constructor
+ *
+ * @param m value of m
+ * @param alpha value of alpha
+ * @param outlierAlgorithm Inner outlier detection algorithm
+ * @param statTest Test to use
+ * @param cutoff Candidate limit
+ * @param seed Random seed
+ */
+ public HiCS(int m, double alpha, OutlierAlgorithm outlierAlgorithm, GoodnessOfFitTest statTest, int cutoff, Long seed) {
+ super();
+ this.m = m;
+ this.alpha = alpha;
+ this.outlierAlgorithm = outlierAlgorithm;
+ this.statTest = statTest;
+ this.cutoff = cutoff;
+ this.random = (seed != null) ? new Random(seed) : new Random();
+ }
+
+ /**
+ * Perform HiCS on a given database
+ *
+ * @param relation the database
+ * @return The aggregated resulting scores that were assigned by the given
+ * outlier detection algorithm
+ */
+ public OutlierResult run(Relation<V> relation) {
+ final DBIDs ids = relation.getDBIDs();
+ final V factory = DatabaseUtil.assumeVectorField(relation).getFactory();
+
+ ArrayList<ArrayDBIDs> subspaceIndex = buildOneDimIndexes(relation);
+ Set<HiCSSubspace> subspaces = calculateSubspaces(relation, subspaceIndex);
+
+ if(logger.isVerbose()) {
+ logger.verbose("Number of high-contrast subspaces: " + subspaces.size());
+ }
+ List<Relation<Double>> results = new ArrayList<Relation<Double>>();
+ FiniteProgress prog = logger.isVerbose() ? new FiniteProgress("Calculating Outlier scores for high Contrast subspaces", subspaces.size(), logger) : null;
+
+ // run outlier detection and collect the result
+ // TODO extend so that any outlierAlgorithm can be used (use materialized
+ // relation instead of SubspaceEuclideanDistanceFunction?)
+ for(HiCSSubspace dimset : subspaces) {
+ if(logger.isVerbose()) {
+ logger.verbose("Performing outlier detection in subspace " + dimset);
+ }
+
+ ProxyDatabase pdb = new ProxyDatabase(ids);
+ Projection<V, V> proj = new NumericalFeatureSelection<V>(dimset, factory);
+ pdb.addRelation(new ProjectedView<V, V>(relation, proj));
+
+ // run LOF and collect the result
+ OutlierResult result = outlierAlgorithm.run(pdb);
+ results.add(result.getScores());
+ if(prog != null) {
+ prog.incrementProcessed(logger);
+ }
+ }
+ if(prog != null) {
+ prog.ensureCompleted(logger);
+ }
+
+ WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
+ DoubleMinMax minmax = new DoubleMinMax();
+
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ double sum = 0.0;
+ for(Relation<Double> r : results) {
+ final Double s = r.get(iditer);
+ if(s != null && !Double.isNaN(s)) {
+ sum += s;
+ }
+ }
+ scores.putDouble(iditer, sum);
+ minmax.put(sum);
+ }
+ OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
+ Relation<Double> scoreres = new MaterializedRelation<Double>("HiCS", "HiCS-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
+
+ return new OutlierResult(meta, scoreres);
+ }
+
+ /**
+ * Calculates "index structures" for every attribute, i.e. sorts a
+ * ModifiableArray of every DBID in the database for every dimension and
+ * stores them in a list
+ *
+ * @param relation Relation to index
+ * @return List of sorted objects
+ */
+ private ArrayList<ArrayDBIDs> buildOneDimIndexes(Relation<? extends NumberVector<?, ?>> relation) {
+ final int dim = DatabaseUtil.dimensionality(relation);
+ ArrayList<ArrayDBIDs> subspaceIndex = new ArrayList<ArrayDBIDs>(dim + 1);
+
+ SortDBIDsBySingleDimension comp = new VectorUtil.SortDBIDsBySingleDimension(relation);
+ for(int i = 1; i <= dim; i++) {
+ ArrayModifiableDBIDs amDBIDs = DBIDUtil.newArray(relation.getDBIDs());
+ comp.setDimension(i);
+ amDBIDs.sort(comp);
+ subspaceIndex.add(amDBIDs);
+ }
+
+ return subspaceIndex;
+ }
+
+ /**
+ * Identifies high contrast subspaces in a given full-dimensional database
+ *
+ * @param relation the relation the HiCS should be evaluated for
+ * @param subspaceIndex Subspace indexes
+ * @return a set of high contrast subspaces
+ */
+ private Set<HiCSSubspace> calculateSubspaces(Relation<? extends NumberVector<?, ?>> relation, ArrayList<ArrayDBIDs> subspaceIndex) {
+ final int dbdim = DatabaseUtil.dimensionality(relation);
+
+ FiniteProgress dprog = logger.isVerbose() ? new FiniteProgress("Subspace dimensionality", dbdim, logger) : null;
+ if(dprog != null) {
+ dprog.setProcessed(2, logger);
+ }
+
+ TreeSet<HiCSSubspace> subspaceList = new TreeSet<HiCSSubspace>(HiCSSubspace.SORT_BY_SUBSPACE);
+ TopBoundedHeap<HiCSSubspace> dDimensionalList = new TopBoundedHeap<HiCSSubspace>(cutoff, HiCSSubspace.SORT_BY_CONTRAST_ASC);
+ FiniteProgress prog = logger.isVerbose() ? new FiniteProgress("Generating two-element subsets", dbdim * (dbdim - 1) / 2, logger) : null;
+ // compute two-element sets of subspaces
+ for(int i = 0; i < dbdim; i++) {
+ for(int j = i + 1; j < dbdim; j++) {
+ HiCSSubspace ts = new HiCSSubspace();
+ ts.set(i);
+ ts.set(j);
+ calculateContrast(relation, ts, subspaceIndex);
+ dDimensionalList.add(ts);
+ if(prog != null) {
+ prog.incrementProcessed(logger);
+ }
+ }
+ }
+ if(prog != null) {
+ prog.ensureCompleted(logger);
+ }
+
+ IndefiniteProgress qprog = logger.isVerbose() ? new IndefiniteProgress("Testing subspace candidates", logger) : null;
+ for(int d = 3; !dDimensionalList.isEmpty(); d++) {
+ if(dprog != null) {
+ dprog.setProcessed(d, logger);
+ }
+ subspaceList.addAll(dDimensionalList);
+ // result now contains all d-dimensional sets of subspaces
+
+ ArrayList<HiCSSubspace> candidateList = new ArrayList<HiCSSubspace>(dDimensionalList);
+ dDimensionalList.clear();
+ // candidateList now contains the *m* best d-dimensional sets
+ Collections.sort(candidateList, HiCSSubspace.SORT_BY_SUBSPACE);
+
+ // TODO: optimize APRIORI style, by not even computing the bit set or?
+ for(int i = 0; i < candidateList.size() - 1; i++) {
+ for(int j = i + 1; j < candidateList.size(); j++) {
+ HiCSSubspace set1 = candidateList.get(i);
+ HiCSSubspace set2 = candidateList.get(j);
+
+ HiCSSubspace joinedSet = new HiCSSubspace();
+ joinedSet.or(set1);
+ joinedSet.or(set2);
+ if(joinedSet.cardinality() != d) {
+ continue;
+ }
+
+ calculateContrast(relation, joinedSet, subspaceIndex);
+ dDimensionalList.add(joinedSet);
+ if(qprog != null) {
+ qprog.incrementProcessed(logger);
+ }
+ }
+ }
+ // Prune
+ for(HiCSSubspace cand : candidateList) {
+ for(HiCSSubspace nextSet : dDimensionalList) {
+ if(nextSet.contrast > cand.contrast) {
+ subspaceList.remove(cand);
+ break;
+ }
+ }
+ }
+ }
+ if(qprog != null) {
+ qprog.setCompleted(logger);
+ }
+ if(dprog != null) {
+ dprog.setProcessed(dbdim, logger);
+ dprog.ensureCompleted(logger);
+ }
+ return subspaceList;
+ }
+
+ /**
+ * Calculates the actual contrast of a given subspace
+ *
+ * @param relation
+ * @param subspace
+ * @param subspaceIndex Subspace indexes
+ */
+ private void calculateContrast(Relation<? extends NumberVector<?, ?>> relation, HiCSSubspace subspace, ArrayList<ArrayDBIDs> subspaceIndex) {
+ final int card = subspace.cardinality();
+ final double alpha1 = Math.pow(alpha, (1.0 / card));
+ final int windowsize = (int) (relation.size() * alpha1);
+ final FiniteProgress prog = logger.isDebugging() ? new FiniteProgress("Monte-Carlo iterations", m, logger) : null;
+
+ int retries = 0;
+ double deviationSum = 0.0;
+ for(int i = 0; i < m; i++) {
+ // Choose a random set bit.
+ int chosen = -1;
+ for(int tmp = random.nextInt(card); tmp >= 0; tmp--) {
+ chosen = subspace.nextSetBit(chosen + 1);
+ }
+ // initialize sample
+ DBIDs conditionalSample = relation.getDBIDs();
+
+ for(int j = subspace.nextSetBit(0); j >= 0; j = subspace.nextSetBit(j + 1)) {
+ if(j == chosen) {
+ continue;
+ }
+ ArrayDBIDs sortedIndices = subspaceIndex.get(j);
+ ArrayModifiableDBIDs indexBlock = DBIDUtil.newArray();
+ // initialize index block
+ int start = random.nextInt(relation.size() - windowsize);
+ for(int k = start; k < start + windowsize; k++) {
+ indexBlock.add(sortedIndices.get(k)); // select index block
+ }
+
+ conditionalSample = DBIDUtil.intersection(conditionalSample, indexBlock);
+ }
+ if(conditionalSample.size() < 10) {
+ retries++;
+ if(logger.isDebugging()) {
+ logger.debug("Sample size very small. Retry no. " + retries);
+ }
+ if(retries >= MAX_RETRIES) {
+ logger.warning("Too many retries, for small samples: " + retries);
+ }
+ else {
+ i--;
+ continue;
+ }
+ }
+ // Project conditional set
+ double[] sampleValues = new double[conditionalSample.size()];
+ {
+ int l = 0;
+ for (DBIDIter iter = conditionalSample.iter(); iter.valid(); iter.advance()) {
+ sampleValues[l] = relation.get(iter).doubleValue(chosen + 1);
+ l++;
+ }
+ }
+ // Project full set
+ double[] fullValues = new double[relation.size()];
+ {
+ int l = 0;
+ for (DBIDIter iter = subspaceIndex.get(chosen).iter(); iter.valid(); iter.advance()) {
+ fullValues[l] = relation.get(iter).doubleValue(chosen + 1);
+ l++;
+ }
+ }
+ double contrast = statTest.deviation(fullValues, sampleValues);
+ if(Double.isNaN(contrast)) {
+ i--;
+ logger.warning("Contrast was NaN");
+ continue;
+ }
+ deviationSum += contrast;
+ if(prog != null) {
+ prog.incrementProcessed(logger);
+ }
+ }
+ if(prog != null) {
+ prog.ensureCompleted(logger);
+ }
+ subspace.contrast = deviationSum / m;
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD);
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return logger;
+ }
+
+ /**
+ * BitSet that holds a contrast value as field. Used for the representation of
+ * a subspace in HiCS
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class HiCSSubspace extends BitSet {
+ /**
+ * Serial version
+ */
+ private static final long serialVersionUID = 1L;
+
+ /**
+ * The HiCS contrast value
+ */
+ protected double contrast;
+
+ /**
+ * Constructor.
+ */
+ public HiCSSubspace() {
+ super();
+ }
+
+ @Override
+ public String toString() {
+ StringBuffer buf = new StringBuffer();
+ buf.append("[contrast=").append(contrast);
+ for(int i = nextSetBit(0); i >= 0; i = nextSetBit(i + 1)) {
+ buf.append(" ").append(i + 1);
+ }
+ buf.append("]");
+ return buf.toString();
+ }
+
+ /**
+ * Sort subspaces by their actual subspace.
+ */
+ public static Comparator<HiCSSubspace> SORT_BY_CONTRAST_ASC = new Comparator<HiCSSubspace>() {
+ @Override
+ public int compare(HiCSSubspace o1, HiCSSubspace o2) {
+ if(o1.contrast == o2.contrast) {
+ return 0;
+ }
+ return o1.contrast > o2.contrast ? 1 : -1;
+ }
+ };
+
+ /**
+ * Sort subspaces by their actual subspace.
+ */
+ public static Comparator<HiCSSubspace> SORT_BY_CONTRAST_DESC = new Comparator<HiCSSubspace>() {
+ @Override
+ public int compare(HiCSSubspace o1, HiCSSubspace o2) {
+ if(o1.contrast == o2.contrast) {
+ return 0;
+ }
+ return o1.contrast < o2.contrast ? 1 : -1;
+ }
+ };
+
+ /**
+ * Sort subspaces by their actual subspace.
+ */
+ public static Comparator<HiCSSubspace> SORT_BY_SUBSPACE = new Comparator<HiCSSubspace>() {
+ @Override
+ public int compare(HiCSSubspace o1, HiCSSubspace o2) {
+ int dim1 = o1.nextSetBit(0);
+ int dim2 = o2.nextSetBit(0);
+ while(dim1 >= 0 && dim2 >= 0) {
+ if(dim1 < dim2) {
+ return -1;
+ }
+ else if(dim1 > dim2) {
+ return 1;
+ }
+ dim1 = o1.nextSetBit(dim1 + 1);
+ dim2 = o2.nextSetBit(dim2 + 1);
+ }
+ return 0;
+ }
+ };
+ }
+
+ /**
+ * Parameterization class
+ *
+ * @author Jan Brusis
+ *
+ * @apiviz.exclude
+ *
+ * @param <V> vector type
+ */
+ public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractParameterizer {
+ /**
+ * Parameter that specifies the number of iterations in the Monte-Carlo
+ * process of identifying high contrast subspaces
+ */
+ public static final OptionID M_ID = OptionID.getOrCreateOptionID("hics.m", "The number of iterations in the Monte-Carlo processing.");
+
+ /**
+ * Parameter that determines the size of the test statistic during the
+ * Monte-Carlo iteration
+ */
+ public static final OptionID ALPHA_ID = OptionID.getOrCreateOptionID("hics.alpha", "The discriminance value that determines the size of the test statistic .");
+
+ /**
+ * Parameter that specifies which outlier detection algorithm to use on the
+ * resulting set of high contrast subspaces
+ */
+ public static final OptionID ALGO_ID = OptionID.getOrCreateOptionID("hics.algo", "The Algorithm that performs the actual outlier detection on the resulting set of subspace");
+
+ /**
+ * Parameter that specifies which statistical test to use in order to
+ * calculate the deviation of two given data samples
+ */
+ public static final OptionID TEST_ID = OptionID.getOrCreateOptionID("hics.test", "The statistical test that is used to calculate the deviation of two data samples");
+
+ /**
+ * Parameter that specifies the candidate_cutoff
+ */
+ public static final OptionID LIMIT_ID = OptionID.getOrCreateOptionID("hics.limit", "The threshold that determines how many d-dimensional subspace candidates to retain in each step of the generation");
+
+ /**
+ * Parameter that specifies the random seed
+ */
+ public static final OptionID SEED_ID = OptionID.getOrCreateOptionID("hics.seed", "The random seed.");
+
+ /**
+ * Holds the value of {@link #M_ID}.
+ */
+ private int m = 50;
+
+ /**
+ * Holds the value of {@link #ALPHA_ID}.
+ */
+ private double alpha = 0.1;
+
+ /**
+ * Holds the value of {@link #ALGO_ID}.
+ */
+ private OutlierAlgorithm outlierAlgorithm;
+
+ /**
+ * Holds the value of {@link #TEST_ID}.
+ */
+ private GoodnessOfFitTest statTest;
+
+ /**
+ * Holds the value of {@link #LIMIT_ID}
+ */
+ private int cutoff = 400;
+
+ /**
+ * Random seed (optional)
+ */
+ private Long seed = null;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ final IntParameter mP = new IntParameter(M_ID, new GreaterConstraint(1), 50);
+ if(config.grab(mP)) {
+ m = mP.getValue();
+ }
+
+ final DoubleParameter alphaP = new DoubleParameter(ALPHA_ID, new GreaterConstraint(0), 0.1);
+ if(config.grab(alphaP)) {
+ alpha = alphaP.getValue();
+ }
+
+ final ObjectParameter<OutlierAlgorithm> algoP = new ObjectParameter<OutlierAlgorithm>(ALGO_ID, OutlierAlgorithm.class, LOF.class);
+ if(config.grab(algoP)) {
+ outlierAlgorithm = algoP.instantiateClass(config);
+ }
+
+ final ObjectParameter<GoodnessOfFitTest> testP = new ObjectParameter<GoodnessOfFitTest>(TEST_ID, GoodnessOfFitTest.class, KolmogorovSmirnovTest.class);
+ if(config.grab(testP)) {
+ statTest = testP.instantiateClass(config);
+ }
+
+ final IntParameter cutoffP = new IntParameter(LIMIT_ID, new GreaterConstraint(1), 100);
+ if(config.grab(cutoffP)) {
+ cutoff = cutoffP.getValue();
+ }
+
+ final LongParameter seedP = new LongParameter(SEED_ID, true);
+ if(config.grab(seedP)) {
+ seed = seedP.getValue();
+ }
+}
+
+ @Override
+ protected HiCS<V> makeInstance() {
+ return new HiCS<V>(m, alpha, outlierAlgorithm, statTest, cutoff, seed);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/RescaleMetaOutlierAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/RescaleMetaOutlierAlgorithm.java
index 9634cd59..a4db7e3d 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/RescaleMetaOutlierAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/RescaleMetaOutlierAlgorithm.java
@@ -34,7 +34,7 @@ import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.logging.Logging;
@@ -55,6 +55,8 @@ import de.lmu.ifi.dbs.elki.utilities.scaling.outlier.OutlierScalingFunction;
* Scale another outlier score using the given scaling function.
*
* @author Erich Schubert
+ *
+ * @apiviz.composedOf OutlierAlgorithm
*/
public class RescaleMetaOutlierAlgorithm extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
/**
@@ -93,7 +95,7 @@ public class RescaleMetaOutlierAlgorithm extends AbstractAlgorithm<OutlierResult
}
@Override
- public OutlierResult run(Database database) throws IllegalStateException {
+ public OutlierResult run(Database database) {
Result innerresult = algorithm.run(database);
OutlierResult or = getOutlierResult(innerresult);
@@ -105,10 +107,9 @@ public class RescaleMetaOutlierAlgorithm extends AbstractAlgorithm<OutlierResult
WritableDoubleDataStore scaledscores = DataStoreUtil.makeDoubleStorage(scores.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
DoubleMinMax minmax = new DoubleMinMax();
- for(DBID id : scores.iterDBIDs()) {
- double val = scores.get(id);
- val = scaling.getScaled(val);
- scaledscores.putDouble(id, val);
+ for(DBIDIter iditer = scores.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ double val = scaling.getScaled(scores.get(iditer));
+ scaledscores.putDouble(iditer, val);
minmax.put(val);
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuGLSBackwardSearchAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuGLSBackwardSearchAlgorithm.java
index b4070e0c..7f3bac29 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuGLSBackwardSearchAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuGLSBackwardSearchAlgorithm.java
@@ -34,6 +34,7 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
@@ -129,7 +130,7 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?, ?>, D exte
ModifiableDBIDs idview = DBIDUtil.newHashSet(relationx.getDBIDs());
ProxyView<V> proxy = new ProxyView<V>(relationx.getDatabase(), idview, relationx);
- double phialpha = NormalDistribution.standardNormalProbit(1.0 - alpha / 2);
+ double phialpha = NormalDistribution.standardNormalQuantile(1.0 - alpha / 2);
// Detect outliers while significant.
while(true) {
Pair<DBID, Double> candidate = singleIteration(proxy, relationy);
@@ -144,8 +145,8 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?, ?>, D exte
}
// Remaining objects are inliers
- for(DBID id : idview) {
- scores.putDouble(id, 0.0);
+ for (DBIDIter iter = idview.iter(); iter.valid(); iter.advance()) {
+ scores.putDouble(iter.getDBID(), 0.0);
}
}
@@ -204,7 +205,7 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?, ?>, D exte
KNNResult<D> neighbors = knnQuery.getKNNForDBID(id, k + 1);
ModifiableDBIDs neighborhood = DBIDUtil.newArray(neighbors.size());
for(DistanceResultPair<D> dpair : neighbors) {
- if(id.equals(dpair.getDBID())) {
+ if(id.sameDBID(dpair.getDBID())) {
continue;
}
neighborhood.add(dpair.getDBID());
@@ -213,8 +214,8 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?, ?>, D exte
F.set(i, i, 1.0);
final int nweight = -1 / neighborhood.size();
// We need to find the index positions of the neighbors, unfortunately.
- for(DBID nid : neighborhood) {
- int pos = ids.binarySearch(nid);
+ for (DBIDIter iter = neighborhood.iter(); iter.valid(); iter.advance()) {
+ int pos = ids.binarySearch(iter.getDBID());
assert (pos >= 0);
F.set(pos, i, nweight);
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMeanMultipleAttributes.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMeanMultipleAttributes.java
index 68e58ffa..a0c09057 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMeanMultipleAttributes.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMeanMultipleAttributes.java
@@ -32,6 +32,7 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
@@ -99,7 +100,8 @@ public class CTLuMeanMultipleAttributes<N, O extends NumberVector<?, ?>> extends
CovarianceMatrix covmaker = new CovarianceMatrix(DatabaseUtil.dimensionality(attributes));
WritableDataStore<Vector> deltas = DataStoreUtil.makeStorage(attributes.getDBIDs(), DataStoreFactory.HINT_TEMP, Vector.class);
- for(DBID id : attributes.iterDBIDs()) {
+ for(DBIDIter iditer = attributes.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ DBID id = iditer.getDBID();
final O obj = attributes.get(id);
final DBIDs neighbors = npred.getNeighborDBIDs(id);
// TODO: remove object itself from neighbors?
@@ -117,7 +119,8 @@ public class CTLuMeanMultipleAttributes<N, O extends NumberVector<?, ?>> extends
DoubleMinMax minmax = new DoubleMinMax();
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(attributes.getDBIDs(), DataStoreFactory.HINT_STATIC);
- for(DBID id : attributes.iterDBIDs()) {
+ for(DBIDIter iditer = attributes.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ DBID id = iditer.getDBID();
Vector temp = deltas.get(id).minus(mean);
final double score = temp.transposeTimesTimes(cmati, temp);
minmax.put(score);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianAlgorithm.java
index 9b4534fe..20ab9a00 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianAlgorithm.java
@@ -31,6 +31,7 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
@@ -94,18 +95,19 @@ public class CTLuMedianAlgorithm<N> extends AbstractNeighborhoodOutlier<N> {
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
MeanVariance mv = new MeanVariance();
- for(DBID id : relation.iterDBIDs()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ DBID id = iditer.getDBID();
DBIDs neighbors = npred.getNeighborDBIDs(id);
final double median;
{
double[] fi = new double[neighbors.size()];
// calculate and store Median of neighborhood
int c = 0;
- for(DBID n : neighbors) {
- if(id.equals(n)) {
+ for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
+ if(id.sameDBID(iter)) {
continue;
}
- fi[c] = relation.get(n).doubleValue(1);
+ fi[c] = relation.get(iter).doubleValue(1);
c++;
}
@@ -125,7 +127,8 @@ public class CTLuMedianAlgorithm<N> extends AbstractNeighborhoodOutlier<N> {
final double mean = mv.getMean();
final double stddev = mv.getNaiveStddev();
DoubleMinMax minmax = new DoubleMinMax();
- for(DBID id : relation.iterDBIDs()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ DBID id = iditer.getDBID();
double score = Math.abs((scores.doubleValue(id) - mean) / stddev);
minmax.put(score);
scores.putDouble(id, score);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianMultipleAttributes.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianMultipleAttributes.java
index cbf61c38..c8bcba74 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianMultipleAttributes.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianMultipleAttributes.java
@@ -32,6 +32,7 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
@@ -108,7 +109,8 @@ public class CTLuMedianMultipleAttributes<N, O extends NumberVector<?, ?>> exten
CovarianceMatrix covmaker = new CovarianceMatrix(dim);
WritableDataStore<Vector> deltas = DataStoreUtil.makeStorage(attributes.getDBIDs(), DataStoreFactory.HINT_TEMP, Vector.class);
- for(DBID id : attributes.iterDBIDs()) {
+ for(DBIDIter iditer = attributes.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ DBID id = iditer.getDBID();
final O obj = attributes.get(id);
final DBIDs neighbors = npred.getNeighborDBIDs(id);
// Compute the median vector
@@ -117,9 +119,9 @@ public class CTLuMedianMultipleAttributes<N, O extends NumberVector<?, ?>> exten
double[][] data = new double[dim][neighbors.size()];
int i = 0;
// Load data
- for(DBID n : neighbors) {
+ for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
// TODO: skip object itself within neighbors?
- O nobj = attributes.get(n);
+ O nobj = attributes.get(iter);
for(int d = 0; d < dim; d++) {
data[d][i] = nobj.doubleValue(d + 1);
}
@@ -143,7 +145,8 @@ public class CTLuMedianMultipleAttributes<N, O extends NumberVector<?, ?>> exten
DoubleMinMax minmax = new DoubleMinMax();
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(attributes.getDBIDs(), DataStoreFactory.HINT_STATIC);
- for(DBID id : attributes.iterDBIDs()) {
+ for(DBIDIter iditer = attributes.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ DBID id = iditer.getDBID();
Vector temp = deltas.get(id).minus(mean);
final double score = temp.transposeTimesTimes(cmati, temp);
minmax.put(score);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMoranScatterplotOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMoranScatterplotOutlier.java
index 9f19757d..7b88ae66 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMoranScatterplotOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMoranScatterplotOutlier.java
@@ -33,6 +33,7 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.logging.Logging;
@@ -98,7 +99,8 @@ public class CTLuMoranScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<
// Compute the global mean and variance
MeanVariance globalmv = new MeanVariance();
- for(DBID id : relation.iterDBIDs()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ DBID id = iditer.getDBID();
globalmv.put(relation.get(id).doubleValue(1));
}
@@ -107,12 +109,14 @@ public class CTLuMoranScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<
// calculate normalized attribute values
// calculate neighborhood average of normalized attribute values.
- for(DBID id : relation.iterDBIDs()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ DBID id = iditer.getDBID();
// Compute global z score
final double globalZ = (relation.get(id).doubleValue(1) - globalmv.getMean()) / globalmv.getNaiveStddev();
// Compute local average z score
Mean localm = new Mean();
- for(DBID n : npred.getNeighborDBIDs(id)) {
+ for(DBIDIter iter = npred.getNeighborDBIDs(id).iter(); iter.valid(); iter.advance()) {
+ DBID n = iter.getDBID();
if(id.equals(n)) {
continue;
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java
index a6425d43..852c4be4 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java
@@ -34,6 +34,7 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
@@ -208,7 +209,8 @@ public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends Abstrac
DBID id = ids.get(i);
double gmean = 1.0;
int cnt = 0;
- for(DBID n : neighbors.get(id)) {
+ for(DBIDIter iter = neighbors.get(id).iter(); iter.valid(); iter.advance()) {
+ DBID n = iter.getDBID();
if(id.equals(n)) {
continue;
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuScatterplotOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuScatterplotOutlier.java
index 8e4ab32c..4f11cb38 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuScatterplotOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuScatterplotOutlier.java
@@ -32,6 +32,7 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
@@ -102,12 +103,14 @@ public class CTLuScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<N> {
// Calculate average of neighborhood for each object and perform a linear
// regression using the covariance matrix
CovarianceMatrix covm = new CovarianceMatrix(2);
- for(DBID id : relation.iterDBIDs()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ DBID id = iditer.getDBID();
final double local = relation.get(id).doubleValue(1);
// Compute mean of neighbors
Mean mean = new Mean();
DBIDs neighbors = npred.getNeighborDBIDs(id);
- for(DBID n : neighbors) {
+ for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
+ DBID n = iter.getDBID();
if(id.equals(n)) {
continue;
}
@@ -139,7 +142,8 @@ public class CTLuScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<N> {
// calculate mean and variance for error
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
MeanVariance mv = new MeanVariance();
- for(DBID id : relation.iterDBIDs()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ DBID id = iditer.getDBID();
// Compute the error from the linear regression
double y_i = relation.get(id).doubleValue(1);
double e = means.doubleValue(id) - (slope * y_i + inter);
@@ -152,7 +156,8 @@ public class CTLuScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<N> {
{
final double mean = mv.getMean();
final double variance = mv.getNaiveStddev();
- for(DBID id : relation.iterDBIDs()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ DBID id = iditer.getDBID();
double score = Math.abs((scores.doubleValue(id) - mean) / variance);
minmax.put(score);
scores.putDouble(id, score);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuZTestOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuZTestOutlier.java
index 573e1526..05729481 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuZTestOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuZTestOutlier.java
@@ -33,6 +33,7 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
@@ -102,17 +103,17 @@ public class CTLuZTestOutlier<N> extends AbstractNeighborhoodOutlier<N> {
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
MeanVariance zmv = new MeanVariance();
- for(DBID id : relation.iterDBIDs()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ DBID id = iditer.getDBID();
DBIDs neighbors = npred.getNeighborDBIDs(id);
// Compute Mean of neighborhood
Mean localmean = new Mean();
- for(DBID n : neighbors) {
+ for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
+ DBID n = iter.getDBID();
if(id.equals(n)) {
continue;
}
- else {
- localmean.put(relation.get(n).doubleValue(1));
- }
+ localmean.put(relation.get(n).doubleValue(1));
}
final double localdiff;
if(localmean.getCount() > 0) {
@@ -127,7 +128,8 @@ public class CTLuZTestOutlier<N> extends AbstractNeighborhoodOutlier<N> {
// Normalize scores using mean and variance
DoubleMinMax minmax = new DoubleMinMax();
- for(DBID id : relation.iterDBIDs()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ DBID id = iditer.getDBID();
double score = Math.abs(scores.doubleValue(id) - zmv.getMean()) / zmv.getSampleStddev();
minmax.put(score);
scores.putDouble(id, score);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SLOM.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SLOM.java
index e69d46d4..8ae23229 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SLOM.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SLOM.java
@@ -31,6 +31,7 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
@@ -53,7 +54,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
* Reference:<br>
* Sanjay Chawla and Pei Sun<br>
* SLOM: a new measure for local spatial outliers<br>
- * in Knowledge and Information Systems 2005
+ * in Knowledge and Information Systems 9(4), 412-429, 2006
* </p>
*
* This implementation works around some corner cases in SLOM, in particular
@@ -68,7 +69,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
*/
@Title("SLOM: a new measure for local spatial outliers")
@Description("Spatial local outlier measure (SLOM), which captures the local behaviour of datum in their spatial neighbourhood")
-@Reference(authors = "Sanjay Chawla and Pei Sun", title = "SLOM: a new measure for local spatial outliers", booktitle = "Knowledge and Information Systems 2005", url = "http://rp-www.cs.usyd.edu.au/~chawlarg/papers/KAIS_online.pdf")
+@Reference(authors = "Sanjay Chawla and Pei Sun", title = "SLOM: a new measure for local spatial outliers", booktitle = "Knowledge and Information Systems 9(4), 412-429, 2006", url = "http://dx.doi.org/10.1007/s10115-005-0200-2")
public class SLOM<N, O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedSpatialOutlier<N, O, D> {
/**
* The logger for this class.
@@ -98,13 +99,15 @@ public class SLOM<N, O, D extends NumberDistance<D, ?>> extends AbstractDistance
WritableDoubleDataStore modifiedDistance = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
// calculate D-Tilde
- for(DBID id : relation.iterDBIDs()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ DBID id = iditer.getDBID();
double sum = 0;
double maxDist = 0;
int cnt = 0;
final DBIDs neighbors = npred.getNeighborDBIDs(id);
- for(DBID neighbor : neighbors) {
+ for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
+ DBID neighbor = iter.getDBID();
if(id.equals(neighbor)) {
continue;
}
@@ -127,12 +130,14 @@ public class SLOM<N, O, D extends NumberDistance<D, ?>> extends AbstractDistance
DoubleMinMax slomminmax = new DoubleMinMax();
WritableDoubleDataStore sloms = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
- for(DBID id : relation.iterDBIDs()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ DBID id = iditer.getDBID();
double sum = 0;
int cnt = 0;
final DBIDs neighbors = npred.getNeighborDBIDs(id);
- for(DBID neighbor : neighbors) {
+ for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
+ DBID neighbor = iter.getDBID();
if(neighbor.equals(id)) {
continue;
}
@@ -146,7 +151,8 @@ public class SLOM<N, O, D extends NumberDistance<D, ?>> extends AbstractDistance
double avg = sum / cnt;
double beta = 0;
- for(DBID neighbor : neighbors) {
+ for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
+ DBID neighbor = iter.getDBID();
final double dist = modifiedDistance.doubleValue(neighbor);
if(dist > avgPlus) {
beta += 1;
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SOF.java
index abc3c481..e9987bf0 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SOF.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SOF.java
@@ -30,6 +30,7 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
@@ -108,11 +109,12 @@ public class SOF<N, O, D extends NumberDistance<D, ?>> extends AbstractDistanceB
DoubleMinMax lofminmax = new DoubleMinMax();
// Compute densities
- for(DBID id : relation.iterDBIDs()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ DBID id = iditer.getDBID();
DBIDs neighbors = npred.getNeighborDBIDs(id);
double avg = 0;
- for(DBID n : neighbors) {
- avg += distFunc.distance(id, n).doubleValue();
+ for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
+ avg += distFunc.distance(id, iter.getDBID()).doubleValue();
}
double lrd = 1 / (avg / neighbors.size());
if (Double.isNaN(lrd)) {
@@ -122,11 +124,12 @@ public class SOF<N, O, D extends NumberDistance<D, ?>> extends AbstractDistanceB
}
// Compute density quotients
- for(DBID id : relation.iterDBIDs()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ DBID id = iditer.getDBID();
DBIDs neighbors = npred.getNeighborDBIDs(id);
double avg = 0;
- for(DBID n : neighbors) {
- avg += lrds.doubleValue(n);
+ for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
+ avg += lrds.doubleValue(iter.getDBID());
}
final double lrd = (avg / neighbors.size()) / lrds.doubleValue(id);
if (!Double.isNaN(lrd)) {
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java
index 75700bca..41022414 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java
@@ -34,6 +34,7 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
@@ -116,13 +117,14 @@ public class TrimmedMeanApproach<N> extends AbstractNeighborhoodOutlier<N> {
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("Computing trimmed means", relation.size(), logger) : null;
- for(DBID id : relation.iterDBIDs()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ DBID id = iditer.getDBID();
DBIDs neighbors = npred.getNeighborDBIDs(id);
int num = 0;
double[] values = new double[neighbors.size()];
// calculate trimmedMean
- for(DBID n : neighbors) {
- values[num] = relation.get(n).doubleValue(1);
+ for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
+ values[num] = relation.get(iter).doubleValue(1);
num++;
}
@@ -161,7 +163,8 @@ public class TrimmedMeanApproach<N> extends AbstractNeighborhoodOutlier<N> {
double[] ei = new double[relation.size()];
{
int i = 0;
- for(DBID id : relation.iterDBIDs()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ DBID id = iditer.getDBID();
ei[i] = errors.doubleValue(id);
i++;
}
@@ -180,7 +183,8 @@ public class TrimmedMeanApproach<N> extends AbstractNeighborhoodOutlier<N> {
}
// calculate score
DoubleMinMax minmax = new DoubleMinMax();
- for(DBID id : relation.iterDBIDs()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ DBID id = iditer.getDBID();
double score = Math.abs(errors.doubleValue(id)) * 0.6745 / median_dev_from_median;
scores.putDouble(id, score);
minmax.put(score);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java
index 9ee92d35..7a2fda52 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java
@@ -29,6 +29,7 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs;
@@ -132,15 +133,17 @@ public class ExtendedNeighborhood extends AbstractPrecomputedNeighborhood {
// Expand multiple steps
FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("Expanding neighborhoods", database.size(), logger) : null;
- for(final DBID id : database.iterDBIDs()) {
+ for(DBIDIter iter = database.iterDBIDs(); iter.valid(); iter.advance()) {
+ DBID id = iter.getDBID();
HashSetModifiableDBIDs res = DBIDUtil.newHashSet(id);
DBIDs todo = id;
for(int i = 0; i < steps; i++) {
ModifiableDBIDs ntodo = DBIDUtil.newHashSet();
- for(final DBID oid : todo) {
- DBIDs add = innerinst.getNeighborDBIDs(oid);
+ for(DBIDIter iter2 = todo.iter(); iter2.valid(); iter2.advance()) {
+ DBIDs add = innerinst.getNeighborDBIDs(iter2.getDBID());
if(add != null) {
- for(DBID nid : add) {
+ for(DBIDIter iter3 = add.iter(); iter.valid(); iter.advance()) {
+ DBID nid = iter3.getDBID();
if(res.contains(nid)) {
continue;
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java
index f2586e2e..74e5bbcf 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java
@@ -42,6 +42,7 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
@@ -149,7 +150,8 @@ public class ExternalNeighborhood extends AbstractPrecomputedNeighborhood {
{
Relation<LabelList> olq = database.getDatabase().getRelation(TypeUtil.LABELLIST);
Relation<ExternalID> eidq = database.getDatabase().getRelation(TypeUtil.EXTERNALID);
- for(DBID id : database.iterDBIDs()) {
+ for(DBIDIter iditer = database.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ DBID id = iditer.getDBID();
if(eidq != null) {
ExternalID eid = eidq.get(id);
if(eid != null) {
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/PrecomputedKNearestNeighborNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/PrecomputedKNearestNeighborNeighborhood.java
index f5ea7e15..9dd2dee1 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/PrecomputedKNearestNeighborNeighborhood.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/PrecomputedKNearestNeighborNeighborhood.java
@@ -30,6 +30,7 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
@@ -119,7 +120,8 @@ public class PrecomputedKNearestNeighborNeighborhood<D extends Distance<D>> exte
// TODO: use bulk?
WritableDataStore<DBIDs> s = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, DBIDs.class);
- for(DBID id : relation.iterDBIDs()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ DBID id = iditer.getDBID();
KNNResult<D> neighbors = knnQuery.getKNNForDBID(id, k);
ArrayModifiableDBIDs neighbours = DBIDUtil.newArray(neighbors.size());
for(DistanceResultPair<D> dpair : neighbors) {
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java
index 52fc2c46..d170571f 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java
@@ -30,6 +30,7 @@ import java.util.List;
import de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
@@ -99,8 +100,10 @@ public class LinearWeightedExtendedNeighborhood implements WeightedNeighborSetPr
final double weight = computeWeight(i);
// Collect newly discovered IDs
ModifiableDBIDs add = DBIDUtil.newHashSet();
- for(DBID id : cur) {
- for(DBID nid : inner.getNeighborDBIDs(id)) {
+ for(DBIDIter iter = cur.iter(); iter.valid(); iter.advance()) {
+ DBID id = iter.getDBID();
+ for(DBIDIter iter2 = inner.getNeighborDBIDs(id).iter(); iter2.valid(); iter2.advance()) {
+ DBID nid = iter2.getDBID();
// Seen before?
if(seen.contains(nid)) {
continue;
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/UnweightedNeighborhoodAdapter.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/UnweightedNeighborhoodAdapter.java
index 4378aa2e..ce0666df 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/UnweightedNeighborhoodAdapter.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/UnweightedNeighborhoodAdapter.java
@@ -29,6 +29,7 @@ import java.util.Collection;
import de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
@@ -63,7 +64,8 @@ public class UnweightedNeighborhoodAdapter implements WeightedNeighborSetPredica
public Collection<DoubleObjPair<DBID>> getWeightedNeighbors(DBID reference) {
DBIDs neighbors = inner.getNeighborDBIDs(reference);
ArrayList<DoubleObjPair<DBID>> adapted = new ArrayList<DoubleObjPair<DBID>>(neighbors.size());
- for(DBID id : neighbors) {
+ for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
+ DBID id = iter.getDBID();
adapted.add(new DoubleObjPair<DBID>(1.0, id));
}
return adapted;
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OUTRES.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java
index 912f878a..573233a7 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OUTRES.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java
@@ -1,4 +1,4 @@
-package de.lmu.ifi.dbs.elki.algorithm.outlier;
+package de.lmu.ifi.dbs.elki.algorithm.outlier.subspace;
/*
This file is part of ELKI:
@@ -23,6 +23,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.List;
@@ -37,11 +38,14 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.DoubleDistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
@@ -77,8 +81,12 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
* management
* </p>
*
- * @author Pleintinger Viktoria
+ * @author Viktoria Pleintinger
* @author Erich Schubert
+ *
+ * @apiviz.composedOf KernelDensityEstimator
+ *
+ * @param <V> vector type
*/
@Reference(authors = "E. Müller, M. Schiffer, T. Seidl", title = "Adaptive outlierness for subspace outlier ranking", booktitle = "Proc. 19th ACM International Conference on Information and knowledge management")
public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
@@ -122,10 +130,10 @@ public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outl
FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("OutRank scores", relation.size(), logger) : null;
- for(DBID object : relation.iterDBIDs()) {
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
subspace.clear();
- double score = outresScore(0, subspace, object, kernel);
- ranks.putDouble(object, score);
+ double score = outresScore(0, subspace, iditer, kernel);
+ ranks.putDouble(iditer, score);
minmax.put(score);
if(progress != null) {
progress.incrementProcessed(logger);
@@ -149,7 +157,7 @@ public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outl
* @param kernel Kernel
* @return Score
*/
- public double outresScore(final int s, BitSet subspace, DBID id, KernelDensityEstimator kernel) {
+ public double outresScore(final int s, BitSet subspace, DBIDRef id, KernelDensityEstimator kernel) {
double score = 1.0; // Initial score is 1.0
for(int i = s; i < kernel.dim; i++) {
@@ -158,10 +166,14 @@ public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outl
}
subspace.set(i);
final SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(subspace);
- final DoubleDistance range = new DoubleDistance(kernel.adjustedEps(kernel.dim));
+ final double adjustedEps = kernel.adjustedEps(kernel.dim);
+ // Query with a larger window, to also get neighbors of neighbors
+ // Subspace euclidean is metric!
+ final DoubleDistance range = new DoubleDistance(adjustedEps * 2);
RangeQuery<V, DoubleDistance> rq = QueryUtil.getRangeQuery(kernel.relation, df, range);
- List<DistanceResultPair<DoubleDistance>> neigh = rq.getRangeForDBID(id, range);
+ List<DistanceResultPair<DoubleDistance>> neighc = rq.getRangeForDBID(id, range);
+ List<DoubleDistanceResultPair> neigh = refineRange(neighc, adjustedEps);
if(neigh.size() > 2) {
// Relevance test
if(relevantSubspace(subspace, neigh, kernel)) {
@@ -169,8 +181,8 @@ public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outl
final double deviation;
// Compute mean and standard deviation for densities of neighbors.
MeanVariance meanv = new MeanVariance();
- for(DistanceResultPair<DoubleDistance> pair : neigh) {
- List<DistanceResultPair<DoubleDistance>> n2 = rq.getRangeForDBID(pair.getDBID(), range);
+ for(DoubleDistanceResultPair pair : neigh) {
+ List<DoubleDistanceResultPair> n2 = subsetNeighborhoodQuery(neighc, pair.getDBID(), df, adjustedEps, kernel);
meanv.put(kernel.subspaceDensity(subspace, n2));
}
deviation = (meanv.getMean() - density) / (2. * meanv.getSampleStddev());
@@ -188,11 +200,62 @@ public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outl
}
/**
+ * Refine a range query.
+ *
+ * @param neighc Original result
+ * @param adjustedEps New epsilon
+ * @return refined list
+ */
+ private List<DoubleDistanceResultPair> refineRange(List<DistanceResultPair<DoubleDistance>> neighc, double adjustedEps) {
+ List<DoubleDistanceResultPair> n = new ArrayList<DoubleDistanceResultPair>(neighc.size());
+ // We don't have a guarantee for this list to be sorted
+ for(DistanceResultPair<DoubleDistance> p : neighc) {
+ if(p instanceof DoubleDistanceResultPair) {
+ if(((DoubleDistanceResultPair) p).getDoubleDistance() <= adjustedEps) {
+ n.add((DoubleDistanceResultPair) p);
+ }
+ }
+ else {
+ double dist = p.getDistance().doubleValue();
+ if(dist <= adjustedEps) {
+ n.add(new DoubleDistanceResultPair(dist, p.getDBID()));
+ }
+ }
+ }
+ return n;
+ }
+
+ /**
+ * Refine neighbors within a subset.
*
- * @param test: subspace that will be tested about scattering
- * @return if the subspace is scattered return will be 0, else 1
+ * @param neighc Neighbor candidates
+ * @param dbid Query object
+ * @param df distance function
+ * @param adjustedEps Epsilon range
+ * @param kernel Kernel
+ * @return Neighbors of neighbor object
*/
- protected boolean relevantSubspace(BitSet subspace, List<DistanceResultPair<DoubleDistance>> neigh, KernelDensityEstimator kernel) {
+ private List<DoubleDistanceResultPair> subsetNeighborhoodQuery(List<DistanceResultPair<DoubleDistance>> neighc, DBID dbid, PrimitiveDoubleDistanceFunction<? super V> df, double adjustedEps, KernelDensityEstimator kernel) {
+ List<DoubleDistanceResultPair> n = new ArrayList<DoubleDistanceResultPair>(neighc.size());
+ V query = kernel.relation.get(dbid);
+ for(DistanceResultPair<DoubleDistance> p : neighc) {
+ double dist = df.doubleDistance(query, kernel.relation.get(p));
+ if(dist <= adjustedEps) {
+ n.add(new DoubleDistanceResultPair(dist, p.getDBID()));
+ }
+ }
+ return n;
+ }
+
+ /**
+ * Subspace relevance test.
+ *
+ * @param subspace Subspace to test
+ * @param neigh Neighbor list
+ * @param kernel Kernel density estimator
+ * @return relevance test result
+ */
+ protected boolean relevantSubspace(BitSet subspace, List<DoubleDistanceResultPair> neigh, KernelDensityEstimator kernel) {
Relation<V> relation = kernel.relation;
final double crit = K_S_CRITICAL001 / Math.sqrt(neigh.size());
@@ -201,7 +264,7 @@ public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outl
double[] data = new double[neigh.size()];
{
int count = 0;
- for(DistanceResultPair<DoubleDistance> object : neigh) {
+ for(DoubleDistanceResultPair object : neigh) {
V vector = relation.get(object.getDBID());
data[count] = vector.doubleValue(dim + 1);
count++;
@@ -257,7 +320,7 @@ public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outl
/**
* Constructor.
- *
+ *
* @param relation Relation to apply to
*/
public KernelDensityEstimator(Relation<V> relation) {
@@ -277,17 +340,14 @@ public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outl
* @param neighbours Neighbor distance list
* @return Density
*/
- protected double subspaceDensity(BitSet subspace, List<DistanceResultPair<DoubleDistance>> neighbours) {
+ protected double subspaceDensity(BitSet subspace, List<DoubleDistanceResultPair> neighbours) {
final double bandwidth = optimalBandwidth(subspace.cardinality());
- // TODO: optimize by moving instanceof outside?
double density = 0;
- for(DistanceResultPair<DoubleDistance> pair : neighbours) {
- if(pair instanceof DoubleDistanceResultPair) {
- density += kernel.density(((DoubleDistanceResultPair) pair).getDoubleDistance() / bandwidth);
- }
- else {
- density += kernel.density(pair.getDistance().doubleValue() / bandwidth);
+ for(DoubleDistanceResultPair pair : neighbours) {
+ double v = pair.getDoubleDistance() / bandwidth;
+ if(v < 1) {
+ density += 1 - (v * v);
}
}
@@ -302,7 +362,7 @@ public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outl
*/
protected double optimalBandwidth(int dim) {
// Pi in the publication is redundant and cancels out!
- double hopt = 8 * Math.exp(GammaDistribution.logGamma(dim / 2.0 + 1)) * (dim + 4) * Math.pow(2, dim);
+ double hopt = 8 * GammaDistribution.gamma(dim / 2.0 + 1) * (dim + 4) * Math.pow(2, dim);
return hopt * Math.pow(relation.size(), (-1 / (dim + 4)));
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OutRankS1.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OutRankS1.java
new file mode 100644
index 00000000..e370d2bf
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OutRankS1.java
@@ -0,0 +1,199 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier.subspace;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.SubspaceClusteringAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
+import de.lmu.ifi.dbs.elki.data.Cluster;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.model.SubspaceModel;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * OutRank: ranking outliers in high dimensional data.
+ *
+ * Algorithm to score outliers based on a subspace clustering result. This class
+ * implements score 1 of the OutRank publication, which is a score based on
+ * cluster sizes and cluster dimensionality.
+ *
+ * Reference:
+ * <p>
+ * Emmanuel Müller, Ira Assent, Uwe Steinhausen, Thomas Seidl<br />
+ * OutRank: ranking outliers in high dimensional data<br />
+ * In Proceedings 24th International Conference on Data Engineering (ICDE)
+ * Workshop on Ranking in Databases (DBRank), Cancun, Mexico
+ * </p>
+ *
+ * @author Erich Schubert
+ */
+@Title("OutRank: ranking outliers in high dimensional data")
+@Description("Ranking outliers in high dimensional data - score 1")
+@Reference(authors = "Emmanuel Müller, Ira Assent, Uwe Steinhausen, Thomas Seidl", title = "OutRank: ranking outliers in high dimensional data", booktitle = "Proc. 24th Int. Conf. on Data Engineering (ICDE) Workshop on Ranking in Databases (DBRank), Cancun, Mexico", url = "http://dx.doi.org/10.1109/ICDEW.2008.4498387")
+public class OutRankS1 extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+ /**
+ * The logger for this class.
+ */
+ private static final Logging logger = Logging.getLogger(OutRankS1.class);
+
+ /**
+ * Clustering algorithm to run.
+ */
+ protected SubspaceClusteringAlgorithm<? extends SubspaceModel<?>> clusteralg;
+
+ /**
+ * Weighting parameter of size vs. dimensionality score.
+ */
+ double alpha;
+
+ /**
+ * Constructor.
+ *
+ * @param clusteralg Clustering algorithm to use (must implement
+ * {@link SubspaceClusteringAlgorithm}!)
+ * @param alpha Alpha parameter to balance size and dimensionality.
+ */
+ public OutRankS1(SubspaceClusteringAlgorithm<? extends SubspaceModel<?>> clusteralg, double alpha) {
+ super();
+ this.clusteralg = clusteralg;
+ this.alpha = alpha;
+ }
+
+ @Override
+ public OutlierResult run(Database database) {
+ DBIDs ids = database.getRelation(TypeUtil.DBID).getDBIDs();
+ // Run the primary algorithm
+ Clustering<? extends SubspaceModel<?>> clustering = clusteralg.run(database);
+
+ WritableDoubleDataStore score = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT);
+ for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ score.putDouble(iter, 0);
+ }
+
+ int maxdim = 0, maxsize = 0;
+ // Find maximum dimensionality and cluster size
+ for(Cluster<? extends SubspaceModel<?>> cluster : clustering.getAllClusters()) {
+ maxsize = Math.max(maxsize, cluster.size());
+ maxdim = Math.max(maxdim, cluster.getModel().getDimensions().cardinality());
+ }
+ // Iterate over all clusters:
+ DoubleMinMax minmax = new DoubleMinMax();
+ for(Cluster<? extends SubspaceModel<?>> cluster : clustering.getAllClusters()) {
+ double relsize = cluster.size() / (double) maxsize;
+ double reldim = cluster.getModel().getDimensions().cardinality() / (double) maxdim;
+ // Process objects in the cluster
+ for(DBIDIter iter = cluster.getIDs().iter(); iter.valid(); iter.advance()) {
+ double newscore = score.doubleValue(iter) + alpha * relsize + (1 - alpha) * reldim;
+ score.putDouble(iter, newscore);
+ minmax.put(newscore);
+ }
+ }
+
+ Relation<Double> scoreResult = new MaterializedRelation<Double>("OutRank-S1", "OUTRANK_S1", TypeUtil.DOUBLE, score, ids);
+ OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0, Double.POSITIVE_INFINITY);
+ OutlierResult res = new OutlierResult(meta, scoreResult);
+ res.addChildResult(clustering);
+ return res;
+ }
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return clusteralg.getInputTypeRestriction();
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return logger;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ /**
+ * Clustering algorithm to use.
+ */
+ public static final OptionID ALGORITHM_ID = OptionID.getOrCreateOptionID("outrank.algorithm", "Subspace clustering algorithm to use.");
+
+ /**
+ * Alpha parameter for S1
+ */
+ public static final OptionID ALPHA_ID = OptionID.getOrCreateOptionID("outrank.s1.alpha", "Alpha parameter for S1 score.");
+
+ /**
+ * Clustering algorithm to run.
+ */
+ protected SubspaceClusteringAlgorithm<? extends SubspaceModel<?>> algorithm = null;
+
+ /**
+ * Alpha parameter to balance parameters
+ */
+ protected double alpha = 0.25;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ ObjectParameter<SubspaceClusteringAlgorithm<? extends SubspaceModel<?>>> algP = new ObjectParameter<SubspaceClusteringAlgorithm<? extends SubspaceModel<?>>>(ALGORITHM_ID, SubspaceClusteringAlgorithm.class);
+ if(config.grab(algP)) {
+ algorithm = algP.instantiateClass(config);
+ }
+ DoubleParameter alphaP = new DoubleParameter(ALPHA_ID, new GreaterConstraint(0), 0.25);
+ if(config.grab(alphaP)) {
+ alpha = alphaP.getValue();
+ }
+ }
+
+ @Override
+ protected OutRankS1 makeInstance() {
+ return new OutRankS1(algorithm, alpha);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SOD.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/SOD.java
index a09bbcfd..7fef95e0 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SOD.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/SOD.java
@@ -1,4 +1,4 @@
-package de.lmu.ifi.dbs.elki.algorithm.outlier;
+package de.lmu.ifi.dbs.elki.algorithm.outlier.subspace;
/*
This file is part of ELKI:
@@ -24,9 +24,9 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
*/
import java.util.BitSet;
-import java.util.Iterator;
import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
@@ -37,6 +37,8 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.query.similarity.SimilarityQuery;
@@ -61,8 +63,6 @@ import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TiedTopBoundedHeap;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
-import de.lmu.ifi.dbs.elki.utilities.iterator.IterableIterator;
-import de.lmu.ifi.dbs.elki.utilities.iterator.IterableUtil;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
@@ -73,6 +73,16 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair;
/**
+ * Subspace Outlier Degree. Outlier detection method for axis-parallel subspaces.
+ *
+ * Reference:
+ * <p>
+ * * H.-P. Kriegel, P. Kröger, E. Schubert, A. Zimek:<br />
+ * Outlier Detection in Axis-Parallel Subspaces of High Dimensional Data<br />
+ * In: Proceedings of the 13th Pacific-Asia Conference on Knowledge Discovery
+ * and Data Mining (PAKDD), Bangkok, Thailand, 2009
+ * </p>
+ *
* @author Arthur Zimek
*
* @apiviz.has SODModel oneway - - computes
@@ -141,20 +151,20 @@ public class SOD<V extends NumberVector<V, ?>, D extends NumberDistance<D, ?>> e
* Performs the SOD algorithm on the given database.
*
* @param relation Data relation to process
+ * @return Outlier result
*/
- public OutlierResult run(Relation<V> relation) throws IllegalStateException {
+ public OutlierResult run(Relation<V> relation) {
SimilarityQuery<V, D> snnInstance = similarityFunction.instantiate(relation);
FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("Assigning Subspace Outlier Degree", relation.size(), logger) : null;
WritableDataStore<SODModel<?>> sod_models = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, SODModel.class);
DoubleMinMax minmax = new DoubleMinMax();
- for(Iterator<DBID> iter = relation.iterDBIDs(); iter.hasNext();) {
- DBID queryObject = iter.next();
+ for(DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
if(progress != null) {
progress.incrementProcessed(logger);
}
- DBIDs knnList = getNearestNeighbors(relation, snnInstance, queryObject);
- SODModel<V> model = new SODModel<V>(relation, knnList, alpha, relation.get(queryObject));
- sod_models.put(queryObject, model);
+ DBIDs knnList = getNearestNeighbors(relation, snnInstance, iter);
+ SODModel<V> model = new SODModel<V>(relation, knnList, alpha, relation.get(iter));
+ sod_models.put(iter, model);
minmax.put(model.getSod());
}
if(progress != null) {
@@ -181,14 +191,14 @@ public class SOD<V extends NumberVector<V, ?>, D extends NumberDistance<D, ?>> e
* @return the k nearest neighbors in terms of the shared nearest neighbor
* distance without the query object
*/
- private DBIDs getNearestNeighbors(Relation<V> relation, SimilarityQuery<V, D> simQ, DBID queryObject) {
+ private DBIDs getNearestNeighbors(Relation<V> relation, SimilarityQuery<V, D> simQ, DBIDRef queryObject) {
// similarityFunction.getPreprocessor().getParameters();
Heap<DoubleObjPair<DBID>> nearestNeighbors = new TiedTopBoundedHeap<DoubleObjPair<DBID>>(knn);
- for(DBID id : relation.iterDBIDs()) {
- if(!id.equals(queryObject)) {
- double sim = simQ.similarity(queryObject, id).doubleValue();
+ for(DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
+ if(!iter.sameDBID(queryObject)) {
+ double sim = simQ.similarity(queryObject, iter).doubleValue();
if(sim > 0) {
- nearestNeighbors.add(new DoubleObjPair<DBID>(sim, id));
+ nearestNeighbors.add(new DoubleObjPair<DBID>(sim, iter.getDBID()));
}
}
}
@@ -244,8 +254,8 @@ public class SOD<V extends NumberVector<V, ?>, D extends NumberDistance<D, ?>> e
// TODO: store database link?
centerValues = new double[DatabaseUtil.dimensionality(relation)];
variances = new double[centerValues.length];
- for(DBID id : neighborhood) {
- V databaseObject = relation.get(id);
+ for(DBIDIter iter = neighborhood.iter(); iter.valid(); iter.advance()) {
+ V databaseObject = relation.get(iter);
for(int d = 0; d < centerValues.length; d++) {
centerValues[d] += databaseObject.doubleValue(d + 1);
}
@@ -253,8 +263,8 @@ public class SOD<V extends NumberVector<V, ?>, D extends NumberDistance<D, ?>> e
for(int d = 0; d < centerValues.length; d++) {
centerValues[d] /= neighborhood.size();
}
- for(DBID id : neighborhood) {
- V databaseObject = relation.get(id);
+ for(DBIDIter iter = neighborhood.iter(); iter.valid(); iter.advance()) {
+ V databaseObject = relation.get(iter);
for(int d = 0; d < centerValues.length; d++) {
// distance
double distance = centerValues[d] - databaseObject.doubleValue(d + 1);
@@ -359,7 +369,7 @@ public class SOD<V extends NumberVector<V, ?>, D extends NumberDistance<D, ?>> e
}
@Override
- public Double get(DBID objID) {
+ public Double get(DBIDRef objID) {
return models.get(objID).getSod();
}
@@ -379,8 +389,8 @@ public class SOD<V extends NumberVector<V, ?>, D extends NumberDistance<D, ?>> e
}
@Override
- public IterableIterator<DBID> iterDBIDs() {
- return IterableUtil.fromIterator(dbids.iterator());
+ public DBIDIter iterDBIDs() {
+ return dbids.iter();
}
@Override
@@ -389,12 +399,12 @@ public class SOD<V extends NumberVector<V, ?>, D extends NumberDistance<D, ?>> e
}
@Override
- public void set(DBID id, Double val) {
+ public void set(DBIDRef id, Double val) {
throw new UnsupportedOperationException();
}
@Override
- public void delete(DBID id) {
+ public void delete(DBIDRef id) {
throw new UnsupportedOperationException();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/package-info.java
new file mode 100644
index 00000000..8b1c80df
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/package-info.java
@@ -0,0 +1,28 @@
+/**
+ * <p>Subspace outlier detection methods.</p>
+ *
+ * Methods that detect outliers in subspaces (projections) of the data set.
+ */
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package de.lmu.ifi.dbs.elki.algorithm.outlier.subspace; \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java
index 86730404..66a89cf5 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java
@@ -35,7 +35,7 @@ import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.logging.Logging;
@@ -112,15 +112,10 @@ public class ByLabelOutlier extends AbstractAlgorithm<OutlierResult> implements
*/
public OutlierResult run(Relation<?> relation) {
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT);
- for(DBID id : relation.iterDBIDs()) {
- String label = relation.get(id).toString();
- final double score;
- if (pattern.matcher(label).matches()) {
- score = 1.0;
- } else {
- score = 0.0;
- }
- scores.putDouble(id, score);
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ String label = relation.get(iditer).toString();
+ final double score = (pattern.matcher(label).matches()) ? 1 : 0;
+ scores.putDouble(iditer, score);
}
Relation<Double> scoreres = new MaterializedRelation<Double>("By label outlier scores", "label-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
OutlierScoreMeta meta = new ProbabilisticOutlierScore();
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java
index 509e35e9..b50226f1 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java
@@ -30,7 +30,7 @@ import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.logging.Logging;
@@ -70,8 +70,8 @@ public class TrivialAllOutlier extends AbstractAlgorithm<OutlierResult> implemen
*/
public OutlierResult run(Relation<?> relation) {
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT);
- for(DBID id : relation.iterDBIDs()) {
- scores.putDouble(id, 1.0);
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ scores.putDouble(iditer, 1.0);
}
Relation<Double> scoreres = new MaterializedRelation<Double>("Trivial all-outlier score", "all-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
OutlierScoreMeta meta = new ProbabilisticOutlierScore();
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java
index db40ff30..d1c2e076 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java
@@ -37,7 +37,7 @@ import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.logging.Logging;
@@ -100,7 +100,7 @@ public class TrivialGeneratedOutlier extends AbstractAlgorithm<OutlierResult> im
}
@Override
- public OutlierResult run(Database database) throws IllegalStateException {
+ public OutlierResult run(Database database) {
Relation<NumberVector<?, ?>> vecs = database.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
Relation<Model> models = database.getRelation(new SimpleTypeInformation<Model>(Model.class));
// Prefer a true class label
@@ -129,8 +129,8 @@ public class TrivialGeneratedOutlier extends AbstractAlgorithm<OutlierResult> im
final double minscore = expect / (expect + 1);
HashSet<GeneratorSingleCluster> generators = new HashSet<GeneratorSingleCluster>();
- for(DBID id : models.iterDBIDs()) {
- Model model = models.get(id);
+ for(DBIDIter iditer = models.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ Model model = models.get(iditer);
if(model instanceof GeneratorSingleCluster) {
generators.add((GeneratorSingleCluster) model);
}
@@ -139,10 +139,10 @@ public class TrivialGeneratedOutlier extends AbstractAlgorithm<OutlierResult> im
logger.warning("No generator models found for dataset - all points will be considered outliers.");
}
- for(DBID id : models.iterDBIDs()) {
+ for(DBIDIter iditer = models.iterDBIDs(); iditer.valid(); iditer.advance()) {
double score = 0.0;
// Convert to a math vector
- Vector v = vecs.get(id).getColumnVector();
+ Vector v = vecs.get(iditer).getColumnVector();
for(GeneratorSingleCluster gen : generators) {
Vector tv = v;
// Transform backwards
@@ -170,7 +170,7 @@ public class TrivialGeneratedOutlier extends AbstractAlgorithm<OutlierResult> im
score = expect / (expect + score);
// adjust to 0 to 1 range:
score = (score - minscore) / (1 - minscore);
- scores.putDouble(id, score);
+ scores.putDouble(iditer, score);
}
Relation<Double> scoreres = new MaterializedRelation<Double>("Model outlier scores", "model-outlier", TypeUtil.DOUBLE, scores, models.getDBIDs());
OutlierScoreMeta meta = new ProbabilisticOutlierScore(0., 1.);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java
index cff2ad2c..6d8e9f46 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java
@@ -30,7 +30,7 @@ import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.logging.Logging;
@@ -68,10 +68,10 @@ public class TrivialNoOutlier extends AbstractAlgorithm<OutlierResult> implement
* @param relation Relation
* @return Result
*/
- public OutlierResult run(Relation<?> relation) throws IllegalStateException {
+ public OutlierResult run(Relation<?> relation) {
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT);
- for(DBID id : relation.iterDBIDs()) {
- scores.putDouble(id, 0.0);
+ for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ scores.putDouble(iditer, 0.0);
}
Relation<Double> scoreres = new MaterializedRelation<Double>("Trivial no-outlier score", "no-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
OutlierScoreMeta meta = new ProbabilisticOutlierScore();