summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java')
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java106
1 files changed, 46 insertions, 60 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java
index c21542da..b3a03ba6 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.subspace;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -24,7 +24,6 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.subspace;
*/
import java.util.Arrays;
-import java.util.BitSet;
import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
@@ -37,21 +36,17 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDPair;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDList;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDPair;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDPairList;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter;
-import de.lmu.ifi.dbs.elki.database.ids.distance.ModifiableDoubleDistanceDBIDList;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
@@ -63,6 +58,7 @@ import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.KernelDensityFunction
import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.BitsUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
@@ -93,7 +89,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
* @param <V> vector type
*/
@Reference(authors = "E. Müller, M. Schiffer, T. Seidl", title = "Adaptive outlierness for subspace outlier ranking", booktitle = "Proc. 19th ACM International Conference on Information and knowledge management")
-public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+public class OUTRES<V extends NumberVector> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
@@ -130,25 +126,21 @@ public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<Outlier
DoubleMinMax minmax = new DoubleMinMax();
KernelDensityEstimator kernel = new KernelDensityEstimator(relation);
- BitSet subspace = new BitSet(kernel.dim);
+ long[] subspace = BitsUtil.zero(kernel.dim);
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("OUTRES scores", relation.size(), LOG) : null;
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
- subspace.clear();
+ BitsUtil.zeroI(subspace);
double score = outresScore(0, subspace, iditer, kernel);
ranks.putDouble(iditer, score);
minmax.put(score);
- if(progress != null) {
- progress.incrementProcessed(LOG);
- }
- }
- if(progress != null) {
- progress.ensureCompleted(LOG);
+ LOG.incrementProcessed(progress);
}
+ LOG.ensureCompleted(progress);
OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0., 1., 1.);
- OutlierResult outresResult = new OutlierResult(meta, new MaterializedRelation<>("OUTRES", "outres-score", TypeUtil.DOUBLE, ranks, relation.getDBIDs()));
+ OutlierResult outresResult = new OutlierResult(meta, new MaterializedDoubleRelation("OUTRES", "outres-score", ranks, relation.getDBIDs()));
return outresResult;
}
@@ -161,33 +153,34 @@ public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<Outlier
* @param kernel Kernel
* @return Score
*/
- public double outresScore(final int s, BitSet subspace, DBIDRef id, KernelDensityEstimator kernel) {
+ public double outresScore(final int s, long[] subspace, DBIDRef id, KernelDensityEstimator kernel) {
double score = 1.0; // Initial score is 1.0
final SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(subspace);
MeanVariance meanv = new MeanVariance();
for(int i = s; i < kernel.dim; i++) {
- if(subspace.get(i)) { // TODO: needed? Or should we always start with i=0?
+ if(BitsUtil.get(subspace, i)) { // TODO: needed? Or should we always start
+ // with i=0?
continue;
}
- subspace.set(i);
+ BitsUtil.setI(subspace, i);
df.setSelectedDimensions(subspace);
final double adjustedEps = kernel.adjustedEps(kernel.dim);
// Query with a larger window, to also get neighbors of neighbors
// Subspace euclidean is metric!
- final DoubleDistance range = new DoubleDistance(adjustedEps * 2.);
- RangeQuery<V, DoubleDistance> rq = QueryUtil.getRangeQuery(kernel.relation, df, range);
+ final double range = adjustedEps * 2.;
+ RangeQuery<V> rq = QueryUtil.getRangeQuery(kernel.relation, df, range);
- DistanceDBIDList<DoubleDistance> neighc = rq.getRangeForDBID(id, range);
- DoubleDistanceDBIDList neigh = refineRange(neighc, adjustedEps);
+ DoubleDBIDList neighc = rq.getRangeForDBID(id, range);
+ DoubleDBIDList neigh = refineRange(neighc, adjustedEps);
if(neigh.size() > 2) {
// Relevance test
if(relevantSubspace(subspace, neigh, kernel)) {
final double density = kernel.subspaceDensity(subspace, neigh);
// Compute mean and standard deviation for densities of neighbors.
meanv.reset();
- for (DoubleDistanceDBIDListIter neighbor = neigh.iter(); neighbor.valid(); neighbor.advance()) {
- DoubleDistanceDBIDList n2 = subsetNeighborhoodQuery(neighc, neighbor, df, adjustedEps, kernel);
+ for(DoubleDBIDListIter neighbor = neigh.iter(); neighbor.valid(); neighbor.advance()) {
+ DoubleDBIDList n2 = subsetNeighborhoodQuery(neighc, neighbor, df, adjustedEps, kernel);
meanv.put(kernel.subspaceDensity(subspace, n2));
}
final double deviation = (meanv.getMean() - density) / (2. * meanv.getSampleStddev());
@@ -199,7 +192,7 @@ public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<Outlier
score *= outresScore(i + 1, subspace, id, kernel);
}
}
- subspace.clear(i);
+ BitsUtil.clearI(subspace, i);
}
return score;
}
@@ -211,21 +204,14 @@ public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<Outlier
* @param adjustedEps New epsilon
* @return refined list
*/
- private DoubleDistanceDBIDList refineRange(DistanceDBIDList<DoubleDistance> neighc, double adjustedEps) {
- ModifiableDoubleDistanceDBIDList n = new DoubleDistanceDBIDPairList(neighc.size());
+ private DoubleDBIDList refineRange(DoubleDBIDList neighc, double adjustedEps) {
+ ModifiableDoubleDBIDList n = DBIDUtil.newDistanceDBIDList(neighc.size());
// We don't have a guarantee for this list to be sorted
- for (DistanceDBIDListIter<DoubleDistance> neighbor = neighc.iter(); neighbor.valid(); neighbor.advance()) {
- DistanceDBIDPair<DoubleDistance> p = neighbor.getDistancePair();
- if(p instanceof DoubleDistanceDBIDPair) {
- if(((DoubleDistanceDBIDPair) p).doubleDistance() <= adjustedEps) {
- n.add((DoubleDistanceDBIDPair) p);
- }
- }
- else {
- double dist = p.getDistance().doubleValue();
- if(dist <= adjustedEps) {
- n.add(dist, p);
- }
+ for(DoubleDBIDListIter neighbor = neighc.iter(); neighbor.valid(); neighbor.advance()) {
+ DoubleDBIDPair p = neighbor.getPair();
+ double dist = p.doubleValue();
+ if(dist <= adjustedEps) {
+ n.add(dist, p);
}
}
return n;
@@ -241,12 +227,12 @@ public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<Outlier
* @param kernel Kernel
* @return Neighbors of neighbor object
*/
- private DoubleDistanceDBIDList subsetNeighborhoodQuery(DistanceDBIDList<DoubleDistance> neighc, DBIDRef dbid, PrimitiveDoubleDistanceFunction<? super V> df, double adjustedEps, KernelDensityEstimator kernel) {
- ModifiableDoubleDistanceDBIDList n = new DoubleDistanceDBIDPairList(neighc.size());
+ private DoubleDBIDList subsetNeighborhoodQuery(DoubleDBIDList neighc, DBIDRef dbid, PrimitiveDistanceFunction<? super V> df, double adjustedEps, KernelDensityEstimator kernel) {
+ ModifiableDoubleDBIDList n = DBIDUtil.newDistanceDBIDList(neighc.size());
V query = kernel.relation.get(dbid);
- for (DistanceDBIDListIter<DoubleDistance> neighbor = neighc.iter(); neighbor.valid(); neighbor.advance()) {
- DistanceDBIDPair<DoubleDistance> p = neighbor.getDistancePair();
- double dist = df.doubleDistance(query, kernel.relation.get(p));
+ for(DoubleDBIDListIter neighbor = neighc.iter(); neighbor.valid(); neighbor.advance()) {
+ DoubleDBIDPair p = neighbor.getPair();
+ double dist = df.distance(query, kernel.relation.get(p));
if(dist <= adjustedEps) {
n.add(dist, p);
}
@@ -262,16 +248,16 @@ public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<Outlier
* @param kernel Kernel density estimator
* @return relevance test result
*/
- protected boolean relevantSubspace(BitSet subspace, DoubleDistanceDBIDList neigh, KernelDensityEstimator kernel) {
+ protected boolean relevantSubspace(long[] subspace, DoubleDBIDList neigh, KernelDensityEstimator kernel) {
Relation<V> relation = kernel.relation;
final double crit = K_S_CRITICAL001 / Math.sqrt(neigh.size());
- for(int dim = subspace.nextSetBit(0); dim > 0; dim = subspace.nextSetBit(dim + 1)) {
+ for(int dim = BitsUtil.nextSetBit(subspace, 0); dim > 0; dim = BitsUtil.nextSetBit(subspace, dim + 1)) {
// TODO: can we save this copy somehow?
double[] data = new double[neigh.size()];
{
int count = 0;
- for (DBIDIter neighbor = neigh.iter(); neighbor.valid(); neighbor.advance()) {
+ for(DBIDIter neighbor = neigh.iter(); neighbor.valid(); neighbor.advance()) {
V vector = relation.get(neighbor);
data[count] = vector.doubleValue(dim);
count++;
@@ -347,12 +333,12 @@ public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<Outlier
* @param neighbors Neighbor distance list
* @return Density
*/
- protected double subspaceDensity(BitSet subspace, DoubleDistanceDBIDList neighbors) {
- final double bandwidth = optimalBandwidth(subspace.cardinality());
+ protected double subspaceDensity(long[] subspace, DoubleDBIDList neighbors) {
+ final double bandwidth = optimalBandwidth(BitsUtil.cardinality(subspace));
double density = 0;
- for (DoubleDistanceDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
- double v = neighbor.doubleDistance() / bandwidth;
+ for(DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ double v = neighbor.doubleValue() / bandwidth;
if(v < 1) {
density += 1 - (v * v);
}
@@ -407,7 +393,7 @@ public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<Outlier
*
* @apiviz.exclude
*/
- public static class Parameterizer<O extends NumberVector<?>> extends AbstractParameterizer {
+ public static class Parameterizer<O extends NumberVector> extends AbstractParameterizer {
/**
* Option ID for Epsilon parameter
*/