diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java')
-rw-r--r-- | src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java | 106 |
1 files changed, 46 insertions, 60 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java index c21542da..b3a03ba6 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.subspace; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -24,7 +24,6 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.subspace; */ import java.util.Arrays; -import java.util.BitSet; import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm; import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm; @@ -37,21 +36,17 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; -import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList; -import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDPair; -import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter; -import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDList; -import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDPair; -import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDPairList; -import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter; -import de.lmu.ifi.dbs.elki.database.ids.distance.ModifiableDoubleDistanceDBIDList; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; +import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList; +import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter; +import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair; +import de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList; import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery; -import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; -import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction; import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; import de.lmu.ifi.dbs.elki.math.DoubleMinMax; @@ -63,6 +58,7 @@ import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.KernelDensityFunction import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; +import de.lmu.ifi.dbs.elki.utilities.BitsUtil; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; @@ -93,7 +89,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; * @param <V> vector type */ @Reference(authors = "E. Müller, M. Schiffer, T. Seidl", title = "Adaptive outlierness for subspace outlier ranking", booktitle = "Proc. 19th ACM International Conference on Information and knowledge management") -public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm { +public class OUTRES<V extends NumberVector> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm { /** * The logger for this class. */ @@ -130,25 +126,21 @@ public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<Outlier DoubleMinMax minmax = new DoubleMinMax(); KernelDensityEstimator kernel = new KernelDensityEstimator(relation); - BitSet subspace = new BitSet(kernel.dim); + long[] subspace = BitsUtil.zero(kernel.dim); FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("OUTRES scores", relation.size(), LOG) : null; for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { - subspace.clear(); + BitsUtil.zeroI(subspace); double score = outresScore(0, subspace, iditer, kernel); ranks.putDouble(iditer, score); minmax.put(score); - if(progress != null) { - progress.incrementProcessed(LOG); - } - } - if(progress != null) { - progress.ensureCompleted(LOG); + LOG.incrementProcessed(progress); } + LOG.ensureCompleted(progress); OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0., 1., 1.); - OutlierResult outresResult = new OutlierResult(meta, new MaterializedRelation<>("OUTRES", "outres-score", TypeUtil.DOUBLE, ranks, relation.getDBIDs())); + OutlierResult outresResult = new OutlierResult(meta, new MaterializedDoubleRelation("OUTRES", "outres-score", ranks, relation.getDBIDs())); return outresResult; } @@ -161,33 +153,34 @@ public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<Outlier * @param kernel Kernel * @return Score */ - public double outresScore(final int s, BitSet subspace, DBIDRef id, KernelDensityEstimator kernel) { + public double outresScore(final int s, long[] subspace, DBIDRef id, KernelDensityEstimator kernel) { double score = 1.0; // Initial score is 1.0 final SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(subspace); MeanVariance meanv = new MeanVariance(); for(int i = s; i < kernel.dim; i++) { - if(subspace.get(i)) { // TODO: needed? Or should we always start with i=0? + if(BitsUtil.get(subspace, i)) { // TODO: needed? Or should we always start + // with i=0? continue; } - subspace.set(i); + BitsUtil.setI(subspace, i); df.setSelectedDimensions(subspace); final double adjustedEps = kernel.adjustedEps(kernel.dim); // Query with a larger window, to also get neighbors of neighbors // Subspace euclidean is metric! - final DoubleDistance range = new DoubleDistance(adjustedEps * 2.); - RangeQuery<V, DoubleDistance> rq = QueryUtil.getRangeQuery(kernel.relation, df, range); + final double range = adjustedEps * 2.; + RangeQuery<V> rq = QueryUtil.getRangeQuery(kernel.relation, df, range); - DistanceDBIDList<DoubleDistance> neighc = rq.getRangeForDBID(id, range); - DoubleDistanceDBIDList neigh = refineRange(neighc, adjustedEps); + DoubleDBIDList neighc = rq.getRangeForDBID(id, range); + DoubleDBIDList neigh = refineRange(neighc, adjustedEps); if(neigh.size() > 2) { // Relevance test if(relevantSubspace(subspace, neigh, kernel)) { final double density = kernel.subspaceDensity(subspace, neigh); // Compute mean and standard deviation for densities of neighbors. meanv.reset(); - for (DoubleDistanceDBIDListIter neighbor = neigh.iter(); neighbor.valid(); neighbor.advance()) { - DoubleDistanceDBIDList n2 = subsetNeighborhoodQuery(neighc, neighbor, df, adjustedEps, kernel); + for(DoubleDBIDListIter neighbor = neigh.iter(); neighbor.valid(); neighbor.advance()) { + DoubleDBIDList n2 = subsetNeighborhoodQuery(neighc, neighbor, df, adjustedEps, kernel); meanv.put(kernel.subspaceDensity(subspace, n2)); } final double deviation = (meanv.getMean() - density) / (2. * meanv.getSampleStddev()); @@ -199,7 +192,7 @@ public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<Outlier score *= outresScore(i + 1, subspace, id, kernel); } } - subspace.clear(i); + BitsUtil.clearI(subspace, i); } return score; } @@ -211,21 +204,14 @@ public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<Outlier * @param adjustedEps New epsilon * @return refined list */ - private DoubleDistanceDBIDList refineRange(DistanceDBIDList<DoubleDistance> neighc, double adjustedEps) { - ModifiableDoubleDistanceDBIDList n = new DoubleDistanceDBIDPairList(neighc.size()); + private DoubleDBIDList refineRange(DoubleDBIDList neighc, double adjustedEps) { + ModifiableDoubleDBIDList n = DBIDUtil.newDistanceDBIDList(neighc.size()); // We don't have a guarantee for this list to be sorted - for (DistanceDBIDListIter<DoubleDistance> neighbor = neighc.iter(); neighbor.valid(); neighbor.advance()) { - DistanceDBIDPair<DoubleDistance> p = neighbor.getDistancePair(); - if(p instanceof DoubleDistanceDBIDPair) { - if(((DoubleDistanceDBIDPair) p).doubleDistance() <= adjustedEps) { - n.add((DoubleDistanceDBIDPair) p); - } - } - else { - double dist = p.getDistance().doubleValue(); - if(dist <= adjustedEps) { - n.add(dist, p); - } + for(DoubleDBIDListIter neighbor = neighc.iter(); neighbor.valid(); neighbor.advance()) { + DoubleDBIDPair p = neighbor.getPair(); + double dist = p.doubleValue(); + if(dist <= adjustedEps) { + n.add(dist, p); } } return n; @@ -241,12 +227,12 @@ public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<Outlier * @param kernel Kernel * @return Neighbors of neighbor object */ - private DoubleDistanceDBIDList subsetNeighborhoodQuery(DistanceDBIDList<DoubleDistance> neighc, DBIDRef dbid, PrimitiveDoubleDistanceFunction<? super V> df, double adjustedEps, KernelDensityEstimator kernel) { - ModifiableDoubleDistanceDBIDList n = new DoubleDistanceDBIDPairList(neighc.size()); + private DoubleDBIDList subsetNeighborhoodQuery(DoubleDBIDList neighc, DBIDRef dbid, PrimitiveDistanceFunction<? super V> df, double adjustedEps, KernelDensityEstimator kernel) { + ModifiableDoubleDBIDList n = DBIDUtil.newDistanceDBIDList(neighc.size()); V query = kernel.relation.get(dbid); - for (DistanceDBIDListIter<DoubleDistance> neighbor = neighc.iter(); neighbor.valid(); neighbor.advance()) { - DistanceDBIDPair<DoubleDistance> p = neighbor.getDistancePair(); - double dist = df.doubleDistance(query, kernel.relation.get(p)); + for(DoubleDBIDListIter neighbor = neighc.iter(); neighbor.valid(); neighbor.advance()) { + DoubleDBIDPair p = neighbor.getPair(); + double dist = df.distance(query, kernel.relation.get(p)); if(dist <= adjustedEps) { n.add(dist, p); } @@ -262,16 +248,16 @@ public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<Outlier * @param kernel Kernel density estimator * @return relevance test result */ - protected boolean relevantSubspace(BitSet subspace, DoubleDistanceDBIDList neigh, KernelDensityEstimator kernel) { + protected boolean relevantSubspace(long[] subspace, DoubleDBIDList neigh, KernelDensityEstimator kernel) { Relation<V> relation = kernel.relation; final double crit = K_S_CRITICAL001 / Math.sqrt(neigh.size()); - for(int dim = subspace.nextSetBit(0); dim > 0; dim = subspace.nextSetBit(dim + 1)) { + for(int dim = BitsUtil.nextSetBit(subspace, 0); dim > 0; dim = BitsUtil.nextSetBit(subspace, dim + 1)) { // TODO: can we save this copy somehow? double[] data = new double[neigh.size()]; { int count = 0; - for (DBIDIter neighbor = neigh.iter(); neighbor.valid(); neighbor.advance()) { + for(DBIDIter neighbor = neigh.iter(); neighbor.valid(); neighbor.advance()) { V vector = relation.get(neighbor); data[count] = vector.doubleValue(dim); count++; @@ -347,12 +333,12 @@ public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<Outlier * @param neighbors Neighbor distance list * @return Density */ - protected double subspaceDensity(BitSet subspace, DoubleDistanceDBIDList neighbors) { - final double bandwidth = optimalBandwidth(subspace.cardinality()); + protected double subspaceDensity(long[] subspace, DoubleDBIDList neighbors) { + final double bandwidth = optimalBandwidth(BitsUtil.cardinality(subspace)); double density = 0; - for (DoubleDistanceDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { - double v = neighbor.doubleDistance() / bandwidth; + for(DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { + double v = neighbor.doubleValue() / bandwidth; if(v < 1) { density += 1 - (v * v); } @@ -407,7 +393,7 @@ public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<Outlier * * @apiviz.exclude */ - public static class Parameterizer<O extends NumberVector<?>> extends AbstractParameterizer { + public static class Parameterizer<O extends NumberVector> extends AbstractParameterizer { /** * Option ID for Epsilon parameter */ |