summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java')
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java45
1 files changed, 27 insertions, 18 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java
index d3e738a1..c8da9501 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.meta;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -25,7 +25,6 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.meta;
import java.util.ArrayList;
import java.util.BitSet;
-import java.util.HashMap;
import java.util.Random;
import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
@@ -36,11 +35,11 @@ import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.DimensionsSelectingEuclideanDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
@@ -60,6 +59,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.LongParameter;
+import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
/**
* A simple ensemble method called "Feature bagging" for outlier detection.
@@ -144,8 +144,8 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements
FiniteProgress prog = logger.isVerbose() ? new FiniteProgress("LOF iterations", num, logger) : null;
for(int i = 0; i < num; i++) {
BitSet dimset = randomSubspace(dbdim, mindim, maxdim);
- DimensionsSelectingEuclideanDistanceFunction df = new DimensionsSelectingEuclideanDistanceFunction(dimset);
- LOF<NumberVector<?, ?>, DoubleDistance> lof = new LOF<NumberVector<?, ?>, DoubleDistance>(k, df, df);
+ SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(dimset);
+ LOF<NumberVector<?, ?>, DoubleDistance> lof = new LOF<NumberVector<?, ?>, DoubleDistance>(k, df);
// run LOF and collect the result
OutlierResult result = lof.run(relation);
@@ -159,28 +159,34 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements
}
}
- WritableDataStore<Double> scores = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, Double.class);
+ WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
DoubleMinMax minmax = new DoubleMinMax();
if(breadth) {
FiniteProgress cprog = logger.isVerbose() ? new FiniteProgress("Combining results", relation.size(), logger) : null;
- HashMap<IterableIterator<DBID>, Relation<Double>> IDVectorOntoScoreVector = new HashMap<IterableIterator<DBID>, Relation<Double>>();
+ Pair<IterableIterator<DBID>, Relation<Double>>[] IDVectorOntoScoreVector = Pair.newPairArray(results.size());
// Mapping score-sorted DBID-Iterators onto their corresponding scores.
// We need to initialize them now be able to iterate them "in parallel".
- for(OutlierResult r : results) {
- IDVectorOntoScoreVector.put(r.getOrdering().iter(relation.getDBIDs()), r.getScores());
+ {
+ int i = 0;
+ for(OutlierResult r : results) {
+ IDVectorOntoScoreVector[i] = new Pair<IterableIterator<DBID>, Relation<Double>>(r.getOrdering().iter(relation.getDBIDs()), r.getScores());
+ i++;
+ }
}
// Iterating over the *lines* of the AS_t(i)-matrix.
for(int i = 0; i < relation.size(); i++) {
// Iterating over the elements of a line (breadth-first).
- for(IterableIterator<DBID> iter : IDVectorOntoScoreVector.keySet()) {
- if(iter.hasNext()) { // Always true if every algorithm returns a
- // complete result (one score for every DBID).
+ for(Pair<IterableIterator<DBID>, Relation<Double>> pair : IDVectorOntoScoreVector) {
+ IterableIterator<DBID> iter = pair.first;
+ // Always true if every algorithm returns a complete result (one score
+ // for every DBID).
+ if(iter.hasNext()) {
DBID tmpID = iter.next();
- double score = IDVectorOntoScoreVector.get(iter).get(tmpID);
- if(scores.get(tmpID) == null) {
- scores.put(tmpID, score);
+ double score = pair.second.get(tmpID);
+ if(Double.isNaN(scores.doubleValue(tmpID))) {
+ scores.putDouble(tmpID, score);
minmax.put(score);
}
}
@@ -202,9 +208,12 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements
for(DBID id : relation.iterDBIDs()) {
double sum = 0.0;
for(OutlierResult r : results) {
- sum += r.getScores().get(id);
+ final Double s = r.getScores().get(id);
+ if (s != null && !Double.isNaN(s)) {
+ sum += s;
+ }
}
- scores.put(id, sum);
+ scores.putDouble(id, sum);
minmax.put(sum);
if(cprog != null) {
cprog.incrementProcessed(logger);