diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java')
-rw-r--r-- | src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java | 45 |
1 files changed, 27 insertions, 18 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java index d3e738a1..c8da9501 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.meta; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -25,7 +25,6 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.meta; import java.util.ArrayList; import java.util.BitSet; -import java.util.HashMap; import java.util.Random; import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm; @@ -36,11 +35,11 @@ import de.lmu.ifi.dbs.elki.data.type.TypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; -import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore; +import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; -import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.DimensionsSelectingEuclideanDistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction; import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; @@ -60,6 +59,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.LongParameter; +import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; /** * A simple ensemble method called "Feature bagging" for outlier detection. @@ -144,8 +144,8 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements FiniteProgress prog = logger.isVerbose() ? new FiniteProgress("LOF iterations", num, logger) : null; for(int i = 0; i < num; i++) { BitSet dimset = randomSubspace(dbdim, mindim, maxdim); - DimensionsSelectingEuclideanDistanceFunction df = new DimensionsSelectingEuclideanDistanceFunction(dimset); - LOF<NumberVector<?, ?>, DoubleDistance> lof = new LOF<NumberVector<?, ?>, DoubleDistance>(k, df, df); + SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(dimset); + LOF<NumberVector<?, ?>, DoubleDistance> lof = new LOF<NumberVector<?, ?>, DoubleDistance>(k, df); // run LOF and collect the result OutlierResult result = lof.run(relation); @@ -159,28 +159,34 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements } } - WritableDataStore<Double> scores = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, Double.class); + WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC); DoubleMinMax minmax = new DoubleMinMax(); if(breadth) { FiniteProgress cprog = logger.isVerbose() ? new FiniteProgress("Combining results", relation.size(), logger) : null; - HashMap<IterableIterator<DBID>, Relation<Double>> IDVectorOntoScoreVector = new HashMap<IterableIterator<DBID>, Relation<Double>>(); + Pair<IterableIterator<DBID>, Relation<Double>>[] IDVectorOntoScoreVector = Pair.newPairArray(results.size()); // Mapping score-sorted DBID-Iterators onto their corresponding scores. // We need to initialize them now be able to iterate them "in parallel". - for(OutlierResult r : results) { - IDVectorOntoScoreVector.put(r.getOrdering().iter(relation.getDBIDs()), r.getScores()); + { + int i = 0; + for(OutlierResult r : results) { + IDVectorOntoScoreVector[i] = new Pair<IterableIterator<DBID>, Relation<Double>>(r.getOrdering().iter(relation.getDBIDs()), r.getScores()); + i++; + } } // Iterating over the *lines* of the AS_t(i)-matrix. for(int i = 0; i < relation.size(); i++) { // Iterating over the elements of a line (breadth-first). - for(IterableIterator<DBID> iter : IDVectorOntoScoreVector.keySet()) { - if(iter.hasNext()) { // Always true if every algorithm returns a - // complete result (one score for every DBID). + for(Pair<IterableIterator<DBID>, Relation<Double>> pair : IDVectorOntoScoreVector) { + IterableIterator<DBID> iter = pair.first; + // Always true if every algorithm returns a complete result (one score + // for every DBID). + if(iter.hasNext()) { DBID tmpID = iter.next(); - double score = IDVectorOntoScoreVector.get(iter).get(tmpID); - if(scores.get(tmpID) == null) { - scores.put(tmpID, score); + double score = pair.second.get(tmpID); + if(Double.isNaN(scores.doubleValue(tmpID))) { + scores.putDouble(tmpID, score); minmax.put(score); } } @@ -202,9 +208,12 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements for(DBID id : relation.iterDBIDs()) { double sum = 0.0; for(OutlierResult r : results) { - sum += r.getScores().get(id); + final Double s = r.getScores().get(id); + if (s != null && !Double.isNaN(s)) { + sum += s; + } } - scores.put(id, sum); + scores.putDouble(id, sum); minmax.put(sum); if(cprog != null) { cprog.incrementProcessed(logger); |