summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java')
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java60
1 files changed, 24 insertions, 36 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java
index 5b681106..a5eb0c7a 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.meta;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -24,7 +24,6 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.meta;
*/
import java.util.ArrayList;
-import java.util.BitSet;
import java.util.Random;
import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
@@ -38,18 +37,19 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
-import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.math.random.RandomFactory;
import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
-import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
+import de.lmu.ifi.dbs.elki.utilities.BitsUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
@@ -131,7 +131,7 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements
* @param relation Relation to use
* @return Outlier detection result
*/
- public OutlierResult run(Database database, Relation<NumberVector<?>> relation) {
+ public OutlierResult run(Database database, Relation<NumberVector> relation) {
final int dbdim = RelationUtil.dimensionality(relation);
final int mindim = dbdim >> 1;
final int maxdim = dbdim - 1;
@@ -141,34 +141,30 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements
{
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("LOF iterations", num, LOG) : null;
for(int i = 0; i < num; i++) {
- BitSet dimset = randomSubspace(dbdim, mindim, maxdim, rand);
+ long[] dimset = randomSubspace(dbdim, mindim, maxdim, rand);
SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(dimset);
- LOF<NumberVector<?>, DoubleDistance> lof = new LOF<>(k, df);
+ LOF<NumberVector> lof = new LOF<>(k, df);
// run LOF and collect the result
OutlierResult result = lof.run(database, relation);
results.add(result);
- if(prog != null) {
- prog.incrementProcessed(LOG);
- }
- }
- if(prog != null) {
- prog.ensureCompleted(LOG);
+ LOG.incrementProcessed(prog);
}
+ LOG.ensureCompleted(prog);
}
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
DoubleMinMax minmax = new DoubleMinMax();
if(breadth) {
FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Combining results", relation.size(), LOG) : null;
- Pair<DBIDIter, Relation<Double>>[] IDVectorOntoScoreVector = Pair.newPairArray(results.size());
+ Pair<DBIDIter, DoubleRelation>[] IDVectorOntoScoreVector = Pair.newPairArray(results.size());
// Mapping score-sorted DBID-Iterators onto their corresponding scores.
// We need to initialize them now be able to iterate them "in parallel".
{
int i = 0;
for(OutlierResult r : results) {
- IDVectorOntoScoreVector[i] = new Pair<DBIDIter, Relation<Double>>(r.getOrdering().iter(relation.getDBIDs()).iter(), r.getScores());
+ IDVectorOntoScoreVector[i] = new Pair<DBIDIter, DoubleRelation>(r.getOrdering().iter(relation.getDBIDs()).iter(), r.getScores());
i++;
}
}
@@ -176,12 +172,12 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements
// Iterating over the *lines* of the AS_t(i)-matrix.
for(int i = 0; i < relation.size(); i++) {
// Iterating over the elements of a line (breadth-first).
- for(Pair<DBIDIter, Relation<Double>> pair : IDVectorOntoScoreVector) {
+ for(Pair<DBIDIter, DoubleRelation> pair : IDVectorOntoScoreVector) {
DBIDIter iter = pair.first;
// Always true if every algorithm returns a complete result (one score
// for every DBID).
if(iter.valid()) {
- double score = pair.second.get(iter);
+ double score = pair.second.doubleValue(iter);
if(Double.isNaN(scores.doubleValue(iter))) {
scores.putDouble(iter, score);
minmax.put(score);
@@ -193,36 +189,28 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements
}
}
// Progress does not take the initial mapping into account.
- if(cprog != null) {
- cprog.incrementProcessed(LOG);
- }
- }
- if(cprog != null) {
- cprog.ensureCompleted(LOG);
+ LOG.incrementProcessed(cprog);
}
+ LOG.ensureCompleted(cprog);
}
else {
FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Combining results", relation.size(), LOG) : null;
for(DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
double sum = 0.0;
for(OutlierResult r : results) {
- final Double s = r.getScores().get(iter);
- if(s != null && !Double.isNaN(s)) {
+ final double s = r.getScores().doubleValue(iter);
+ if(!Double.isNaN(s)) {
sum += s;
}
}
scores.putDouble(iter, sum);
minmax.put(sum);
- if(cprog != null) {
- cprog.incrementProcessed(LOG);
- }
- }
- if(cprog != null) {
- cprog.ensureCompleted(LOG);
+ LOG.incrementProcessed(cprog);
}
+ LOG.ensureCompleted(cprog);
}
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
- Relation<Double> scoreres = new MaterializedRelation<>("Feature bagging", "fb-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
+ DoubleRelation scoreres = new MaterializedDoubleRelation("Feature bagging", "fb-outlier", scores, relation.getDBIDs());
return new OutlierResult(meta, scoreres);
}
@@ -234,8 +222,8 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements
* @param maxdim Maximum number to choose
* @return Subspace as bits.
*/
- private BitSet randomSubspace(final int alldim, final int mindim, final int maxdim, final Random rand) {
- BitSet dimset = new BitSet();
+ private long[] randomSubspace(final int alldim, final int mindim, final int maxdim, final Random rand) {
+ long[] dimset = BitsUtil.zero(alldim);
// Fill with all dimensions
int[] dims = new int[alldim];
for(int d = 0; d < alldim; d++) {
@@ -246,7 +234,7 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements
// Shrink the subspace to the destination size
for(int d = 0; d < alldim - subdim; d++) {
int s = rand.nextInt(alldim - d);
- dimset.set(dims[s]);
+ BitsUtil.setI(dimset, dims[s]);
dims[s] = dims[alldim - d - 1];
}
return dimset;