diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java')
-rw-r--r-- | src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java | 60 |
1 files changed, 24 insertions, 36 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java index 5b681106..a5eb0c7a 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.meta; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -24,7 +24,6 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.meta; */ import java.util.ArrayList; -import java.util.BitSet; import java.util.Random; import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm; @@ -38,18 +37,19 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; -import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; +import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; import de.lmu.ifi.dbs.elki.math.DoubleMinMax; +import de.lmu.ifi.dbs.elki.math.random.RandomFactory; import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; -import de.lmu.ifi.dbs.elki.utilities.RandomFactory; +import de.lmu.ifi.dbs.elki.utilities.BitsUtil; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; @@ -131,7 +131,7 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements * @param relation Relation to use * @return Outlier detection result */ - public OutlierResult run(Database database, Relation<NumberVector<?>> relation) { + public OutlierResult run(Database database, Relation<NumberVector> relation) { final int dbdim = RelationUtil.dimensionality(relation); final int mindim = dbdim >> 1; final int maxdim = dbdim - 1; @@ -141,34 +141,30 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements { FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("LOF iterations", num, LOG) : null; for(int i = 0; i < num; i++) { - BitSet dimset = randomSubspace(dbdim, mindim, maxdim, rand); + long[] dimset = randomSubspace(dbdim, mindim, maxdim, rand); SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(dimset); - LOF<NumberVector<?>, DoubleDistance> lof = new LOF<>(k, df); + LOF<NumberVector> lof = new LOF<>(k, df); // run LOF and collect the result OutlierResult result = lof.run(database, relation); results.add(result); - if(prog != null) { - prog.incrementProcessed(LOG); - } - } - if(prog != null) { - prog.ensureCompleted(LOG); + LOG.incrementProcessed(prog); } + LOG.ensureCompleted(prog); } WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC); DoubleMinMax minmax = new DoubleMinMax(); if(breadth) { FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Combining results", relation.size(), LOG) : null; - Pair<DBIDIter, Relation<Double>>[] IDVectorOntoScoreVector = Pair.newPairArray(results.size()); + Pair<DBIDIter, DoubleRelation>[] IDVectorOntoScoreVector = Pair.newPairArray(results.size()); // Mapping score-sorted DBID-Iterators onto their corresponding scores. // We need to initialize them now be able to iterate them "in parallel". { int i = 0; for(OutlierResult r : results) { - IDVectorOntoScoreVector[i] = new Pair<DBIDIter, Relation<Double>>(r.getOrdering().iter(relation.getDBIDs()).iter(), r.getScores()); + IDVectorOntoScoreVector[i] = new Pair<DBIDIter, DoubleRelation>(r.getOrdering().iter(relation.getDBIDs()).iter(), r.getScores()); i++; } } @@ -176,12 +172,12 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements // Iterating over the *lines* of the AS_t(i)-matrix. for(int i = 0; i < relation.size(); i++) { // Iterating over the elements of a line (breadth-first). - for(Pair<DBIDIter, Relation<Double>> pair : IDVectorOntoScoreVector) { + for(Pair<DBIDIter, DoubleRelation> pair : IDVectorOntoScoreVector) { DBIDIter iter = pair.first; // Always true if every algorithm returns a complete result (one score // for every DBID). if(iter.valid()) { - double score = pair.second.get(iter); + double score = pair.second.doubleValue(iter); if(Double.isNaN(scores.doubleValue(iter))) { scores.putDouble(iter, score); minmax.put(score); @@ -193,36 +189,28 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements } } // Progress does not take the initial mapping into account. - if(cprog != null) { - cprog.incrementProcessed(LOG); - } - } - if(cprog != null) { - cprog.ensureCompleted(LOG); + LOG.incrementProcessed(cprog); } + LOG.ensureCompleted(cprog); } else { FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Combining results", relation.size(), LOG) : null; for(DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) { double sum = 0.0; for(OutlierResult r : results) { - final Double s = r.getScores().get(iter); - if(s != null && !Double.isNaN(s)) { + final double s = r.getScores().doubleValue(iter); + if(!Double.isNaN(s)) { sum += s; } } scores.putDouble(iter, sum); minmax.put(sum); - if(cprog != null) { - cprog.incrementProcessed(LOG); - } - } - if(cprog != null) { - cprog.ensureCompleted(LOG); + LOG.incrementProcessed(cprog); } + LOG.ensureCompleted(cprog); } OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax()); - Relation<Double> scoreres = new MaterializedRelation<>("Feature bagging", "fb-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs()); + DoubleRelation scoreres = new MaterializedDoubleRelation("Feature bagging", "fb-outlier", scores, relation.getDBIDs()); return new OutlierResult(meta, scoreres); } @@ -234,8 +222,8 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements * @param maxdim Maximum number to choose * @return Subspace as bits. */ - private BitSet randomSubspace(final int alldim, final int mindim, final int maxdim, final Random rand) { - BitSet dimset = new BitSet(); + private long[] randomSubspace(final int alldim, final int mindim, final int maxdim, final Random rand) { + long[] dimset = BitsUtil.zero(alldim); // Fill with all dimensions int[] dims = new int[alldim]; for(int d = 0; d < alldim; d++) { @@ -246,7 +234,7 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements // Shrink the subspace to the destination size for(int d = 0; d < alldim - subdim; d++) { int s = rand.nextInt(alldim - d); - dimset.set(dims[s]); + BitsUtil.setI(dimset, dims[s]); dims[s] = dims[alldim - d - 1]; } return dimset; |