diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/algorithm/outlier')
72 files changed, 1598 insertions, 892 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ABOD.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ABOD.java index d52a81fd..ad0b8175 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ABOD.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ABOD.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -23,7 +23,6 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import java.util.Collections; import java.util.HashMap; import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm; @@ -34,19 +33,21 @@ import de.lmu.ifi.dbs.elki.database.QueryUtil; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; +import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs; import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs; import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDRange; import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair; import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs; +import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList; import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult; import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; import de.lmu.ifi.dbs.elki.distance.similarityfunction.PrimitiveSimilarityFunction; import de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.KernelMatrix; @@ -58,13 +59,13 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector; import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; -import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap; +import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ComparableMaxHeap; +import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ComparableMinHeap; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint; import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; @@ -139,7 +140,7 @@ public class ABOD<V extends NumberVector<?>> extends AbstractDistanceBasedAlgori /** * Static DBID map. */ - private ArrayModifiableDBIDs staticids = null; + private ArrayDBIDs staticids = null; /** * Actual constructor, with parameters. Fast mode (sampling). @@ -178,11 +179,15 @@ public class ABOD<V extends NumberVector<?>> extends AbstractDistanceBasedAlgori */ public OutlierResult getRanking(Relation<V> relation) { // Fix a static set of IDs - staticids = DBIDUtil.newArray(relation.getDBIDs()); - staticids.sort(); + if (relation.getDBIDs() instanceof DBIDRange) { + staticids = (DBIDRange) relation.getDBIDs(); + } else { + staticids = DBIDUtil.newArray(relation.getDBIDs()); + ((ArrayModifiableDBIDs) staticids).sort(); + } KernelMatrix kernelMatrix = new KernelMatrix(primitiveKernelFunction, relation, staticids); - Heap<DoubleDBIDPair> pq = new Heap<DoubleDBIDPair>(relation.size(), Collections.reverseOrder()); + ComparableMaxHeap<DoubleDBIDPair> pq = new ComparableMaxHeap<>(relation.size()); // preprocess kNN neighborhoods KNNQuery<V, DoubleDistance> knnQuery = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k); @@ -191,7 +196,7 @@ public class ABOD<V extends NumberVector<?>> extends AbstractDistanceBasedAlgori for (DBIDIter objKey = relation.iterDBIDs(); objKey.valid(); objKey.advance()) { s.reset(); - KNNResult<DoubleDistance> neighbors = knnQuery.getKNNForDBID(objKey, k); + KNNList<DoubleDistance> neighbors = knnQuery.getKNNForDBID(objKey, k); for (DBIDIter key1 = neighbors.iter(); key1.valid(); key1.advance()) { for (DBIDIter key2 = neighbors.iter(); key2.valid(); key2.advance()) { if (DBIDUtil.equal(key2, key1) || DBIDUtil.equal(key1, objKey) || DBIDUtil.equal(key2, objKey)) { @@ -214,12 +219,13 @@ public class ABOD<V extends NumberVector<?>> extends AbstractDistanceBasedAlgori DoubleMinMax minmaxabod = new DoubleMinMax(); WritableDoubleDataStore abodvalues = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC); - for (DoubleDBIDPair pair : pq) { + while (!pq.isEmpty()) { + DoubleDBIDPair pair = pq.poll(); abodvalues.putDouble(pair, pair.doubleValue()); minmaxabod.put(pair.doubleValue()); } // Build result representation. - Relation<Double> scoreResult = new MaterializedRelation<Double>("Angle-based Outlier Degree", "abod-outlier", TypeUtil.DOUBLE, abodvalues, relation.getDBIDs()); + Relation<Double> scoreResult = new MaterializedRelation<>("Angle-based Outlier Degree", "abod-outlier", TypeUtil.DOUBLE, abodvalues, relation.getDBIDs()); OutlierScoreMeta scoreMeta = new InvertedOutlierScoreMeta(minmaxabod.getMin(), minmaxabod.getMax(), 0.0, Double.POSITIVE_INFINITY); return new OutlierResult(scoreMeta, scoreResult); } @@ -234,17 +240,21 @@ public class ABOD<V extends NumberVector<?>> extends AbstractDistanceBasedAlgori final DBIDs ids = relation.getDBIDs(); // Fix a static set of IDs // TODO: add a DBIDUtil.ensureSorted? - staticids = DBIDUtil.newArray(ids); - staticids.sort(); + if (relation.getDBIDs() instanceof DBIDRange) { + staticids = (DBIDRange) relation.getDBIDs(); + } else { + staticids = DBIDUtil.newArray(relation.getDBIDs()); + ((ArrayModifiableDBIDs) staticids).sort(); + } KernelMatrix kernelMatrix = new KernelMatrix(primitiveKernelFunction, relation, staticids); - Heap<DoubleDBIDPair> pq = new Heap<DoubleDBIDPair>(relation.size(), Collections.reverseOrder()); + ComparableMaxHeap<DoubleDBIDPair> pq = new ComparableMaxHeap<>(relation.size()); // get Candidate Ranking for (DBIDIter aKey = relation.iterDBIDs(); aKey.valid(); aKey.advance()) { WritableDoubleDataStore dists = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT); // determine kNearestNeighbors and pairwise distances - Heap<DoubleDBIDPair> nn; + ComparableMinHeap<DoubleDBIDPair> nn; if (!USE_RND_SAMPLE) { nn = calcDistsandNN(relation, kernelMatrix, sampleSize, aKey, dists); } else { @@ -264,7 +274,7 @@ public class ABOD<V extends NumberVector<?>> extends AbstractDistanceBasedAlgori pq.add(DBIDUtil.newPair(var, aKey)); } // refine Candidates - Heap<DoubleDBIDPair> resqueue = new Heap<DoubleDBIDPair>(k); + ComparableMinHeap<DoubleDBIDPair> resqueue = new ComparableMinHeap<>(k); MeanVariance s = new MeanVariance(); while (!pq.isEmpty()) { if (resqueue.size() == k && pq.peek().doubleValue() > resqueue.peek().doubleValue()) { @@ -302,12 +312,13 @@ public class ABOD<V extends NumberVector<?>> extends AbstractDistanceBasedAlgori } DoubleMinMax minmaxabod = new DoubleMinMax(); WritableDoubleDataStore abodvalues = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC); - for (DoubleDBIDPair pair : pq) { + while (!pq.isEmpty()) { + DoubleDBIDPair pair = pq.poll(); abodvalues.putDouble(pair, pair.doubleValue()); minmaxabod.put(pair.doubleValue()); } // Build result representation. - Relation<Double> scoreResult = new MaterializedRelation<Double>("Angle-based Outlier Detection", "abod-outlier", TypeUtil.DOUBLE, abodvalues, ids); + Relation<Double> scoreResult = new MaterializedRelation<>("Angle-based Outlier Detection", "abod-outlier", TypeUtil.DOUBLE, abodvalues, ids); OutlierScoreMeta scoreMeta = new InvertedOutlierScoreMeta(minmaxabod.getMin(), minmaxabod.getMax(), 0.0, Double.POSITIVE_INFINITY); return new OutlierResult(scoreMeta, scoreResult); } @@ -404,8 +415,8 @@ public class ABOD<V extends NumberVector<?>> extends AbstractDistanceBasedAlgori return (kernelMatrix.getDistance(ai, ai) + kernelMatrix.getDistance(bi, ci) - kernelMatrix.getDistance(ai, ci) - kernelMatrix.getDistance(ai, bi)); } - private Heap<DoubleDBIDPair> calcDistsandNN(Relation<V> data, KernelMatrix kernelMatrix, int sampleSize, DBIDRef aKey, WritableDoubleDataStore dists) { - Heap<DoubleDBIDPair> nn = new Heap<DoubleDBIDPair>(sampleSize); + private ComparableMinHeap<DoubleDBIDPair> calcDistsandNN(Relation<V> data, KernelMatrix kernelMatrix, int sampleSize, DBIDRef aKey, WritableDoubleDataStore dists) { + ComparableMinHeap<DoubleDBIDPair> nn = new ComparableMinHeap<>(sampleSize); for (DBIDIter bKey = data.iterDBIDs(); bKey.valid(); bKey.advance()) { double val = calcCos(kernelMatrix, aKey, bKey); dists.putDouble(bKey, val); @@ -420,8 +431,8 @@ public class ABOD<V extends NumberVector<?>> extends AbstractDistanceBasedAlgori return nn; } - private Heap<DoubleDBIDPair> calcDistsandRNDSample(Relation<V> data, KernelMatrix kernelMatrix, int sampleSize, DBIDRef aKey, WritableDoubleDataStore dists) { - Heap<DoubleDBIDPair> nn = new Heap<DoubleDBIDPair>(sampleSize); + private ComparableMinHeap<DoubleDBIDPair> calcDistsandRNDSample(Relation<V> data, KernelMatrix kernelMatrix, int sampleSize, DBIDRef aKey, WritableDoubleDataStore dists) { + ComparableMinHeap<DoubleDBIDPair> nn = new ComparableMinHeap<>(sampleSize); int step = (int) ((double) data.size() / (double) sampleSize); int counter = 0; for (DBIDIter bKey = data.iterDBIDs(); bKey.valid(); bKey.advance()) { @@ -445,14 +456,14 @@ public class ABOD<V extends NumberVector<?>> extends AbstractDistanceBasedAlgori public String getExplanations(Relation<V> data) { KernelMatrix kernelMatrix = new KernelMatrix(primitiveKernelFunction, data, staticids); // PQ for Outlier Ranking - Heap<DoubleDBIDPair> pq = new Heap<DoubleDBIDPair>(data.size(), Collections.reverseOrder()); - HashMap<DBID, DBIDs> explaintab = new HashMap<DBID, DBIDs>(); + ComparableMaxHeap<DoubleDBIDPair> pq = new ComparableMaxHeap<>(data.size()); + HashMap<DBID, DBIDs> explaintab = new HashMap<>(); // test all objects MeanVariance s = new MeanVariance(), s2 = new MeanVariance(); for (DBIDIter objKey = data.iterDBIDs(); objKey.valid(); objKey.advance()) { s.reset(); // Queue for the best explanation - Heap<DoubleDBIDPair> explain = new Heap<DoubleDBIDPair>(); + ComparableMinHeap<DoubleDBIDPair> explain = new ComparableMinHeap<>(); // determine Object // for each pair of other objects for (DBIDIter key1 = data.iterDBIDs(); key1.valid(); key1.advance()) { @@ -591,7 +602,7 @@ public class ABOD<V extends NumberVector<?>> extends AbstractDistanceBasedAlgori if (config.grab(sampleSizeP)) { sampleSize = sampleSizeP.getValue(); } - final ObjectParameter<PrimitiveSimilarityFunction<V, DoubleDistance>> param = new ObjectParameter<PrimitiveSimilarityFunction<V, DoubleDistance>>(KERNEL_FUNCTION_ID, PrimitiveSimilarityFunction.class, PolynomialKernelFunction.class); + final ObjectParameter<PrimitiveSimilarityFunction<V, DoubleDistance>> param = new ObjectParameter<>(KERNEL_FUNCTION_ID, PrimitiveSimilarityFunction.class, PolynomialKernelFunction.class); if (config.grab(param)) { primitiveKernelFunction = param.instantiateClass(config); } @@ -599,7 +610,7 @@ public class ABOD<V extends NumberVector<?>> extends AbstractDistanceBasedAlgori @Override protected ABOD<V> makeInstance() { - return new ABOD<V>(k, sampleSize, primitiveKernelFunction, distanceFunction); + return new ABOD<>(k, sampleSize, primitiveKernelFunction, distanceFunction); } } } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractAggarwalYuOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractAggarwalYuOutlier.java index 2a4885dc..99356aef 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractAggarwalYuOutlier.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractAggarwalYuOutlier.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -109,12 +109,12 @@ public abstract class AbstractAggarwalYuOutlier<V extends NumberVector<?>> exten final int dim = RelationUtil.dimensionality(relation); final int size = relation.size(); final DBIDs allids = relation.getDBIDs(); - final ArrayList<ArrayList<DBIDs>> ranges = new ArrayList<ArrayList<DBIDs>>(); + final ArrayList<ArrayList<DBIDs>> ranges = new ArrayList<>(); // Temporary projection storage of the database - final ArrayList<ArrayList<DoubleDBIDPair>> dbAxis = new ArrayList<ArrayList<DoubleDBIDPair>>(dim); + final ArrayList<ArrayList<DoubleDBIDPair>> dbAxis = new ArrayList<>(dim); for(int i = 0; i < dim; i++) { - ArrayList<DoubleDBIDPair> axis = new ArrayList<DoubleDBIDPair>(size); + ArrayList<DoubleDBIDPair> axis = new ArrayList<>(size); dbAxis.add(i, axis); } // Project @@ -129,7 +129,7 @@ public abstract class AbstractAggarwalYuOutlier<V extends NumberVector<?>> exten for(int d = 0; d < dim; d++) { ArrayList<DoubleDBIDPair> axis = dbAxis.get(d); Collections.sort(axis); - ArrayList<DBIDs> dimranges = new ArrayList<DBIDs>(phi + 1); + ArrayList<DBIDs> dimranges = new ArrayList<>(phi + 1); dimranges.add(allids); int start = 0; for(int r = 0; r < phi; r++) { diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractDBOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractDBOutlier.java index 0e6f502a..5cafe04d 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractDBOutlier.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractDBOutlier.java @@ -86,7 +86,7 @@ public abstract class AbstractDBOutlier<O, D extends Distance<D>> extends Abstra DoubleDataStore dbodscore = computeOutlierScores(database, relation, d);
// Build result representation.
- Relation<Double> scoreResult = new MaterializedRelation<Double>("Density-Based Outlier Detection", "db-outlier", TypeUtil.DOUBLE, dbodscore, relation.getDBIDs());
+ Relation<Double> scoreResult = new MaterializedRelation<>("Density-Based Outlier Detection", "db-outlier", TypeUtil.DOUBLE, dbodscore, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore();
return new OutlierResult(scoreMeta, scoreResult);
}
@@ -132,7 +132,7 @@ public abstract class AbstractDBOutlier<O, D extends Distance<D>> extends Abstra */
protected void configD(Parameterization config, DistanceFunction<?, D> distanceFunction) {
final D distanceFactory = (distanceFunction != null) ? distanceFunction.getDistanceFactory() : null;
- final DistanceParameter<D> param = new DistanceParameter<D>(D_ID, distanceFactory);
+ final DistanceParameter<D> param = new DistanceParameter<>(D_ID, distanceFactory);
if(config.grab(param)) {
d = param.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuEvolutionary.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuEvolutionary.java index c263cdfa..89be0e66 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuEvolutionary.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuEvolutionary.java @@ -49,6 +49,7 @@ import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TopBoundedHeap;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
@@ -131,30 +132,30 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA final int dbsize = relation.size();
ArrayList<ArrayList<DBIDs>> ranges = buildRanges(relation);
- Iterable<Individuum> individuums = (new EvolutionarySearch(relation, ranges, m, rnd.getRandom())).run();
+ Heap<Individuum>.UnorderedIter individuums = (new EvolutionarySearch(relation, ranges, m, rnd.getRandom())).run();
WritableDoubleDataStore outlierScore = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
- for(Individuum ind : individuums) {
- DBIDs ids = computeSubspaceForGene(ind.getGene(), ranges);
+ for (; individuums.valid(); individuums.advance()) {
+ DBIDs ids = computeSubspaceForGene(individuums.get().getGene(), ranges);
double sparsityC = sparsity(ids.size(), dbsize, k, phi);
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
double prev = outlierScore.doubleValue(iter);
- if(Double.isNaN(prev) || sparsityC < prev) {
+ if (Double.isNaN(prev) || sparsityC < prev) {
outlierScore.putDouble(iter, sparsityC);
}
}
}
DoubleMinMax minmax = new DoubleMinMax();
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { + for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double val = outlierScore.doubleValue(iditer);
- if(Double.isNaN(val)) {
+ if (Double.isNaN(val)) {
outlierScore.putDouble(iditer, 0.0);
val = 0.0;
}
minmax.put(val);
}
- Relation<Double> scoreResult = new MaterializedRelation<Double>("AggarwalYuEvolutionary", "aggarwal-yu-outlier", TypeUtil.DOUBLE, outlierScore, relation.getDBIDs());
+ Relation<Double> scoreResult = new MaterializedRelation<>("AggarwalYuEvolutionary", "aggarwal-yu-outlier", TypeUtil.DOUBLE, outlierScore, relation.getDBIDs());
OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), Double.NEGATIVE_INFINITY, 0.0);
return new OutlierResult(meta, scoreResult);
}
@@ -214,16 +215,16 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA this.random = random;
}
- public Iterable<Individuum> run() {
+ public Heap<Individuum>.UnorderedIter run() {
ArrayList<Individuum> pop = initialPopulation(m);
// best Population
- TopBoundedHeap<Individuum> bestSol = new TopBoundedHeap<Individuum>(m, Collections.reverseOrder());
+ TopBoundedHeap<Individuum> bestSol = new TopBoundedHeap<>(m, Collections.reverseOrder());
for (Individuum ind : pop) {
bestSol.add(ind);
}
int iterations = 0;
- while(!checkConvergence(pop)) {
+ while (!checkConvergence(pop)) {
Collections.sort(pop);
pop = rouletteRankSelection(pop);
// Crossover
@@ -231,33 +232,33 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA // Mutation with probability 0.25 , 0.25
pop = mutation(pop, 0.5, 0.5);
// Avoid duplicates
- ind: for(Individuum ind : pop) {
- for (Individuum b : bestSol) {
- if (b.equals(ind)) {
+ ind: for (Individuum ind : pop) {
+ for (Heap<Individuum>.UnorderedIter it = bestSol.unorderedIter(); it.valid(); it.advance()) {
+ if (it.get().equals(ind)) {
continue ind;
}
}
bestSol.add(ind);
}
- if(LOG.isDebuggingFinest()) {
+ if (LOG.isDebuggingFinest()) {
StringBuilder buf = new StringBuilder();
buf.append("Top solutions:\n");
- for(Individuum ind : bestSol) {
- buf.append(ind.toString()).append('\n');
+ for (Heap<Individuum>.UnorderedIter it = bestSol.unorderedIter(); it.valid(); it.advance()) {
+ buf.append(it.get().toString()).append('\n');
}
buf.append("Population:\n");
- for(Individuum ind : pop) {
+ for (Individuum ind : pop) {
buf.append(ind.toString()).append('\n');
}
LOG.debugFinest(buf.toString());
}
iterations++;
- if(iterations > MAX_ITERATIONS) {
+ if (iterations > MAX_ITERATIONS) {
LOG.warning("Maximum iterations reached.");
break;
}
}
- return bestSol;
+ return bestSol.unorderedIter();
}
/**
@@ -267,18 +268,18 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA * @return Convergence
*/
private boolean checkConvergence(Collection<Individuum> pop) {
- if(pop.size() == 0) {
+ if (pop.size() == 0) {
return true;
}
// Gene occurrence counter
int[][] occur = new int[dim][phi + 1];
// Count gene occurrences
- for(Individuum ind : pop) {
+ for (Individuum ind : pop) {
int[] gene = ind.getGene();
- for(int d = 0; d < dim; d++) {
+ for (int d = 0; d < dim; d++) {
int val = gene[d] + DONT_CARE;
- if(val < 0 || val >= phi + 1) {
+ if (val < 0 || val >= phi + 1) {
LOG.warning("Invalid gene value encountered: " + val + " in " + ind.toString());
continue;
}
@@ -287,20 +288,20 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA }
int conv = (int) (pop.size() * 0.95);
- if(LOG.isDebuggingFine()) {
+ if (LOG.isDebuggingFine()) {
LOG.debugFine("Convergence at " + conv + " of " + pop.size() + " individuums.");
}
- for(int d = 0; d < dim; d++) {
+ for (int d = 0; d < dim; d++) {
boolean converged = false;
- for(int val = 0; val < phi + 1; val++) {
- if(occur[d][val] >= conv) {
+ for (int val = 0; val < phi + 1; val++) {
+ if (occur[d][val] >= conv) {
converged = true;
break;
}
}
// A single failure to converge is sufficient to continue.
- if(!converged) {
+ if (!converged) {
return false;
}
}
@@ -315,21 +316,21 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA */
private ArrayList<Individuum> initialPopulation(int popsize) {
// Initial Population
- ArrayList<Individuum> population = new ArrayList<Individuum>(popsize);
+ ArrayList<Individuum> population = new ArrayList<>(popsize);
// fill population
- for(int i = 0; i < popsize; i++) {
+ for (int i = 0; i < popsize; i++) {
// Random Individual
int[] gene = new int[dim];
// fill don't care ( any dimension == don't care)
- for(int j = 0; j < dim; j++) {
+ for (int j = 0; j < dim; j++) {
gene[j] = DONT_CARE;
}
// count of don't care positions
int countDim = k;
// fill non don't care positions of the Individual
- while(countDim > 0) {
+ while (countDim > 0) {
int z = random.nextInt(dim);
- if(gene[z] == DONT_CARE) {
+ if (gene[z] == DONT_CARE) {
gene[z] = random.nextInt(phi) + 1;
countDim--;
}
@@ -357,24 +358,23 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA // Relative weight := popsize - position => sum(1..popsize)
int totalweight = (popsize * (popsize + 1)) >> 1;
// Survivors
- ArrayList<Individuum> survivors = new ArrayList<Individuum>(popsize);
+ ArrayList<Individuum> survivors = new ArrayList<>(popsize);
// position of selection
- for(int i = 0; i < popsize; i++) {
+ for (int i = 0; i < popsize; i++) {
int z = random.nextInt(totalweight);
- for(int j = 0; j < popsize; j++) {
- if(z < popsize - j) {
+ for (int j = 0; j < popsize; j++) {
+ if (z < popsize - j) {
// TODO: need clone?
survivors.add(population.get(j));
break;
- }
- else {
+ } else {
// decrement
z -= (popsize - j);
}
}
}
- if(survivors.size() != popsize) {
+ if (survivors.size() != popsize) {
throw new AbortException("Selection step failed - implementation error?");
}
// Don't sort, to avoid biasing the crossover!
@@ -387,31 +387,30 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA */
private ArrayList<Individuum> mutation(ArrayList<Individuum> population, double perc1, double perc2) {
// the Mutations
- ArrayList<Individuum> mutations = new ArrayList<Individuum>();
+ ArrayList<Individuum> mutations = new ArrayList<>();
// Set of Positions which are don't care in the String
- TreeSet<Integer> Q = new TreeSet<Integer>();
+ TreeSet<Integer> Q = new TreeSet<>();
// Set of Positions which are not don't care in the String
- TreeSet<Integer> R = new TreeSet<Integer>();
+ TreeSet<Integer> R = new TreeSet<>();
// for each individuum
- for(int j = 0; j < population.size(); j++) {
+ for (int j = 0; j < population.size(); j++) {
// clear the Sets
Q.clear();
R.clear();
// Fill the Sets with the Positions
- for(int i = 0; i < dim; i++) {
- if(population.get(j).getGene()[i] == DONT_CARE) {
+ for (int i = 0; i < dim; i++) {
+ if (population.get(j).getGene()[i] == DONT_CARE) {
Q.add(i);
- }
- else {
+ } else {
R.add(i);
}
}
//
double r1 = random.nextDouble();
- if(Q.size() != 0) {
+ if (Q.size() != 0) {
// Mutation Variant 1
- if(r1 <= perc1) {
+ if (r1 <= perc1) {
// calc Mutation Spot
Integer[] pos = new Integer[Q.size()];
pos = Q.toArray(pos);
@@ -436,7 +435,7 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA }
r1 = random.nextDouble();
// Mutation Variant 2
- if(r1 <= perc2) {
+ if (r1 <= perc2) {
// calc Mutation Spot
Integer[] pos = new Integer[R.size()];
pos = R.toArray(pos);
@@ -470,16 +469,16 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA */
private ArrayList<Individuum> crossoverOptimized(ArrayList<Individuum> population) {
// Crossover Set of population Set
- ArrayList<Individuum> crossover = new ArrayList<Individuum>();
+ ArrayList<Individuum> crossover = new ArrayList<>();
- for(int i = 0; i < population.size() - 1; i += 2) {
+ for (int i = 0; i < population.size() - 1; i += 2) {
Pair<Individuum, Individuum> recombine = recombineOptimized(population.get(i), population.get(i + 1));
// add the Solutions to the new Set
crossover.add(recombine.getFirst());
crossover.add(recombine.getSecond());
}
// if the set contains an odd number of Subspaces, retain the last one
- if(population.size() % 2 == 1) {
+ if (population.size() % 2 == 1) {
crossover.add(population.get(population.size() - 1));
}
// Collections.sort(crossover);
@@ -496,18 +495,18 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA private Pair<Individuum, Individuum> recombineOptimized(Individuum parent1, Individuum parent2) {
Pair<Individuum, Individuum> recombinePair;
// Set of Positions in which either s1 or s2 are don't care
- ArrayList<Integer> Q = new ArrayList<Integer>(dim);
+ ArrayList<Integer> Q = new ArrayList<>(dim);
// Set of Positions in which neither s1 or s2 is don't care
- ArrayList<Integer> R = new ArrayList<Integer>(dim);
+ ArrayList<Integer> R = new ArrayList<>(dim);
- for(int i = 0; i < dim; i++) {
- if((parent1.getGene()[i] == DONT_CARE) && (parent2.getGene()[i] != DONT_CARE)) {
+ for (int i = 0; i < dim; i++) {
+ if ((parent1.getGene()[i] == DONT_CARE) && (parent2.getGene()[i] != DONT_CARE)) {
Q.add(i);
}
- if((parent1.getGene()[i] != DONT_CARE) && (parent2.getGene()[i] == DONT_CARE)) {
+ if ((parent1.getGene()[i] != DONT_CARE) && (parent2.getGene()[i] == DONT_CARE)) {
Q.add(i);
}
- if((parent1.getGene()[i] != DONT_CARE) && (parent2.getGene()[i] != DONT_CARE)) {
+ if ((parent1.getGene()[i] != DONT_CARE) && (parent2.getGene()[i] != DONT_CARE)) {
R.add(i);
}
}
@@ -519,11 +518,11 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA int count = k - R.size();
Iterator<Integer> q = Q.iterator();
- while(count > 0) {
+ while (count > 0) {
int[] l1 = b.clone();
int[] l2 = b.clone();
- while(q.hasNext()) {
+ while (q.hasNext()) {
int next = q.next();
// pos = next;
@@ -537,15 +536,14 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA final double sparsityL1 = sparsity(computeSubspaceForGene(l1, ranges).size(), dbsize, k, phi);
final double sparsityL2 = sparsity(computeSubspaceForGene(l2, ranges).size(), dbsize, k, phi);
- if(sparsityL1 <= sparsityL2) {
+ if (sparsityL1 <= sparsityL2) {
b = l1.clone();
- if(s1Null) {
+ if (s1Null) {
count--;
}
- }
- else {
+ } else {
b = l2.clone();
- if(s2Null) {
+ if (s2Null) {
count--;
}
}
@@ -557,17 +555,16 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA // create the complementary String
int[] comp = new int[dim];
- for(int i = 0; i < dim; i++) {
- if(b[i] == parent1.getGene()[i]) {
+ for (int i = 0; i < dim; i++) {
+ if (b[i] == parent1.getGene()[i]) {
comp[i] = parent2.getGene()[i];
- }
- else {
+ } else {
comp[i] = parent2.getGene()[i];
}
}
final Individuum i1 = makeIndividuum(b);
final Individuum i2 = makeIndividuum(comp);
- recombinePair = new Pair<Individuum, Individuum>(i1, i2);
+ recombinePair = new Pair<>(i1, i2);
return recombinePair;
}
@@ -584,7 +581,7 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA * @return best gene combination
*/
private Individuum combineRecursive(ArrayList<Integer> r, int i, int[] current, Individuum parent1, Individuum parent2) {
- if(i == r.size()) {
+ if (i == r.size()) {
return makeIndividuum(current);
}
// Position to modify
@@ -597,10 +594,9 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA Individuum i1 = combineRecursive(r, i + 1, gene1, parent1, parent2);
Individuum i2 = combineRecursive(r, i + 1, gene2, parent1, parent2);
// Return the better result.
- if(i1.getFitness() < i2.getFitness()) {
+ if (i1.getFitness() < i2.getFitness()) {
return i1;
- }
- else {
+ } else {
return i2;
}
}
@@ -610,8 +606,8 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA * Individuum for the evolutionary search.
*
* @author Erich Schubert
- * - * @apiviz.exclude de.lmu.ifi.dbs.elki.utilities.pairs.FCPair + *
+ * @apiviz.exclude de.lmu.ifi.dbs.elki.utilities.pairs.FCPair
*/
private static class Individuum extends FCPair<Double, int[]> {
/**
@@ -661,15 +657,15 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA @Override
public boolean equals(Object obj) {
- if(!(obj instanceof Individuum)) {
+ if (!(obj instanceof Individuum)) {
return false;
}
Individuum other = (Individuum) obj;
- if(other.second.length != this.second.length) {
+ if (other.second.length != this.second.length) {
return false;
}
- for(int i = 0; i < this.second.length; i++) {
- if(other.second[i] != this.second[i]) {
+ for (int i = 0; i < this.second.length; i++) {
+ if (other.second[i] != this.second[i]) {
return false;
}
}
@@ -708,18 +704,18 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractA super.makeOptions(config);
final IntParameter mP = new IntParameter(M_ID);
mP.addConstraint(new GreaterEqualConstraint(2));
- if(config.grab(mP)) {
+ if (config.grab(mP)) {
m = mP.getValue();
}
final RandomParameter rndP = new RandomParameter(SEED_ID);
- if(config.grab(rndP)) {
+ if (config.grab(rndP)) {
rnd = rndP.getValue();
}
}
@Override
protected AggarwalYuEvolutionary<V> makeInstance() {
- return new AggarwalYuEvolutionary<V>(k, phi, m, rnd);
+ return new AggarwalYuEvolutionary<>(k, phi, m, rnd);
}
}
-} +}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuNaive.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuNaive.java index 9cd7d79f..1816c3a3 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuNaive.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuNaive.java @@ -102,15 +102,15 @@ public class AggarwalYuNaive<V extends NumberVector<?>> extends AbstractAggarwal // Build a list of all subspaces
{
// R1 initial one-dimensional subspaces.
- Rk = new ArrayList<ArrayList<IntIntPair>>();
+ Rk = new ArrayList<>();
// Set of all dim*phi ranges
- ArrayList<IntIntPair> q = new ArrayList<IntIntPair>();
+ ArrayList<IntIntPair> q = new ArrayList<>();
for(int i = 0; i < dimensionality; i++) {
for(int j = 1; j <= phi; j++) {
IntIntPair s = new IntIntPair(i, j);
q.add(s);
// Add to first Rk
- ArrayList<IntIntPair> v = new ArrayList<IntIntPair>();
+ ArrayList<IntIntPair> v = new ArrayList<>();
v.add(s);
Rk.add(v);
}
@@ -118,7 +118,7 @@ public class AggarwalYuNaive<V extends NumberVector<?>> extends AbstractAggarwal // build Ri
for(int i = 2; i <= k; i++) {
- ArrayList<ArrayList<IntIntPair>> Rnew = new ArrayList<ArrayList<IntIntPair>>();
+ ArrayList<ArrayList<IntIntPair>> Rnew = new ArrayList<>();
for(int j = 0; j < Rk.size(); j++) {
ArrayList<IntIntPair> c = Rk.get(j);
@@ -131,7 +131,7 @@ public class AggarwalYuNaive<V extends NumberVector<?>> extends AbstractAggarwal }
}
if(!invalid) {
- ArrayList<IntIntPair> neu = new ArrayList<IntIntPair>(c);
+ ArrayList<IntIntPair> neu = new ArrayList<>(c);
neu.add(pair);
Rnew.add(neu);
}
@@ -165,7 +165,7 @@ public class AggarwalYuNaive<V extends NumberVector<?>> extends AbstractAggarwal }
minmax.put(val);
}
- Relation<Double> scoreResult = new MaterializedRelation<Double>("AggarwalYuNaive", "aggarwal-yu-outlier", TypeUtil.DOUBLE, sparsity, relation.getDBIDs());
+ Relation<Double> scoreResult = new MaterializedRelation<>("AggarwalYuNaive", "aggarwal-yu-outlier", TypeUtil.DOUBLE, sparsity, relation.getDBIDs());
OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), Double.NEGATIVE_INFINITY, 0.0);
return new OutlierResult(meta, scoreResult);
}
@@ -185,7 +185,7 @@ public class AggarwalYuNaive<V extends NumberVector<?>> extends AbstractAggarwal public static class Parameterizer<V extends NumberVector<?>> extends AbstractAggarwalYuOutlier.Parameterizer {
@Override
protected AggarwalYuNaive<V> makeInstance() {
- return new AggarwalYuNaive<V>(k, phi);
+ return new AggarwalYuNaive<>(k, phi);
}
}
}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/COP.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/COP.java index ac544b7f..06168c5a 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/COP.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/COP.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -39,12 +39,12 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs; +import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList; import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult; import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; @@ -54,10 +54,11 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector; import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAResult; import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCARunner; import de.lmu.ifi.dbs.elki.math.statistics.distribution.ChiSquaredDistribution; -import de.lmu.ifi.dbs.elki.math.statistics.distribution.GammaDistribution; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.estimator.GammaChoiWetteEstimator; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore; +import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.NumberArrayAdapter; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; @@ -108,6 +109,53 @@ public class COP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte public static final String COP_ERRORVEC = "cop-errorvec"; /** + * A clone of + * {@link de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.DoubleArrayAdapter} + * that only uses the first 85% of the array! + */ + private static final NumberArrayAdapter<Double, double[]> SHORTENED_ARRAY = new NumberArrayAdapter<Double, double[]>() { + @Override + public int size(double[] array) { + return (int) (.85 * array.length); + } + + @Override + public Double get(double[] array, int off) throws IndexOutOfBoundsException { + return Double.valueOf(array[off]); + } + + @Override + public double getDouble(double[] array, int off) throws IndexOutOfBoundsException { + return array[off]; + } + + @Override + public float getFloat(double[] array, int off) throws IndexOutOfBoundsException { + return (float) array[off]; + } + + @Override + public int getInteger(double[] array, int off) throws IndexOutOfBoundsException { + return (int) array[off]; + } + + @Override + public short getShort(double[] array, int off) throws IndexOutOfBoundsException { + return (short) array[off]; + } + + @Override + public long getLong(double[] array, int off) throws IndexOutOfBoundsException { + return (long) array[off]; + } + + @Override + public byte getByte(double[] array, int off) throws IndexOutOfBoundsException { + return (byte) array[off]; + } + }; + + /** * Number of neighbors to be considered. */ int k; @@ -184,7 +232,7 @@ public class COP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Correlation Outlier Probabilities", relation.size(), LOG) : null; for (DBIDIter id = ids.iter(); id.valid(); id.advance()) { - KNNResult<D> neighbors = knnQuery.getKNNForDBID(id, k + 1); + KNNList<D> neighbors = knnQuery.getKNNForDBID(id, k + 1); ModifiableDBIDs nids = DBIDUtil.newHashSet(neighbors); nids.remove(id); // Do not use query object @@ -241,7 +289,7 @@ public class COP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte // Sort, so we can trim the top 15% below. Arrays.sort(dists[d]); // Evaluate - double score = 1 - GammaDistribution.estimate(dists[d], (int) (.85 * dists[d].length)).cdf(sqdevs); + double score = 1 - GammaChoiWetteEstimator.STATIC.estimate(dists[d], SHORTENED_ARRAY).cdf(sqdevs); if (score < min) { min = score; vdim = d + 1; @@ -271,11 +319,11 @@ public class COP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte } // combine results. - Relation<Double> scoreResult = new MaterializedRelation<Double>("Correlation Outlier Probabilities", COP_SCORES, TypeUtil.DOUBLE, cop_score, ids); + Relation<Double> scoreResult = new MaterializedRelation<>("Correlation Outlier Probabilities", COP_SCORES, TypeUtil.DOUBLE, cop_score, ids); OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore(); OutlierResult result = new OutlierResult(scoreMeta, scoreResult); - result.addChildResult(new MaterializedRelation<Integer>("Local Dimensionality", COP_DIM, TypeUtil.INTEGER, cop_dim, ids)); - result.addChildResult(new MaterializedRelation<Vector>("Error vectors", COP_ERRORVEC, TypeUtil.VECTOR, cop_err_v, ids)); + result.addChildResult(new MaterializedRelation<>("Local Dimensionality", COP_DIM, TypeUtil.INTEGER, cop_dim, ids)); + result.addChildResult(new MaterializedRelation<>("Error vectors", COP_ERRORVEC, TypeUtil.VECTOR, cop_err_v, ids)); return result; } @@ -361,7 +409,7 @@ public class COP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte if (config.grab(kP)) { k = kP.intValue(); } - EnumParameter<DistanceDist> distP = new EnumParameter<DistanceDist>(DIST_ID, DistanceDist.class, DistanceDist.GAMMA); + EnumParameter<DistanceDist> distP = new EnumParameter<>(DIST_ID, DistanceDist.class, DistanceDist.GAMMA); if (config.grab(distP)) { dist = distP.getValue(); } @@ -371,7 +419,7 @@ public class COP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte if (config.grab(expectP)) { expect = expectP.doubleValue(); } - ObjectParameter<PCARunner<V>> pcaP = new ObjectParameter<PCARunner<V>>(PCARUNNER_ID, PCARunner.class, PCARunner.class); + ObjectParameter<PCARunner<V>> pcaP = new ObjectParameter<>(PCARUNNER_ID, PCARunner.class, PCARunner.class); if (config.grab(pcaP)) { pca = pcaP.instantiateClass(config); } @@ -379,7 +427,7 @@ public class COP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte @Override protected COP<V, D> makeInstance() { - return new COP<V, D>(distanceFunction, k, pca, expect, dist); + return new COP<>(distanceFunction, k, pca, expect, dist); } } } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierDetection.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierDetection.java index ba1fd841..4f4d12bf 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierDetection.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierDetection.java @@ -29,12 +29,12 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.DoubleDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
@@ -117,7 +117,7 @@ public class DBOutlierDetection<O, D extends Distance<D>> extends AbstractDBOutl if(knnQuery != null) {
for(DBIDIter iditer = distFunc.getRelation().iterDBIDs(); iditer.valid(); iditer.advance()) { counter++;
- final KNNResult<D> knns = knnQuery.getKNNForDBID(iditer, m);
+ final KNNList<D> knns = knnQuery.getKNNForDBID(iditer, m);
if(LOG.isDebugging()) {
LOG.debugFine("distance to mth nearest neighbour" + knns.toString());
}
@@ -184,7 +184,7 @@ public class DBOutlierDetection<O, D extends Distance<D>> extends AbstractDBOutl @Override
protected DBOutlierDetection<O, D> makeInstance() {
- return new DBOutlierDetection<O, D>(distanceFunction, d, p);
+ return new DBOutlierDetection<>(distanceFunction, d, p);
}
}
}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierScore.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierScore.java index a2d39130..d6528682 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierScore.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierScore.java @@ -103,7 +103,7 @@ public class DBOutlierScore<O, D extends Distance<D>> extends AbstractDBOutlier< public static class Parameterizer<O, D extends Distance<D>> extends AbstractDBOutlier.Parameterizer<O, D> {
@Override
protected DBOutlierScore<O, D> makeInstance() {
- return new DBOutlierScore<O, D>(distanceFunction, d);
+ return new DBOutlierScore<>(distanceFunction, d);
}
}
}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/EMOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/EMOutlier.java index 2d2a4466..f8fd686f 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/EMOutlier.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/EMOutlier.java @@ -104,7 +104,7 @@ public class EMOutlier<V extends NumberVector<?>> extends AbstractAlgorithm<Outl emo_score.putDouble(iditer, maxProb);
globmax = Math.max(maxProb, globmax);
}
- Relation<Double> scoreres = new MaterializedRelation<Double>("EM outlier scores", "em-outlier", TypeUtil.DOUBLE, emo_score, relation.getDBIDs());
+ Relation<Double> scoreres = new MaterializedRelation<>("EM outlier scores", "em-outlier", TypeUtil.DOUBLE, emo_score, relation.getDBIDs());
OutlierScoreMeta meta = new ProbabilisticOutlierScore(0.0, globmax);
// combine results.
OutlierResult result = new OutlierResult(meta, scoreres);
@@ -142,7 +142,7 @@ public class EMOutlier<V extends NumberVector<?>> extends AbstractAlgorithm<Outl @Override
protected EMOutlier<V> makeInstance() {
- return new EMOutlier<V>(em);
+ return new EMOutlier<>(em);
}
}
}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java index 6aed60fe..c9e6a634 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java @@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -136,7 +136,7 @@ public class GaussianModel<V extends NumberVector<?>> extends AbstractAlgorithm< else {
meta = new InvertedOutlierScoreMeta(mm.getMin(), mm.getMax(), 0.0, Double.POSITIVE_INFINITY);
}
- Relation<Double> res = new MaterializedRelation<Double>("Gaussian Model Outlier Score", "gaussian-model-outlier", TypeUtil.DOUBLE, oscores, relation.getDBIDs());
+ Relation<Double> res = new MaterializedRelation<>("Gaussian Model Outlier Score", "gaussian-model-outlier", TypeUtil.DOUBLE, oscores, relation.getDBIDs());
return new OutlierResult(meta, res);
}
@@ -171,7 +171,7 @@ public class GaussianModel<V extends NumberVector<?>> extends AbstractAlgorithm< @Override
protected GaussianModel<V> makeInstance() {
- return new GaussianModel<V>(invert);
+ return new GaussianModel<>(invert);
}
}
}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java index db53a3ef..294592e8 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java @@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -184,7 +184,7 @@ public class GaussianUniformMixture<V extends NumberVector<?>> extends AbstractA }
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, 0.0);
- Relation<Double> res = new MaterializedRelation<Double>("Gaussian Mixture Outlier Score", "gaussian-mixture-outlier", TypeUtil.DOUBLE, oscores, relation.getDBIDs());
+ Relation<Double> res = new MaterializedRelation<>("Gaussian Mixture Outlier Score", "gaussian-mixture-outlier", TypeUtil.DOUBLE, oscores, relation.getDBIDs());
return new OutlierResult(meta, res);
}
@@ -267,7 +267,7 @@ public class GaussianUniformMixture<V extends NumberVector<?>> extends AbstractA @Override
protected GaussianUniformMixture<V> makeInstance() {
- return new GaussianUniformMixture<V>(l, c);
+ return new GaussianUniformMixture<>(l, c);
}
}
}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/HilOut.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/HilOut.java index 15f6cbf3..e0cdd0c5 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/HilOut.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/HilOut.java @@ -1,28 +1,28 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
-/* - This file is part of ELKI: +
+/*
+ This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures - - Copyright (C) 2012 - Ludwig-Maximilians-Universität München - Lehr- und Forschungseinheit für Datenbanksysteme +
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or +
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
(at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - */ -import java.util.Collections;
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
import java.util.Comparator;
import java.util.HashSet;
import java.util.Set;
@@ -39,14 +39,16 @@ import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.ids.DBIDFactory;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
-import de.lmu.ifi.dbs.elki.database.ids.DoubleDistanceDBIDPair;
import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDPair;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.LPNormDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.LPNormDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultUtil;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
@@ -57,7 +59,9 @@ import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.utilities.BitsUtil;
import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
-import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ComparatorMaxHeap;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ComparatorMinHeap;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.ObjectHeap;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -139,6 +143,13 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo */
private double omega_star;
+ // public int distcomp = 1;
+
+ /**
+ * Comparator for sorting the heaps.
+ */
+ private static final Comparator<? super DistanceDBIDPair<?>> COMPARATOR = DistanceDBIDResultUtil.distanceComparator();
+
/**
* Type of output: all scores (upper bounds) or top n only
*
@@ -182,18 +193,18 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo Pair<O, O> hbbs = DatabaseUtil.computeMinMax(relation);
min = new double[d];
double[] max = new double[d];
- for(int i = 0; i < d; i++) {
+ for (int i = 0; i < d; i++) {
min[i] = hbbs.first.doubleValue(i);
max[i] = hbbs.second.doubleValue(i);
diameter = Math.max(diameter, max[i] - min[i]);
}
// Enlarge bounding box to have equal lengths.
- for(int i = 0; i < d; i++) {
+ for (int i = 0; i < d; i++) {
double diff = (diameter - (max[i] - min[i])) * .5;
min[i] -= diff;
max[i] += diff;
}
- if(LOG.isVerbose()) {
+ if (LOG.isVerbose()) {
LOG.verbose("Rescaling dataset by " + (1 / diameter) + " to fit the unit cube.");
}
}
@@ -205,7 +216,7 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo FiniteProgress progressHilOut = LOG.isVerbose() ? new FiniteProgress("HilOut iterations", d + 1, LOG) : null;
FiniteProgress progressTrueOut = LOG.isVerbose() ? new FiniteProgress("True outliers found", n, LOG) : null;
// Main part: 1. Phase max. d+1 loops
- for(int j = 0; j <= d && n_star < n; j++) {
+ for (int j = 0; j <= d && n_star < n; j++) {
// initialize (clear) out and wlb - not 100% clear in the paper
h.out.clear();
h.wlb.clear();
@@ -215,61 +226,64 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo scan(h, (int) (k * capital_n / (double) capital_n_star));
// determine the true outliers (n_star)
trueOutliers(h);
- if(progressTrueOut != null) {
+ if (progressTrueOut != null) {
progressTrueOut.setProcessed(n_star, LOG);
}
// Build the top Set as out + wlb
h.top.clear();
HashSetModifiableDBIDs top_keys = DBIDUtil.newHashSet(h.out.size());
- for(HilFeature entry : h.out) {
+ for (ObjectHeap.UnsortedIter<HilFeature> iter = h.out.unsortedIter(); iter.valid(); iter.advance()) {
+ HilFeature entry = iter.get();
top_keys.add(entry.id);
h.top.add(entry);
}
- for(HilFeature entry : h.wlb) {
- if(!top_keys.contains(entry.id)) {
+ for (ObjectHeap.UnsortedIter<HilFeature> iter = h.wlb.unsortedIter(); iter.valid(); iter.advance()) {
+ HilFeature entry = iter.get();
+ if (!top_keys.contains(entry.id)) {
// No need to update top_keys - discarded
h.top.add(entry);
}
}
- if(progressHilOut != null) {
+ if (progressHilOut != null) {
progressHilOut.incrementProcessed(LOG);
}
}
// 2. Phase: Additional Scan if less than n true outliers determined
- if(n_star < n) {
+ if (n_star < n) {
h.out.clear();
h.wlb.clear();
// TODO: reinitialize shift to 0?
scan(h, capital_n);
}
- if(progressHilOut != null) {
+ if (progressHilOut != null) {
progressHilOut.setProcessed(d, LOG);
progressHilOut.ensureCompleted(LOG);
}
- if(progressTrueOut != null) {
+ if (progressTrueOut != null) {
progressTrueOut.setProcessed(n, LOG);
progressTrueOut.ensureCompleted(LOG);
}
DoubleMinMax minmax = new DoubleMinMax();
// Return weights in out
- if(tn == ScoreType.TopN) {
+ if (tn == ScoreType.TopN) {
minmax.put(0.0);
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { + for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
hilout_weight.putDouble(iditer, 0.0);
}
- for(HilFeature ent : h.out) {
+ for (ObjectHeap.UnsortedIter<HilFeature> iter = h.out.unsortedIter(); iter.valid(); iter.advance()) {
+ HilFeature ent = iter.get();
minmax.put(ent.ubound);
hilout_weight.putDouble(ent.id, ent.ubound);
}
}
// Return all weights in pf
else {
- for(HilFeature ent : h.pf) {
+ for (HilFeature ent : h.pf) {
minmax.put(ent.ubound);
hilout_weight.putDouble(ent.id, ent.ubound);
}
}
- Relation<Double> scoreResult = new MaterializedRelation<Double>("HilOut weight", "hilout-weight", TypeUtil.DOUBLE, hilout_weight, relation.getDBIDs());
+ Relation<Double> scoreResult = new MaterializedRelation<>("HilOut weight", "hilout-weight", TypeUtil.DOUBLE, hilout_weight, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY);
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
return result;
@@ -283,37 +297,35 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo */
private void scan(HilbertFeatures hf, int k0) {
final int mink0 = Math.min(2 * k0, capital_n - 1);
- if(LOG.isDebuggingFine()) {
+ if (LOG.isDebuggingFine()) {
LOG.debugFine("Scanning with k0=" + k0 + " (" + mink0 + ")" + " N*=" + capital_n_star);
}
- for(int i = 0; i < hf.pf.length; i++) {
- if(hf.pf[i].ubound < omega_star) {
+ for (int i = 0; i < hf.pf.length; i++) {
+ if (hf.pf[i].ubound < omega_star) {
continue;
}
- if(hf.pf[i].lbound < hf.pf[i].ubound) {
+ if (hf.pf[i].lbound < hf.pf[i].ubound) {
double omega = hf.fastUpperBound(i);
- if(omega < omega_star) {
+ if (omega < omega_star) {
hf.pf[i].ubound = omega;
- }
- else {
+ } else {
int maxcount;
// capital_n-1 instead of capital_n: all, except self
- if(hf.top.contains(hf.pf[i])) {
+ if (hf.top.contains(hf.pf[i])) {
maxcount = capital_n - 1;
- }
- else {
+ } else {
maxcount = mink0;
}
innerScan(hf, i, maxcount);
}
}
- if(hf.pf[i].ubound > 0) {
+ if (hf.pf[i].ubound > 0) {
hf.updateOUT(i);
}
- if(hf.pf[i].lbound > 0) {
+ if (hf.pf[i].lbound > 0) {
hf.updateWLB(i);
}
- if(hf.wlb.size() >= n) {
+ if (hf.wlb.size() >= n) {
omega_star = Math.max(omega_star, hf.wlb.peek().lbound);
}
}
@@ -332,43 +344,40 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo int a = i, b = i;
int level = h, levela = h, levelb = h;
// Explore up to "maxcount" neighbors in this pass
- for(int count = 0; count < maxcount; count++) {
+ for (int count = 0; count < maxcount; count++) {
final int c; // Neighbor to explore
- if(a == 0) { // At left end, explore right
+ if (a == 0) { // At left end, explore right
// assert (b < capital_n - 1);
levelb = Math.min(levelb, hf.pf[b].level);
b++;
c = b;
- }
- else if(b >= capital_n - 1) { // At right end, explore left
+ } else if (b >= capital_n - 1) { // At right end, explore left
// assert (a > 0);
a--;
levela = Math.min(levela, hf.pf[a].level);
c = a;
- }
- else if(hf.pf[a - 1].level >= hf.pf[b].level) { // Prefer higher level
+ } else if (hf.pf[a - 1].level >= hf.pf[b].level) { // Prefer higher level
a--;
levela = Math.min(levela, hf.pf[a].level);
c = a;
- }
- else {
+ } else {
// assert (b < capital_n - 1);
levelb = Math.min(levelb, hf.pf[b].level);
b++;
c = b;
}
- if(!hf.pf[i].nn_keys.contains(hf.pf[c].id)) {
+ if (!hf.pf[i].nn_keys.contains(hf.pf[c].id)) {
// hf.distcomp ++;
hf.pf[i].insert(hf.pf[c].id, distq.distance(p, hf.pf[c].id).doubleValue(), k);
- if(hf.pf[i].nn.size() == k) {
- if(hf.pf[i].sum_nn < omega_star) {
+ if (hf.pf[i].nn.size() == k) {
+ if (hf.pf[i].sum_nn < omega_star) {
break; // stop = true
}
final int mlevel = Math.max(levela, levelb);
- if(mlevel < level) {
+ if (mlevel < level) {
level = mlevel;
final double delta = hf.minDistLevel(hf.pf[i].id, level);
- if(delta >= hf.pf[i].nn.peek().doubleDistance()) {
+ if (delta >= hf.pf[i].nn.peek().doubleDistance()) {
break; // stop = true
}
}
@@ -378,16 +387,17 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo double br = hf.boxRadius(i, a - 1, b + 1);
double newlb = 0.0;
double newub = 0.0;
- for(DoubleDistanceDBIDPair entry : hf.pf[i].nn) {
+ for (ObjectHeap.UnsortedIter<DoubleDistanceDBIDPair> iter = hf.pf[i].nn.unsortedIter(); iter.valid(); iter.advance()) {
+ DoubleDistanceDBIDPair entry = iter.get();
newub += entry.doubleDistance();
- if(entry.doubleDistance() <= br) {
+ if (entry.doubleDistance() <= br) {
newlb += entry.doubleDistance();
}
}
- if(newlb > hf.pf[i].lbound) {
+ if (newlb > hf.pf[i].lbound) {
hf.pf[i].lbound = newlb;
}
- if(newub < hf.pf[i].ubound) {
+ if (newub < hf.pf[i].ubound) {
hf.pf[i].ubound = newub;
}
}
@@ -401,8 +411,9 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo private void trueOutliers(HilbertFeatures h) {
n_star = 0;
- for(HilFeature entry : h.out) {
- if(entry.ubound >= omega_star && (entry.ubound - entry.lbound < 1E-10)) {
+ for (ObjectHeap.UnsortedIter<HilFeature> iter = h.out.unsortedIter(); iter.valid(); iter.advance()) {
+ HilFeature entry = iter.get();
+ if (entry.ubound >= omega_star && (entry.ubound - entry.lbound < 1E-10)) {
n_star++;
}
}
@@ -461,12 +472,12 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo /**
* "OUT"
*/
- private Heap<HilFeature> out;
+ private ObjectHeap<HilFeature> out;
/**
* "WLB"
*/
- private Heap<HilFeature> wlb;
+ private ObjectHeap<HilFeature> wlb;
/**
* Constructor.
@@ -483,22 +494,22 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo this.pf = new HilFeature[relation.size()];
int pos = 0;
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { - pf[pos++] = new HilFeature(DBIDUtil.deref(iditer), new Heap<DoubleDistanceDBIDPair>(k, Collections.reverseOrder()));
+ for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ pf[pos++] = new HilFeature(DBIDUtil.deref(iditer), new ComparatorMaxHeap<DoubleDistanceDBIDPair>(k, COMPARATOR));
}
- this.out = new Heap<HilFeature>(n, new Comparator<HilFeature>() {
+ this.out = new ComparatorMinHeap<>(n, new Comparator<HilFeature>() {
@Override
public int compare(HilFeature o1, HilFeature o2) {
return Double.compare(o1.ubound, o2.ubound);
}
});
- this.wlb = new Heap<HilFeature>(n, new Comparator<HilFeature>() {
+ this.wlb = new ComparatorMinHeap<>(n, new Comparator<HilFeature>() {
@Override
public int compare(HilFeature o1, HilFeature o2) {
return Double.compare(o1.lbound, o2.lbound);
}
});
- this.top = new HashSet<HilFeature>(2 * n);
+ this.top = new HashSet<>(2 * n);
}
/**
@@ -512,45 +523,42 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo // FIXME: 64 bit mode untested - sign bit is tricky to handle correctly
// with the rescaling. 63 bit should be fine. The sign bit probably needs
// to be handled differently, or at least needs careful testing of the API
- if(h >= 32) { // 32 to 63 bit
+ if (h >= 32) { // 32 to 63 bit
final long scale = Long.MAX_VALUE; // = 63 bits
- for(int i = 0; i < pf.length; i++) {
+ for (int i = 0; i < pf.length; i++) {
NumberVector<?> obj = relation.get(pf[i].id);
long[] coord = new long[d];
- for(int dim = 0; dim < d; dim++) {
+ for (int dim = 0; dim < d; dim++) {
coord[dim] = (long) (getDimForObject(obj, dim) * .5 * scale);
}
pf[i].hilbert = HilbertSpatialSorter.coordinatesToHilbert(coord, h, 1);
}
- }
- else if(h >= 16) { // 16-31 bit
+ } else if (h >= 16) { // 16-31 bit
final int scale = ~1 >>> 1;
- for(int i = 0; i < pf.length; i++) {
+ for (int i = 0; i < pf.length; i++) {
NumberVector<?> obj = relation.get(pf[i].id);
int[] coord = new int[d];
- for(int dim = 0; dim < d; dim++) {
+ for (int dim = 0; dim < d; dim++) {
coord[dim] = (int) (getDimForObject(obj, dim) * .5 * scale);
}
pf[i].hilbert = HilbertSpatialSorter.coordinatesToHilbert(coord, h, 1);
}
- }
- else if(h >= 8) { // 8-15 bit
+ } else if (h >= 8) { // 8-15 bit
final int scale = ~1 >>> 16;
- for(int i = 0; i < pf.length; i++) {
+ for (int i = 0; i < pf.length; i++) {
NumberVector<?> obj = relation.get(pf[i].id);
short[] coord = new short[d];
- for(int dim = 0; dim < d; dim++) {
+ for (int dim = 0; dim < d; dim++) {
coord[dim] = (short) (getDimForObject(obj, dim) * .5 * scale);
}
pf[i].hilbert = HilbertSpatialSorter.coordinatesToHilbert(coord, h, 16);
}
- }
- else { // 1-7 bit
+ } else { // 1-7 bit
final int scale = ~1 >>> 8;
- for(int i = 0; i < pf.length; i++) {
+ for (int i = 0; i < pf.length; i++) {
NumberVector<?> obj = relation.get(pf[i].id);
byte[] coord = new byte[d];
- for(int dim = 0; dim < d; dim++) {
+ for (int dim = 0; dim < d; dim++) {
coord[dim] = (byte) (getDimForObject(obj, dim) * .5 * scale);
}
pf[i].hilbert = HilbertSpatialSorter.coordinatesToHilbert(coord, h, 24);
@@ -558,13 +566,13 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo }
java.util.Arrays.sort(pf);
// Update levels
- for(int i = 0; i < pf.length - 1; i++) {
+ for (int i = 0; i < pf.length - 1; i++) {
pf[i].level = minRegLevel(i, i + 1);
}
// Count candidates
capital_n_star = 0;
- for(int i = 0; i < pf.length; i++) {
- if(pf[i].ubound >= omega_star) {
+ for (int i = 0; i < pf.length; i++) {
+ if (pf[i].ubound >= omega_star) {
capital_n_star++;
}
}
@@ -576,12 +584,11 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo * @param i position in pf of the feature to be inserted
*/
private void updateOUT(int i) {
- if(out.size() < n) {
+ if (out.size() < n) {
out.add(pf[i]);
- }
- else {
+ } else {
HilFeature head = out.peek();
- if(pf[i].ubound > head.ubound) {
+ if (pf[i].ubound > head.ubound) {
// replace smallest
out.replaceTopElement(pf[i]);
}
@@ -594,12 +601,11 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo * @param i position in pf of the feature to be inserted
*/
private void updateWLB(int i) {
- if(wlb.size() < n) {
+ if (wlb.size() < n) {
wlb.add(pf[i]);
- }
- else {
+ } else {
HilFeature head = wlb.peek();
- if(pf[i].lbound > head.lbound) {
+ if (pf[i].lbound > head.lbound) {
// replace smallest
wlb.replaceTopElement(pf[i]);
}
@@ -616,13 +622,12 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo private double fastUpperBound(int i) {
int pre = i;
int post = i;
- while(post - pre < k) {
+ while (post - pre < k) {
int pre_level = (pre - 1 >= 0) ? pf[pre - 1].level : -2;
int post_level = (post < capital_n - 1) ? pf[post].level : -2;
- if(post_level >= pre_level) {
+ if (post_level >= pre_level) {
post++;
- }
- else {
+ } else {
pre--;
}
}
@@ -642,7 +647,7 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo // 2 ^ - (level - 1)
final double r = 1.0 / (1 << (level - 1));
double dist = Double.POSITIVE_INFINITY;
- for(int dim = 0; dim < d; dim++) {
+ for (int dim = 0; dim < d; dim++) {
final double p_m_r = getDimForObject(obj, dim) % r;
dist = Math.min(dist, Math.min(p_m_r, r - p_m_r));
}
@@ -661,35 +666,32 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo // level 1 is supposed to have r=1 as in the original publication
final double r = 1.0 / (1 << (level - 1));
double dist;
- if(t == 1.0) {
+ if (t == 1.0) {
dist = 0.0;
- for(int dim = 0; dim < d; dim++) {
+ for (int dim = 0; dim < d; dim++) {
final double p_m_r = getDimForObject(obj, dim) % r;
// assert (p_m_r >= 0);
dist += Math.max(p_m_r, r - p_m_r);
}
- }
- else if(t == 2.0) {
+ } else if (t == 2.0) {
dist = 0.0;
- for(int dim = 0; dim < d; dim++) {
+ for (int dim = 0; dim < d; dim++) {
final double p_m_r = getDimForObject(obj, dim) % r;
// assert (p_m_r >= 0);
double a = Math.max(p_m_r, r - p_m_r);
dist += a * a;
}
dist = Math.sqrt(dist);
- }
- else if(!Double.isInfinite(t)) {
+ } else if (!Double.isInfinite(t)) {
dist = 0.0;
- for(int dim = 0; dim < d; dim++) {
+ for (int dim = 0; dim < d; dim++) {
final double p_m_r = getDimForObject(obj, dim) % r;
dist += Math.pow(Math.max(p_m_r, r - p_m_r), t);
}
dist = Math.pow(dist, 1.0 / t);
- }
- else {
+ } else {
dist = Double.NEGATIVE_INFINITY;
- for(int dim = 0; dim < d; dim++) {
+ for (int dim = 0; dim < d; dim++) {
final double p_m_r = getDimForObject(obj, dim) % r;
dist = Math.max(dist, Math.max(p_m_r, r - p_m_r));
}
@@ -705,9 +707,9 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo * @return Number of level shared
*/
private int numberSharedLevels(long[] a, long[] b) {
- for(int i = 0, j = a.length - 1; i < a.length; i++, j--) {
+ for (int i = 0, j = a.length - 1; i < a.length; i++, j--) {
final long diff = a[j] ^ b[j];
- if(diff != 0) {
+ if (diff != 0) {
// expected unused = available - used
final int expected = (a.length * Long.SIZE) - (d * h);
return ((BitsUtil.numberOfLeadingZeros(diff) + i * Long.SIZE) - expected) / d;
@@ -756,16 +758,14 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo private double boxRadius(int i, int a, int b) {
// level are inversely ordered to box sizes. min -> max
final int level;
- if(a < 0) {
- if(b >= pf.length) {
+ if (a < 0) {
+ if (b >= pf.length) {
return Double.POSITIVE_INFINITY;
}
level = maxRegLevel(i, b);
- }
- else if(b >= pf.length) {
+ } else if (b >= pf.length) {
level = maxRegLevel(i, a);
- }
- else {
+ } else {
level = Math.max(maxRegLevel(i, a), maxRegLevel(i, b));
}
return minDistLevel(pf[i].id, level);
@@ -822,7 +822,7 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo /**
* Heap with the nearest known neighbors
*/
- public Heap<DoubleDistanceDBIDPair> nn;
+ public ObjectHeap<DoubleDistanceDBIDPair> nn;
/**
* Set representation of the nearest neighbors for faster lookups
@@ -840,7 +840,7 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo * @param id Object ID
* @param nn Heap for neighbors
*/
- public HilFeature(DBID id, Heap<DoubleDistanceDBIDPair> nn) {
+ public HilFeature(DBID id, ObjectHeap<DoubleDistanceDBIDPair> nn) {
super();
this.id = id;
this.nn = nn;
@@ -861,15 +861,14 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo */
protected void insert(DBID id, double dt, int k) {
// assert (!nn_keys.contains(id));
- if(nn.size() < k) {
+ if (nn.size() < k) {
DoubleDistanceDBIDPair entry = DBIDFactory.FACTORY.newDistancePair(dt, id);
nn.add(entry);
nn_keys.add(id);
sum_nn += dt;
- }
- else {
+ } else {
DoubleDistanceDBIDPair head = nn.peek();
- if(dt < head.doubleDistance()) {
+ if (dt < head.doubleDistance()) {
head = nn.poll(); // Remove worst
sum_nn -= head.doubleDistance();
nn_keys.remove(head);
@@ -891,7 +890,7 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo * @author Jonathan von Brünken
*
* @apiviz.exclude
- *
+ *
* @param <O> Vector type
*/
public static class Parameterizer<O extends NumberVector<?>> extends AbstractParameterizer {
@@ -952,34 +951,34 @@ public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgo super.makeOptions(config);
final IntParameter kP = new IntParameter(K_ID, 5);
- if(config.grab(kP)) {
+ if (config.grab(kP)) {
k = kP.getValue();
}
final IntParameter nP = new IntParameter(N_ID, 10);
- if(config.grab(nP)) {
+ if (config.grab(nP)) {
n = nP.getValue();
}
final IntParameter hP = new IntParameter(H_ID, 32);
- if(config.grab(hP)) {
+ if (config.grab(hP)) {
h = hP.getValue();
}
-
+
ObjectParameter<LPNormDistanceFunction> distP = AbstractDistanceBasedAlgorithm.makeParameterDistanceFunction(EuclideanDistanceFunction.class, LPNormDistanceFunction.class);
if (config.grab(distP)) {
distfunc = distP.instantiateClass(config);
}
- final EnumParameter<ScoreType> tnP = new EnumParameter<ScoreType>(TN_ID, ScoreType.class, ScoreType.TopN);
- if(config.grab(tnP)) {
+ final EnumParameter<ScoreType> tnP = new EnumParameter<>(TN_ID, ScoreType.class, ScoreType.TopN);
+ if (config.grab(tnP)) {
tn = tnP.getValue();
}
}
@Override
protected HilOut<O> makeInstance() {
- return new HilOut<O>(distfunc, k, n, h, tn);
+ return new HilOut<>(distfunc, k, n, h, tn);
}
}
-}
\ No newline at end of file +}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNOutlier.java index 4c4873dd..503487c8 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNOutlier.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNOutlier.java @@ -31,13 +31,13 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceKNNList;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
@@ -53,10 +53,8 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
/**
- * <p>
* Outlier Detection based on the distance of an object to its k nearest
* neighbor.
- * </p>
*
* <p>
* Reference:<br>
@@ -119,7 +117,7 @@ public class KNNOutlier<O, D extends NumberDistance<D, ?>> extends AbstractDista // compute distance to the k nearest neighbor.
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
// distance to the kth nearest neighbor
- final KNNResult<D> knns = knnQuery.getKNNForDBID(iditer, k);
+ final KNNList<D> knns = knnQuery.getKNNForDBID(iditer, k);
final double dkn;
if(knns instanceof DoubleDistanceKNNList) {
dkn = ((DoubleDistanceKNNList) knns).doubleKNNDistance();
@@ -138,7 +136,7 @@ public class KNNOutlier<O, D extends NumberDistance<D, ?>> extends AbstractDista if(progressKNNDistance != null) {
progressKNNDistance.ensureCompleted(LOG);
}
- Relation<Double> scoreres = new MaterializedRelation<Double>("kNN Outlier Score", "knn-outlier", TypeUtil.DOUBLE, knno_score, relation.getDBIDs());
+ Relation<Double> scoreres = new MaterializedRelation<>("kNN Outlier Score", "knn-outlier", TypeUtil.DOUBLE, knno_score, relation.getDBIDs());
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
return new OutlierResult(meta, scoreres);
}
@@ -174,7 +172,7 @@ public class KNNOutlier<O, D extends NumberDistance<D, ?>> extends AbstractDista @Override
protected KNNOutlier<O, D> makeInstance() {
- return new KNNOutlier<O, D>(distanceFunction, k);
+ return new KNNOutlier<>(distanceFunction, k);
}
}
}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNWeightOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNWeightOutlier.java index e7eeeb9c..88603f09 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNWeightOutlier.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNWeightOutlier.java @@ -31,15 +31,15 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDResultIter;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceKNNList;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
@@ -124,15 +124,15 @@ public class KNNWeightOutlier<O, D extends NumberDistance<D, ?>> extends Abstrac for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
// compute sum of the distances to the k nearest neighbors
- final KNNResult<D> knn = knnQuery.getKNNForDBID(iditer, k);
+ final KNNList<D> knn = knnQuery.getKNNForDBID(iditer, k);
double skn = 0;
if(knn instanceof DoubleDistanceKNNList) {
- for(DoubleDistanceDBIDResultIter neighbor = ((DoubleDistanceKNNList) knn).iter(); neighbor.valid(); neighbor.advance()) {
+ for(DoubleDistanceDBIDListIter neighbor = ((DoubleDistanceKNNList) knn).iter(); neighbor.valid(); neighbor.advance()) {
skn += neighbor.doubleDistance();
}
}
else {
- for(DistanceDBIDResultIter<D> neighbor = knn.iter(); neighbor.valid(); neighbor.advance()) {
+ for(DistanceDBIDListIter<D> neighbor = knn.iter(); neighbor.valid(); neighbor.advance()) {
skn += neighbor.getDistance().doubleValue();
}
}
@@ -147,7 +147,7 @@ public class KNNWeightOutlier<O, D extends NumberDistance<D, ?>> extends Abstrac progressKNNWeight.ensureCompleted(LOG);
}
- Relation<Double> res = new MaterializedRelation<Double>("Weighted kNN Outlier Score", "knnw-outlier", TypeUtil.DOUBLE, knnw_score, relation.getDBIDs());
+ Relation<Double> res = new MaterializedRelation<>("Weighted kNN Outlier Score", "knnw-outlier", TypeUtil.DOUBLE, knnw_score, relation.getDBIDs());
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
return new OutlierResult(meta, res);
}
@@ -183,7 +183,7 @@ public class KNNWeightOutlier<O, D extends NumberDistance<D, ?>> extends Abstrac @Override
protected KNNWeightOutlier<O, D> makeInstance() {
- return new KNNWeightOutlier<O, D>(distanceFunction, k);
+ return new KNNWeightOutlier<>(distanceFunction, k);
}
}
}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ODIN.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ODIN.java new file mode 100644 index 00000000..f22cdeb7 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ODIN.java @@ -0,0 +1,192 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm; +import de.lmu.ifi.dbs.elki.data.type.TypeInformation; +import de.lmu.ifi.dbs.elki.data.type.TypeUtil; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; +import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; +import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; +import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList; +import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; +import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance; +import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; + +/** + * Outlier detection based on the in-degree of the kNN graph. + * + * This is a curried version: instead of using a threshold T to obtain a binary + * decision, we use the computed value as outlier score; normalized by k to make + * the numbers more comparable across different parameterizations. + * + * Reference: + * <p> + * V. Hautamäki and I. Kärkkäinen and P Fränti<br /> + * Outlier detection using k-nearest neighbour graph<br /> + * Proc. 17th Int. Conf. Pattern Recognition, ICPR 2004 <br /> + * </p> + * + * @author Erich Schubert + * + * @param <O> Object type + * @param <D> Distance type + */ +@Reference(authors = "V. Hautamäki and I. Kärkkäinen and P Fränti", title = "Outlier detection using k-nearest neighbour graph", booktitle = "Proc. 17th Int. Conf. Pattern Recognition, ICPR 2004", url = "http://dx.doi.org/10.1109/ICPR.2004.1334558") +public class ODIN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm { + /** + * Class logger. + */ + private static final Logging LOG = Logging.getLogger(ODIN.class); + + /** + * Number of neighbors for kNN graph. + */ + int k; + + /** + * Constructor. + * + * @param distanceFunction Distance function + * @param k k parameter + */ + public ODIN(DistanceFunction<? super O, D> distanceFunction, int k) { + super(distanceFunction); + this.k = k; + } + + /** + * Run the ODIN algorithm + * + * @param database Database to run on. + * @param relation Relation to process. + * @return ODIN outlier result. + */ + public OutlierResult run(Database database, Relation<O> relation) { + // Get the query functions: + DistanceQuery<O, D> dq = database.getDistanceQuery(relation, getDistanceFunction()); + KNNQuery<O, D> knnq = database.getKNNQuery(dq, k); + + // Get the objects to process, and a data storage for counting and output: + DBIDs ids = relation.getDBIDs(); + WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB, 0.); + + double inc = 1. / (k - 1); + double min = Double.POSITIVE_INFINITY, max = 0.0; + // Process all objects + for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { + // Find the nearest neighbors (using an index, if available!) + KNNList<D> neighbors = knnq.getKNNForDBID(iter, k); + // For each neighbor, except ourselves, increase the in-degree: + for (DBIDIter nei = neighbors.iter(); nei.valid(); nei.advance()) { + if (DBIDUtil.equal(iter, nei)) { + continue; + } + final double value = scores.doubleValue(nei) + inc; + if (value < min) { + min = value; + } + if (value > max) { + max = value; + } + scores.put(nei, value); + } + } + + // Wrap the result and add metadata. + OutlierScoreMeta meta = new InvertedOutlierScoreMeta(min, max, 0., inc * (ids.size() - 1), 1); + Relation<Double> rel = new MaterializedRelation<>("ODIN In-Degree", "odin", TypeUtil.DOUBLE, scores, ids); + return new OutlierResult(meta, rel); + } + + @Override + public TypeInformation[] getInputTypeRestriction() { + return TypeUtil.array(getDistanceFunction().getInputTypeRestriction()); + } + + @Override + protected Logging getLogger() { + return LOG; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + * + * @param <O> Object type + * @param <D> Distance type + */ + public static class Parameterizer<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> { + /** + * Parameter for the number of nearest neighbors: + * + * <pre> + * -odin.k <int> + * </pre> + */ + public static final OptionID K_ID = new OptionID("odin.k", "Number of neighbors to use for kNN graph."); + + /** + * Number of nearest neighbors to use. + */ + int k; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + + IntParameter param = new IntParameter(K_ID); + // Since in a database context, the 1 nearest neighbor + // will usually be the query object itself, we require + // this value to be at least 2. + param.addConstraint(new GreaterConstraint(1)); + if (config.grab(param)) { + k = param.intValue(); + } + } + + @Override + protected ODIN<O, D> makeInstance() { + return new ODIN<>(distanceFunction, k); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java index bed27a33..f6d46f57 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java @@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -37,14 +37,14 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
@@ -115,7 +115,7 @@ public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanc DBIDs ids = relation.getDBIDs();
// FIXME: implicit preprocessor.
- WritableDataStore<KNNResult<D>> nMinPts = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, KNNResult.class);
+ WritableDataStore<KNNList<D>> nMinPts = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, KNNList.class);
WritableDoubleDataStore coreDistance = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
WritableIntegerDataStore minPtsNeighborhoodSize = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, -1);
@@ -123,7 +123,7 @@ public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanc // N_minpts(id) and core-distance(id)
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { - KNNResult<D> minptsNeighbours = knnQuery.getKNNForDBID(iditer, minpts);
+ KNNList<D> minptsNeighbours = knnQuery.getKNNForDBID(iditer, minpts);
D d = minptsNeighbours.getKNNDistance();
nMinPts.put(iditer, minptsNeighbours);
coreDistance.putDouble(iditer, d.doubleValue());
@@ -134,10 +134,10 @@ public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanc WritableDataStore<List<Double>> reachDistance = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, List.class);
WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { - List<Double> core = new ArrayList<Double>();
+ List<Double> core = new ArrayList<>();
double lrd = 0;
// TODO: optimize for double distances
- for (DistanceDBIDResultIter<D> neighbor = nMinPts.get(iditer).iter(); neighbor.valid(); neighbor.advance()) {
+ for (DistanceDBIDListIter<D> neighbor = nMinPts.get(iditer).iter(); neighbor.valid(); neighbor.advance()) {
double coreDist = coreDistance.doubleValue(neighbor);
double dist = distQuery.distance(iditer, neighbor).doubleValue();
double rd = Math.max(coreDist, dist);
@@ -165,7 +165,7 @@ public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanc ofminmax.put(of);
}
// Build result representation.
- Relation<Double> scoreResult = new MaterializedRelation<Double>("OPTICS Outlier Scores", "optics-outlier", TypeUtil.DOUBLE, ofs, relation.getDBIDs());
+ Relation<Double> scoreResult = new MaterializedRelation<>("OPTICS Outlier Scores", "optics-outlier", TypeUtil.DOUBLE, ofs, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(ofminmax.getMin(), ofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
return new OutlierResult(scoreMeta, scoreResult);
}
@@ -202,7 +202,7 @@ public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanc @Override
protected OPTICSOF<O, D> makeInstance() {
- return new OPTICSOF<O, D>(distanceFunction, minpts);
+ return new OPTICSOF<>(distanceFunction, minpts);
}
}
}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OutlierAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OutlierAlgorithm.java index 00c4a8ec..f3ef5ab5 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OutlierAlgorithm.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OutlierAlgorithm.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -33,7 +33,7 @@ import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; * @author Erich Schubert * * @apiviz.landmark - * + * @apiviz.excludeSubtypes * @apiviz.has OutlierResult */ public interface OutlierAlgorithm extends Algorithm { diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ReferenceBasedOutlierDetection.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ReferenceBasedOutlierDetection.java index 93eca7db..092bbc45 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ReferenceBasedOutlierDetection.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ReferenceBasedOutlierDetection.java @@ -31,18 +31,19 @@ import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm; import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.DistanceDBIDPair;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDPair;
+import de.lmu.ifi.dbs.elki.database.ids.generic.GenericDistanceDBIDList;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.GenericDistanceDBIDList;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.Mean;
@@ -136,11 +137,12 @@ public class ReferenceBasedOutlierDetection<V extends NumberVector<?>, D extends /**
* Run the algorithm on the given relation.
*
+ * @param database Database
* @param relation Relation to process
* @return Outlier result
*/
- public OutlierResult run(Relation<V> relation) {
- DistanceQuery<V, D> distFunc = relation.getDatabase().getDistanceQuery(relation, distanceFunction);
+ public OutlierResult run(Database database, Relation<V> relation) {
+ DistanceQuery<V, D> distFunc = database.getDistanceQuery(relation, distanceFunction);
Collection<V> refPoints = refp.getReferencePoints(relation);
DBIDs ids = relation.getDBIDs();
@@ -158,7 +160,7 @@ public class ReferenceBasedOutlierDetection<V extends NumberVector<?>, D extends }
V firstRef = iter.next();
// compute distance vector for the first reference point
- DistanceDBIDResult<D> firstReferenceDists = computeDistanceVector(firstRef, relation, distFunc);
+ DistanceDBIDList<D> firstReferenceDists = computeDistanceVector(firstRef, relation, distFunc);
for(int l = 0; l < firstReferenceDists.size(); l++) {
double density = computeDensity(firstReferenceDists, l);
// Initial value
@@ -167,7 +169,7 @@ public class ReferenceBasedOutlierDetection<V extends NumberVector<?>, D extends // compute density values for all remaining reference points
while(iter.hasNext()) {
V refPoint = iter.next();
- DistanceDBIDResult<D> referenceDists = computeDistanceVector(refPoint, relation, distFunc);
+ DistanceDBIDList<D> referenceDists = computeDistanceVector(refPoint, relation, distFunc);
// compute density value for each object
for(int l = 0; l < referenceDists.size(); l++) {
double density = computeDensity(referenceDists, l);
@@ -194,9 +196,9 @@ public class ReferenceBasedOutlierDetection<V extends NumberVector<?>, D extends // adds reference points to the result. header information for the
// visualizer to find the reference points in the result
- ReferencePointsResult<V> refp = new ReferencePointsResult<V>("Reference points", "reference-points", refPoints);
+ ReferencePointsResult<V> refp = new ReferencePointsResult<>("Reference points", "reference-points", refPoints);
- Relation<Double> scoreResult = new MaterializedRelation<Double>("Reference-points Outlier Scores", "reference-outlier", TypeUtil.DOUBLE, rbod_score, relation.getDBIDs());
+ Relation<Double> scoreResult = new MaterializedRelation<>("Reference-points Outlier Scores", "reference-outlier", TypeUtil.DOUBLE, rbod_score, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(0.0, 1.0, 0.0, 1.0, 0.0);
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
result.addChildResult(refp);
@@ -213,9 +215,9 @@ public class ReferenceBasedOutlierDetection<V extends NumberVector<?>, D extends * @return array containing the distance to one reference point for each
* database object and the object id
*/
- protected DistanceDBIDResult<D> computeDistanceVector(V refPoint, Relation<V> database, DistanceQuery<V, D> distFunc) {
+ protected DistanceDBIDList<D> computeDistanceVector(V refPoint, Relation<V> database, DistanceQuery<V, D> distFunc) {
// TODO: optimize for double distances?
- GenericDistanceDBIDList<D> referenceDists = new GenericDistanceDBIDList<D>(database.size());
+ GenericDistanceDBIDList<D> referenceDists = new GenericDistanceDBIDList<>(database.size());
for(DBIDIter iditer = database.iterDBIDs(); iditer.valid(); iditer.advance()) { referenceDists.add(distFunc.distance(iditer, refPoint), iditer);
}
@@ -235,7 +237,7 @@ public class ReferenceBasedOutlierDetection<V extends NumberVector<?>, D extends * @param index index of the current object
* @return density for one object and reference point
*/
- protected double computeDensity(DistanceDBIDResult<D> referenceDists, int index) {
+ protected double computeDensity(DistanceDBIDList<D> referenceDists, int index) {
final DistanceDBIDPair<D> x = referenceDists.get(index);
final double xDist = x.getDistance().doubleValue();
@@ -321,7 +323,7 @@ public class ReferenceBasedOutlierDetection<V extends NumberVector<?>, D extends if(config.grab(pK)) {
k = pK.getValue();
}
- final ObjectParameter<ReferencePointsHeuristic<V>> refpP = new ObjectParameter<ReferencePointsHeuristic<V>>(REFP_ID, ReferencePointsHeuristic.class, GridBasedReferencePoints.class);
+ final ObjectParameter<ReferencePointsHeuristic<V>> refpP = new ObjectParameter<>(REFP_ID, ReferencePointsHeuristic.class, GridBasedReferencePoints.class);
if(config.grab(refpP)) {
refp = refpP.instantiateClass(config);
}
@@ -329,7 +331,7 @@ public class ReferenceBasedOutlierDetection<V extends NumberVector<?>, D extends @Override
protected ReferenceBasedOutlierDetection<V, D> makeInstance() {
- return new ReferenceBasedOutlierDetection<V, D>(k, distanceFunction, refp);
+ return new ReferenceBasedOutlierDetection<>(k, distanceFunction, refp);
}
}
}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleCOP.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleCOP.java index e8077819..38820ab7 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleCOP.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleCOP.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -41,11 +41,11 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs; +import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList; import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult; import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; @@ -107,7 +107,7 @@ public class SimpleCOP<V extends NumberVector<?>, D extends NumberDistance<D, ?> public SimpleCOP(DistanceFunction<? super V, D> distanceFunction, int k, PCAFilteredRunner<V> pca) { super(distanceFunction); this.k = k; - this.dependencyDerivator = new DependencyDerivator<V, D>(null, FormatUtil.NF8, pca, 0, false); + this.dependencyDerivator = new DependencyDerivator<>(null, FormatUtil.NF, pca, 0, false); } public OutlierResult run(Database database, Relation<V> data) throws IllegalStateException { @@ -124,7 +124,7 @@ public class SimpleCOP<V extends NumberVector<?>, D extends NumberDistance<D, ?> FiniteProgress progressLocalPCA = LOG.isVerbose() ? new FiniteProgress("Correlation Outlier Probabilities", data.size(), LOG) : null; double sqrt2 = Math.sqrt(2.0); for (DBIDIter id = data.iterDBIDs(); id.valid(); id.advance()) { - KNNResult<D> neighbors = knnQuery.getKNNForDBID(id, k + 1); + KNNList<D> neighbors = knnQuery.getKNNForDBID(id, k + 1); ModifiableDBIDs nids = DBIDUtil.newArray(neighbors); nids.remove(id); @@ -156,14 +156,14 @@ public class SimpleCOP<V extends NumberVector<?>, D extends NumberDistance<D, ?> } } // combine results. - Relation<Double> scoreResult = new MaterializedRelation<Double>("Original Correlation Outlier Probabilities", "origcop-outlier", TypeUtil.DOUBLE, cop_score, ids); + Relation<Double> scoreResult = new MaterializedRelation<>("Original Correlation Outlier Probabilities", "origcop-outlier", TypeUtil.DOUBLE, cop_score, ids); OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore(); OutlierResult result = new OutlierResult(scoreMeta, scoreResult); // extra results - result.addChildResult(new MaterializedRelation<Integer>("Local Dimensionality", COP.COP_DIM, TypeUtil.INTEGER, cop_dim, ids)); - result.addChildResult(new MaterializedRelation<Vector>("Error vectors", COP.COP_ERRORVEC, TypeUtil.VECTOR, cop_err_v, ids)); - result.addChildResult(new MaterializedRelation<Matrix>("Data vectors", "cop-datavec", TypeUtil.MATRIX, cop_datav, ids)); - result.addChildResult(new MaterializedRelation<CorrelationAnalysisSolution<?>>("Correlation analysis", "cop-sol", new SimpleTypeInformation<CorrelationAnalysisSolution<?>>(CorrelationAnalysisSolution.class), cop_sol, ids)); + result.addChildResult(new MaterializedRelation<>("Local Dimensionality", COP.COP_DIM, TypeUtil.INTEGER, cop_dim, ids)); + result.addChildResult(new MaterializedRelation<>("Error vectors", COP.COP_ERRORVEC, TypeUtil.VECTOR, cop_err_v, ids)); + result.addChildResult(new MaterializedRelation<>("Data vectors", "cop-datavec", TypeUtil.MATRIX, cop_datav, ids)); + result.addChildResult(new MaterializedRelation<>("Correlation analysis", "cop-sol", new SimpleTypeInformation<CorrelationAnalysisSolution<?>>(CorrelationAnalysisSolution.class), cop_sol, ids)); return result; } @@ -222,7 +222,7 @@ public class SimpleCOP<V extends NumberVector<?>, D extends NumberDistance<D, ?> if (config.grab(kP)) { k = kP.intValue(); } - ObjectParameter<PCAFilteredRunner<V>> pcaP = new ObjectParameter<PCAFilteredRunner<V>>(PCARUNNER_ID, PCAFilteredRunner.class, PCAFilteredRunner.class); + ObjectParameter<PCAFilteredRunner<V>> pcaP = new ObjectParameter<>(PCARUNNER_ID, PCAFilteredRunner.class, PCAFilteredRunner.class); if (config.grab(pcaP)) { pca = pcaP.instantiateClass(config); } @@ -230,7 +230,7 @@ public class SimpleCOP<V extends NumberVector<?>, D extends NumberDistance<D, ?> @Override protected SimpleCOP<V, D> makeInstance() { - return new SimpleCOP<V, D>(distanceFunction, k, pca); + return new SimpleCOP<>(distanceFunction, k, pca); } } } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ALOCI.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/ALOCI.java index 41da687f..d48679a9 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ALOCI.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/ALOCI.java @@ -1,4 +1,4 @@ -package de.lmu.ifi.dbs.elki.algorithm.outlier;
+package de.lmu.ifi.dbs.elki.algorithm.outlier.lof;
/*
This file is part of ELKI:
@@ -28,6 +28,7 @@ import java.util.List; import java.util.Random;
import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
@@ -42,8 +43,8 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.NumberVectorDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
@@ -163,7 +164,7 @@ public class ALOCI<O extends NumberVector<?>, D extends NumberDistance<D, ?>> ex }
}
- List<ALOCIQuadTree> qts = new ArrayList<ALOCIQuadTree>(g);
+ List<ALOCIQuadTree> qts = new ArrayList<>(g);
double[] nshift = new double[dim];
ALOCIQuadTree qt = new ALOCIQuadTree(min, max, nshift, nmin, relation);
@@ -251,7 +252,7 @@ public class ALOCI<O extends NumberVector<?>, D extends NumberDistance<D, ?>> ex if(progressLOCI != null) {
progressLOCI.ensureCompleted(LOG);
}
- Relation<Double> scoreResult = new MaterializedRelation<Double>("aLOCI normalized MDEF", "aloci-mdef-outlier", TypeUtil.DOUBLE, mdef_norm, relation.getDBIDs());
+ Relation<Double> scoreResult = new MaterializedRelation<>("aLOCI normalized MDEF", "aloci-mdef-outlier", TypeUtil.DOUBLE, mdef_norm, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY);
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
return result;
@@ -370,7 +371,7 @@ public class ALOCI<O extends NumberVector<?>, D extends NumberDistance<D, ?>> ex }
this.relation = relation;
ArrayModifiableDBIDs ids = DBIDUtil.newArray(relation.getDBIDs());
- List<Node> children = new ArrayList<Node>();
+ List<Node> children = new ArrayList<>();
bulkLoad(min.clone(), max.clone(), children, ids, 0, ids.size(), 0, 0, 0);
this.root = new Node(0, new Vector(center), ids.size(), -1, children);
}
@@ -432,7 +433,7 @@ public class ALOCI<O extends NumberVector<?>, D extends NumberDistance<D, ?>> ex return;
}
else {
- List<Node> newchildren = new ArrayList<Node>();
+ List<Node> newchildren = new ArrayList<>();
bulkLoad(lmin, lmax, newchildren, ids, start, end, 0, level + 1, 0);
children.add(new Node(code, new Vector(center), end - start, level, newchildren));
return;
@@ -730,7 +731,7 @@ public class ALOCI<O extends NumberVector<?>, D extends NumberDistance<D, ?>> ex @Override
protected ALOCI<O, D> makeInstance() {
- return new ALOCI<O, D>(distanceFunction, nmin, alpha, g, rnd);
+ return new ALOCI<>(distanceFunction, nmin, alpha, g, rnd);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/FlexibleLOF.java index 66bed47a..80f60e8b 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOF.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/FlexibleLOF.java @@ -1,10 +1,10 @@ -package de.lmu.ifi.dbs.elki.algorithm.outlier; +package de.lmu.ifi.dbs.elki.algorithm.outlier.lof; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -25,9 +25,11 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier; import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm; import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm; +import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm; import de.lmu.ifi.dbs.elki.data.type.CombinedTypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; +import de.lmu.ifi.dbs.elki.database.Database; import de.lmu.ifi.dbs.elki.database.QueryUtil; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; @@ -36,6 +38,10 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter; +import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter; +import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList; +import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList; import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; @@ -44,10 +50,6 @@ import de.lmu.ifi.dbs.elki.database.query.rknn.RKNNQuery; import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDResultIter; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceKNNList; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult; import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor; import de.lmu.ifi.dbs.elki.logging.Logging; @@ -70,8 +72,7 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; /** * <p> - * Algorithm to compute density-based local outlier factors in a database based - * on a specified parameter {@link #K_ID} ({@code -lof.k}). + * Flexible variant of the "Local Outlier Factor" algorithm. * </p> * * <p> @@ -85,14 +86,15 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; * The k nearest neighbors are determined using the parameter * {@link de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm#DISTANCE_FUNCTION_ID} * , while the reference set used in reachability distance computation is - * configured using {@link #REACHABILITY_DISTANCE_FUNCTION_ID}. + * configured using {@link Parameterizer#REACHABILITY_DISTANCE_FUNCTION_ID}. * </p> * * <p> - * The original LOF parameter was called "minPts". Since kNN queries - * in ELKI have slightly different semantics - exactly k neighbors are returned - * - we chose to rename the parameter to {@link #K_ID} ({@code -lof.k}) to - * reflect this difference. + * The original LOF parameter was called "minPts". For consistency + * with the name "kNN query", we chose to rename the parameter to {@code k}. + * Flexible LOF allows you to set the two values different, which yields the + * parameters {@link Parameterizer#KREF_ID} ({@code -lof.krefer}) and + * {@link Parameterizer#KREACH_ID} ({@code -lof.kreach}) * </p> * * <p> @@ -116,33 +118,26 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; @Title("LOF: Local Outlier Factor") @Description("Algorithm to compute density-based local outlier factors in a database based on the neighborhood size parameter 'k'") @Reference(authors = "M. M. Breunig, H.-P. Kriegel, R. Ng, and J. Sander", title = "LOF: Identifying Density-Based Local Outliers", booktitle = "Proc. 2nd ACM SIGMOD Int. Conf. on Management of Data (SIGMOD '00), Dallas, TX, 2000", url = "http://dx.doi.org/10.1145/342009.335388") -public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm { +public class FlexibleLOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm { /** * The logger for this class. */ - private static final Logging LOG = Logging.getLogger(LOF.class); + private static final Logging LOG = Logging.getLogger(FlexibleLOF.class); /** - * The distance function to determine the reachability distance between - * database objects. + * Number of neighbors in comparison set. */ - public static final OptionID REACHABILITY_DISTANCE_FUNCTION_ID = new OptionID("lof.reachdistfunction", "Distance function to determine the reachability distance between database objects."); + protected int krefer = 2; /** - * Parameter to specify the number of nearest neighbors of an object to be - * considered for computing its LOF_SCORE, must be an integer greater than 1. + * Number of neighbors used for reachability distance. */ - public static final OptionID K_ID = new OptionID("lof.k", "The number of nearest neighbors of an object to be considered for computing its LOF_SCORE."); - - /** - * Holds the value of {@link #K_ID}. - */ - protected int k = 2; + protected int kreach = 2; /** * Neighborhood distance function. */ - protected DistanceFunction<? super O, D> neighborhoodDistanceFunction; + protected DistanceFunction<? super O, D> referenceDistanceFunction; /** * Reachability distance function. @@ -160,42 +155,30 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou /** * Constructor. * - * @param k the value of k + * @param krefer The number of neighbors for reference + * @param kreach The number of neighbors for reachability distance * @param neighborhoodDistanceFunction the neighborhood distance function * @param reachabilityDistanceFunction the reachability distance function */ - public LOF(int k, DistanceFunction<? super O, D> neighborhoodDistanceFunction, DistanceFunction<? super O, D> reachabilityDistanceFunction) { + public FlexibleLOF(int krefer, int kreach, DistanceFunction<? super O, D> neighborhoodDistanceFunction, DistanceFunction<? super O, D> reachabilityDistanceFunction) { super(); - this.k = k + (objectIsInKNN ? 0 : 1); - this.neighborhoodDistanceFunction = neighborhoodDistanceFunction; + this.krefer = krefer + (objectIsInKNN ? 0 : 1); + this.kreach = kreach + (objectIsInKNN ? 0 : 1); + this.referenceDistanceFunction = neighborhoodDistanceFunction; this.reachabilityDistanceFunction = reachabilityDistanceFunction; } /** - * Constructor. - * - * @param k the value of k - * @param distanceFunction the distance function - * - * Uses the same distance function for neighborhood computation and - * reachability distance (standard as in the original publication), - * same as {@link #LOF(int, DistanceFunction, DistanceFunction) - * LOF(int, distanceFunction, distanceFunction)}. - */ - public LOF(int k, DistanceFunction<? super O, D> distanceFunction) { - this(k, distanceFunction, distanceFunction); - } - - /** - * Performs the Generalized LOF_SCORE algorithm on the given database by - * calling {@link #doRunInTime}. + * Performs the Generalized LOF algorithm on the given database by calling + * {@link #doRunInTime}. * + * @param database Database to query * @param relation Data to process * @return LOF outlier result */ - public OutlierResult run(Relation<O> relation) { + public OutlierResult run(Database database, Relation<O> relation) { StepProgress stepprog = LOG.isVerbose() ? new StepProgress("LOF", 3) : null; - Pair<KNNQuery<O, D>, KNNQuery<O, D>> pair = getKNNQueries(relation, stepprog); + Pair<KNNQuery<O, D>, KNNQuery<O, D>> pair = getKNNQueries(database, relation, stepprog); KNNQuery<O, D> kNNRefer = pair.getFirst(); KNNQuery<O, D> kNNReach = pair.getSecond(); return doRunInTime(relation.getDBIDs(), kNNRefer, kNNReach, stepprog).getResult(); @@ -208,40 +191,41 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou * @param stepprog the progress logger * @return the kNN queries for the algorithm */ - private Pair<KNNQuery<O, D>, KNNQuery<O, D>> getKNNQueries(Relation<O> relation, StepProgress stepprog) { + private Pair<KNNQuery<O, D>, KNNQuery<O, D>> getKNNQueries(Database database, Relation<O> relation, StepProgress stepprog) { // "HEAVY" flag for knnReach since it is used more than once - KNNQuery<O, D> knnReach = QueryUtil.getKNNQuery(relation, reachabilityDistanceFunction, k, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE); + KNNQuery<O, D> knnReach = QueryUtil.getKNNQuery(relation, reachabilityDistanceFunction, kreach, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE); // No optimized kNN query - use a preprocessor! if (!(knnReach instanceof PreprocessorKNNQuery)) { if (stepprog != null) { - if (neighborhoodDistanceFunction.equals(reachabilityDistanceFunction)) { + if (referenceDistanceFunction.equals(reachabilityDistanceFunction)) { stepprog.beginStep(1, "Materializing neighborhoods w.r.t. reference neighborhood distance function.", LOG); } else { stepprog.beginStep(1, "Not materializing neighborhoods w.r.t. reference neighborhood distance function, but materializing neighborhoods w.r.t. reachability distance function.", LOG); } } - MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<O, D>(relation, reachabilityDistanceFunction, k); - relation.getDatabase().addIndex(preproc); - DistanceQuery<O, D> rdq = relation.getDatabase().getDistanceQuery(relation, reachabilityDistanceFunction); - knnReach = preproc.getKNNQuery(rdq, k); + int kpreproc = (referenceDistanceFunction.equals(reachabilityDistanceFunction)) ? Math.max(kreach, krefer) : kreach; + MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<>(relation, reachabilityDistanceFunction, kpreproc); + database.addIndex(preproc); + DistanceQuery<O, D> rdq = database.getDistanceQuery(relation, reachabilityDistanceFunction); + knnReach = preproc.getKNNQuery(rdq, kreach); } // knnReach is only used once KNNQuery<O, D> knnRefer; - if (neighborhoodDistanceFunction == reachabilityDistanceFunction || neighborhoodDistanceFunction.equals(reachabilityDistanceFunction)) { + if (referenceDistanceFunction == reachabilityDistanceFunction || referenceDistanceFunction.equals(reachabilityDistanceFunction)) { knnRefer = knnReach; } else { // do not materialize the first neighborhood, since it is used only once - knnRefer = QueryUtil.getKNNQuery(relation, neighborhoodDistanceFunction, k); + knnRefer = QueryUtil.getKNNQuery(relation, referenceDistanceFunction, krefer); } - return new Pair<KNNQuery<O, D>, KNNQuery<O, D>>(knnRefer, knnReach); + return new Pair<>(knnRefer, knnReach); } /** * Performs the Generalized LOF_SCORE algorithm on the given database and - * returns a {@link LOF.LOFResult} encapsulating information that may be - * needed by an OnlineLOF algorithm. + * returns a {@link FlexibleLOF.LOFResult} encapsulating information that may + * be needed by an OnlineLOF algorithm. * * @param ids Object ids * @param kNNRefer the kNN query w.r.t. reference neighborhood distance @@ -279,11 +263,11 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou } // Build result representation. - Relation<Double> scoreResult = new MaterializedRelation<Double>("Local Outlier Factor", "lof-outlier", TypeUtil.DOUBLE, lofs, ids); + Relation<Double> scoreResult = new MaterializedRelation<>("Local Outlier Factor", "lof-outlier", TypeUtil.DOUBLE, lofs, ids); OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0); OutlierResult result = new OutlierResult(scoreMeta, scoreResult); - return new LOFResult<O, D>(result, kNNRefer, kNNReach, lrds, lofs); + return new LOFResult<>(result, kNNRefer, kNNReach, lrds, lofs); } /** @@ -298,14 +282,14 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP); FiniteProgress lrdsProgress = LOG.isVerbose() ? new FiniteProgress("LRD", ids.size(), LOG) : null; for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { - final KNNResult<D> neighbors = knnReach.getKNNForDBID(iter, k); + final KNNList<D> neighbors = knnReach.getKNNForDBID(iter, kreach); double sum = 0.0; int count = 0; if (neighbors instanceof DoubleDistanceKNNList) { // Fast version for double distances - for (DoubleDistanceDBIDResultIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) { + for (DoubleDistanceDBIDListIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) { if (objectIsInKNN || !DBIDUtil.equal(neighbor, iter)) { - KNNResult<D> neighborsNeighbors = knnReach.getKNNForDBID(neighbor, k); + KNNList<D> neighborsNeighbors = knnReach.getKNNForDBID(neighbor, kreach); final double nkdist; if (neighborsNeighbors instanceof DoubleDistanceKNNList) { nkdist = ((DoubleDistanceKNNList) neighborsNeighbors).doubleKNNDistance(); @@ -317,16 +301,16 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou } } } else { - for (DistanceDBIDResultIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { + for (DistanceDBIDListIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { if (objectIsInKNN || !DBIDUtil.equal(neighbor, iter)) { - KNNResult<D> neighborsNeighbors = knnReach.getKNNForDBID(neighbor, k); + KNNList<D> neighborsNeighbors = knnReach.getKNNForDBID(neighbor, kreach); sum += Math.max(neighbor.getDistance().doubleValue(), neighborsNeighbors.getKNNDistance().doubleValue()); count++; } } } // Avoid division by 0 - final double lrd = (sum > 0) ? (count / sum) : 0; + final double lrd = (sum > 0) ? (count / sum) : Double.POSITIVE_INFINITY; lrds.putDouble(iter, lrd); if (lrdsProgress != null) { lrdsProgress.incrementProcessed(LOG); @@ -356,8 +340,8 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { final double lrdp = lrds.doubleValue(iter); final double lof; - if (lrdp > 0) { - final KNNResult<D> neighbors = knnRefer.getKNNForDBID(iter, k); + if (lrdp > 0 && !Double.isInfinite(lrdp)) { + final KNNList<D> neighbors = knnRefer.getKNNForDBID(iter, krefer); double sum = 0.0; int count = 0; for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { @@ -373,7 +357,9 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou } lofs.putDouble(iter, lof); // update minimum and maximum - lofminmax.put(lof); + if (!Double.isInfinite(lof)) { + lofminmax.put(lof); + } if (progressLOFs != null) { progressLOFs.incrementProcessed(LOG); @@ -382,16 +368,16 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou if (progressLOFs != null) { progressLOFs.ensureCompleted(LOG); } - return new Pair<WritableDoubleDataStore, DoubleMinMax>(lofs, lofminmax); + return new Pair<>(lofs, lofminmax); } @Override public TypeInformation[] getInputTypeRestriction() { final TypeInformation type; - if (reachabilityDistanceFunction.equals(neighborhoodDistanceFunction)) { + if (reachabilityDistanceFunction.equals(referenceDistanceFunction)) { type = reachabilityDistanceFunction.getInputTypeRestriction(); } else { - type = new CombinedTypeInformation(neighborhoodDistanceFunction.getInputTypeRestriction(), reachabilityDistanceFunction.getInputTypeRestriction()); + type = new CombinedTypeInformation(referenceDistanceFunction.getInputTypeRestriction(), reachabilityDistanceFunction.getInputTypeRestriction()); } return TypeUtil.array(type); } @@ -403,11 +389,13 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou /** * Encapsulates information like the neighborhood, the LRD and LOF values of - * the objects during a run of the {@link LOF} algorithm. + * the objects during a run of the {@link FlexibleLOF} algorithm. + * + * @author Elke Achtert */ public static class LOFResult<O, D extends NumberDistance<D, ?>> { /** - * The result of the run of the {@link LOF} algorithm. + * The result of the run of the {@link FlexibleLOF} algorithm. */ private OutlierResult result; @@ -442,10 +430,10 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou private final WritableDoubleDataStore lofs; /** - * Encapsulates information generated during a run of the {@link LOF} - * algorithm. + * Encapsulates information generated during a run of the + * {@link FlexibleLOF} algorithm. * - * @param result the result of the run of the {@link LOF} algorithm + * @param result the result of the run of the {@link FlexibleLOF} algorithm * @param kNNRefer the kNN query w.r.t. the reference neighborhood distance * @param kNNReach the kNN query w.r.t. the reachability distance * @param lrds the LRD values of the objects @@ -498,7 +486,7 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou /** * Get the outlier result. * - * @return the result of the run of the {@link LOF} algorithm + * @return the result of the run of the {@link FlexibleLOF} algorithm */ public OutlierResult getResult() { return result; @@ -550,9 +538,33 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou */ public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> { /** - * The neighborhood size to use. + * The distance function to determine the reachability distance between + * database objects. + */ + public static final OptionID REACHABILITY_DISTANCE_FUNCTION_ID = new OptionID("lof.reachdistfunction", "Distance function to determine the reachability distance between database objects."); + + /** + * Parameter to specify the number of nearest neighbors of an object to be + * considered for computing its LOF_SCORE, must be an integer greater than + * 1. + */ + public static final OptionID KREF_ID = new OptionID("lof.krefer", "The number of nearest neighbors of an object to be considered for computing its LOF_SCORE."); + + /** + * Parameter to specify the number of nearest neighbors of an object to be + * considered for computing its reachability distance. + */ + public static final OptionID KREACH_ID = new OptionID("lof.kreach", "The number of nearest neighbors of an object to be considered for computing its LOF_SCORE."); + + /** + * The reference set size to use. */ - protected int k = 2; + protected int krefer = 2; + + /** + * The set size to use for reachability distance. + */ + protected int kreach = 2; /** * Neighborhood distance function. @@ -568,23 +580,33 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou protected void makeOptions(Parameterization config) { super.makeOptions(config); - final IntParameter pK = new IntParameter(K_ID); + final IntParameter pK = new IntParameter(KREF_ID); pK.addConstraint(new GreaterConstraint(1)); if (config.grab(pK)) { - k = pK.getValue(); + krefer = pK.intValue(); } - final ObjectParameter<DistanceFunction<O, D>> reachDistP = new ObjectParameter<DistanceFunction<O, D>>(REACHABILITY_DISTANCE_FUNCTION_ID, DistanceFunction.class, true); + final IntParameter pK2 = new IntParameter(KREACH_ID); + pK2.setOptional(true); + pK2.addConstraint(new GreaterConstraint(1)); + if (config.grab(pK2)) { + kreach = pK2.intValue(); + } else { + kreach = krefer; + } + + final ObjectParameter<DistanceFunction<O, D>> reachDistP = new ObjectParameter<>(REACHABILITY_DISTANCE_FUNCTION_ID, DistanceFunction.class); + reachDistP.setOptional(true); if (config.grab(reachDistP)) { reachabilityDistanceFunction = reachDistP.instantiateClass(config); + } else { + reachabilityDistanceFunction = distanceFunction; } } @Override - protected LOF<O, D> makeInstance() { - // Default is to re-use the same distance - DistanceFunction<O, D> rdist = (reachabilityDistanceFunction != null) ? reachabilityDistanceFunction : distanceFunction; - return new LOF<O, D>(k, distanceFunction, rdist); + protected FlexibleLOF<O, D> makeInstance() { + return new FlexibleLOF<>(kreach, krefer, distanceFunction, reachabilityDistanceFunction); } } } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/INFLO.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/INFLO.java index 655a0910..ae297a3c 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/INFLO.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/INFLO.java @@ -1,4 +1,4 @@ -package de.lmu.ifi.dbs.elki.algorithm.outlier;
+package de.lmu.ifi.dbs.elki.algorithm.outlier.lof;
/*
This file is part of ELKI:
@@ -24,6 +24,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier; */
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
@@ -34,13 +35,13 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
@@ -154,7 +155,7 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa int count = rnns.get(id).size();
if (!processedIDs.contains(id)) {
// TODO: use exactly k neighbors?
- KNNResult<D> list = knnQuery.getKNNForDBID(id, k);
+ KNNList<D> list = knnQuery.getKNNForDBID(id, k);
knns.get(id).addDBIDs(list);
processedIDs.add(id);
density.putDouble(id, 1 / list.getKNNDistance().doubleValue());
@@ -164,7 +165,7 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa for (DBIDIter q = knns.get(id).iter(); q.valid(); q.advance()) {
if (!processedIDs.contains(q)) {
// TODO: use exactly k neighbors?
- KNNResult<D> listQ = knnQuery.getKNNForDBID(q, k);
+ KNNList<D> listQ = knnQuery.getKNNForDBID(q, k);
knns.get(q).addDBIDs(listQ);
density.putDouble(q, 1 / listQ.getKNNDistance().doubleValue());
processedIDs.add(q);
@@ -209,7 +210,7 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa }
// Build result representation.
- Relation<Double> scoreResult = new MaterializedRelation<Double>("Influence Outlier Score", "inflo-outlier", TypeUtil.DOUBLE, inflos, relation.getDBIDs());
+ Relation<Double> scoreResult = new MaterializedRelation<>("Influence Outlier Score", "inflo-outlier", TypeUtil.DOUBLE, inflos, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(inflominmax.getMin(), inflominmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
return new OutlierResult(scoreMeta, scoreResult);
}
@@ -254,7 +255,7 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa @Override
protected INFLO<O, D> makeInstance() {
- return new INFLO<O, D>(distanceFunction, m, k);
+ return new INFLO<>(distanceFunction, m, k);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LDF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDF.java index 4ce0313e..4a86e93d 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LDF.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDF.java @@ -1,10 +1,10 @@ -package de.lmu.ifi.dbs.elki.algorithm.outlier; +package de.lmu.ifi.dbs.elki.algorithm.outlier.lof; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -24,10 +24,12 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier; */ import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm; +import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm; import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.data.type.CombinedTypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; +import de.lmu.ifi.dbs.elki.database.Database; import de.lmu.ifi.dbs.elki.database.QueryUtil; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; @@ -35,6 +37,10 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter; +import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter; +import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList; +import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList; import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; @@ -43,18 +49,14 @@ import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDResultIter; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceKNNList; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult; import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; import de.lmu.ifi.dbs.elki.logging.progress.StepProgress; import de.lmu.ifi.dbs.elki.math.DoubleMinMax; -import de.lmu.ifi.dbs.elki.math.statistics.GaussianKernelDensityFunction; -import de.lmu.ifi.dbs.elki.math.statistics.KernelDensityFunction; +import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.GaussianKernelDensityFunction; +import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.KernelDensityFunction; import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; @@ -133,10 +135,11 @@ public class LDF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> exte /** * Run the naive kernel density LOF algorithm. * + * @param database Database to query * @param relation Data to process * @return LOF outlier result */ - public OutlierResult run(Relation<O> relation) { + public OutlierResult run(Database database, Relation<O> relation) { StepProgress stepprog = LOG.isVerbose() ? new StepProgress("LDF", 3) : null; final int dim = RelationUtil.dimensionality(relation); @@ -150,43 +153,54 @@ public class LDF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> exte if (stepprog != null) { stepprog.beginStep(1, "Materializing neighborhoods w.r.t. distance function.", LOG); } - MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<O, D>(relation, getDistanceFunction(), k); - relation.getDatabase().addIndex(preproc); - DistanceQuery<O, D> rdq = relation.getDatabase().getDistanceQuery(relation, getDistanceFunction()); + MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<>(relation, getDistanceFunction(), k); + database.addIndex(preproc); + DistanceQuery<O, D> rdq = database.getDistanceQuery(relation, getDistanceFunction()); knnq = preproc.getKNNQuery(rdq, k); } - // Compute LRDs + // Compute LDEs if (stepprog != null) { stepprog.beginStep(2, "Computing LDEs.", LOG); } WritableDoubleDataStore ldes = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP); FiniteProgress densProgress = LOG.isVerbose() ? new FiniteProgress("Densities", ids.size(), LOG) : null; for (DBIDIter it = ids.iter(); it.valid(); it.advance()) { - final KNNResult<D> neighbors = knnq.getKNNForDBID(it, k); + final KNNList<D> neighbors = knnq.getKNNForDBID(it, k); double sum = 0.0; int count = 0; if (neighbors instanceof DoubleDistanceKNNList) { // Fast version for double distances - for (DoubleDistanceDBIDResultIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) { + for (DoubleDistanceDBIDListIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) { if (DBIDUtil.equal(neighbor, it)) { continue; } - double nkdist = ((DoubleDistanceKNNList) knnq.getKNNForDBID(neighbor, k)).doubleKNNDistance(); - - final double v = Math.max(nkdist, neighbor.doubleDistance()) / (h * nkdist); - sum += kernel.density(v) / Math.pow(h * nkdist, dim); - count++; + final double nkdist = ((DoubleDistanceKNNList) knnq.getKNNForDBID(neighbor, k)).doubleKNNDistance(); + if (nkdist > 0.) { + final double v = Math.max(nkdist, neighbor.doubleDistance()) / (h * nkdist); + sum += kernel.density(v) / Math.pow(h * nkdist, dim); + count++; + } else { + sum = Double.POSITIVE_INFINITY; + count++; + break; + } } } else { - for (DistanceDBIDResultIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { + for (DistanceDBIDListIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { if (DBIDUtil.equal(neighbor, it)) { continue; } - double nkdist = knnq.getKNNForDBID(neighbor, k).getKNNDistance().doubleValue(); - final double v = Math.max(nkdist, neighbor.getDistance().doubleValue()) / (h * nkdist); - sum += kernel.density(v) / Math.pow(h * nkdist, dim); - count++; + final double nkdist = knnq.getKNNForDBID(neighbor, k).getKNNDistance().doubleValue(); + if (nkdist > 0.) { + final double v = Math.max(nkdist, neighbor.getDistance().doubleValue()) / (h * nkdist); + sum += kernel.density(v) / Math.pow(h * nkdist, dim); + count++; + } else { + sum = Double.POSITIVE_INFINITY; + count++; + break; + } } } ldes.putDouble(it, sum / count); @@ -209,7 +223,7 @@ public class LDF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> exte FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("Local Density Factors", ids.size(), LOG) : null; for (DBIDIter it = ids.iter(); it.valid(); it.advance()) { final double lrdp = ldes.doubleValue(it); - final KNNResult<D> neighbors = knnq.getKNNForDBID(it, k); + final KNNList<D> neighbors = knnq.getKNNForDBID(it, k); double sum = 0.0; int count = 0; for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { @@ -240,7 +254,7 @@ public class LDF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> exte } // Build result representation. - Relation<Double> scoreResult = new MaterializedRelation<Double>("Local Density Factor", "ldf-outlier", TypeUtil.DOUBLE, ldfs, ids); + Relation<Double> scoreResult = new MaterializedRelation<>("Local Density Factor", "ldf-outlier", TypeUtil.DOUBLE, ldfs, ids); OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, 1. / c, 1 / (1 + c)); OutlierResult result = new OutlierResult(scoreMeta, scoreResult); @@ -318,7 +332,7 @@ public class LDF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> exte k = pK.getValue(); } - ObjectParameter<KernelDensityFunction> kernelP = new ObjectParameter<KernelDensityFunction>(KERNEL_ID, KernelDensityFunction.class, GaussianKernelDensityFunction.class); + ObjectParameter<KernelDensityFunction> kernelP = new ObjectParameter<>(KERNEL_ID, KernelDensityFunction.class, GaussianKernelDensityFunction.class); if (config.grab(kernelP)) { kernel = kernelP.instantiateClass(config); } @@ -336,7 +350,7 @@ public class LDF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> exte @Override protected LDF<O, D> makeInstance() { - return new LDF<O, D>(k, distanceFunction, kernel, h, c); + return new LDF<>(k, distanceFunction, kernel, h, c); } } } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LDOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDOF.java index fbbfe484..80ed3f68 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LDOF.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LDOF.java @@ -1,4 +1,4 @@ -package de.lmu.ifi.dbs.elki.algorithm.outlier;
+package de.lmu.ifi.dbs.elki.algorithm.outlier.lof;
/*
This file is part of ELKI:
@@ -24,6 +24,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier; */
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
@@ -32,13 +33,13 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
@@ -47,6 +48,7 @@ import de.lmu.ifi.dbs.elki.math.Mean; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.Alias;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -78,6 +80,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; @Title("LDOF: Local Distance-Based Outlier Factor")
@Description("Local outlier detection appraoch suitable for scattered data by averaging the kNN distance over all k nearest neighbors")
@Reference(authors = "K. Zhang, M. Hutter, H. Jin", title = "A New Local Distance-Based Outlier Detection Approach for Scattered Real-World Data", booktitle = "Proc. 13th Pacific-Asia Conference on Advances in Knowledge Discovery and Data Mining (PAKDD 2009), Bangkok, Thailand, 2009", url = "http://dx.doi.org/10.1007/978-3-642-01307-2_84")
+@Alias({"de.lmu.ifi.dbs.elki.algorithm.outlier.LDOF"})
public class LDOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
@@ -136,14 +139,14 @@ public class LDOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas Mean dxp = new Mean(), Dxp = new Mean();
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { - KNNResult<D> neighbors = knnQuery.getKNNForDBID(iditer, k);
+ KNNList<D> neighbors = knnQuery.getKNNForDBID(iditer, k);
// skip the point itself
dxp.reset(); Dxp.reset();
// TODO: optimize for double distances
- for (DistanceDBIDResultIter<D> neighbor1 = neighbors.iter(); neighbor1.valid(); neighbor1.advance()) {
+ for (DistanceDBIDListIter<D> neighbor1 = neighbors.iter(); neighbor1.valid(); neighbor1.advance()) {
if(!DBIDUtil.equal(neighbor1, iditer)) {
dxp.put(neighbor1.getDistance().doubleValue());
- for (DistanceDBIDResultIter<D> neighbor2 = neighbors.iter(); neighbor2.valid(); neighbor2.advance()) {
+ for (DistanceDBIDListIter<D> neighbor2 = neighbors.iter(); neighbor2.valid(); neighbor2.advance()) {
if(!DBIDUtil.equal(neighbor1, neighbor2) && !DBIDUtil.equal(neighbor2, iditer)) {
Dxp.put(distFunc.distance(neighbor1, neighbor2).doubleValue());
}
@@ -167,7 +170,7 @@ public class LDOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas }
// Build result representation.
- Relation<Double> scoreResult = new MaterializedRelation<Double>("LDOF Outlier Score", "ldof-outlier", TypeUtil.DOUBLE, ldofs, relation.getDBIDs());
+ Relation<Double> scoreResult = new MaterializedRelation<>("LDOF Outlier Score", "ldof-outlier", TypeUtil.DOUBLE, ldofs, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(ldofminmax.getMin(), ldofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, LDOF_BASELINE);
return new OutlierResult(scoreMeta, scoreResult);
}
@@ -204,7 +207,7 @@ public class LDOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas @Override
protected LDOF<O, D> makeInstance() {
- return new LDOF<O, D>(distanceFunction, k);
+ return new LDOF<>(distanceFunction, k);
}
}
}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOCI.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOCI.java index ba9ad20e..e76c6034 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOCI.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOCI.java @@ -1,10 +1,10 @@ -package de.lmu.ifi.dbs.elki.algorithm.outlier; +package de.lmu.ifi.dbs.elki.algorithm.outlier.lof; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -28,6 +28,7 @@ import java.util.Collections; import java.util.List; import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm; +import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm; import de.lmu.ifi.dbs.elki.data.type.TypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; import de.lmu.ifi.dbs.elki.database.Database; @@ -36,14 +37,14 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; -import de.lmu.ifi.dbs.elki.database.ids.DistanceDBIDPair; +import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList; +import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDPair; +import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery; import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter; import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; @@ -52,6 +53,7 @@ import de.lmu.ifi.dbs.elki.math.MeanVariance; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta; +import de.lmu.ifi.dbs.elki.utilities.Alias; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; @@ -83,6 +85,7 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleIntPair; @Title("LOCI: Fast Outlier Detection Using the Local Correlation Integral") @Description("Algorithm to compute outliers based on the Local Correlation Integral") @Reference(authors = "S. Papadimitriou, H. Kitagawa, P. B. Gibbons, C. Faloutsos", title = "LOCI: Fast Outlier Detection Using the Local Correlation Integral", booktitle = "Proc. 19th IEEE Int. Conf. on Data Engineering (ICDE '03), Bangalore, India, 2003", url = "http://dx.doi.org/10.1109/ICDE.2003.1260802") +@Alias({"de.lmu.ifi.dbs.elki.algorithm.outlier.LOCI"}) public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm { /** * The logger for this class. @@ -150,9 +153,9 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas // LOCI preprocessing step WritableDataStore<ArrayList<DoubleIntPair>> interestingDistances = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_SORTED, ArrayList.class); for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { - DistanceDBIDResult<D> neighbors = rangeQuery.getRangeForDBID(iditer, rmax); + DistanceDBIDList<D> neighbors = rangeQuery.getRangeForDBID(iditer, rmax); // build list of critical distances - ArrayList<DoubleIntPair> cdist = new ArrayList<DoubleIntPair>(neighbors.size() << 1); + ArrayList<DoubleIntPair> cdist = new ArrayList<>(neighbors.size() << 1); { for(int i = 0; i < neighbors.size(); i++) { DistanceDBIDPair<D> r = neighbors.get(i); @@ -203,7 +206,7 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas if(maxneig >= nmin) { D range = distFunc.getDistanceFactory().fromDouble(maxdist); // Compute the largest neighborhood we will need. - DistanceDBIDResult<D> maxneighbors = rangeQuery.getRangeForDBID(iditer, range); + DistanceDBIDList<D> maxneighbors = rangeQuery.getRangeForDBID(iditer, range); // TODO: Ensure the set is sorted. Should be a no-op with most indexes. // For any critical distance, compute the normalized MDEF score. for(DoubleIntPair c : cdist) { @@ -218,7 +221,7 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas // compute \hat{n}(p_i, r, \alpha) and the corresponding \simga_{MDEF} MeanVariance mv_n_r_alpha = new MeanVariance(); // TODO: optimize for double distances - for (DistanceDBIDResultIter<D> neighbor = maxneighbors.iter(); neighbor.valid(); neighbor.advance()) { + for (DistanceDBIDListIter<D> neighbor = maxneighbors.iter(); neighbor.valid(); neighbor.advance()) { // Stop at radius r if(neighbor.getDistance().doubleValue() > r) { break; @@ -256,10 +259,10 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas if(progressLOCI != null) { progressLOCI.ensureCompleted(LOG); } - Relation<Double> scoreResult = new MaterializedRelation<Double>("LOCI normalized MDEF", "loci-mdef-outlier", TypeUtil.DOUBLE, mdef_norm, relation.getDBIDs()); + Relation<Double> scoreResult = new MaterializedRelation<>("LOCI normalized MDEF", "loci-mdef-outlier", TypeUtil.DOUBLE, mdef_norm, relation.getDBIDs()); OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0); OutlierResult result = new OutlierResult(scoreMeta, scoreResult); - result.addChildResult(new MaterializedRelation<Double>("LOCI MDEF Radius", "loci-critical-radius", TypeUtil.DOUBLE, mdef_radius, relation.getDBIDs())); + result.addChildResult(new MaterializedRelation<>("LOCI MDEF Radius", "loci-critical-radius", TypeUtil.DOUBLE, mdef_radius, relation.getDBIDs())); return result; } @@ -313,7 +316,7 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas protected void makeOptions(Parameterization config) { super.makeOptions(config); final D distanceFactory = (distanceFunction != null) ? distanceFunction.getDistanceFactory() : null; - final DistanceParameter<D> rmaxP = new DistanceParameter<D>(RMAX_ID, distanceFactory); + final DistanceParameter<D> rmaxP = new DistanceParameter<>(RMAX_ID, distanceFactory); if(config.grab(rmaxP)) { rmax = rmaxP.getValue(); } @@ -331,7 +334,7 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas @Override protected LOCI<O, D> makeInstance() { - return new LOCI<O, D>(distanceFunction, rmax, nmin, alpha); + return new LOCI<>(distanceFunction, rmax, nmin, alpha); } } } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOF.java new file mode 100644 index 00000000..302dafe6 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LOF.java @@ -0,0 +1,293 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier.lof; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm; +import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm; +import de.lmu.ifi.dbs.elki.data.type.TypeInformation; +import de.lmu.ifi.dbs.elki.data.type.TypeUtil; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; +import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; +import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; +import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter; +import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter; +import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList; +import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList; +import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery; +import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; +import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; +import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; +import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor; +import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; +import de.lmu.ifi.dbs.elki.logging.progress.StepProgress; +import de.lmu.ifi.dbs.elki.math.DoubleMinMax; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; +import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta; +import de.lmu.ifi.dbs.elki.utilities.Alias; +import de.lmu.ifi.dbs.elki.utilities.documentation.Description; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.documentation.Title; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; + +/** + * <p> + * Algorithm to compute density-based local outlier factors in a database based + * on a specified parameter {@link Parameterizer#K_ID} ({@code -lof.k}). + * </p> + * + * <p> + * The original LOF parameter was called "minPts", but for consistency + * within ELKI we have renamed this parameter to "k". + * </p> + * + * <p> + * Reference: <br> + * M. M. Breunig, H.-P. Kriegel, R. Ng, J. Sander: LOF: Identifying + * Density-Based Local Outliers. <br> + * In: Proc. 2nd ACM SIGMOD Int. Conf. on Management of Data (SIGMOD'00), + * Dallas, TX, 2000. + * </p> + * + * @author Erich Schubert + * @author Elke Achtert + * + * @apiviz.has KNNQuery + * + * @param <O> the type of DatabaseObjects handled by this Algorithm + * @param <D> Distance type + */ +@Title("LOF: Local Outlier Factor") +@Description("Algorithm to compute density-based local outlier factors in a database based on the neighborhood size parameter 'k'") +@Reference(authors = "M. M. Breunig, H.-P. Kriegel, R. Ng, and J. Sander", title = "LOF: Identifying Density-Based Local Outliers", booktitle = "Proc. 2nd ACM SIGMOD Int. Conf. on Management of Data (SIGMOD '00), Dallas, TX, 2000", url = "http://dx.doi.org/10.1145/342009.335388") +@Alias({ "de.lmu.ifi.dbs.elki.algorithm.outlier.LOF", "outlier.LOF", "LOF" }) +public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm { + /** + * The logger for this class. + */ + private static final Logging LOG = Logging.getLogger(LOF.class); + + /** + * Holds the value of {@link Parameterizer#K_ID}. + */ + protected int k = 2; + + /** + * Constructor. + * + * @param k the value of k + * @param distanceFunction the neighborhood distance function + */ + public LOF(int k, DistanceFunction<? super O, D> distanceFunction) { + super(distanceFunction); + this.k = k + 1; + } + + /** + * Performs the Generalized LOF_SCORE algorithm on the given database. + * + * @param database Database to query + * @param relation Data to process + * @return LOF outlier result + */ + public OutlierResult run(Database database, Relation<O> relation) { + StepProgress stepprog = LOG.isVerbose() ? new StepProgress("LOF", 3) : null; + DistanceQuery<O, D> dq = database.getDistanceQuery(relation, getDistanceFunction()); + // "HEAVY" flag for knn query since it is used more than once + KNNQuery<O, D> knnq = database.getKNNQuery(dq, k, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE); + // No optimized kNN query - use a preprocessor! + if (!(knnq instanceof PreprocessorKNNQuery)) { + if (stepprog != null) { + stepprog.beginStep(1, "Materializing LOF neighborhoods.", LOG); + } + MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<>(relation, getDistanceFunction(), k); + knnq = preproc.getKNNQuery(dq, k); + } + DBIDs ids = relation.getDBIDs(); + + // Compute LRDs + if (stepprog != null) { + stepprog.beginStep(2, "Computing LRDs.", LOG); + } + WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP); + { + FiniteProgress lrdsProgress = LOG.isVerbose() ? new FiniteProgress("LRD", ids.size(), LOG) : null; + for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { + final KNNList<D> neighbors = knnq.getKNNForDBID(iter, k); + double sum = 0.0; + int count = 0; + if (neighbors instanceof DoubleDistanceKNNList) { + // Fast version for double distances + for (DoubleDistanceDBIDListIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) { + if (DBIDUtil.equal(neighbor, iter)) { + continue; + } + KNNList<D> neighborsNeighbors = knnq.getKNNForDBID(neighbor, k); + final double nkdist; + if (neighborsNeighbors instanceof DoubleDistanceKNNList) { + nkdist = ((DoubleDistanceKNNList) neighborsNeighbors).doubleKNNDistance(); + } else { + nkdist = neighborsNeighbors.getKNNDistance().doubleValue(); + } + sum += Math.max(neighbor.doubleDistance(), nkdist); + count++; + } + } else { + for (DistanceDBIDListIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { + if (DBIDUtil.equal(neighbor, iter)) { + continue; + } + KNNList<D> neighborsNeighbors = knnq.getKNNForDBID(neighbor, k); + sum += Math.max(neighbor.getDistance().doubleValue(), neighborsNeighbors.getKNNDistance().doubleValue()); + count++; + } + } + // Avoid division by 0 + final double lrd = (sum > 0) ? (count / sum) : Double.POSITIVE_INFINITY; + lrds.putDouble(iter, lrd); + if (lrdsProgress != null) { + lrdsProgress.incrementProcessed(LOG); + } + } + if (lrdsProgress != null) { + lrdsProgress.ensureCompleted(LOG); + } + } + + // compute LOF_SCORE of each db object + if (stepprog != null) { + stepprog.beginStep(3, "Computing LOFs.", LOG); + } + WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC); + // track the maximum value for normalization. + DoubleMinMax lofminmax = new DoubleMinMax(); + { + FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("LOF_SCORE for objects", ids.size(), LOG) : null; + for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { + final double lof; + final double lrdp = lrds.doubleValue(iter); + final KNNList<D> neighbors = knnq.getKNNForDBID(iter, k); + if (!Double.isInfinite(lrdp)) { + double sum = 0.0; + int count = 0; + for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { + // skip the point itself + if (DBIDUtil.equal(neighbor, iter)) { + continue; + } + final double val = lrds.doubleValue(neighbor); + sum += val; + count++; + if (Double.isInfinite(val)) { + break; + } + } + lof = sum / (lrdp * count); + } else { + lof = 1.0; + } + lofs.putDouble(iter, lof); + // update minimum and maximum + lofminmax.put(lof); + + if (progressLOFs != null) { + progressLOFs.incrementProcessed(LOG); + } + } + if (progressLOFs != null) { + progressLOFs.ensureCompleted(LOG); + } + } + + if (stepprog != null) { + stepprog.setCompleted(LOG); + } + + // Build result representation. + Relation<Double> scoreResult = new MaterializedRelation<>("Local Outlier Factor", "lof-outlier", TypeUtil.DOUBLE, lofs, ids); + OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0); + OutlierResult result = new OutlierResult(scoreMeta, scoreResult); + + return result; + } + + @Override + public TypeInformation[] getInputTypeRestriction() { + return TypeUtil.array(getDistanceFunction().getInputTypeRestriction()); + } + + @Override + protected Logging getLogger() { + return LOG; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> { + /** + * Parameter to specify the number of nearest neighbors of an object to be + * considered for computing its LOF_SCORE, must be an integer greater than + * 1. + */ + public static final OptionID K_ID = new OptionID("lof.k", "The number of nearest neighbors of an object to be considered for computing its LOF_SCORE."); + + /** + * The neighborhood size to use. + */ + protected int k = 2; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + + final IntParameter pK = new IntParameter(K_ID); + pK.addConstraint(new GreaterConstraint(1)); + if (config.grab(pK)) { + k = pK.getValue(); + } + } + + @Override + protected LOF<O, D> makeInstance() { + return new LOF<>(k, distanceFunction); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LoOP.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LoOP.java index 5da06983..15ff690a 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LoOP.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/LoOP.java @@ -1,10 +1,10 @@ -package de.lmu.ifi.dbs.elki.algorithm.outlier; +package de.lmu.ifi.dbs.elki.algorithm.outlier.lof; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -24,6 +24,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier; */ import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm; +import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm; import de.lmu.ifi.dbs.elki.data.type.CombinedTypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; @@ -34,17 +35,17 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; +import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter; +import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter; +import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList; +import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList; import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDResultIter; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceKNNList; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult; +import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction; import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor; import de.lmu.ifi.dbs.elki.logging.Logging; @@ -56,6 +57,7 @@ import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore; +import de.lmu.ifi.dbs.elki.utilities.Alias; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; @@ -85,6 +87,7 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; @Title("LoOP: Local Outlier Probabilities") @Description("Variant of the LOF algorithm normalized using statistical values.") @Reference(authors = "H.-P. Kriegel, P. Kröger, E. Schubert, A. Zimek", title = "LoOP: Local Outlier Probabilities", booktitle = "Proceedings of the 18th International Conference on Information and Knowledge Management (CIKM), Hong Kong, China, 2009", url = "http://dx.doi.org/10.1145/1645953.1646195") +@Alias({ "de.lmu.ifi.dbs.elki.algorithm.outlier.LoOP", "LoOP", "outlier.LoOP" }) public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm { /** * The logger for this class. @@ -188,7 +191,7 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O if (stepprog != null) { stepprog.beginStep(1, "Materializing neighborhoods with respect to reference neighborhood distance function.", LOG); } - MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<O, D>(relation, comparisonDistanceFunction, kcomp); + MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<>(relation, comparisonDistanceFunction, kcomp); database.addIndex(preproc); DistanceQuery<O, D> cdq = database.getDistanceQuery(relation, comparisonDistanceFunction); knnComp = preproc.getKNNQuery(cdq, kreach, DatabaseQuery.HINT_HEAVY_USE); @@ -205,7 +208,7 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O knnComp = QueryUtil.getKNNQuery(relation, comparisonDistanceFunction, kreach); knnReach = QueryUtil.getKNNQuery(relation, reachabilityDistanceFunction, kcomp); } - return new Pair<KNNQuery<O, D>, KNNQuery<O, D>>(knnComp, knnReach); + return new Pair<>(knnComp, knnReach); } /** @@ -241,13 +244,13 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O } FiniteProgress prdsProgress = LOG.isVerbose() ? new FiniteProgress("pdists", relation.size(), LOG) : null; for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { - final KNNResult<D> neighbors = knnReach.getKNNForDBID(iditer, kreach); + final KNNList<D> neighbors = knnReach.getKNNForDBID(iditer, kreach); mean.reset(); // use first kref neighbors as reference set int ks = 0; // TODO: optimize for double distances if (neighbors instanceof DoubleDistanceKNNList) { - for (DoubleDistanceDBIDResultIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) { + for (DoubleDistanceDBIDListIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) { if (objectIsInKNN || !DBIDUtil.equal(neighbor, iditer)) { final double d = neighbor.doubleDistance(); mean.put(d * d); @@ -258,7 +261,7 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O } } } else { - for (DistanceDBIDResultIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { + for (DistanceDBIDListIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { if (objectIsInKNN || !DBIDUtil.equal(neighbor, iditer)) { double d = neighbor.getDistance().doubleValue(); mean.put(d * d); @@ -287,7 +290,7 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O FiniteProgress progressPLOFs = LOG.isVerbose() ? new FiniteProgress("PLOFs for objects", relation.size(), LOG) : null; MeanVariance mv = new MeanVariance(); for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { - final KNNResult<D> neighbors = knnComp.getKNNForDBID(iditer, kcomp); + final KNNList<D> neighbors = knnComp.getKNNForDBID(iditer, kcomp); mv.reset(); // use first kref neighbors as comparison set. int ks = 0; @@ -340,7 +343,7 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O } // Build result representation. - Relation<Double> scoreResult = new MaterializedRelation<Double>("Local Outlier Probabilities", "loop-outlier", TypeUtil.DOUBLE, loops, relation.getDBIDs()); + Relation<Double> scoreResult = new MaterializedRelation<>("Local Outlier Probabilities", "loop-outlier", TypeUtil.DOUBLE, loops, relation.getDBIDs()); OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore(); return new OutlierResult(scoreMeta, scoreResult); } @@ -403,7 +406,7 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O kcomp = kcompP.intValue(); } - final ObjectParameter<DistanceFunction<O, D>> compDistP = new ObjectParameter<DistanceFunction<O, D>>(COMPARISON_DISTANCE_FUNCTION_ID, DistanceFunction.class, EuclideanDistanceFunction.class); + final ObjectParameter<DistanceFunction<O, D>> compDistP = new ObjectParameter<>(COMPARISON_DISTANCE_FUNCTION_ID, DistanceFunction.class, EuclideanDistanceFunction.class); if (config.grab(compDistP)) { comparisonDistanceFunction = compDistP.instantiateClass(config); } @@ -417,7 +420,7 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O kreach = kcomp; } - final ObjectParameter<DistanceFunction<O, D>> reachDistP = new ObjectParameter<DistanceFunction<O, D>>(REACHABILITY_DISTANCE_FUNCTION_ID, DistanceFunction.class, true); + final ObjectParameter<DistanceFunction<O, D>> reachDistP = new ObjectParameter<>(REACHABILITY_DISTANCE_FUNCTION_ID, DistanceFunction.class, true); if (config.grab(reachDistP)) { reachabilityDistanceFunction = reachDistP.instantiateClass(config); } @@ -433,7 +436,7 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O @Override protected LoOP<O, D> makeInstance() { DistanceFunction<O, D> realreach = (reachabilityDistanceFunction != null) ? reachabilityDistanceFunction : comparisonDistanceFunction; - return new LoOP<O, D>(kreach, kcomp, realreach, comparisonDistanceFunction, lambda); + return new LoOP<>(kreach, kcomp, realreach, comparisonDistanceFunction, lambda); } } } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OnlineLOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/OnlineLOF.java index bac5db36..c01c914f 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OnlineLOF.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/OnlineLOF.java @@ -1,30 +1,31 @@ -package de.lmu.ifi.dbs.elki.algorithm.outlier;
-/* -This file is part of ELKI: -Environment for Developing KDD-Applications Supported by Index-Structures - -Copyright (C) 2012 -Ludwig-Maximilians-Universität München -Lehr- und Forschungseinheit für Datenbanksysteme -ELKI Development Team - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU Affero General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU Affero General Public License for more details. - -You should have received a copy of the GNU Affero General Public License -along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ +package de.lmu.ifi.dbs.elki.algorithm.outlier.lof;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures +
+ Copyright (C) 2013
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team +
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version. +
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details. +
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
import java.util.List;
-import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.QueryUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
@@ -33,6 +34,8 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
@@ -40,8 +43,6 @@ import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery; import de.lmu.ifi.dbs.elki.database.query.rknn.RKNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.index.preprocessed.knn.AbstractMaterializeKNNPreprocessor;
import de.lmu.ifi.dbs.elki.index.preprocessed.knn.KNNChangeEvent;
@@ -53,11 +54,7 @@ import de.lmu.ifi.dbs.elki.logging.progress.StepProgress; import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
/**
@@ -66,10 +63,10 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; *
* @author Elke Achtert
*
- * @apiviz.has LOF.LOFResult oneway - - updates
+ * @apiviz.has FlexibleLOF.LOFResult oneway - - updates
*/
-// TODO: related to publication?
-public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> {
+// TODO: related to publication?
+public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends FlexibleLOF<O, D> {
/**
* The logger for this class.
*/
@@ -78,12 +75,13 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> { /**
* Constructor.
*
- * @param k the value of k
+ * @param krefer The number of neighbors for reference
+ * @param kreach The number of neighbors for reachability distance
* @param neighborhoodDistanceFunction the neighborhood distance function
* @param reachabilityDistanceFunction the reachability distance function
*/
- public OnlineLOF(int k, DistanceFunction<? super O, D> neighborhoodDistanceFunction, DistanceFunction<? super O, D> reachabilityDistanceFunction) {
- super(k, neighborhoodDistanceFunction, reachabilityDistanceFunction);
+ public OnlineLOF(int krefer, int kreach, DistanceFunction<? super O, D> neighborhoodDistanceFunction, DistanceFunction<? super O, D> reachabilityDistanceFunction) {
+ super(krefer, kreach, neighborhoodDistanceFunction, reachabilityDistanceFunction);
}
/**
@@ -92,10 +90,10 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> { * the preprocessors.
*/
@Override
- public OutlierResult run(Relation<O> relation) {
+ public OutlierResult run(Database database, Relation<O> relation) {
StepProgress stepprog = LOG.isVerbose() ? new StepProgress("OnlineLOF", 3) : null;
- Pair<Pair<KNNQuery<O, D>, KNNQuery<O, D>>, Pair<RKNNQuery<O, D>, RKNNQuery<O, D>>> queries = getKNNAndRkNNQueries(relation, stepprog);
+ Pair<Pair<KNNQuery<O, D>, KNNQuery<O, D>>, Pair<RKNNQuery<O, D>, RKNNQuery<O, D>>> queries = getKNNAndRkNNQueries(database, relation, stepprog);
KNNQuery<O, D> kNNRefer = queries.getFirst().getFirst();
KNNQuery<O, D> kNNReach = queries.getFirst().getSecond();
RKNNQuery<O, D> rkNNRefer = queries.getSecond().getFirst();
@@ -107,8 +105,8 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> { // add listener
KNNListener l = new LOFKNNListener(lofResult);
- ((MaterializeKNNPreprocessor<O, D>)((PreprocessorKNNQuery<O, D, ? extends KNNResult<D>>) lofResult.getKNNRefer()).getPreprocessor()).addKNNListener(l);
- ((MaterializeKNNPreprocessor<O, D>)((PreprocessorKNNQuery<O, D, ? extends KNNResult<D>>) lofResult.getKNNReach()).getPreprocessor()).addKNNListener(l);
+ ((MaterializeKNNPreprocessor<O, D>) ((PreprocessorKNNQuery<O, D, ? extends KNNList<D>>) lofResult.getKNNRefer()).getPreprocessor()).addKNNListener(l);
+ ((MaterializeKNNPreprocessor<O, D>) ((PreprocessorKNNQuery<O, D, ? extends KNNList<D>>) lofResult.getKNNReach()).getPreprocessor()).addKNNListener(l);
return lofResult.getResult();
}
@@ -120,50 +118,49 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> { * @param stepprog Progress logger
* @return the kNN and rkNN queries
*/
- private Pair<Pair<KNNQuery<O, D>, KNNQuery<O, D>>, Pair<RKNNQuery<O, D>, RKNNQuery<O, D>>> getKNNAndRkNNQueries(Relation<O> relation, StepProgress stepprog) {
+ private Pair<Pair<KNNQuery<O, D>, KNNQuery<O, D>>, Pair<RKNNQuery<O, D>, RKNNQuery<O, D>>> getKNNAndRkNNQueries(Database database, Relation<O> relation, StepProgress stepprog) {
// Use "HEAVY" flag, since this is an online algorithm
- KNNQuery<O, D> kNNRefer = QueryUtil.getKNNQuery(relation, neighborhoodDistanceFunction, k, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
- RKNNQuery<O, D> rkNNRefer = QueryUtil.getRKNNQuery(relation, neighborhoodDistanceFunction, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
+ KNNQuery<O, D> kNNRefer = QueryUtil.getKNNQuery(relation, referenceDistanceFunction, krefer, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
+ RKNNQuery<O, D> rkNNRefer = QueryUtil.getRKNNQuery(relation, referenceDistanceFunction, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
// No optimized kNN query or RkNN query - use a preprocessor!
- if(kNNRefer == null || rkNNRefer == null) {
- if(stepprog != null) {
+ if (kNNRefer == null || rkNNRefer == null) {
+ if (stepprog != null) {
stepprog.beginStep(1, "Materializing neighborhood w.r.t. reference neighborhood distance function.", LOG);
}
- MaterializeKNNAndRKNNPreprocessor<O, D> preproc = new MaterializeKNNAndRKNNPreprocessor<O, D>(relation, neighborhoodDistanceFunction, k);
- DistanceQuery<O, D> ndq = relation.getDatabase().getDistanceQuery(relation, neighborhoodDistanceFunction);
- kNNRefer = preproc.getKNNQuery(ndq, k, DatabaseQuery.HINT_HEAVY_USE);
- rkNNRefer = preproc.getRKNNQuery(ndq, k, DatabaseQuery.HINT_HEAVY_USE);
+ MaterializeKNNAndRKNNPreprocessor<O, D> preproc = new MaterializeKNNAndRKNNPreprocessor<>(relation, referenceDistanceFunction, krefer);
+ DistanceQuery<O, D> ndq = database.getDistanceQuery(relation, referenceDistanceFunction);
+ kNNRefer = preproc.getKNNQuery(ndq, krefer, DatabaseQuery.HINT_HEAVY_USE);
+ rkNNRefer = preproc.getRKNNQuery(ndq, krefer, DatabaseQuery.HINT_HEAVY_USE);
// add as index
relation.getDatabase().addIndex(preproc);
- }
- else {
- if(stepprog != null) {
+ } else {
+ if (stepprog != null) {
stepprog.beginStep(1, "Optimized neighborhood w.r.t. reference neighborhood distance function provided by database.", LOG);
}
}
- KNNQuery<O, D> kNNReach = QueryUtil.getKNNQuery(relation, reachabilityDistanceFunction, k, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
+ KNNQuery<O, D> kNNReach = QueryUtil.getKNNQuery(relation, reachabilityDistanceFunction, kreach, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
RKNNQuery<O, D> rkNNReach = QueryUtil.getRKNNQuery(relation, reachabilityDistanceFunction, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
- if(kNNReach == null || rkNNReach == null) {
- if(stepprog != null) {
+ if (kNNReach == null || rkNNReach == null) {
+ if (stepprog != null) {
stepprog.beginStep(2, "Materializing neighborhood w.r.t. reachability distance function.", LOG);
}
ListParameterization config = new ListParameterization();
config.addParameter(AbstractMaterializeKNNPreprocessor.Factory.DISTANCE_FUNCTION_ID, reachabilityDistanceFunction);
- config.addParameter(AbstractMaterializeKNNPreprocessor.Factory.K_ID, k);
- MaterializeKNNAndRKNNPreprocessor<O, D> preproc = new MaterializeKNNAndRKNNPreprocessor<O, D>(relation, reachabilityDistanceFunction, k);
- DistanceQuery<O, D> rdq = relation.getDatabase().getDistanceQuery(relation, reachabilityDistanceFunction);
- kNNReach = preproc.getKNNQuery(rdq, k, DatabaseQuery.HINT_HEAVY_USE);
- rkNNReach = preproc.getRKNNQuery(rdq, k, DatabaseQuery.HINT_HEAVY_USE);
+ config.addParameter(AbstractMaterializeKNNPreprocessor.Factory.K_ID, kreach);
+ MaterializeKNNAndRKNNPreprocessor<O, D> preproc = new MaterializeKNNAndRKNNPreprocessor<>(relation, reachabilityDistanceFunction, kreach);
+ DistanceQuery<O, D> rdq = database.getDistanceQuery(relation, reachabilityDistanceFunction);
+ kNNReach = preproc.getKNNQuery(rdq, kreach, DatabaseQuery.HINT_HEAVY_USE);
+ rkNNReach = preproc.getRKNNQuery(rdq, kreach, DatabaseQuery.HINT_HEAVY_USE);
// add as index
relation.getDatabase().addIndex(preproc);
}
- Pair<KNNQuery<O, D>, KNNQuery<O, D>> kNNPair = new Pair<KNNQuery<O, D>, KNNQuery<O, D>>(kNNRefer, kNNReach);
- Pair<RKNNQuery<O, D>, RKNNQuery<O, D>> rkNNPair = new Pair<RKNNQuery<O, D>, RKNNQuery<O, D>>(rkNNRefer, rkNNReach);
+ Pair<KNNQuery<O, D>, KNNQuery<O, D>> kNNPair = new Pair<>(kNNRefer, kNNReach);
+ Pair<RKNNQuery<O, D>, RKNNQuery<O, D>> rkNNPair = new Pair<>(rkNNRefer, rkNNReach);
- return new Pair<Pair<KNNQuery<O, D>, KNNQuery<O, D>>, Pair<RKNNQuery<O, D>, RKNNQuery<O, D>>>(kNNPair, rkNNPair);
+ return new Pair<>(kNNPair, rkNNPair);
}
/**
@@ -201,24 +198,20 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> { AbstractMaterializeKNNPreprocessor<O, D, ?> p1 = ((PreprocessorKNNQuery<O, D, ?>) lofResult.getKNNRefer()).getPreprocessor();
AbstractMaterializeKNNPreprocessor<O, D, ?> p2 = ((PreprocessorKNNQuery<O, D, ?>) lofResult.getKNNReach()).getPreprocessor();
- if(firstEventReceived == null) {
- if(e.getSource().equals(p1) && e.getSource().equals(p2)) {
+ if (firstEventReceived == null) {
+ if (e.getSource().equals(p1) && e.getSource().equals(p2)) {
kNNsChanged(e, e);
- }
- else {
+ } else {
firstEventReceived = e;
}
- }
- else {
- if(e.getSource().equals(p1) && firstEventReceived.getSource().equals(p2)) {
+ } else {
+ if (e.getSource().equals(p1) && firstEventReceived.getSource().equals(p2)) {
kNNsChanged(e, firstEventReceived);
firstEventReceived = null;
- }
- else if(e.getSource().equals(p2) && firstEventReceived.getSource().equals(p1)) {
+ } else if (e.getSource().equals(p2) && firstEventReceived.getSource().equals(p1)) {
kNNsChanged(firstEventReceived, e);
firstEventReceived = null;
- }
- else {
+ } else {
throw new UnsupportedOperationException("Event sources do not fit!");
}
}
@@ -232,20 +225,18 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> { * @param e2 the change event of the second preprocessor
*/
private void kNNsChanged(KNNChangeEvent e1, KNNChangeEvent e2) {
- if(!e1.getType().equals(e2.getType())) {
+ if (!e1.getType().equals(e2.getType())) {
throw new UnsupportedOperationException("Event types do not fit: " + e1.getType() + " != " + e2.getType());
}
- if(!e1.getObjects().equals(e2.getObjects())) {
+ if (!e1.getObjects().equals(e2.getObjects())) {
throw new UnsupportedOperationException("Objects do not fit: " + e1.getObjects() + " != " + e2.getObjects());
}
- if(e1.getType().equals(KNNChangeEvent.Type.DELETE)) {
+ if (e1.getType().equals(KNNChangeEvent.Type.DELETE)) {
kNNsRemoved(e1.getObjects(), e1.getUpdates(), e2.getUpdates(), lofResult);
- }
- else if(e1.getType().equals(KNNChangeEvent.Type.INSERT)) {
+ } else if (e1.getType().equals(KNNChangeEvent.Type.INSERT)) {
kNNsInserted(e1.getObjects(), e1.getUpdates(), e2.getUpdates(), lofResult);
- }
- else {
+ } else {
throw new UnsupportedOperationException("Unsupported event type: " + e1.getType());
}
}
@@ -264,38 +255,38 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> { StepProgress stepprog = LOG.isVerbose() ? new StepProgress(3) : null;
// recompute lrds
- if(stepprog != null) {
+ if (stepprog != null) {
stepprog.beginStep(1, "Recompute LRDs.", LOG);
}
ArrayDBIDs lrd_ids = DBIDUtil.ensureArray(DBIDUtil.union(insertions, updates2));
- List<? extends DistanceDBIDResult<D>> reachDistRKNNs = lofResult.getRkNNReach().getRKNNForBulkDBIDs(lrd_ids, k);
+ List<? extends DistanceDBIDList<D>> reachDistRKNNs = lofResult.getRkNNReach().getRKNNForBulkDBIDs(lrd_ids, kreach);
ArrayDBIDs affected_lrd_id_candidates = mergeIDs(reachDistRKNNs, lrd_ids);
ArrayModifiableDBIDs affected_lrd_ids = DBIDUtil.newArray(affected_lrd_id_candidates.size());
WritableDoubleDataStore new_lrds = computeLRDs(affected_lrd_id_candidates, lofResult.getKNNReach());
for (DBIDIter iter = affected_lrd_id_candidates.iter(); iter.valid(); iter.advance()) {
double new_lrd = new_lrds.doubleValue(iter);
double old_lrd = lofResult.getLrds().doubleValue(iter);
- if(Double.isNaN(old_lrd) || old_lrd != new_lrd) {
+ if (Double.isNaN(old_lrd) || old_lrd != new_lrd) {
lofResult.getLrds().putDouble(iter, new_lrd);
affected_lrd_ids.add(iter);
}
}
// recompute lofs
- if(stepprog != null) {
+ if (stepprog != null) {
stepprog.beginStep(2, "Recompute LOFS.", LOG);
}
- List<? extends DistanceDBIDResult<D>> primDistRKNNs = lofResult.getRkNNRefer().getRKNNForBulkDBIDs(affected_lrd_ids, k);
+ List<? extends DistanceDBIDList<D>> primDistRKNNs = lofResult.getRkNNRefer().getRKNNForBulkDBIDs(affected_lrd_ids, krefer);
ArrayDBIDs affected_lof_ids = mergeIDs(primDistRKNNs, affected_lrd_ids, insertions, updates1);
recomputeLOFs(affected_lof_ids, lofResult);
// fire result changed
- if(stepprog != null) {
+ if (stepprog != null) {
stepprog.beginStep(3, "Inform listeners.", LOG);
}
lofResult.getResult().getHierarchy().resultChanged(lofResult.getResult());
- if(stepprog != null) {
+ if (stepprog != null) {
stepprog.setCompleted(LOG);
}
}
@@ -314,7 +305,7 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> { StepProgress stepprog = LOG.isVerbose() ? new StepProgress(4) : null;
// delete lrds and lofs
- if(stepprog != null) {
+ if (stepprog != null) {
stepprog.beginStep(1, "Delete old LRDs and LOFs.", LOG);
}
for (DBIDIter iter = deletions.iter(); iter.valid(); iter.advance()) {
@@ -323,38 +314,38 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> { }
// recompute lrds
- if(stepprog != null) {
+ if (stepprog != null) {
stepprog.beginStep(2, "Recompute LRDs.", LOG);
}
ArrayDBIDs lrd_ids = DBIDUtil.ensureArray(updates2);
- List<? extends DistanceDBIDResult<D>> reachDistRKNNs = lofResult.getRkNNReach().getRKNNForBulkDBIDs(lrd_ids, k);
+ List<? extends DistanceDBIDList<D>> reachDistRKNNs = lofResult.getRkNNReach().getRKNNForBulkDBIDs(lrd_ids, kreach);
ArrayDBIDs affected_lrd_id_candidates = mergeIDs(reachDistRKNNs, lrd_ids);
ArrayModifiableDBIDs affected_lrd_ids = DBIDUtil.newArray(affected_lrd_id_candidates.size());
WritableDoubleDataStore new_lrds = computeLRDs(affected_lrd_id_candidates, lofResult.getKNNReach());
for (DBIDIter iter = affected_lrd_id_candidates.iter(); iter.valid(); iter.advance()) {
double new_lrd = new_lrds.doubleValue(iter);
double old_lrd = lofResult.getLrds().doubleValue(iter);
- if(old_lrd != new_lrd) {
+ if (old_lrd != new_lrd) {
lofResult.getLrds().putDouble(iter, new_lrd);
affected_lrd_ids.add(iter);
}
}
// recompute lofs
- if(stepprog != null) {
+ if (stepprog != null) {
stepprog.beginStep(3, "Recompute LOFS.", LOG);
}
- List<? extends DistanceDBIDResult<D>> primDistRKNNs = lofResult.getRkNNRefer().getRKNNForBulkDBIDs(affected_lrd_ids, k);
+ List<? extends DistanceDBIDList<D>> primDistRKNNs = lofResult.getRkNNRefer().getRKNNForBulkDBIDs(affected_lrd_ids, krefer);
ArrayDBIDs affected_lof_ids = mergeIDs(primDistRKNNs, affected_lrd_ids, updates1);
recomputeLOFs(affected_lof_ids, lofResult);
// fire result changed
- if(stepprog != null) {
+ if (stepprog != null) {
stepprog.beginStep(4, "Inform listeners.", LOG);
}
lofResult.getResult().getHierarchy().resultChanged(lofResult.getResult());
- if(stepprog != null) {
+ if (stepprog != null) {
stepprog.setCompleted(LOG);
}
}
@@ -367,12 +358,12 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> { * @return a set containing the ids of the query result and the specified
* ids
*/
- private ArrayModifiableDBIDs mergeIDs(List<? extends DistanceDBIDResult<D>> queryResults, DBIDs... ids) {
+ private ArrayModifiableDBIDs mergeIDs(List<? extends DistanceDBIDList<D>> queryResults, DBIDs... ids) {
ModifiableDBIDs result = DBIDUtil.newHashSet();
- for(DBIDs dbids : ids) {
+ for (DBIDs dbids : ids) {
result.addDBIDs(dbids);
}
- for(DistanceDBIDResult<D> queryResult : queryResults) {
+ for (DistanceDBIDList<D> queryResult : queryResults) {
result.addDBIDs(queryResult);
}
return DBIDUtil.newArray(result);
@@ -394,12 +385,12 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> { DoubleMinMax new_lofminmax = lofsAndMax.getSecond();
// Actualize meta info
- if(new_lofminmax.isValid() && lofResult.getResult().getOutlierMeta().getActualMaximum() < new_lofminmax.getMax()) {
+ if (new_lofminmax.isValid() && lofResult.getResult().getOutlierMeta().getActualMaximum() < new_lofminmax.getMax()) {
BasicOutlierScoreMeta scoreMeta = (BasicOutlierScoreMeta) lofResult.getResult().getOutlierMeta();
scoreMeta.setActualMaximum(new_lofminmax.getMax());
}
- if(new_lofminmax.isValid() && lofResult.getResult().getOutlierMeta().getActualMinimum() > new_lofminmax.getMin()) {
+ if (new_lofminmax.isValid() && lofResult.getResult().getOutlierMeta().getActualMinimum() > new_lofminmax.getMin()) {
BasicOutlierScoreMeta scoreMeta = (BasicOutlierScoreMeta) lofResult.getResult().getOutlierMeta();
scoreMeta.setActualMinimum(new_lofminmax.getMin());
}
@@ -412,49 +403,16 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> { }
/**
- * Parameterization class. - * - * @author Erich Schubert - * - * @apiviz.exclude + * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
*/
- public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
- /**
- * The neighborhood size to use
- */
- protected int k = 2;
-
- /**
- * Neighborhood distance function.
- */
- protected DistanceFunction<O, D> neighborhoodDistanceFunction = null;
-
- /**
- * Reachability distance function.
- */
- protected DistanceFunction<O, D> reachabilityDistanceFunction = null;
-
- @Override
- protected void makeOptions(Parameterization config) {
- super.makeOptions(config);
-
- final IntParameter pK = new IntParameter(K_ID);
- pK.addConstraint(new GreaterConstraint(1));
- if(config.grab(pK)) {
- k = pK.getValue();
- }
-
- final ObjectParameter<DistanceFunction<O, D>> reachDistP = new ObjectParameter<DistanceFunction<O, D>>(REACHABILITY_DISTANCE_FUNCTION_ID, DistanceFunction.class, true);
- if(config.grab(reachDistP)) {
- reachabilityDistanceFunction = reachDistP.instantiateClass(config);
- }
- }
-
+ public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends FlexibleLOF.Parameterizer<O, D> {
@Override
protected OnlineLOF<O, D> makeInstance() {
- // Default is to re-use the same distance
- DistanceFunction<O, D> rdist = (reachabilityDistanceFunction != null) ? reachabilityDistanceFunction : distanceFunction;
- return new OnlineLOF<O, D>(k, distanceFunction, rdist);
+ return new OnlineLOF<>(kreach, krefer, distanceFunction, reachabilityDistanceFunction);
}
}
-}
\ No newline at end of file +}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleKernelDensityLOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimpleKernelDensityLOF.java index 1c104c08..2ff7534a 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleKernelDensityLOF.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimpleKernelDensityLOF.java @@ -1,10 +1,10 @@ -package de.lmu.ifi.dbs.elki.algorithm.outlier; +package de.lmu.ifi.dbs.elki.algorithm.outlier.lof; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -24,10 +24,12 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier; */ import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm; +import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm; import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.data.type.CombinedTypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; +import de.lmu.ifi.dbs.elki.database.Database; import de.lmu.ifi.dbs.elki.database.QueryUtil; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; @@ -35,6 +37,10 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter; +import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter; +import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList; +import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList; import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; @@ -43,18 +49,14 @@ import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDResultIter; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceKNNList; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult; import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; import de.lmu.ifi.dbs.elki.logging.progress.StepProgress; import de.lmu.ifi.dbs.elki.math.DoubleMinMax; -import de.lmu.ifi.dbs.elki.math.statistics.EpanechnikovKernelDensityFunction; -import de.lmu.ifi.dbs.elki.math.statistics.KernelDensityFunction; +import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.EpanechnikovKernelDensityFunction; +import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.KernelDensityFunction; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta; @@ -107,10 +109,11 @@ public class SimpleKernelDensityLOF<O extends NumberVector<?>, D extends NumberD /** * Run the naive kernel density LOF algorithm. * + * @param database Database to query * @param relation Data to process * @return LOF outlier result */ - public OutlierResult run(Relation<O> relation) { + public OutlierResult run(Database database, Relation<O> relation) { StepProgress stepprog = LOG.isVerbose() ? new StepProgress("KernelDensityLOF", 3) : null; final int dim = RelationUtil.dimensionality(relation); @@ -124,9 +127,9 @@ public class SimpleKernelDensityLOF<O extends NumberVector<?>, D extends NumberD if (stepprog != null) { stepprog.beginStep(1, "Materializing neighborhoods w.r.t. distance function.", LOG); } - MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<O, D>(relation, getDistanceFunction(), k); - relation.getDatabase().addIndex(preproc); - DistanceQuery<O, D> rdq = relation.getDatabase().getDistanceQuery(relation, getDistanceFunction()); + MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<>(relation, getDistanceFunction(), k); + database.addIndex(preproc); + DistanceQuery<O, D> rdq = database.getDistanceQuery(relation, getDistanceFunction()); knnq = preproc.getKNNQuery(rdq, k); } @@ -137,12 +140,12 @@ public class SimpleKernelDensityLOF<O extends NumberVector<?>, D extends NumberD WritableDoubleDataStore dens = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP); FiniteProgress densProgress = LOG.isVerbose() ? new FiniteProgress("Densities", ids.size(), LOG) : null; for (DBIDIter it = ids.iter(); it.valid(); it.advance()) { - final KNNResult<D> neighbors = knnq.getKNNForDBID(it, k); + final KNNList<D> neighbors = knnq.getKNNForDBID(it, k); int count = 0; double sum = 0.0; if (neighbors instanceof DoubleDistanceKNNList) { // Fast version for double distances - for (DoubleDistanceDBIDResultIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) { + for (DoubleDistanceDBIDListIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) { if (DBIDUtil.equal(neighbor, it)) { continue; } @@ -152,7 +155,7 @@ public class SimpleKernelDensityLOF<O extends NumberVector<?>, D extends NumberD count++; } } else { - for (DistanceDBIDResultIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { + for (DistanceDBIDListIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { if (DBIDUtil.equal(neighbor, it)) { continue; } @@ -185,7 +188,7 @@ public class SimpleKernelDensityLOF<O extends NumberVector<?>, D extends NumberD final double lrdp = dens.doubleValue(it); final double lof; if (lrdp > 0) { - final KNNResult<D> neighbors = knnq.getKNNForDBID(it, k); + final KNNList<D> neighbors = knnq.getKNNForDBID(it, k); double sum = 0.0; int count = 0; for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { @@ -217,7 +220,7 @@ public class SimpleKernelDensityLOF<O extends NumberVector<?>, D extends NumberD } // Build result representation. - Relation<Double> scoreResult = new MaterializedRelation<Double>("Kernel Density Local Outlier Factor", "kernel-density-slof-outlier", TypeUtil.DOUBLE, lofs, ids); + Relation<Double> scoreResult = new MaterializedRelation<>("Kernel Density Local Outlier Factor", "kernel-density-slof-outlier", TypeUtil.DOUBLE, lofs, ids); OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0); OutlierResult result = new OutlierResult(scoreMeta, scoreResult); @@ -264,13 +267,13 @@ public class SimpleKernelDensityLOF<O extends NumberVector<?>, D extends NumberD protected void makeOptions(Parameterization config) { super.makeOptions(config); - final IntParameter pK = new IntParameter(LOF.K_ID); + final IntParameter pK = new IntParameter(LOF.Parameterizer.K_ID); pK.addConstraint(new GreaterConstraint(1)); if (config.grab(pK)) { k = pK.getValue(); } - ObjectParameter<KernelDensityFunction> kernelP = new ObjectParameter<KernelDensityFunction>(KERNEL_ID, KernelDensityFunction.class, EpanechnikovKernelDensityFunction.class); + ObjectParameter<KernelDensityFunction> kernelP = new ObjectParameter<>(KERNEL_ID, KernelDensityFunction.class, EpanechnikovKernelDensityFunction.class); if (config.grab(kernelP)) { kernel = kernelP.instantiateClass(config); } @@ -278,7 +281,7 @@ public class SimpleKernelDensityLOF<O extends NumberVector<?>, D extends NumberD @Override protected SimpleKernelDensityLOF<O, D> makeInstance() { - return new SimpleKernelDensityLOF<O, D>(k, distanceFunction, kernel); + return new SimpleKernelDensityLOF<>(k, distanceFunction, kernel); } } } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleLOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimplifiedLOF.java index 48505ed5..413eaca1 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleLOF.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/SimplifiedLOF.java @@ -1,10 +1,10 @@ -package de.lmu.ifi.dbs.elki.algorithm.outlier; +package de.lmu.ifi.dbs.elki.algorithm.outlier.lof; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -24,8 +24,10 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier; */ import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm; +import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm; import de.lmu.ifi.dbs.elki.data.type.TypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; +import de.lmu.ifi.dbs.elki.database.Database; import de.lmu.ifi.dbs.elki.database.QueryUtil; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; @@ -33,6 +35,10 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter; +import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter; +import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceKNNList; +import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList; import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; @@ -40,10 +46,6 @@ import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery; import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDResultIter; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceKNNList; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult; import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor; import de.lmu.ifi.dbs.elki.logging.Logging; @@ -53,6 +55,8 @@ import de.lmu.ifi.dbs.elki.math.DoubleMinMax; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta; +import de.lmu.ifi.dbs.elki.utilities.Alias; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; @@ -61,6 +65,14 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * A simplified version of the original LOF algorithm, which does not use the * reachability distance, yielding less stable results on inliers. * + * Reference: + * <p> + * Erich Schubert, Arthur Zimek, Hans-Peter Kriegel<br /> + * Local outlier detection reconsidered: a generalized view on locality with + * applications to spatial, video, and network outlier detection<br /> + * In: Data Mining and Knowledge Discovery + * </p> + * * @author Erich Schubert * * @apiviz.has KNNQuery @@ -68,11 +80,13 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * @param <O> the type of DatabaseObjects handled by this Algorithm * @param <D> Distance type */ -public class SimpleLOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm { +@Reference(authors = "Erich Schubert, Arthur Zimek, Hans-Peter Kriegel", title = "Local outlier detection reconsidered: a generalized view on locality with applications to spatial, video, and network outlier detection", booktitle = "Data Mining and Knowledge Discovery", url = "http://dx.doi.org/10.1007/s10618-012-0300-z") +@Alias({ "SimpleLOF", "outlier.SimpleLOF", "de.lmu.ifi.dbs.elki.algorithm.outlier.SimpleLOF" }) +public class SimplifiedLOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm { /** * The logger for this class. */ - private static final Logging LOG = Logging.getLogger(SimpleLOF.class); + private static final Logging LOG = Logging.getLogger(SimplifiedLOF.class); /** * Parameter k. @@ -84,7 +98,7 @@ public class SimpleLOF<O, D extends NumberDistance<D, ?>> extends AbstractDistan * * @param k the value of k */ - public SimpleLOF(int k, DistanceFunction<? super O, D> distance) { + public SimplifiedLOF(int k, DistanceFunction<? super O, D> distance) { super(distance); this.k = k + 1; } @@ -92,10 +106,11 @@ public class SimpleLOF<O, D extends NumberDistance<D, ?>> extends AbstractDistan /** * Run the Simple LOF algorithm. * + * @param database Database to query * @param relation Data to process * @return LOF outlier result */ - public OutlierResult run(Relation<O> relation) { + public OutlierResult run(Database database, Relation<O> relation) { StepProgress stepprog = LOG.isVerbose() ? new StepProgress("SimpleLOF", 3) : null; DBIDs ids = relation.getDBIDs(); @@ -107,9 +122,9 @@ public class SimpleLOF<O, D extends NumberDistance<D, ?>> extends AbstractDistan if (stepprog != null) { stepprog.beginStep(1, "Materializing neighborhoods w.r.t. distance function.", LOG); } - MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<O, D>(relation, getDistanceFunction(), k); - relation.getDatabase().addIndex(preproc); - DistanceQuery<O, D> rdq = relation.getDatabase().getDistanceQuery(relation, getDistanceFunction()); + MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<>(relation, getDistanceFunction(), k); + database.addIndex(preproc); + DistanceQuery<O, D> rdq = database.getDistanceQuery(relation, getDistanceFunction()); knnq = preproc.getKNNQuery(rdq, k); } @@ -120,12 +135,12 @@ public class SimpleLOF<O, D extends NumberDistance<D, ?>> extends AbstractDistan WritableDoubleDataStore dens = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP); FiniteProgress densProgress = LOG.isVerbose() ? new FiniteProgress("Densities", ids.size(), LOG) : null; for (DBIDIter it = ids.iter(); it.valid(); it.advance()) { - final KNNResult<D> neighbors = knnq.getKNNForDBID(it, k); + final KNNList<D> neighbors = knnq.getKNNForDBID(it, k); double sum = 0.0; int count = 0; if (neighbors instanceof DoubleDistanceKNNList) { // Fast version for double distances - for (DoubleDistanceDBIDResultIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) { + for (DoubleDistanceDBIDListIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) { if (DBIDUtil.equal(neighbor, it)) { continue; } @@ -133,7 +148,7 @@ public class SimpleLOF<O, D extends NumberDistance<D, ?>> extends AbstractDistan count++; } } else { - for (DistanceDBIDResultIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { + for (DistanceDBIDListIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { if (DBIDUtil.equal(neighbor, it)) { continue; } @@ -165,7 +180,7 @@ public class SimpleLOF<O, D extends NumberDistance<D, ?>> extends AbstractDistan final double lrdp = dens.doubleValue(it); final double lof; if (lrdp > 0) { - final KNNResult<D> neighbors = knnq.getKNNForDBID(it, k); + final KNNList<D> neighbors = knnq.getKNNForDBID(it, k); double sum = 0.0; int count = 0; for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { @@ -197,7 +212,7 @@ public class SimpleLOF<O, D extends NumberDistance<D, ?>> extends AbstractDistan } // Build result representation. - Relation<Double> scoreResult = new MaterializedRelation<Double>("Simple Local Outlier Factor", "simple-lof-outlier", TypeUtil.DOUBLE, lofs, ids); + Relation<Double> scoreResult = new MaterializedRelation<>("Simple Local Outlier Factor", "simple-lof-outlier", TypeUtil.DOUBLE, lofs, ids); OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0); OutlierResult result = new OutlierResult(scoreMeta, scoreResult); @@ -234,7 +249,7 @@ public class SimpleLOF<O, D extends NumberDistance<D, ?>> extends AbstractDistan protected void makeOptions(Parameterization config) { super.makeOptions(config); - final IntParameter pK = new IntParameter(LOF.K_ID); + final IntParameter pK = new IntParameter(LOF.Parameterizer.K_ID); pK.addConstraint(new GreaterConstraint(1)); if (config.grab(pK)) { k = pK.getValue(); @@ -242,8 +257,8 @@ public class SimpleLOF<O, D extends NumberDistance<D, ?>> extends AbstractDistan } @Override - protected SimpleLOF<O, D> makeInstance() { - return new SimpleLOF<O, D>(k, distanceFunction); + protected SimplifiedLOF<O, D> makeInstance() { + return new SimplifiedLOF<>(k, distanceFunction); } } } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/package-info.java new file mode 100644 index 00000000..48d4b16a --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/lof/package-info.java @@ -0,0 +1,27 @@ +/** + * <p>LOF family of outlier detection algorithms.</p> + */ +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2013 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +package de.lmu.ifi.dbs.elki.algorithm.outlier.lof; + diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java index f230fd3b..0d0f7303 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.meta; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -202,7 +202,7 @@ public class ExternalDoubleOutlierScore extends AbstractAlgorithm<OutlierResult> else { meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax()); } - Relation<Double> scoresult = new MaterializedRelation<Double>("External Outlier", "external-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs()); + Relation<Double> scoresult = new MaterializedRelation<>("External Outlier", "external-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs()); OutlierResult or = new OutlierResult(meta, scoresult); // Apply scaling @@ -327,7 +327,7 @@ public class ExternalDoubleOutlierScore extends AbstractAlgorithm<OutlierResult> inverted = inverstedF.getValue(); } - ObjectParameter<ScalingFunction> scalingP = new ObjectParameter<ScalingFunction>(SCALING_ID, ScalingFunction.class, IdentityScaling.class); + ObjectParameter<ScalingFunction> scalingP = new ObjectParameter<>(SCALING_ID, ScalingFunction.class, IdentityScaling.class); if(config.grab(scalingP)) { scaling = scalingP.instantiateClass(config); } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java index b53a0942..22c20fc3 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.meta; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -28,11 +28,12 @@ import java.util.BitSet; import java.util.Random; import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm; -import de.lmu.ifi.dbs.elki.algorithm.outlier.LOF; import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm; +import de.lmu.ifi.dbs.elki.algorithm.outlier.lof.LOF; import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.data.type.TypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; +import de.lmu.ifi.dbs.elki.database.Database; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; @@ -127,25 +128,26 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements /** * Run the algorithm on a data set. * + * @param database Database context * @param relation Relation to use * @return Outlier detection result */ - public OutlierResult run(Relation<NumberVector<?>> relation) { + public OutlierResult run(Database database, Relation<NumberVector<?>> relation) { final int dbdim = RelationUtil.dimensionality(relation); final int mindim = dbdim >> 1; final int maxdim = dbdim - 1; final Random rand = rnd.getRandom(); - ArrayList<OutlierResult> results = new ArrayList<OutlierResult>(num); + ArrayList<OutlierResult> results = new ArrayList<>(num); { FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("LOF iterations", num, LOG) : null; for (int i = 0; i < num; i++) { BitSet dimset = randomSubspace(dbdim, mindim, maxdim, rand); SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(dimset); - LOF<NumberVector<?>, DoubleDistance> lof = new LOF<NumberVector<?>, DoubleDistance>(k, df); + LOF<NumberVector<?>, DoubleDistance> lof = new LOF<>(k, df); // run LOF and collect the result - OutlierResult result = lof.run(relation); + OutlierResult result = lof.run(database, relation); results.add(result); if (prog != null) { prog.incrementProcessed(LOG); @@ -219,7 +221,7 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements } } OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax()); - Relation<Double> scoreres = new MaterializedRelation<Double>("Feature bagging", "fb-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs()); + Relation<Double> scoreres = new MaterializedRelation<>("Feature bagging", "fb-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs()); return new OutlierResult(meta, scoreres); } @@ -314,7 +316,7 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - final IntParameter pK = new IntParameter(LOF.K_ID); + final IntParameter pK = new IntParameter(LOF.Parameterizer.K_ID); pK.addConstraint(new GreaterConstraint(1)); if (config.grab(pK)) { k = pK.getValue(); diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/HiCS.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/HiCS.java index 15b94322..69608293 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/HiCS.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/HiCS.java @@ -33,13 +33,12 @@ import java.util.Set; import java.util.TreeSet;
import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
-import de.lmu.ifi.dbs.elki.algorithm.outlier.LOF;
import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.lof.LOF;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.VectorUtil;
import de.lmu.ifi.dbs.elki.data.VectorUtil.SortDBIDsBySingleDimension;
import de.lmu.ifi.dbs.elki.data.projection.NumericalFeatureSelection;
-import de.lmu.ifi.dbs.elki.data.projection.Projection;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.ProxyDatabase;
@@ -66,6 +65,7 @@ import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TopBoundedHeap;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
@@ -172,7 +172,6 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe */
public OutlierResult run(Relation<V> relation) {
final DBIDs ids = relation.getDBIDs();
- final NumberVector.Factory<V, ?> factory = RelationUtil.getNumberVectorFactory(relation);
ArrayList<ArrayDBIDs> subspaceIndex = buildOneDimIndexes(relation);
Set<HiCSSubspace> subspaces = calculateSubspaces(relation, subspaceIndex, rnd.getRandom());
@@ -180,7 +179,7 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe if (LOG.isVerbose()) {
LOG.verbose("Number of high-contrast subspaces: " + subspaces.size());
}
- List<Relation<Double>> results = new ArrayList<Relation<Double>>();
+ List<Relation<Double>> results = new ArrayList<>();
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Calculating Outlier scores for high Contrast subspaces", subspaces.size(), LOG) : null;
// run outlier detection and collect the result
@@ -192,8 +191,7 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe }
ProxyDatabase pdb = new ProxyDatabase(ids);
- Projection<V, V> proj = new NumericalFeatureSelection<V>(dimset, factory);
- pdb.addRelation(new ProjectedView<V, V>(relation, proj));
+ pdb.addRelation(new ProjectedView<>(relation, new NumericalFeatureSelection<V>(dimset)));
// run LOF and collect the result
OutlierResult result = outlierAlgorithm.run(pdb);
@@ -221,7 +219,7 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe minmax.put(sum);
}
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
- Relation<Double> scoreres = new MaterializedRelation<Double>("HiCS", "HiCS-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
+ Relation<Double> scoreres = new MaterializedRelation<>("HiCS", "HiCS-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
return new OutlierResult(meta, scoreres);
}
@@ -236,7 +234,7 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe */
private ArrayList<ArrayDBIDs> buildOneDimIndexes(Relation<? extends NumberVector<?>> relation) {
final int dim = RelationUtil.dimensionality(relation);
- ArrayList<ArrayDBIDs> subspaceIndex = new ArrayList<ArrayDBIDs>(dim + 1);
+ ArrayList<ArrayDBIDs> subspaceIndex = new ArrayList<>(dim + 1);
SortDBIDsBySingleDimension comp = new VectorUtil.SortDBIDsBySingleDimension(relation);
for (int i = 0; i < dim; i++) {
@@ -264,8 +262,8 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe dprog.setProcessed(2, LOG);
}
- TreeSet<HiCSSubspace> subspaceList = new TreeSet<HiCSSubspace>(HiCSSubspace.SORT_BY_SUBSPACE);
- TopBoundedHeap<HiCSSubspace> dDimensionalList = new TopBoundedHeap<HiCSSubspace>(cutoff, HiCSSubspace.SORT_BY_CONTRAST_ASC);
+ TreeSet<HiCSSubspace> subspaceList = new TreeSet<>(HiCSSubspace.SORT_BY_SUBSPACE);
+ TopBoundedHeap<HiCSSubspace> dDimensionalList = new TopBoundedHeap<>(cutoff, HiCSSubspace.SORT_BY_CONTRAST_ASC);
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Generating two-element subsets", (dbdim * (dbdim - 1)) >> 1, LOG) : null;
// compute two-element sets of subspaces
for (int i = 0; i < dbdim; i++) {
@@ -291,10 +289,10 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe }
// result now contains all d-dimensional sets of subspaces
- ArrayList<HiCSSubspace> candidateList = new ArrayList<HiCSSubspace>(dDimensionalList.size());
- for (HiCSSubspace sub : dDimensionalList) {
- subspaceList.add(sub);
- candidateList.add(sub);
+ ArrayList<HiCSSubspace> candidateList = new ArrayList<>(dDimensionalList.size());
+ for (Heap<HiCSSubspace>.UnorderedIter it = dDimensionalList.unorderedIter(); it.valid(); it.advance()) {
+ subspaceList.add(it.get());
+ candidateList.add(it.get());
}
dDimensionalList.clear();
// candidateList now contains the *m* best d-dimensional sets
@@ -322,8 +320,8 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe }
// Prune
for (HiCSSubspace cand : candidateList) {
- for (HiCSSubspace nextSet : dDimensionalList) {
- if (nextSet.contrast > cand.contrast) {
+ for (Heap<HiCSSubspace>.UnorderedIter it = dDimensionalList.unorderedIter(); it.valid(); it.advance()) {
+ if (it.get().contrast > cand.contrast) {
subspaceList.remove(cand);
break;
}
@@ -610,12 +608,12 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe alpha = alphaP.doubleValue();
}
- final ObjectParameter<OutlierAlgorithm> algoP = new ObjectParameter<OutlierAlgorithm>(ALGO_ID, OutlierAlgorithm.class, LOF.class);
+ final ObjectParameter<OutlierAlgorithm> algoP = new ObjectParameter<>(ALGO_ID, OutlierAlgorithm.class, LOF.class);
if (config.grab(algoP)) {
outlierAlgorithm = algoP.instantiateClass(config);
}
- final ObjectParameter<GoodnessOfFitTest> testP = new ObjectParameter<GoodnessOfFitTest>(TEST_ID, GoodnessOfFitTest.class, KolmogorovSmirnovTest.class);
+ final ObjectParameter<GoodnessOfFitTest> testP = new ObjectParameter<>(TEST_ID, GoodnessOfFitTest.class, KolmogorovSmirnovTest.class);
if (config.grab(testP)) {
statTest = testP.instantiateClass(config);
}
@@ -634,7 +632,7 @@ public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierRe @Override
protected HiCS<V> makeInstance() {
- return new HiCS<V>(m, alpha, outlierAlgorithm, statTest, cutoff, rnd);
+ return new HiCS<>(m, alpha, outlierAlgorithm, statTest, cutoff, rnd);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/RescaleMetaOutlierAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/RescaleMetaOutlierAlgorithm.java index 387041da..8ebdc27a 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/RescaleMetaOutlierAlgorithm.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/RescaleMetaOutlierAlgorithm.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.meta; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -50,6 +50,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; import de.lmu.ifi.dbs.elki.utilities.scaling.ScalingFunction; import de.lmu.ifi.dbs.elki.utilities.scaling.outlier.OutlierScalingFunction; +import de.lmu.ifi.dbs.elki.workflow.AlgorithmStep; /** * Scale another outlier score using the given scaling function. @@ -114,7 +115,7 @@ public class RescaleMetaOutlierAlgorithm extends AbstractAlgorithm<OutlierResult } OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), scaling.getMin(), scaling.getMax()); - Relation<Double> scoresult = new MaterializedRelation<Double>("Scaled Outlier", "scaled-outlier", TypeUtil.DOUBLE, scaledscores, scores.getDBIDs()); + Relation<Double> scoresult = new MaterializedRelation<>("Scaled Outlier", "scaled-outlier", TypeUtil.DOUBLE, scaledscores, scores.getDBIDs()); OutlierResult result = new OutlierResult(meta, scoresult); result.addChildResult(innerresult); @@ -167,12 +168,12 @@ public class RescaleMetaOutlierAlgorithm extends AbstractAlgorithm<OutlierResult protected void makeOptions(Parameterization config) { super.makeOptions(config); - ObjectParameter<Algorithm> algP = new ObjectParameter<Algorithm>(OptionID.ALGORITHM, OutlierAlgorithm.class); + ObjectParameter<Algorithm> algP = new ObjectParameter<>(AlgorithmStep.Parameterizer.ALGORITHM_ID, OutlierAlgorithm.class); if(config.grab(algP)) { algorithm = algP.instantiateClass(config); } - ObjectParameter<ScalingFunction> scalingP = new ObjectParameter<ScalingFunction>(SCALING_ID, ScalingFunction.class); + ObjectParameter<ScalingFunction> scalingP = new ObjectParameter<>(SCALING_ID, ScalingFunction.class); if(config.grab(scalingP)) { scaling = scalingP.instantiateClass(config); } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/SimpleOutlierEnsemble.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/SimpleOutlierEnsemble.java index b7791fc4..d40af384 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/SimpleOutlierEnsemble.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/SimpleOutlierEnsemble.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.meta; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -58,6 +58,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParamet import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectListParameter; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; +import de.lmu.ifi.dbs.elki.workflow.AlgorithmStep; /** * Simple outlier ensemble method. @@ -100,7 +101,7 @@ public class SimpleOutlierEnsemble extends AbstractAlgorithm<OutlierResult> impl int num = algorithms.size(); // Run inner outlier algorithms ModifiableDBIDs ids = DBIDUtil.newHashSet(); - ArrayList<OutlierResult> results = new ArrayList<OutlierResult>(num); + ArrayList<OutlierResult> results = new ArrayList<>(num); { FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Inner outlier algorithms", num, LOG) : null; for (Algorithm alg : algorithms) { @@ -155,7 +156,7 @@ public class SimpleOutlierEnsemble extends AbstractAlgorithm<OutlierResult> impl } } OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax()); - Relation<Double> scores = new MaterializedRelation<Double>("Simple Outlier Ensemble", "ensemble-outlier", TypeUtil.DOUBLE, sumscore, ids); + Relation<Double> scores = new MaterializedRelation<>("Simple Outlier Ensemble", "ensemble-outlier", TypeUtil.DOUBLE, sumscore, ids); return new OutlierResult(meta, scores); } @@ -200,7 +201,7 @@ public class SimpleOutlierEnsemble extends AbstractAlgorithm<OutlierResult> impl @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - ObjectListParameter<OutlierAlgorithm> algP = new ObjectListParameter<OutlierAlgorithm>(OptionID.ALGORITHM, OutlierAlgorithm.class); + ObjectListParameter<OutlierAlgorithm> algP = new ObjectListParameter<>(AlgorithmStep.Parameterizer.ALGORITHM_ID, OutlierAlgorithm.class); if (config.grab(algP)) { ListParameterization subconfig = new ListParameterization(); ChainedParameterization chain = new ChainedParameterization(subconfig, config); @@ -208,7 +209,7 @@ public class SimpleOutlierEnsemble extends AbstractAlgorithm<OutlierResult> impl algorithms = algP.instantiateClasses(chain); subconfig.logAndClearReportedErrors(); } - ObjectParameter<EnsembleVoting> votingP = new ObjectParameter<EnsembleVoting>(VOTING_ID, EnsembleVoting.class); + ObjectParameter<EnsembleVoting> votingP = new ObjectParameter<>(VOTING_ID, EnsembleVoting.class); if (config.grab(votingP)) { voting = votingP.instantiateClass(config); } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/package-info.java index 7c5dd8b0..f28f8db3 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/package-info.java @@ -8,7 +8,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/package-info.java index eca0d876..0ce6f9b5 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/package-info.java @@ -14,7 +14,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractDistanceBasedSpatialOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractDistanceBasedSpatialOutlier.java index f37ee182..e059c16c 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractDistanceBasedSpatialOutlier.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractDistanceBasedSpatialOutlier.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -25,8 +25,8 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial; import de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction; import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction; import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractNeighborhoodOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractNeighborhoodOutlier.java index d3770504..3b3e71b3 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractNeighborhoodOutlier.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractNeighborhoodOutlier.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -88,7 +88,7 @@ public abstract class AbstractNeighborhoodOutlier<O> extends AbstractAlgorithm<O @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - final ObjectParameter<NeighborSetPredicate.Factory<O>> param = new ObjectParameter<NeighborSetPredicate.Factory<O>>(NEIGHBORHOOD_ID, NeighborSetPredicate.Factory.class); + final ObjectParameter<NeighborSetPredicate.Factory<O>> param = new ObjectParameter<>(NEIGHBORHOOD_ID, NeighborSetPredicate.Factory.class); if(config.grab(param)) { npredf = param.instantiateClass(config); } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuGLSBackwardSearchAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuGLSBackwardSearchAlgorithm.java index cd5670f7..5035cf6f 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuGLSBackwardSearchAlgorithm.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuGLSBackwardSearchAlgorithm.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -28,22 +28,23 @@ import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm; import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.data.type.TypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; +import de.lmu.ifi.dbs.elki.database.Database; import de.lmu.ifi.dbs.elki.database.QueryUtil; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs; -import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; +import de.lmu.ifi.dbs.elki.database.ids.DBIDVar; import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs; +import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList; import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; import de.lmu.ifi.dbs.elki.database.relation.ProxyView; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult; import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.math.DoubleMinMax; @@ -116,23 +117,24 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?>, D extends /** * Run the algorithm * + * @param database Database to process * @param relationx Spatial relation * @param relationy Attribute relation * @return Algorithm result */ - public OutlierResult run(Relation<V> relationx, Relation<? extends NumberVector<?>> relationy) { + public OutlierResult run(Database database, Relation<V> relationx, Relation<? extends NumberVector<?>> relationy) { WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relationx.getDBIDs(), DataStoreFactory.HINT_STATIC); DoubleMinMax mm = new DoubleMinMax(0.0, 0.0); // Outlier detection loop { ModifiableDBIDs idview = DBIDUtil.newHashSet(relationx.getDBIDs()); - ProxyView<V> proxy = new ProxyView<V>(relationx.getDatabase(), idview, relationx); + ProxyView<V> proxy = new ProxyView<>(database, idview, relationx); double phialpha = NormalDistribution.standardNormalQuantile(1.0 - alpha *.5); // Detect outliers while significant. while(true) { - Pair<DBID, Double> candidate = singleIteration(proxy, relationy); + Pair<DBIDVar, Double> candidate = singleIteration(proxy, relationy); if(candidate.second < phialpha) { break; } @@ -149,7 +151,7 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?>, D extends } } - Relation<Double> scoreResult = new MaterializedRelation<Double>("GLSSODBackward", "GLSSODbackward-outlier", TypeUtil.DOUBLE, scores, relationx.getDBIDs()); + Relation<Double> scoreResult = new MaterializedRelation<>("GLSSODBackward", "GLSSODbackward-outlier", TypeUtil.DOUBLE, scores, relationx.getDBIDs()); OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax(), 0, Double.POSITIVE_INFINITY, 0); return new OutlierResult(scoreMeta, scoreResult); } @@ -161,7 +163,7 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?>, D extends * @param relationy Attribute relation * @return Top outlier and associated score */ - private Pair<DBID, Double> singleIteration(Relation<V> relationx, Relation<? extends NumberVector<?>> relationy) { + private Pair<DBIDVar, Double> singleIteration(Relation<V> relationx, Relation<? extends NumberVector<?>> relationy) { final int dim = RelationUtil.dimensionality(relationx); final int dimy = RelationUtil.dimensionality(relationy); assert (dim == 2); @@ -203,7 +205,7 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?>, D extends // Fill the neighborhood matrix F: { - KNNResult<D> neighbors = knnQuery.getKNNForDBID(id, k + 1); + KNNList<D> neighbors = knnQuery.getKNNForDBID(id, k + 1); ModifiableDBIDs neighborhood = DBIDUtil.newArray(neighbors.size()); for(DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { if(DBIDUtil.equal(id, neighbor)) { @@ -237,7 +239,7 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?>, D extends // calculate the absolute values of standard residuals Matrix E = F.times(Y.minus(X.times(b))).timesEquals(norm); - DBID worstid = null; + DBIDVar worstid = DBIDUtil.newVar(); double worstscore = Double.NEGATIVE_INFINITY; int i = 0; for(DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) { @@ -245,11 +247,11 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?>, D extends // double err = Math.abs(E.get(i, 0)); if(err > worstscore) { worstscore = err; - worstid = DBIDUtil.deref(id); + worstid.set(id); } } - return new Pair<DBID, Double>(worstid, worstscore); + return new Pair<>(worstid, worstscore); } @Override @@ -302,7 +304,7 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?>, D extends @Override protected CTLuGLSBackwardSearchAlgorithm<V, D> makeInstance() { - return new CTLuGLSBackwardSearchAlgorithm<V, D>(distanceFunction, k, alpha); + return new CTLuGLSBackwardSearchAlgorithm<>(distanceFunction, k, alpha); } /** diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMeanMultipleAttributes.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMeanMultipleAttributes.java index 2caee128..1712dd4f 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMeanMultipleAttributes.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMeanMultipleAttributes.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -124,7 +124,7 @@ public class CTLuMeanMultipleAttributes<N, O extends NumberVector<?>> extends Ab scores.putDouble(iditer, score); } - Relation<Double> scoreResult = new MaterializedRelation<Double>("mean multiple attributes spatial outlier", "mean-multipleattributes-outlier", TypeUtil.DOUBLE, scores, attributes.getDBIDs()); + Relation<Double> scoreResult = new MaterializedRelation<>("mean multiple attributes spatial outlier", "mean-multipleattributes-outlier", TypeUtil.DOUBLE, scores, attributes.getDBIDs()); OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0); OutlierResult or = new OutlierResult(scoreMeta, scoreResult); or.addChildResult(npred); @@ -149,7 +149,7 @@ public class CTLuMeanMultipleAttributes<N, O extends NumberVector<?>> extends Ab public static class Parameterizer<N, O extends NumberVector<?>> extends AbstractNeighborhoodOutlier.Parameterizer<N> { @Override protected CTLuMeanMultipleAttributes<N, O> makeInstance() { - return new CTLuMeanMultipleAttributes<N, O>(npredf); + return new CTLuMeanMultipleAttributes<>(npredf); } } }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianAlgorithm.java index 7755a459..9848d664 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianAlgorithm.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianAlgorithm.java @@ -132,7 +132,7 @@ public class CTLuMedianAlgorithm<N> extends AbstractNeighborhoodOutlier<N> { scores.putDouble(iditer, score);
}
- Relation<Double> scoreResult = new MaterializedRelation<Double>("MO", "Median-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
+ Relation<Double> scoreResult = new MaterializedRelation<>("MO", "Median-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0);
OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
or.addChildResult(npred);
@@ -161,7 +161,7 @@ public class CTLuMedianAlgorithm<N> extends AbstractNeighborhoodOutlier<N> { public static class Parameterizer<N> extends AbstractNeighborhoodOutlier.Parameterizer<N> {
@Override
protected CTLuMedianAlgorithm<N> makeInstance() {
- return new CTLuMedianAlgorithm<N>(npredf);
+ return new CTLuMedianAlgorithm<>(npredf);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianMultipleAttributes.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianMultipleAttributes.java index 0d515ac7..583958fe 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianMultipleAttributes.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianMultipleAttributes.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -150,7 +150,7 @@ public class CTLuMedianMultipleAttributes<N, O extends NumberVector<?>> extends scores.putDouble(iditer, score); } - Relation<Double> scoreResult = new MaterializedRelation<Double>("Median multiple attributes outlier", "median-outlier", TypeUtil.DOUBLE, scores, attributes.getDBIDs()); + Relation<Double> scoreResult = new MaterializedRelation<>("Median multiple attributes outlier", "median-outlier", TypeUtil.DOUBLE, scores, attributes.getDBIDs()); OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0); OutlierResult or = new OutlierResult(scoreMeta, scoreResult); or.addChildResult(npred); @@ -175,7 +175,7 @@ public class CTLuMedianMultipleAttributes<N, O extends NumberVector<?>> extends public static class Parameterizer<N, O extends NumberVector<?>> extends AbstractNeighborhoodOutlier.Parameterizer<N> { @Override protected CTLuMedianMultipleAttributes<N, O> makeInstance() { - return new CTLuMedianMultipleAttributes<N, O>(npredf); + return new CTLuMedianMultipleAttributes<>(npredf); } } }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMoranScatterplotOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMoranScatterplotOutlier.java index 3b876bba..da527af0 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMoranScatterplotOutlier.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMoranScatterplotOutlier.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -136,7 +136,7 @@ public class CTLuMoranScatterplotOutlier<N> extends AbstractNeighborhoodOutlier< scores.putDouble(iditer, score); } - Relation<Double> scoreResult = new MaterializedRelation<Double>("MoranOutlier", "Moran Scatterplot Outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs()); + Relation<Double> scoreResult = new MaterializedRelation<>("MoranOutlier", "Moran Scatterplot Outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs()); OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, 0); OutlierResult or = new OutlierResult(scoreMeta, scoreResult); or.addChildResult(npred); @@ -165,7 +165,7 @@ public class CTLuMoranScatterplotOutlier<N> extends AbstractNeighborhoodOutlier< public static class Parameterizer<N> extends AbstractNeighborhoodOutlier.Parameterizer<N> { @Override protected CTLuMoranScatterplotOutlier<N> makeInstance() { - return new CTLuMoranScatterplotOutlier<N>(npredf); + return new CTLuMoranScatterplotOutlier<>(npredf); } } }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java index ec92afd7..c8efe4da 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java @@ -38,12 +38,11 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNHeap;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNHeap;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNUtil;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
@@ -137,7 +136,7 @@ public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends Abstrac // construct the relation Matrix of the ec-graph
Matrix E = new Matrix(ids.size(), ids.size());
- KNNHeap<D> heap = KNNUtil.newHeap(distFunc.getDistanceFactory(), k);
+ KNNHeap<D> heap = DBIDUtil.newHeap(distFunc.getDistanceFactory(), k);
{
int i = 0;
for(DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
@@ -226,7 +225,7 @@ public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends Abstrac scores.putDouble(id, score);
}
- Relation<Double> scoreResult = new MaterializedRelation<Double>("randomwalkec", "RandomWalkEC", TypeUtil.DOUBLE, scores, relation.getDBIDs());
+ Relation<Double> scoreResult = new MaterializedRelation<>("randomwalkec", "RandomWalkEC", TypeUtil.DOUBLE, scores, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
return new OutlierResult(scoreMeta, scoreResult);
}
@@ -329,7 +328,7 @@ public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends Abstrac @Override
protected CTLuRandomWalkEC<N, D> makeInstance() {
- return new CTLuRandomWalkEC<N, D>(distanceFunction, alpha, c, k);
+ return new CTLuRandomWalkEC<>(distanceFunction, alpha, c, k);
}
}
}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuScatterplotOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuScatterplotOutlier.java index 295c7414..bcbbfd2a 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuScatterplotOutlier.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuScatterplotOutlier.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -160,7 +160,7 @@ public class CTLuScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<N> { } } // build representation - Relation<Double> scoreResult = new MaterializedRelation<Double>("SPO", "Scatterplot-Outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs()); + Relation<Double> scoreResult = new MaterializedRelation<>("SPO", "Scatterplot-Outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs()); OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0); OutlierResult or = new OutlierResult(scoreMeta, scoreResult); or.addChildResult(npred); @@ -189,7 +189,7 @@ public class CTLuScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<N> { public static class Parameterizer<N> extends AbstractNeighborhoodOutlier.Parameterizer<N> { @Override protected CTLuScatterplotOutlier<N> makeInstance() { - return new CTLuScatterplotOutlier<N>(npredf); + return new CTLuScatterplotOutlier<>(npredf); } } }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuZTestOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuZTestOutlier.java index 02573a06..d6cb5a50 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuZTestOutlier.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuZTestOutlier.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -133,7 +133,7 @@ public class CTLuZTestOutlier<N> extends AbstractNeighborhoodOutlier<N> { } // Wrap result - Relation<Double> scoreResult = new MaterializedRelation<Double>("ZTest", "Z Test score", TypeUtil.DOUBLE, scores, relation.getDBIDs()); + Relation<Double> scoreResult = new MaterializedRelation<>("ZTest", "Z Test score", TypeUtil.DOUBLE, scores, relation.getDBIDs()); OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0); OutlierResult or = new OutlierResult(scoreMeta, scoreResult); or.addChildResult(npred); @@ -162,7 +162,7 @@ public class CTLuZTestOutlier<N> extends AbstractNeighborhoodOutlier<N> { public static class Parameterizer<N> extends AbstractNeighborhoodOutlier.Parameterizer<N> { @Override protected CTLuZTestOutlier<N> makeInstance() { - return new CTLuZTestOutlier<N>(npredf); + return new CTLuZTestOutlier<>(npredf); } } }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SLOM.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SLOM.java index 720fa39f..08c3e29b 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SLOM.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SLOM.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -187,7 +187,7 @@ public class SLOM<N, O, D extends NumberDistance<D, ?>> extends AbstractDistance slomminmax.put(slom); } - Relation<Double> scoreResult = new MaterializedRelation<Double>("SLOM", "slom-outlier", TypeUtil.DOUBLE, sloms, relation.getDBIDs()); + Relation<Double> scoreResult = new MaterializedRelation<>("SLOM", "slom-outlier", TypeUtil.DOUBLE, sloms, relation.getDBIDs()); OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(slomminmax.getMin(), slomminmax.getMax(), 0.0, Double.POSITIVE_INFINITY); OutlierResult or = new OutlierResult(scoreMeta, scoreResult); or.addChildResult(npred); @@ -218,7 +218,7 @@ public class SLOM<N, O, D extends NumberDistance<D, ?>> extends AbstractDistance public static class Parameterizer<N, O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedSpatialOutlier.Parameterizer<N, O, D> { @Override protected SLOM<N, O, D> makeInstance() { - return new SLOM<N, O, D>(npredf, distanceFunction); + return new SLOM<>(npredf, distanceFunction); } } }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SOF.java index a6f39a60..a2605f39 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SOF.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SOF.java @@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -46,7 +46,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title; /**
* The Spatial Outlier Factor (SOF) is a spatial
- * {@link de.lmu.ifi.dbs.elki.algorithm.outlier.LOF LOF} variation.
+ * {@link de.lmu.ifi.dbs.elki.algorithm.outlier.lof.LOF LOF} variation.
*
* Since the "reachability distance" of LOF cannot be used canonically in the
* bichromatic case, this part of LOF is dropped and the exact distance is used
@@ -138,7 +138,7 @@ public class SOF<N, O, D extends NumberDistance<D, ?>> extends AbstractDistanceB }
// Build result representation.
- Relation<Double> scoreResult = new MaterializedRelation<Double>("Spatial Outlier Factor", "sof-outlier", TypeUtil.DOUBLE, lofs, relation.getDBIDs());
+ Relation<Double> scoreResult = new MaterializedRelation<>("Spatial Outlier Factor", "sof-outlier", TypeUtil.DOUBLE, lofs, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
or.addChildResult(npred);
@@ -164,7 +164,7 @@ public class SOF<N, O, D extends NumberDistance<D, ?>> extends AbstractDistanceB public static class Parameterizer<N, O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedSpatialOutlier.Parameterizer<N, O, D> {
@Override
protected SOF<N, O, D> makeInstance() {
- return new SOF<N, O, D>(npredf, distanceFunction);
+ return new SOF<>(npredf, distanceFunction);
}
}
}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java index 9aa21b66..e07ce480 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java @@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -186,7 +186,7 @@ public class TrimmedMeanApproach<N> extends AbstractNeighborhoodOutlier<N> { minmax.put(score);
}
//
- Relation<Double> scoreResult = new MaterializedRelation<Double>("TrimmedMean", "Trimmed Mean Score", TypeUtil.DOUBLE, scores, relation.getDBIDs());
+ Relation<Double> scoreResult = new MaterializedRelation<>("TrimmedMean", "Trimmed Mean Score", TypeUtil.DOUBLE, scores, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0);
OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
or.addChildResult(npred);
@@ -237,7 +237,7 @@ public class TrimmedMeanApproach<N> extends AbstractNeighborhoodOutlier<N> { @Override
protected TrimmedMeanApproach<N> makeInstance() {
- return new TrimmedMeanApproach<N>(npredf, p);
+ return new TrimmedMeanApproach<>(npredf, p);
}
}
}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/AbstractPrecomputedNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/AbstractPrecomputedNeighborhood.java index 2c706ce0..ef237928 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/AbstractPrecomputedNeighborhood.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/AbstractPrecomputedNeighborhood.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java index 4aa96b25..c4fc4407 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -202,7 +202,7 @@ public class ExtendedNeighborhood extends AbstractPrecomputedNeighborhood { * @return Inner neighborhood. */ protected static <O> NeighborSetPredicate.Factory<O> getParameterInnerNeighborhood(Parameterization config) { - final ObjectParameter<NeighborSetPredicate.Factory<O>> param = new ObjectParameter<NeighborSetPredicate.Factory<O>>(NEIGHBORHOOD_ID, NeighborSetPredicate.Factory.class); + final ObjectParameter<NeighborSetPredicate.Factory<O>> param = new ObjectParameter<>(NEIGHBORHOOD_ID, NeighborSetPredicate.Factory.class); if(config.grab(param)) { return param.instantiateClass(config); } @@ -233,7 +233,7 @@ public class ExtendedNeighborhood extends AbstractPrecomputedNeighborhood { @Override protected ExtendedNeighborhood.Factory<O> makeInstance() { - return new ExtendedNeighborhood.Factory<O>(inner, steps); + return new ExtendedNeighborhood.Factory<>(inner, steps); } } } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java index 01052c1f..96896bd8 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -36,6 +36,7 @@ import de.lmu.ifi.dbs.elki.data.ExternalID; import de.lmu.ifi.dbs.elki.data.LabelList; import de.lmu.ifi.dbs.elki.data.type.TypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; +import de.lmu.ifi.dbs.elki.database.Database; import de.lmu.ifi.dbs.elki.database.datastore.DataStore; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; @@ -119,8 +120,8 @@ public class ExternalNeighborhood extends AbstractPrecomputedNeighborhood { } @Override - public NeighborSetPredicate instantiate(Relation<?> database) { - DataStore<DBIDs> store = loadNeighbors(database); + public NeighborSetPredicate instantiate(Relation<?> relation) { + DataStore<DBIDs> store = loadNeighbors(relation.getDatabase(), relation); ExternalNeighborhood neighborhood = new ExternalNeighborhood(store); return neighborhood; } @@ -133,8 +134,8 @@ public class ExternalNeighborhood extends AbstractPrecomputedNeighborhood { /** * Method to load the external neighbors. */ - private DataStore<DBIDs> loadNeighbors(Relation<?> database) { - final WritableDataStore<DBIDs> store = DataStoreUtil.makeStorage(database.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC | DataStoreFactory.HINT_TEMP, DBIDs.class); + private DataStore<DBIDs> loadNeighbors(Database database, Relation<?> relation) { + final WritableDataStore<DBIDs> store = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC | DataStoreFactory.HINT_TEMP, DBIDs.class); if(LOG.isVerbose()) { LOG.verbose("Loading external neighborhoods."); @@ -146,11 +147,11 @@ public class ExternalNeighborhood extends AbstractPrecomputedNeighborhood { // Build a map label/ExternalId -> DBID // (i.e. a reverse index!) // TODO: move this into the database layer to share? - Map<String, DBID> lblmap = new HashMap<String, DBID>(database.size() << 1); + Map<String, DBID> lblmap = new HashMap<>(relation.size() << 1); { - Relation<LabelList> olq = database.getDatabase().getRelation(TypeUtil.LABELLIST); - Relation<ExternalID> eidq = database.getDatabase().getRelation(TypeUtil.EXTERNALID); - for(DBIDIter iditer = database.iterDBIDs(); iditer.valid(); iditer.advance()) { + Relation<LabelList> olq = database.getRelation(TypeUtil.LABELLIST); + Relation<ExternalID> eidq = database.getRelation(TypeUtil.EXTERNALID); + for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { if(eidq != null) { ExternalID eid = eidq.get(iditer); if(eid != null) { diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/NeighborSetPredicate.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/NeighborSetPredicate.java index b52f8e91..25283d5c 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/NeighborSetPredicate.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/NeighborSetPredicate.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/PrecomputedKNearestNeighborNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/PrecomputedKNearestNeighborNeighborhood.java index f6000ef0..c43ebba7 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/PrecomputedKNearestNeighborNeighborhood.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/PrecomputedKNearestNeighborNeighborhood.java @@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -32,10 +32,10 @@ import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.distance.KNNList;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
@@ -119,7 +119,7 @@ public class PrecomputedKNearestNeighborNeighborhood<D extends Distance<D>> exte // TODO: use bulk?
WritableDataStore<DBIDs> s = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, DBIDs.class);
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { - KNNResult<D> neighbors = knnQuery.getKNNForDBID(iditer, k);
+ KNNList<D> neighbors = knnQuery.getKNNForDBID(iditer, k);
ArrayModifiableDBIDs neighbours = DBIDUtil.newArray(neighbors.size());
for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
neighbours.add(neighbor);
@@ -172,7 +172,7 @@ public class PrecomputedKNearestNeighborNeighborhood<D extends Distance<D>> exte if(config.grab(kP)) {
k = kP.getValue();
}
- final ObjectParameter<DistanceFunction<? super O, D>> distP = new ObjectParameter<DistanceFunction<? super O, D>>(DISTANCEFUNCTION_ID, DistanceFunction.class);
+ final ObjectParameter<DistanceFunction<? super O, D>> distP = new ObjectParameter<>(DISTANCEFUNCTION_ID, DistanceFunction.class);
if(config.grab(distP)) {
distFunc = distP.instantiateClass(config);
}
@@ -180,7 +180,7 @@ public class PrecomputedKNearestNeighborNeighborhood<D extends Distance<D>> exte @Override
protected PrecomputedKNearestNeighborNeighborhood.Factory<O, D> makeInstance() {
- return new PrecomputedKNearestNeighborNeighborhood.Factory<O, D>(k, distFunc);
+ return new PrecomputedKNearestNeighborNeighborhood.Factory<>(k, distFunc);
}
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/package-info.java index 47ca5ad2..fd51ca22 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/package-info.java @@ -5,7 +5,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java index f1c68577..05bf2f18 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.weighted; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -89,7 +89,7 @@ public class LinearWeightedExtendedNeighborhood implements WeightedNeighborSetPr @Override public Collection<DoubleDBIDPair> getWeightedNeighbors(DBIDRef reference) { ModifiableDBIDs seen = DBIDUtil.newHashSet(); - List<DoubleDBIDPair> result = new ArrayList<DoubleDBIDPair>(); + List<DoubleDBIDPair> result = new ArrayList<>(); // Add starting object result.add(DBIDUtil.newPair(computeWeight(0), reference)); @@ -194,7 +194,7 @@ public class LinearWeightedExtendedNeighborhood implements WeightedNeighborSetPr * @return Inner neighborhood. */ protected static <O> NeighborSetPredicate.Factory<O> getParameterInnerNeighborhood(Parameterization config) { - final ObjectParameter<NeighborSetPredicate.Factory<O>> param = new ObjectParameter<NeighborSetPredicate.Factory<O>>(NEIGHBORHOOD_ID, NeighborSetPredicate.Factory.class); + final ObjectParameter<NeighborSetPredicate.Factory<O>> param = new ObjectParameter<>(NEIGHBORHOOD_ID, NeighborSetPredicate.Factory.class); if(config.grab(param)) { return param.instantiateClass(config); } @@ -225,7 +225,7 @@ public class LinearWeightedExtendedNeighborhood implements WeightedNeighborSetPr @Override protected LinearWeightedExtendedNeighborhood.Factory<O> makeInstance() { - return new LinearWeightedExtendedNeighborhood.Factory<O>(inner, steps); + return new LinearWeightedExtendedNeighborhood.Factory<>(inner, steps); } } } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/UnweightedNeighborhoodAdapter.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/UnweightedNeighborhoodAdapter.java index c179d81f..9bdb7d51 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/UnweightedNeighborhoodAdapter.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/UnweightedNeighborhoodAdapter.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.weighted; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -64,7 +64,7 @@ public class UnweightedNeighborhoodAdapter implements WeightedNeighborSetPredica @Override public Collection<DoubleDBIDPair> getWeightedNeighbors(DBIDRef reference) { DBIDs neighbors = inner.getNeighborDBIDs(reference); - ArrayList<DoubleDBIDPair> adapted = new ArrayList<DoubleDBIDPair>(neighbors.size()); + ArrayList<DoubleDBIDPair> adapted = new ArrayList<>(neighbors.size()); for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) { adapted.add(DBIDUtil.newPair(1.0, iter)); } @@ -130,7 +130,7 @@ public class UnweightedNeighborhoodAdapter implements WeightedNeighborSetPredica @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - ObjectParameter<NeighborSetPredicate.Factory<O>> innerP = new ObjectParameter<NeighborSetPredicate.Factory<O>>(INNER_ID, NeighborSetPredicate.Factory.class); + ObjectParameter<NeighborSetPredicate.Factory<O>> innerP = new ObjectParameter<>(INNER_ID, NeighborSetPredicate.Factory.class); if(config.grab(innerP)) { inner = innerP.instantiateClass(config); } @@ -138,7 +138,7 @@ public class UnweightedNeighborhoodAdapter implements WeightedNeighborSetPredica @Override protected UnweightedNeighborhoodAdapter.Factory<O> makeInstance() { - return new UnweightedNeighborhoodAdapter.Factory<O>(inner); + return new UnweightedNeighborhoodAdapter.Factory<>(inner); } } } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/WeightedNeighborSetPredicate.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/WeightedNeighborSetPredicate.java index 16d37587..ca0fa620 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/WeightedNeighborSetPredicate.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/WeightedNeighborSetPredicate.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.weighted; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/package-info.java index 39165cfd..d7c7a797 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/package-info.java @@ -5,7 +5,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/package-info.java index 13bf3f25..5a65d8c1 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/package-info.java @@ -5,7 +5,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java index 1965914d..ae04fef4 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.subspace; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -37,26 +37,28 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; -import de.lmu.ifi.dbs.elki.database.ids.DistanceDBIDPair; -import de.lmu.ifi.dbs.elki.database.ids.DoubleDistanceDBIDPair; +import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList; +import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDPair; +import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter; +import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDList; +import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDPair; +import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDPairList; +import de.lmu.ifi.dbs.elki.database.ids.distance.DoubleDistanceDBIDListIter; +import de.lmu.ifi.dbs.elki.database.ids.distance.ModifiableDoubleDistanceDBIDList; import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery; import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction; import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDList; -import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDResultIter; import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; import de.lmu.ifi.dbs.elki.math.DoubleMinMax; import de.lmu.ifi.dbs.elki.math.MeanVariance; -import de.lmu.ifi.dbs.elki.math.statistics.EpanechnikovKernelDensityFunction; -import de.lmu.ifi.dbs.elki.math.statistics.KernelDensityFunction; import de.lmu.ifi.dbs.elki.math.statistics.distribution.GammaDistribution; +import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.EpanechnikovKernelDensityFunction; +import de.lmu.ifi.dbs.elki.math.statistics.kernelfunctions.KernelDensityFunction; import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; @@ -145,7 +147,7 @@ public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<Outlier } OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0., 1., 1.); - OutlierResult outresResult = new OutlierResult(meta, new MaterializedRelation<Double>("OUTRES", "outres-score", TypeUtil.DOUBLE, ranks, relation.getDBIDs())); + OutlierResult outresResult = new OutlierResult(meta, new MaterializedRelation<>("OUTRES", "outres-score", TypeUtil.DOUBLE, ranks, relation.getDBIDs())); return outresResult; } @@ -175,7 +177,7 @@ public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<Outlier final DoubleDistance range = new DoubleDistance(adjustedEps * 2.); RangeQuery<V, DoubleDistance> rq = QueryUtil.getRangeQuery(kernel.relation, df, range); - DistanceDBIDResult<DoubleDistance> neighc = rq.getRangeForDBID(id, range); + DistanceDBIDList<DoubleDistance> neighc = rq.getRangeForDBID(id, range); DoubleDistanceDBIDList neigh = refineRange(neighc, adjustedEps); if(neigh.size() > 2) { // Relevance test @@ -183,7 +185,7 @@ public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<Outlier final double density = kernel.subspaceDensity(subspace, neigh); // Compute mean and standard deviation for densities of neighbors. meanv.reset(); - for (DoubleDistanceDBIDResultIter neighbor = neigh.iter(); neighbor.valid(); neighbor.advance()) { + for (DoubleDistanceDBIDListIter neighbor = neigh.iter(); neighbor.valid(); neighbor.advance()) { DoubleDistanceDBIDList n2 = subsetNeighborhoodQuery(neighc, neighbor, df, adjustedEps, kernel); meanv.put(kernel.subspaceDensity(subspace, n2)); } @@ -208,10 +210,10 @@ public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<Outlier * @param adjustedEps New epsilon * @return refined list */ - private DoubleDistanceDBIDList refineRange(DistanceDBIDResult<DoubleDistance> neighc, double adjustedEps) { - DoubleDistanceDBIDList n = new DoubleDistanceDBIDList(neighc.size()); + private DoubleDistanceDBIDList refineRange(DistanceDBIDList<DoubleDistance> neighc, double adjustedEps) { + ModifiableDoubleDistanceDBIDList n = new DoubleDistanceDBIDPairList(neighc.size()); // We don't have a guarantee for this list to be sorted - for (DistanceDBIDResultIter<DoubleDistance> neighbor = neighc.iter(); neighbor.valid(); neighbor.advance()) { + for (DistanceDBIDListIter<DoubleDistance> neighbor = neighc.iter(); neighbor.valid(); neighbor.advance()) { DistanceDBIDPair<DoubleDistance> p = neighbor.getDistancePair(); if(p instanceof DoubleDistanceDBIDPair) { if(((DoubleDistanceDBIDPair) p).doubleDistance() <= adjustedEps) { @@ -238,10 +240,10 @@ public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<Outlier * @param kernel Kernel * @return Neighbors of neighbor object */ - private DoubleDistanceDBIDList subsetNeighborhoodQuery(DistanceDBIDResult<DoubleDistance> neighc, DBIDRef dbid, PrimitiveDoubleDistanceFunction<? super V> df, double adjustedEps, KernelDensityEstimator kernel) { - DoubleDistanceDBIDList n = new DoubleDistanceDBIDList(neighc.size()); + private DoubleDistanceDBIDList subsetNeighborhoodQuery(DistanceDBIDList<DoubleDistance> neighc, DBIDRef dbid, PrimitiveDoubleDistanceFunction<? super V> df, double adjustedEps, KernelDensityEstimator kernel) { + ModifiableDoubleDistanceDBIDList n = new DoubleDistanceDBIDPairList(neighc.size()); V query = kernel.relation.get(dbid); - for (DistanceDBIDResultIter<DoubleDistance> neighbor = neighc.iter(); neighbor.valid(); neighbor.advance()) { + for (DistanceDBIDListIter<DoubleDistance> neighbor = neighc.iter(); neighbor.valid(); neighbor.advance()) { DistanceDBIDPair<DoubleDistance> p = neighbor.getDistancePair(); double dist = df.doubleDistance(query, kernel.relation.get(p)); if(dist <= adjustedEps) { @@ -348,7 +350,7 @@ public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<Outlier final double bandwidth = optimalBandwidth(subspace.cardinality()); double density = 0; - for (DoubleDistanceDBIDResultIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { + for (DoubleDistanceDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { double v = neighbor.doubleDistance() / bandwidth; if(v < 1) { density += 1 - (v * v); @@ -426,7 +428,7 @@ public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<Outlier @Override protected OUTRES<O> makeInstance() { - return new OUTRES<O>(eps); + return new OUTRES<>(eps); } } }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OutRankS1.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OutRankS1.java index 79243213..96c8875f 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OutRankS1.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OutRankS1.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.subspace; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -133,7 +133,7 @@ public class OutRankS1 extends AbstractAlgorithm<OutlierResult> implements Outli } } - Relation<Double> scoreResult = new MaterializedRelation<Double>("OutRank-S1", "OUTRANK_S1", TypeUtil.DOUBLE, score, ids); + Relation<Double> scoreResult = new MaterializedRelation<>("OutRank-S1", "OUTRANK_S1", TypeUtil.DOUBLE, score, ids); OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0, Double.POSITIVE_INFINITY); OutlierResult res = new OutlierResult(meta, scoreResult); res.addChildResult(clustering); @@ -181,7 +181,7 @@ public class OutRankS1 extends AbstractAlgorithm<OutlierResult> implements Outli @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - ObjectParameter<SubspaceClusteringAlgorithm<? extends SubspaceModel<?>>> algP = new ObjectParameter<SubspaceClusteringAlgorithm<? extends SubspaceModel<?>>>(ALGORITHM_ID, SubspaceClusteringAlgorithm.class); + ObjectParameter<SubspaceClusteringAlgorithm<? extends SubspaceModel<?>>> algP = new ObjectParameter<>(ALGORITHM_ID, SubspaceClusteringAlgorithm.class); if (config.grab(algP)) { algorithm = algP.instantiateClass(config); } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/SOD.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/SOD.java index 35a780cd..b2255e67 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/SOD.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/SOD.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.subspace; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -164,7 +164,7 @@ public class SOD<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte progress.incrementProcessed(LOG); } DBIDs knnList = getNearestNeighbors(relation, snnInstance, iter); - SODModel<V> model = new SODModel<V>(relation, knnList, alpha, relation.get(iter)); + SODModel<V> model = new SODModel<>(relation, knnList, alpha, relation.get(iter)); sod_models.put(iter, model); minmax.put(model.getSod()); } @@ -172,7 +172,7 @@ public class SOD<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte progress.ensureCompleted(LOG); } // combine results. - Relation<SODModel<?>> models = new MaterializedRelation<SODModel<?>>("Subspace Outlier Model", "sod-outlier", new SimpleTypeInformation<SODModel<?>>(SODModel.class), sod_models, relation.getDBIDs()); + Relation<SODModel<?>> models = new MaterializedRelation<>("Subspace Outlier Model", "sod-outlier", new SimpleTypeInformation<SODModel<?>>(SODModel.class), sod_models, relation.getDBIDs()); OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax()); OutlierResult sodResult = new OutlierResult(meta, new SODProxyScoreResult(models, relation.getDBIDs())); // also add the models. @@ -194,7 +194,7 @@ public class SOD<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte */ private DBIDs getNearestNeighbors(Relation<V> relation, SimilarityQuery<V, D> simQ, DBIDRef queryObject) { // similarityFunction.getPreprocessor().getParameters(); - Heap<DoubleDBIDPair> nearestNeighbors = new TiedTopBoundedHeap<DoubleDBIDPair>(knn); + Heap<DoubleDBIDPair> nearestNeighbors = new TiedTopBoundedHeap<>(knn); for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) { if (!DBIDUtil.equal(iter, queryObject)) { double sim = simQ.similarity(queryObject, iter).doubleValue(); @@ -454,7 +454,7 @@ public class SOD<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - final ObjectParameter<SimilarityFunction<V, D>> simP = new ObjectParameter<SimilarityFunction<V, D>>(SIM_ID, SimilarityFunction.class, SharedNearestNeighborSimilarityFunction.class); + final ObjectParameter<SimilarityFunction<V, D>> simP = new ObjectParameter<>(SIM_ID, SimilarityFunction.class, SharedNearestNeighborSimilarityFunction.class); if (config.grab(simP)) { similarityFunction = simP.instantiateClass(config); } @@ -474,7 +474,7 @@ public class SOD<V extends NumberVector<?>, D extends NumberDistance<D, ?>> exte @Override protected SOD<V, D> makeInstance() { - return new SOD<V, D>(knn, alpha, similarityFunction); + return new SOD<>(knn, alpha, similarityFunction); } } } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/package-info.java index 8b1c80df..c3951821 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/package-info.java @@ -7,7 +7,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java index ae95abfa..d10eaef8 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.trivial; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -117,7 +117,7 @@ public class ByLabelOutlier extends AbstractAlgorithm<OutlierResult> implements final double score = (pattern.matcher(label).matches()) ? 1 : 0; scores.putDouble(iditer, score); } - Relation<Double> scoreres = new MaterializedRelation<Double>("By label outlier scores", "label-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs()); + Relation<Double> scoreres = new MaterializedRelation<>("By label outlier scores", "label-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs()); OutlierScoreMeta meta = new ProbabilisticOutlierScore(); return new OutlierResult(meta, scoreres); } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java index 35a85d51..44a7975f 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.trivial; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -73,7 +73,7 @@ public class TrivialAllOutlier extends AbstractAlgorithm<OutlierResult> implemen for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { scores.putDouble(iditer, 1.0); } - Relation<Double> scoreres = new MaterializedRelation<Double>("Trivial all-outlier score", "all-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs()); + Relation<Double> scoreres = new MaterializedRelation<>("Trivial all-outlier score", "all-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs()); OutlierScoreMeta meta = new ProbabilisticOutlierScore(); return new OutlierResult(meta, scoreres); } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAverageCoordinateOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAverageCoordinateOutlier.java new file mode 100644 index 00000000..84e3ad41 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAverageCoordinateOutlier.java @@ -0,0 +1,98 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier.trivial; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm; +import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm; +import de.lmu.ifi.dbs.elki.data.NumberVector; +import de.lmu.ifi.dbs.elki.data.type.TypeInformation; +import de.lmu.ifi.dbs.elki.data.type.TypeUtil; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; +import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; +import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.math.DoubleMinMax; +import de.lmu.ifi.dbs.elki.math.Mean; +import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; + +/** + * Trivial method that takes the average of all dimensions (for one-dimensional + * data that is just the actual value!) as outlier score. Mostly useful to + * convert a 1d data set into a score, e.g. in geodata. + * + * @author Erich Schubert + */ +public class TrivialAverageCoordinateOutlier extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm { + /** + * Our logger. + */ + private static final Logging logger = Logging.getLogger(TrivialAverageCoordinateOutlier.class); + + /** + * Constructor. + */ + public TrivialAverageCoordinateOutlier() { + super(); + } + + @Override + public TypeInformation[] getInputTypeRestriction() { + return TypeUtil.array(TypeUtil.NUMBER_VECTOR_VARIABLE_LENGTH); + } + + /** + * Run the actual algorithm. + * + * @param relation Relation + * @return Result + */ + public OutlierResult run(Relation<? extends NumberVector<?>> relation) { + WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT); + DoubleMinMax minmax = new DoubleMinMax(); + Mean m = new Mean(); + for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { + m.reset(); + NumberVector<?> nv = relation.get(iditer); + for (int i = 0; i < nv.getDimensionality(); i++) { + m.put(nv.doubleValue(i + 1)); + } + final double score = m.getMean(); + scores.putDouble(iditer, score); + minmax.put(score); + } + Relation<Double> scoreres = new MaterializedRelation<Double>("Trivial mean score", "mean-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs()); + OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax()); + return new OutlierResult(meta, scoreres); + } + + @Override + protected Logging getLogger() { + return logger; + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java index e4c3861f..285b00df 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java @@ -1,9 +1,10 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.trivial; + /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -48,8 +49,11 @@ import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore; +import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessEqualConstraint; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; @@ -68,11 +72,6 @@ public class TrivialGeneratedOutlier extends AbstractAlgorithm<OutlierResult> im private static final Logging LOG = Logging.getLogger(TrivialGeneratedOutlier.class); /** - * Expected share of outliers - */ - public static final OptionID EXPECT_ID = new OptionID("modeloutlier.expect", "Expected amount of outliers, for making the scores more intuitive."); - - /** * Expected share of outliers. */ double expect = 0.01; @@ -96,19 +95,18 @@ public class TrivialGeneratedOutlier extends AbstractAlgorithm<OutlierResult> im @Override public TypeInformation[] getInputTypeRestriction() { - return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD, new SimpleTypeInformation<Model>(Model.class), TypeUtil.GUESSED_LABEL); + return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD, new SimpleTypeInformation<>(Model.class), TypeUtil.GUESSED_LABEL); } @Override public OutlierResult run(Database database) { Relation<NumberVector<?>> vecs = database.getRelation(TypeUtil.NUMBER_VECTOR_FIELD); - Relation<Model> models = database.getRelation(new SimpleTypeInformation<Model>(Model.class)); + Relation<Model> models = database.getRelation(new SimpleTypeInformation<>(Model.class)); // Prefer a true class label try { Relation<?> relation = database.getRelation(TypeUtil.CLASSLABEL); return run(models, vecs, relation); - } - catch(NoSupportedDataTypeException e) { + } catch (NoSupportedDataTypeException e) { // Otherwise, try any labellike. return run(models, vecs, database.getRelation(TypeUtil.GUESSED_LABEL)); } @@ -125,54 +123,62 @@ public class TrivialGeneratedOutlier extends AbstractAlgorithm<OutlierResult> im public OutlierResult run(Relation<Model> models, Relation<NumberVector<?>> vecs, Relation<?> labels) { WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(models.getDBIDs(), DataStoreFactory.HINT_HOT); - // Adjustment constant - final double minscore = expect / (expect + 1); - - HashSet<GeneratorSingleCluster> generators = new HashSet<GeneratorSingleCluster>(); - for(DBIDIter iditer = models.iterDBIDs(); iditer.valid(); iditer.advance()) { + HashSet<GeneratorSingleCluster> generators = new HashSet<>(); + for (DBIDIter iditer = models.iterDBIDs(); iditer.valid(); iditer.advance()) { Model model = models.get(iditer); - if(model instanceof GeneratorSingleCluster) { + if (model instanceof GeneratorSingleCluster) { generators.add((GeneratorSingleCluster) model); } } - if(generators.size() == 0) { + if (generators.size() == 0) { LOG.warning("No generator models found for dataset - all points will be considered outliers."); } + for (GeneratorSingleCluster gen : generators) { + for (int i = 0; i < gen.getDim(); i++) { + Distribution dist = gen.getDistribution(i); + if (!(dist instanceof NormalDistribution)) { + throw new AbortException("TrivialGeneratedOutlier currently only supports normal distributions, got: " + dist); + } + } + } - for(DBIDIter iditer = models.iterDBIDs(); iditer.valid(); iditer.advance()) { - double score = 0.0; + for (DBIDIter iditer = models.iterDBIDs(); iditer.valid(); iditer.advance()) { + double score = 1.; // Convert to a math vector Vector v = vecs.get(iditer).getColumnVector(); - for(GeneratorSingleCluster gen : generators) { + for (GeneratorSingleCluster gen : generators) { Vector tv = v; // Transform backwards - if(gen.getTransformation() != null) { + if (gen.getTransformation() != null) { tv = gen.getTransformation().applyInverse(v); } final int dim = tv.getDimensionality(); double lensq = 0.0; int norm = 0; - for(int i = 0; i < dim; i++) { + for (int i = 0; i < dim; i++) { Distribution dist = gen.getDistribution(i); - if(dist instanceof NormalDistribution) { + if (dist instanceof NormalDistribution) { NormalDistribution d = (NormalDistribution) dist; double delta = (tv.get(i) - d.getMean()) / d.getStddev(); lensq += delta * delta; norm += 1; + } else { + throw new AbortException("TrivialGeneratedOutlier currently only supports normal distributions, got: " + dist); } } - if(norm > 0) { + if (norm > 0.) { // The squared distances are ChiSquared distributed - score = Math.max(score, 1 - ChiSquaredDistribution.cdf(lensq, norm)); + score = Math.min(score, ChiSquaredDistribution.cdf(lensq, norm)); + } else { + score = 0.; } } - // score inversion. - score = expect / (expect + score); - // adjust to 0 to 1 range: - score = (score - minscore) / (1 - minscore); + if (expect < 1) { + score = expect * score / (1 - score + expect); + } scores.putDouble(iditer, score); } - Relation<Double> scoreres = new MaterializedRelation<Double>("Model outlier scores", "model-outlier", TypeUtil.DOUBLE, scores, models.getDBIDs()); + Relation<Double> scoreres = new MaterializedRelation<>("Model outlier scores", "model-outlier", TypeUtil.DOUBLE, scores, models.getDBIDs()); OutlierScoreMeta meta = new ProbabilisticOutlierScore(0., 1.); return new OutlierResult(meta, scoreres); } @@ -193,13 +199,20 @@ public class TrivialGeneratedOutlier extends AbstractAlgorithm<OutlierResult> im /** * Expected share of outliers */ + public static final OptionID EXPECT_ID = new OptionID("modeloutlier.expect", "Expected amount of outliers, for making the scores more intuitive. When the value is 1, the CDF will be given instead."); + + /** + * Expected share of outliers + */ double expect; - + @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); DoubleParameter expectP = new DoubleParameter(EXPECT_ID, 0.01); - if(config.grab(expectP)) { + expectP.addConstraint(new GreaterConstraint(0.0)); + expectP.addConstraint(new LessEqualConstraint(1.0)); + if (config.grab(expectP)) { expect = expectP.getValue(); } } @@ -209,4 +222,4 @@ public class TrivialGeneratedOutlier extends AbstractAlgorithm<OutlierResult> im return new TrivialGeneratedOutlier(expect); } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java index 695ff112..ff3d0296 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.trivial; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -73,7 +73,7 @@ public class TrivialNoOutlier extends AbstractAlgorithm<OutlierResult> implement for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { scores.putDouble(iditer, 0.0); } - Relation<Double> scoreres = new MaterializedRelation<Double>("Trivial no-outlier score", "no-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs()); + Relation<Double> scoreres = new MaterializedRelation<>("Trivial no-outlier score", "no-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs()); OutlierScoreMeta meta = new ProbabilisticOutlierScore(); return new OutlierResult(meta, scoreres); } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/package-info.java index d49d3565..c927cae4 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/package-info.java @@ -7,7 +7,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2012 +Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team |