diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/algorithm/outlier')
63 files changed, 3420 insertions, 1641 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ABOD.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ABOD.java index 88a62e38..d52a81fd 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ABOD.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ABOD.java @@ -25,13 +25,11 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier; import java.util.Collections; import java.util.HashMap; -import java.util.Iterator; import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm; import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.data.type.TypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; -import de.lmu.ifi.dbs.elki.database.Database; import de.lmu.ifi.dbs.elki.database.QueryUtil; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; @@ -42,13 +40,13 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair; import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs; -import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair; import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; -import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult; import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult; import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; import de.lmu.ifi.dbs.elki.distance.similarityfunction.PrimitiveSimilarityFunction; import de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.KernelMatrix; @@ -66,11 +64,11 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint; import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; -import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair; /** * Angle-Based Outlier Detection @@ -92,39 +90,39 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair; @Title("ABOD: Angle-Based Outlier Detection") @Description("Outlier detection using variance analysis on angles, especially for high dimensional data sets.") @Reference(authors = "H.-P. Kriegel, M. Schubert, and A. Zimek", title = "Angle-Based Outlier Detection in High-dimensional Data", booktitle = "Proc. 14th ACM SIGKDD Int. Conf. on Knowledge Discovery and Data Mining (KDD '08), Las Vegas, NV, 2008", url = "http://dx.doi.org/10.1145/1401890.1401946") -public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlgorithm<V, DoubleDistance, OutlierResult> implements OutlierAlgorithm { +public class ABOD<V extends NumberVector<?>> extends AbstractDistanceBasedAlgorithm<V, DoubleDistance, OutlierResult> implements OutlierAlgorithm { /** * The logger for this class. */ - private static final Logging logger = Logging.getLogger(ABOD.class); + private static final Logging LOG = Logging.getLogger(ABOD.class); /** * Parameter for k, the number of neighbors used in kNN queries. */ - public static final OptionID K_ID = OptionID.getOrCreateOptionID("abod.k", "Parameter k for kNN queries."); + public static final OptionID K_ID = new OptionID("abod.k", "Parameter k for kNN queries."); /** * Parameter for sample size to be used in fast mode. */ - public static final OptionID FAST_SAMPLE_ID = OptionID.getOrCreateOptionID("abod.samplesize", "Sample size to enable fast mode."); + public static final OptionID FAST_SAMPLE_ID = new OptionID("abod.samplesize", "Sample size to enable fast mode."); /** * Parameter for the kernel function. */ - public static final OptionID KERNEL_FUNCTION_ID = OptionID.getOrCreateOptionID("abod.kernelfunction", "Kernel function to use."); + public static final OptionID KERNEL_FUNCTION_ID = new OptionID("abod.kernelfunction", "Kernel function to use."); /** * The preprocessor used to materialize the kNN neighborhoods. */ - public static final OptionID PREPROCESSOR_ID = OptionID.getOrCreateOptionID("abod.knnquery", "Processor to compute the kNN neighborhoods."); + public static final OptionID PREPROCESSOR_ID = new OptionID("abod.knnquery", "Processor to compute the kNN neighborhoods."); /** - * use alternate code below + * use alternate code below. */ - private static final boolean useRNDSample = false; + private static final boolean USE_RND_SAMPLE = false; /** - * k parameter + * k parameter. */ private int k; @@ -134,10 +132,13 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg int sampleSize = 0; /** - * Store the configured Kernel version + * Store the configured Kernel version. */ private PrimitiveSimilarityFunction<? super V, DoubleDistance> primitiveKernelFunction; + /** + * Static DBID map. + */ private ArrayModifiableDBIDs staticids = null; /** @@ -173,41 +174,32 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg * Main part of the algorithm. Exact version. * * @param relation Relation to query - * @param k k for kNN queries * @return result */ - public OutlierResult getRanking(Relation<V> relation, int k) { + public OutlierResult getRanking(Relation<V> relation) { // Fix a static set of IDs staticids = DBIDUtil.newArray(relation.getDBIDs()); staticids.sort(); KernelMatrix kernelMatrix = new KernelMatrix(primitiveKernelFunction, relation, staticids); - Heap<DoubleObjPair<DBID>> pq = new Heap<DoubleObjPair<DBID>>(relation.size(), Collections.reverseOrder()); + Heap<DoubleDBIDPair> pq = new Heap<DoubleDBIDPair>(relation.size(), Collections.reverseOrder()); // preprocess kNN neighborhoods - assert (k == this.k); KNNQuery<V, DoubleDistance> knnQuery = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k); MeanVariance s = new MeanVariance(); - for(DBIDIter objKey = relation.iterDBIDs(); objKey.valid(); objKey.advance()) { + for (DBIDIter objKey = relation.iterDBIDs(); objKey.valid(); objKey.advance()) { s.reset(); - // System.out.println("Processing: " +objKey); KNNResult<DoubleDistance> neighbors = knnQuery.getKNNForDBID(objKey, k); - Iterator<DistanceResultPair<DoubleDistance>> iter = neighbors.iterator(); - while(iter.hasNext()) { - DistanceResultPair<DoubleDistance> key1 = iter.next(); - // Iterator iter2 = data.keyIterator(); - Iterator<DistanceResultPair<DoubleDistance>> iter2 = neighbors.iterator(); - // PriorityQueue best = new PriorityQueue(false, k); - while(iter2.hasNext()) { - DistanceResultPair<DoubleDistance> key2 = iter2.next(); - if(key2.sameDBID(key1) || key1.sameDBID(objKey) || key2.sameDBID(objKey)) { + for (DBIDIter key1 = neighbors.iter(); key1.valid(); key1.advance()) { + for (DBIDIter key2 = neighbors.iter(); key2.valid(); key2.advance()) { + if (DBIDUtil.equal(key2, key1) || DBIDUtil.equal(key1, objKey) || DBIDUtil.equal(key2, objKey)) { continue; } double nenner = calcDenominator(kernelMatrix, objKey, key1, key2); - if(nenner != 0) { + if (nenner != 0) { double sqrtnenner = Math.sqrt(nenner); double tmp = calcNumerator(kernelMatrix, objKey, key1, key2) / nenner; s.put(tmp, 1 / sqrtnenner); @@ -217,14 +209,14 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg } // Sample variance probably would be correct, however the numerical // instabilities can actually break ABOD here. - pq.add(new DoubleObjPair<DBID>(s.getNaiveVariance(), objKey.getDBID())); + pq.add(DBIDUtil.newPair(s.getNaiveVariance(), objKey)); } DoubleMinMax minmaxabod = new DoubleMinMax(); WritableDoubleDataStore abodvalues = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC); - for(DoubleObjPair<DBID> pair : pq) { - abodvalues.putDouble(pair.getSecond(), pair.first); - minmaxabod.put(pair.first); + for (DoubleDBIDPair pair : pq) { + abodvalues.putDouble(pair, pair.doubleValue()); + minmaxabod.put(pair.doubleValue()); } // Build result representation. Relation<Double> scoreResult = new MaterializedRelation<Double>("Angle-based Outlier Degree", "abod-outlier", TypeUtil.DOUBLE, abodvalues, relation.getDBIDs()); @@ -236,11 +228,9 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg * Main part of the algorithm. Fast version. * * @param relation Relation to use - * @param k k for kNN queries - * @param sampleSize Sample size * @return result */ - public OutlierResult getFastRanking(Relation<V> relation, int k, int sampleSize) { + public OutlierResult getFastRanking(Relation<V> relation) { final DBIDs ids = relation.getDBIDs(); // Fix a static set of IDs // TODO: add a DBIDUtil.ensureSorted? @@ -249,92 +239,72 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg KernelMatrix kernelMatrix = new KernelMatrix(primitiveKernelFunction, relation, staticids); - Heap<DoubleObjPair<DBID>> pq = new Heap<DoubleObjPair<DBID>>(relation.size(), Collections.reverseOrder()); + Heap<DoubleDBIDPair> pq = new Heap<DoubleDBIDPair>(relation.size(), Collections.reverseOrder()); // get Candidate Ranking - for(DBIDIter aKey = relation.iterDBIDs(); aKey.valid(); aKey.advance()) { + for (DBIDIter aKey = relation.iterDBIDs(); aKey.valid(); aKey.advance()) { WritableDoubleDataStore dists = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT); // determine kNearestNeighbors and pairwise distances - Heap<DoubleObjPair<DBID>> nn; - if(!useRNDSample) { + Heap<DoubleDBIDPair> nn; + if (!USE_RND_SAMPLE) { nn = calcDistsandNN(relation, kernelMatrix, sampleSize, aKey, dists); - } - else { + } else { // alternative: nn = calcDistsandRNDSample(relation, kernelMatrix, sampleSize, aKey, dists); } // get normalization double[] counter = calcFastNormalization(aKey, dists, staticids); - // System.out.println(counter[0] + " " + counter2[0] + " " + counter[1] + - // " " + counter2[1]); // umsetzen von Pq zu list ModifiableDBIDs neighbors = DBIDUtil.newArray(nn.size()); - while(!nn.isEmpty()) { - neighbors.add(nn.remove().getSecond()); + while (!nn.isEmpty()) { + neighbors.add(nn.poll()); } // getFilter double var = getAbofFilter(kernelMatrix, aKey, dists, counter[1], counter[0], neighbors); - pq.add(new DoubleObjPair<DBID>(var, aKey.getDBID())); - // System.out.println("prog "+(prog++)); + pq.add(DBIDUtil.newPair(var, aKey)); } // refine Candidates - Heap<DoubleObjPair<DBID>> resqueue = new Heap<DoubleObjPair<DBID>>(k); - // System.out.println(pq.size() + " objects ordered into candidate list."); - // int v = 0; + Heap<DoubleDBIDPair> resqueue = new Heap<DoubleDBIDPair>(k); MeanVariance s = new MeanVariance(); - while(!pq.isEmpty()) { - if(resqueue.size() == k && pq.peek().first > resqueue.peek().first) { + while (!pq.isEmpty()) { + if (resqueue.size() == k && pq.peek().doubleValue() > resqueue.peek().doubleValue()) { break; } // double approx = pq.peek().getFirst(); - DBID aKey = pq.remove().getSecond(); - // if(!result.isEmpty()) { - // System.out.println("Best Candidate " + aKey+" : " + pq.firstPriority() - // + " worst result: " + result.firstPriority()); - // } else { - // System.out.println("Best Candidate " + aKey+" : " + pq.firstPriority() - // + " worst result: " + Double.MAX_VALUE); - // } - // v++; + DBIDRef aKey = pq.poll(); s.reset(); - for(DBIDIter bKey = relation.iterDBIDs(); bKey.valid(); bKey.advance()) { - if(bKey.sameDBID(aKey)) { + for (DBIDIter bKey = relation.iterDBIDs(); bKey.valid(); bKey.advance()) { + if (DBIDUtil.equal(bKey, aKey)) { continue; } - for(DBIDIter cKey = relation.iterDBIDs(); cKey.valid(); cKey.advance()) { - if(cKey.sameDBID(aKey)) { + for (DBIDIter cKey = relation.iterDBIDs(); cKey.valid(); cKey.advance()) { + if (DBIDUtil.equal(cKey, aKey)) { continue; } // double nenner = dists[y]*dists[z]; double nenner = calcDenominator(kernelMatrix, aKey, bKey, cKey); - if(nenner != 0) { + if (nenner != 0) { double tmp = calcNumerator(kernelMatrix, aKey, bKey, cKey) / nenner; double sqrtNenner = Math.sqrt(nenner); s.put(tmp, 1 / sqrtNenner); } } } - // System.out.println( aKey + "Sum " + sum + " SQRSum " +sqrSum + - // " Counter " + counter); double var = s.getSampleVariance(); - // System.out.println(aKey+ " : " + approx +" " + var); - if(resqueue.size() < k) { - resqueue.add(new DoubleObjPair<DBID>(var, aKey)); - } - else { - if(resqueue.peek().first > var) { - resqueue.remove(); - resqueue.add(new DoubleObjPair<DBID>(var, aKey)); + if (resqueue.size() < k) { + resqueue.add(DBIDUtil.newPair(var, aKey)); + } else { + if (resqueue.peek().doubleValue() > var) { + resqueue.replaceTopElement(DBIDUtil.newPair(var, aKey)); } } } - // System.out.println(v + " Punkte von " + data.size() + " verfeinert !!"); DoubleMinMax minmaxabod = new DoubleMinMax(); WritableDoubleDataStore abodvalues = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC); - for(DoubleObjPair<DBID> pair : pq) { - abodvalues.putDouble(pair.getSecond(), pair.first); - minmaxabod.put(pair.first); + for (DoubleDBIDPair pair : pq) { + abodvalues.putDouble(pair, pair.doubleValue()); + minmaxabod.put(pair.doubleValue()); } // Build result representation. Relation<Double> scoreResult = new MaterializedRelation<Double>("Angle-based Outlier Detection", "abod-outlier", TypeUtil.DOUBLE, abodvalues, ids); @@ -348,7 +318,7 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg double sum = 0; double sumF = 0; for (DBIDIter yKey = ids.iter(); yKey.valid(); yKey.advance()) { - if(dists.doubleValue(yKey) != 0) { + if (dists.doubleValue(yKey) != 0) { double tmp = 1 / Math.sqrt(dists.doubleValue(yKey)); sum += tmp; sumF += (1 / dists.doubleValue(yKey)) * tmp; @@ -357,7 +327,7 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg double sofar = 0; double sofarF = 0; for (DBIDIter zKey = ids.iter(); zKey.valid(); zKey.advance()) { - if(dists.doubleValue(zKey) != 0) { + if (dists.doubleValue(zKey) != 0) { double tmp = 1 / Math.sqrt(dists.doubleValue(zKey)); sofar += tmp; double rest = sum - sofar; @@ -375,17 +345,17 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg double sum = 0.0; double sqrSum = 0.0; double partCounter = 0; - for(DBIDIter bKey = neighbors.iter(); bKey.valid(); bKey.advance()) { - if(bKey.sameDBID(aKey)) { + for (DBIDIter bKey = neighbors.iter(); bKey.valid(); bKey.advance()) { + if (DBIDUtil.equal(bKey, aKey)) { continue; } - for(DBIDIter cKey = neighbors.iter(); cKey.valid(); cKey.advance()) { - if(cKey.sameDBID(aKey)) { + for (DBIDIter cKey = neighbors.iter(); cKey.valid(); cKey.advance()) { + if (DBIDUtil.equal(cKey, aKey)) { continue; } - if(bKey.compareDBID(cKey) > 0) { + if (DBIDUtil.compare(bKey, cKey) > 0) { double nenner = dists.doubleValue(bKey) * dists.doubleValue(cKey); - if(nenner != 0) { + if (nenner != 0) { double tmp = calcNumerator(kernelMatrix, aKey, bKey, cKey) / nenner; double sqrtNenner = Math.sqrt(nenner); sum += tmp * (1 / sqrtNenner); @@ -417,7 +387,7 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg private int mapDBID(DBIDRef aKey) { // TODO: this is not the most efficient... int off = staticids.binarySearch(aKey); - if(off < 0) { + if (off < 0) { throw new AbortException("Did not find id " + aKey.toString() + " in staticids. " + staticids.contains(aKey)); } return off + 1; @@ -434,33 +404,31 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg return (kernelMatrix.getDistance(ai, ai) + kernelMatrix.getDistance(bi, ci) - kernelMatrix.getDistance(ai, ci) - kernelMatrix.getDistance(ai, bi)); } - private Heap<DoubleObjPair<DBID>> calcDistsandNN(Relation<V> data, KernelMatrix kernelMatrix, int sampleSize, DBIDRef aKey, WritableDoubleDataStore dists) { - Heap<DoubleObjPair<DBID>> nn = new Heap<DoubleObjPair<DBID>>(sampleSize); - for(DBIDIter bKey = data.iterDBIDs(); bKey.valid(); bKey.advance()) { + private Heap<DoubleDBIDPair> calcDistsandNN(Relation<V> data, KernelMatrix kernelMatrix, int sampleSize, DBIDRef aKey, WritableDoubleDataStore dists) { + Heap<DoubleDBIDPair> nn = new Heap<DoubleDBIDPair>(sampleSize); + for (DBIDIter bKey = data.iterDBIDs(); bKey.valid(); bKey.advance()) { double val = calcCos(kernelMatrix, aKey, bKey); dists.putDouble(bKey, val); - if(nn.size() < sampleSize) { - nn.add(new DoubleObjPair<DBID>(val, bKey.getDBID())); - } - else { - if(val < nn.peek().first) { - nn.remove(); - nn.add(new DoubleObjPair<DBID>(val, bKey.getDBID())); + if (nn.size() < sampleSize) { + nn.add(DBIDUtil.newPair(val, bKey)); + } else { + if (val < nn.peek().doubleValue()) { + nn.replaceTopElement(DBIDUtil.newPair(val, bKey)); } } } return nn; } - private Heap<DoubleObjPair<DBID>> calcDistsandRNDSample(Relation<V> data, KernelMatrix kernelMatrix, int sampleSize, DBIDRef aKey, WritableDoubleDataStore dists) { - Heap<DoubleObjPair<DBID>> nn = new Heap<DoubleObjPair<DBID>>(sampleSize); + private Heap<DoubleDBIDPair> calcDistsandRNDSample(Relation<V> data, KernelMatrix kernelMatrix, int sampleSize, DBIDRef aKey, WritableDoubleDataStore dists) { + Heap<DoubleDBIDPair> nn = new Heap<DoubleDBIDPair>(sampleSize); int step = (int) ((double) data.size() / (double) sampleSize); int counter = 0; - for(DBIDIter bKey = data.iterDBIDs(); bKey.valid(); bKey.advance()) { + for (DBIDIter bKey = data.iterDBIDs(); bKey.valid(); bKey.advance()) { double val = calcCos(kernelMatrix, aKey, bKey); dists.putDouble(bKey, val); - if(counter % step == 0) { - nn.add(new DoubleObjPair<DBID>(val, bKey.getDBID())); + if (counter % step == 0) { + nn.add(DBIDUtil.newPair(val, bKey)); } counter++; } @@ -471,112 +439,108 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg * Get explanations for points in the database. * * @param data to get explanations for + * @return String explanation */ // TODO: this should be done by the result classes. - public void getExplanations(Relation<V> data) { + public String getExplanations(Relation<V> data) { KernelMatrix kernelMatrix = new KernelMatrix(primitiveKernelFunction, data, staticids); // PQ for Outlier Ranking - Heap<DoubleObjPair<DBID>> pq = new Heap<DoubleObjPair<DBID>>(data.size(), Collections.reverseOrder()); + Heap<DoubleDBIDPair> pq = new Heap<DoubleDBIDPair>(data.size(), Collections.reverseOrder()); HashMap<DBID, DBIDs> explaintab = new HashMap<DBID, DBIDs>(); // test all objects MeanVariance s = new MeanVariance(), s2 = new MeanVariance(); - for(DBIDIter objKey = data.iterDBIDs(); objKey.valid(); objKey.advance()) { + for (DBIDIter objKey = data.iterDBIDs(); objKey.valid(); objKey.advance()) { s.reset(); // Queue for the best explanation - Heap<DoubleObjPair<DBID>> explain = new Heap<DoubleObjPair<DBID>>(); + Heap<DoubleDBIDPair> explain = new Heap<DoubleDBIDPair>(); // determine Object // for each pair of other objects for (DBIDIter key1 = data.iterDBIDs(); key1.valid(); key1.advance()) { - // Collect Explanation Vectors + // Collect Explanation Vectors s2.reset(); - if(objKey.sameDBID(key1)) { + if (DBIDUtil.equal(objKey, key1)) { continue; } for (DBIDIter key2 = data.iterDBIDs(); key2.valid(); key2.advance()) { - if(key2.sameDBID(key1) || objKey.sameDBID(key2)) { + if (DBIDUtil.equal(key2, key1) || DBIDUtil.equal(objKey, key2)) { continue; } double nenner = calcDenominator(kernelMatrix, objKey, key1, key2); - if(nenner != 0) { + if (nenner != 0) { double tmp = calcNumerator(kernelMatrix, objKey, key1, key2) / nenner; double sqr = Math.sqrt(nenner); s2.put(tmp, 1 / sqr); } } - explain.add(new DoubleObjPair<DBID>(s2.getSampleVariance(), key1.getDBID())); + explain.add(DBIDUtil.newPair(s2.getSampleVariance(), key1)); s.put(s2); } // build variance of the observed vectors - pq.add(new DoubleObjPair<DBID>(s.getSampleVariance(), objKey.getDBID())); + pq.add(DBIDUtil.newPair(s.getSampleVariance(), objKey)); // ModifiableDBIDs expList = DBIDUtil.newArray(); - expList.add(explain.remove().getSecond()); - while(!explain.isEmpty()) { - DBID nextKey = explain.remove().getSecond(); - if(nextKey.sameDBID(objKey)) { + expList.add(explain.poll()); + while (!explain.isEmpty()) { + DBIDRef nextKey = explain.poll(); + if (DBIDUtil.equal(nextKey, objKey)) { continue; } double max = Double.MIN_VALUE; - for(DBIDIter exp = expList.iter(); exp.valid(); exp.advance()) { - if(exp.sameDBID(objKey) || nextKey.sameDBID(exp)) { + for (DBIDIter exp = expList.iter(); exp.valid(); exp.advance()) { + if (DBIDUtil.equal(exp, objKey) || DBIDUtil.equal(nextKey, exp)) { continue; } double nenner = Math.sqrt(calcCos(kernelMatrix, objKey, nextKey)) * Math.sqrt(calcCos(kernelMatrix, objKey, exp)); double angle = calcNumerator(kernelMatrix, objKey, nextKey, exp) / nenner; max = Math.max(angle, max); } - if(max < 0.5) { + if (max < 0.5) { expList.add(nextKey); } } - explaintab.put(objKey.getDBID(), expList); + explaintab.put(DBIDUtil.deref(objKey), expList); } - System.out.println("--------------------------------------------"); - System.out.println("Result: ABOD"); + StringBuilder buf = new StringBuilder(); + buf.append("Result: ABOD\n"); int count = 0; - while(!pq.isEmpty()) { - if(count > 10) { + while (!pq.isEmpty()) { + if (count > 10) { break; } - double factor = pq.peek().first; - DBID key = pq.remove().getSecond(); - System.out.print(data.get(key) + " "); - System.out.println(count + " Factor=" + factor + " " + key); + double factor = pq.peek().doubleValue(); + DBIDRef key = pq.poll(); + buf.append(data.get(key)).append(' '); + buf.append(count).append(" Factor=").append(factor).append(' ').append(key).append('\n'); DBIDs expList = explaintab.get(key); - generateExplanation(data, key, expList); + generateExplanation(buf, data, key, expList); count++; } - System.out.println("--------------------------------------------"); + return buf.toString(); } - private void generateExplanation(Relation<V> data, DBID key, DBIDs expList) { + private void generateExplanation(StringBuilder buf, Relation<V> data, DBIDRef key, DBIDs expList) { Vector vect1 = data.get(key).getColumnVector(); - for(DBIDIter iter = expList.iter(); iter.valid(); iter.advance()) { - System.out.println("Outlier: " + vect1); + for (DBIDIter iter = expList.iter(); iter.valid(); iter.advance()) { + buf.append("Outlier: ").append(vect1).append('\n'); Vector exp = data.get(iter).getColumnVector(); - System.out.println("Most common neighbor: " + exp); + buf.append("Most common neighbor: ").append(exp).append('\n'); // determine difference Vector Vector vals = exp.minus(vect1); - System.out.println(vals); - // System.out.println(new FeatureVector( - // "Diff-"+vect1.getPrimaryKey(),vals )); + buf.append(vals).append('\n'); } - System.out.println(); } /** - * Run ABOD on the data set + * Run ABOD on the data set. * - * @param database - * @param relation + * @param relation Relation to process * @return Outlier detection result */ - public OutlierResult run(Database database, Relation<V> relation) { - if(sampleSize > 0) { - return getFastRanking(relation, k, sampleSize); - } - else { - return getRanking(relation, k); + public OutlierResult run(Relation<V> relation) { + if (sampleSize > 0) { + return getFastRanking(relation); + } else { + return getRanking(relation); } } @@ -587,7 +551,7 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg @Override protected Logging getLogger() { - return logger; + return LOG; } /** @@ -597,26 +561,38 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg * * @apiviz.exclude */ - public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<V, DoubleDistance> { + public static class Parameterizer<V extends NumberVector<?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<V, DoubleDistance> { + /** + * k Parameter. + */ protected int k = 0; + /** + * Sample size. + */ protected int sampleSize = 0; + /** + * Distance function. + */ protected PrimitiveSimilarityFunction<V, DoubleDistance> primitiveKernelFunction = null; @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - final IntParameter kP = new IntParameter(K_ID, new GreaterEqualConstraint(1), 30); - if(config.grab(kP)) { + final IntParameter kP = new IntParameter(K_ID, 30); + kP.addConstraint(new GreaterEqualConstraint(1)); + if (config.grab(kP)) { k = kP.getValue(); } - final IntParameter sampleSizeP = new IntParameter(FAST_SAMPLE_ID, new GreaterEqualConstraint(1), true); - if(config.grab(sampleSizeP)) { + final IntParameter sampleSizeP = new IntParameter(FAST_SAMPLE_ID); + sampleSizeP.addConstraint(new GreaterEqualConstraint(1)); + sampleSizeP.setOptional(true); + if (config.grab(sampleSizeP)) { sampleSize = sampleSizeP.getValue(); } final ObjectParameter<PrimitiveSimilarityFunction<V, DoubleDistance>> param = new ObjectParameter<PrimitiveSimilarityFunction<V, DoubleDistance>>(KERNEL_FUNCTION_ID, PrimitiveSimilarityFunction.class, PolynomialKernelFunction.class); - if(config.grab(param)) { + if (config.grab(param)) { primitiveKernelFunction = param.instantiateClass(config); } } @@ -626,4 +602,4 @@ public class ABOD<V extends NumberVector<V, ?>> extends AbstractDistanceBasedAlg return new ABOD<V>(k, sampleSize, primitiveKernelFunction, distanceFunction); } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ALOCI.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ALOCI.java index 39c3db60..41da687f 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ALOCI.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ALOCI.java @@ -36,10 +36,12 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.NumberVectorDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
@@ -51,6 +53,7 @@ import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -58,8 +61,8 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.LongParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
/**
@@ -78,17 +81,19 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; * @author Jonathan von Brünken
* @author Erich Schubert
*
+ * @apiviz.composedOf ALOCIQuadTree
+ *
* @param <O> Object type
* @param <D> Distance type
*/
@Title("LOCI: Fast Outlier Detection Using the Local Correlation Integral")
@Description("Algorithm to compute outliers based on the Local Correlation Integral")
@Reference(authors = "S. Papadimitriou, H. Kitagawa, P. B. Gibbons, C. Faloutsos", title = "LOCI: Fast Outlier Detection Using the Local Correlation Integral", booktitle = "Proc. 19th IEEE Int. Conf. on Data Engineering (ICDE '03), Bangalore, India, 2003", url = "http://dx.doi.org/10.1109/ICDE.2003.1260802")
-public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+public class ALOCI<O extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(ALOCI.class);
+ private static final Logging LOG = Logging.getLogger(ALOCI.class);
/**
* Minimum size for a leaf.
@@ -108,7 +113,7 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>> /**
* Random generator
*/
- private Random random;
+ private RandomFactory rnd;
/**
* Distance function
@@ -122,20 +127,21 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>> * @param nmin Minimum neighborhood size
* @param alpha Alpha value
* @param g Number of grids to use
- * @param seed Random generator seed.
+ * @param rnd Random generator.
*/
- public ALOCI(NumberVectorDistanceFunction<D> distanceFunction, int nmin, int alpha, int g, Long seed) {
+ public ALOCI(NumberVectorDistanceFunction<D> distanceFunction, int nmin, int alpha, int g, RandomFactory rnd) {
super();
this.distFunc = distanceFunction;
this.nmin = nmin;
this.alpha = alpha;
this.g = g;
- this.random = (seed != null) ? new Random(seed) : new Random(0);
+ this.rnd = rnd;
}
public OutlierResult run(Database database, Relation<O> relation) {
- final int dim = DatabaseUtil.dimensionality(relation);
- FiniteProgress progressPreproc = logger.isVerbose() ? new FiniteProgress("Build aLOCI quadtress", g, logger) : null;
+ final int dim = RelationUtil.dimensionality(relation);
+ final Random random = rnd.getRandom();
+ FiniteProgress progressPreproc = LOG.isVerbose() ? new FiniteProgress("Build aLOCI quadtress", g, LOG) : null;
// Compute extend of dataset.
double[] min, max;
@@ -145,13 +151,13 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>> min = new double[dim];
max = new double[dim];
for(int i = 0; i < dim; i++) {
- min[i] = hbbs.first.doubleValue(i + 1);
- max[i] = hbbs.second.doubleValue(i + 1);
+ min[i] = hbbs.first.doubleValue(i);
+ max[i] = hbbs.second.doubleValue(i);
maxd = Math.max(maxd, max[i] - min[i]);
}
// Enlarge bounding box to have equal lengths.
for(int i = 0; i < dim; i++) {
- double diff = (maxd - (max[i] - min[i])) / 2;
+ double diff = (maxd - (max[i] - min[i])) * .5;
min[i] -= diff;
max[i] += diff;
}
@@ -163,7 +169,7 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>> ALOCIQuadTree qt = new ALOCIQuadTree(min, max, nshift, nmin, relation);
qts.add(qt);
if(progressPreproc != null) {
- progressPreproc.incrementProcessed(logger);
+ progressPreproc.incrementProcessed(LOG);
}
/*
* create the remaining g-1 shifted QuadTrees. This not clearly described in
@@ -178,19 +184,19 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>> qt = new ALOCIQuadTree(min, max, svec, nmin, relation);
qts.add(qt);
if(progressPreproc != null) {
- progressPreproc.incrementProcessed(logger);
+ progressPreproc.incrementProcessed(LOG);
}
}
if(progressPreproc != null) {
- progressPreproc.ensureCompleted(logger);
+ progressPreproc.ensureCompleted(LOG);
}
// aLOCI main loop: evaluate
- FiniteProgress progressLOCI = logger.isVerbose() ? new FiniteProgress("Compute aLOCI scores", relation.size(), logger) : null;
+ FiniteProgress progressLOCI = LOG.isVerbose() ? new FiniteProgress("Compute aLOCI scores", relation.size(), LOG) : null;
WritableDoubleDataStore mdef_norm = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
DoubleMinMax minmax = new DoubleMinMax();
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { + for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
final O obj = relation.get(iditer);
double maxmdefnorm = 0;
@@ -239,11 +245,11 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>> mdef_norm.putDouble(iditer, maxmdefnorm);
minmax.put(maxmdefnorm);
if(progressLOCI != null) {
- progressLOCI.incrementProcessed(logger);
+ progressLOCI.incrementProcessed(LOG);
}
}
if(progressLOCI != null) {
- progressLOCI.ensureCompleted(logger);
+ progressLOCI.ensureCompleted(LOG);
}
Relation<Double> scoreResult = new MaterializedRelation<Double>("aLOCI normalized MDEF", "aloci-mdef-outlier", TypeUtil.DOUBLE, mdef_norm, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY);
@@ -291,7 +297,7 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>> @Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
@Override
@@ -329,7 +335,7 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>> /**
* Relation indexed.
*/
- private Relation<? extends NumberVector<?, ?>> relation;
+ private Relation<? extends NumberVector<?>> relation;
/**
* Constructor.
@@ -340,7 +346,7 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>> * @param nmin Maximum size for a page to split
* @param relation Relation to index
*/
- public ALOCIQuadTree(double[] min, double[] max, double[] shift, int nmin, Relation<? extends NumberVector<?, ?>> relation) {
+ public ALOCIQuadTree(double[] min, double[] max, double[] shift, int nmin, Relation<? extends NumberVector<?>> relation) {
super();
assert (min.length <= 32) : "Quadtrees are only supported for up to 32 dimensions";
this.shift = shift;
@@ -386,11 +392,14 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>> // logger.warning(FormatUtil.format(lmin)+" "+FormatUtil.format(lmax)+" "+start+"->"+end+" "+(end-start));
// Hack: Check degenerate cases that won't split
if(dim == 0) {
- NumberVector<?, ?> first = relation.get(ids.get(start));
+ DBIDArrayIter iter = ids.iter();
+ iter.seek(start);
+ NumberVector<?> first = relation.get(iter);
+ iter.advance();
boolean degenerate = true;
- loop: for(int pos = start + 1; pos < end; pos++) {
- NumberVector<?, ?> other = relation.get(ids.get(pos));
- for(int d = 1; d <= lmin.length; d++) {
+ loop: for(; iter.getOffset() < end; iter.advance()) {
+ NumberVector<?> other = relation.get(iter);
+ for(int d = 0; d < lmin.length; d++) {
if(Math.abs(first.doubleValue(d) - other.doubleValue(d)) > 1E-15) {
degenerate = false;
break loop;
@@ -431,20 +440,23 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>> }
else {
// Partially sort data, by dimension dim < mid
- int spos = start, epos = end;
- while(spos < epos) {
- if(getShiftedDim(relation.get(ids.get(spos)), dim, level) <= .5) {
- spos++;
+ DBIDArrayIter siter = ids.iter(), eiter = ids.iter();
+ siter.seek(start);
+ eiter.seek(end - 1);
+ while(siter.getOffset() < eiter.getOffset()) {
+ if(getShiftedDim(relation.get(siter), dim, level) <= .5) {
+ siter.advance();
continue;
}
- if(getShiftedDim(relation.get(ids.get(epos - 1)), dim, level) > 0.5) {
- epos--;
+ if(getShiftedDim(relation.get(eiter), dim, level) > 0.5) {
+ eiter.retract();
continue;
}
- ids.swap(spos, epos - 1);
- spos++;
- epos--;
+ ids.swap(siter.getOffset(), eiter.getOffset() - 1);
+ siter.advance();
+ eiter.retract();
}
+ final int spos = siter.getOffset();
if(start < spos) {
final double tmp = lmax[dim];
lmax[dim] = lmax[dim] * .5 + lmin[dim] * .5;
@@ -468,8 +480,8 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>> * @param level Level (controls scaling/wraping!)
* @return Shifted position
*/
- private double getShiftedDim(NumberVector<?, ?> obj, int dim, int level) {
- double pos = obj.doubleValue(dim + 1) + shift[dim];
+ private double getShiftedDim(NumberVector<?> obj, int dim, int level) {
+ double pos = obj.doubleValue(dim) + shift[dim];
pos = (pos - min[dim]) / width[dim] * (1 + level);
return pos - Math.floor(pos);
}
@@ -482,7 +494,7 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>> * @param tlevel Target level
* @return Node
*/
- public Node findClosestNode(NumberVector<?, ?> vec, int tlevel) {
+ public Node findClosestNode(NumberVector<?> vec, int tlevel) {
Node cur = root;
for(int level = 0; level <= tlevel; level++) {
if(cur.children == null) {
@@ -637,26 +649,26 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>> *
* @apiviz.exclude
*/
- public static class Parameterizer<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<O extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractParameterizer {
/**
* Parameter to specify the minimum neighborhood size
*/
- public static final OptionID NMIN_ID = OptionID.getOrCreateOptionID("loci.nmin", "Minimum neighborhood size to be considered.");
+ public static final OptionID NMIN_ID = new OptionID("loci.nmin", "Minimum neighborhood size to be considered.");
/**
* Parameter to specify the averaging neighborhood scaling.
*/
- public static final OptionID ALPHA_ID = OptionID.getOrCreateOptionID("loci.alpha", "Scaling factor for averaging neighborhood");
+ public static final OptionID ALPHA_ID = new OptionID("loci.alpha", "Scaling factor for averaging neighborhood");
/**
* Parameter to specify the number of Grids to use.
*/
- public static final OptionID GRIDS_ID = OptionID.getOrCreateOptionID("loci.g", "The number of Grids to use.");
+ public static final OptionID GRIDS_ID = new OptionID("loci.g", "The number of Grids to use.");
/**
* Parameter to specify the seed to initialize Random.
*/
- public static final OptionID SEED_ID = OptionID.getOrCreateOptionID("loci.seed", "The seed to use for initializing Random.");
+ public static final OptionID SEED_ID = new OptionID("loci.seed", "The seed to use for initializing Random.");
/**
* Neighborhood minimum size
@@ -674,9 +686,9 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>> protected int g = 1;
/**
- * Random generator seed
+ * Random generator
*/
- protected Long seed = null;
+ protected RandomFactory rnd;
/**
* The distance function
@@ -702,9 +714,9 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>> this.g = g.getValue();
}
- final LongParameter seedP = new LongParameter(SEED_ID, true);
- if(config.grab(seedP)) {
- this.seed = seedP.getValue();
+ final RandomParameter rndP = new RandomParameter(SEED_ID);
+ if(config.grab(rndP)) {
+ this.rnd = rndP.getValue();
}
final IntParameter alphaP = new IntParameter(ALPHA_ID, 4);
@@ -718,7 +730,7 @@ public class ALOCI<O extends NumberVector<O, ?>, D extends NumberDistance<D, ?>> @Override
protected ALOCI<O, D> makeInstance() {
- return new ALOCI<O, D>(distanceFunction, nmin, alpha, g, seed);
+ return new ALOCI<O, D>(distanceFunction, nmin, alpha, g, rnd);
}
}
-}
\ No newline at end of file +}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractAggarwalYuOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractAggarwalYuOutlier.java index 9c1a216a..2a4885dc 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractAggarwalYuOutlier.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractAggarwalYuOutlier.java @@ -25,28 +25,26 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier; import java.util.ArrayList; import java.util.Collections; -import java.util.Vector; import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm; import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.data.type.TypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs; -import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair; import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs; import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; -import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; -import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair; import de.lmu.ifi.dbs.elki.utilities.pairs.IntIntPair; /** @@ -64,19 +62,11 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.IntIntPair; * * @author Ahmed Hettab * @author Erich Schubert + * + * @param <V> Vector type */ @Reference(authors = "C.C. Aggarwal, P. S. Yu", title = "Outlier detection for high dimensional data", booktitle = "Proc. ACM SIGMOD Int. Conf. on Management of Data (SIGMOD 2001), Santa Barbara, CA, 2001", url = "http://dx.doi.org/10.1145/375663.375668") -public abstract class AbstractAggarwalYuOutlier<V extends NumberVector<?, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm { - /** - * OptionID for the grid size - */ - public static final OptionID PHI_ID = OptionID.getOrCreateOptionID("ay.phi", "The number of equi-depth grid ranges to use in each dimension."); - - /** - * OptionID for the target dimensionality - */ - public static final OptionID K_ID = OptionID.getOrCreateOptionID("ay.k", "Subspace dimensionality to search for."); - +public abstract class AbstractAggarwalYuOutlier<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm { /** * Symbolic value for subspaces not in use. * @@ -86,7 +76,7 @@ public abstract class AbstractAggarwalYuOutlier<V extends NumberVector<?, ?>> ex public static final int DONT_CARE = 0; /** - * The number of partitions for each dimension + * The number of partitions for each dimension. */ protected int phi; @@ -112,33 +102,32 @@ public abstract class AbstractAggarwalYuOutlier<V extends NumberVector<?, ?>> ex * Each attribute of data is divided into phi equi-depth ranges.<br /> * Each range contains a fraction f=1/phi of the records. * - * @param database + * @param relation Relation to process * @return range map */ - protected ArrayList<ArrayList<DBIDs>> buildRanges(Relation<V> database) { - final int dim = DatabaseUtil.dimensionality(database); - final int size = database.size(); - final DBIDs allids = database.getDBIDs(); + protected ArrayList<ArrayList<DBIDs>> buildRanges(Relation<V> relation) { + final int dim = RelationUtil.dimensionality(relation); + final int size = relation.size(); + final DBIDs allids = relation.getDBIDs(); final ArrayList<ArrayList<DBIDs>> ranges = new ArrayList<ArrayList<DBIDs>>(); // Temporary projection storage of the database - final ArrayList<ArrayList<DoubleObjPair<DBID>>> dbAxis = new ArrayList<ArrayList<DoubleObjPair<DBID>>>(dim); + final ArrayList<ArrayList<DoubleDBIDPair>> dbAxis = new ArrayList<ArrayList<DoubleDBIDPair>>(dim); for(int i = 0; i < dim; i++) { - ArrayList<DoubleObjPair<DBID>> axis = new ArrayList<DoubleObjPair<DBID>>(size); + ArrayList<DoubleDBIDPair> axis = new ArrayList<DoubleDBIDPair>(size); dbAxis.add(i, axis); } // Project for(DBIDIter iter = allids.iter(); iter.valid(); iter.advance()) { - DBID id = iter.getDBID(); - final V obj = database.get(id); - for(int d = 1; d <= dim; d++) { - dbAxis.get(d - 1).add(new DoubleObjPair<DBID>(obj.doubleValue(d), id)); + final V obj = relation.get(iter); + for(int d = 0; d < dim; d++) { + dbAxis.get(d).add(DBIDUtil.newPair(obj.doubleValue(d), iter)); } } // Split into cells final double part = size * 1.0 / phi; - for(int d = 1; d <= dim; d++) { - ArrayList<DoubleObjPair<DBID>> axis = dbAxis.get(d - 1); + for(int d = 0; d < dim; d++) { + ArrayList<DoubleDBIDPair> axis = dbAxis.get(d); Collections.sort(axis); ArrayList<DBIDs> dimranges = new ArrayList<DBIDs>(phi + 1); dimranges.add(allids); @@ -150,7 +139,7 @@ public abstract class AbstractAggarwalYuOutlier<V extends NumberVector<?, ?>> ex } ArrayModifiableDBIDs currange = DBIDUtil.newArray(phi + 1); for(int i = start; i < end; i++) { - currange.add(axis.get(i).second); + currange.add(axis.get(i)); } start = end; dimranges.add(currange); @@ -161,14 +150,15 @@ public abstract class AbstractAggarwalYuOutlier<V extends NumberVector<?, ?>> ex } /** - * Method to calculate the sparsity coefficient of + * Method to calculate the sparsity coefficient of. * * @param setsize Size of subset * @param dbsize Size of database * @param k Dimensionality + * @param phi Phi parameter * @return sparsity coefficient */ - protected double sparsity(final int setsize, final int dbsize, final int k) { + protected static double sparsity(final int setsize, final int dbsize, final int k, final double phi) { // calculate sparsity c final double f = 1. / phi; final double fK = Math.pow(f, k); @@ -177,16 +167,17 @@ public abstract class AbstractAggarwalYuOutlier<V extends NumberVector<?, ?>> ex } /** - * Method to get the ids in the given subspace + * Method to get the ids in the given subspace. * - * @param subspace + * @param subspace Subspace to process + * @param ranges List of DBID ranges * @return ids */ - protected DBIDs computeSubspace(Vector<IntIntPair> subspace, ArrayList<ArrayList<DBIDs>> ranges) { - HashSetModifiableDBIDs ids = DBIDUtil.newHashSet(ranges.get(subspace.get(0).first - 1).get(subspace.get(0).second)); + protected DBIDs computeSubspace(ArrayList<IntIntPair> subspace, ArrayList<ArrayList<DBIDs>> ranges) { + HashSetModifiableDBIDs ids = DBIDUtil.newHashSet(ranges.get(subspace.get(0).first).get(subspace.get(0).second)); // intersect all selected dimensions for(int i = 1; i < subspace.size(); i++) { - DBIDs current = ranges.get(subspace.get(i).first - 1).get(subspace.get(i).second); + DBIDs current = ranges.get(subspace.get(i).first).get(subspace.get(i).second); ids.retainAll(current); if(ids.size() == 0) { break; @@ -226,19 +217,37 @@ public abstract class AbstractAggarwalYuOutlier<V extends NumberVector<?, ?>> ex * * @apiviz.exclude */ - public static abstract class Parameterizer extends AbstractParameterizer { - protected Integer phi; + public abstract static class Parameterizer extends AbstractParameterizer { + /** + * OptionID for the grid size. + */ + public static final OptionID PHI_ID = new OptionID("ay.phi", "The number of equi-depth grid ranges to use in each dimension."); + + /** + * OptionID for the target dimensionality. + */ + public static final OptionID K_ID = new OptionID("ay.k", "Subspace dimensionality to search for."); + + /** + * Phi parameter. + */ + protected int phi; - protected Integer k; + /** + * k Parameter. + */ + protected int k; @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - final IntParameter kP = new IntParameter(K_ID, new GreaterEqualConstraint(2)); + final IntParameter kP = new IntParameter(K_ID); + kP.addConstraint(new GreaterEqualConstraint(2)); if(config.grab(kP)) { k = kP.getValue(); } - final IntParameter phiP = new IntParameter(PHI_ID, new GreaterEqualConstraint(2)); + final IntParameter phiP = new IntParameter(PHI_ID); + phiP.addConstraint(new GreaterEqualConstraint(2)); if(config.grab(phiP)) { phi = phiP.getValue(); } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractDBOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractDBOutlier.java index a5ccce3a..0e6f502a 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractDBOutlier.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AbstractDBOutlier.java @@ -27,7 +27,7 @@ import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm; import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
-import de.lmu.ifi.dbs.elki.database.datastore.DataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.DoubleDataStore;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
@@ -56,7 +56,7 @@ public abstract class AbstractDBOutlier<O, D extends Distance<D>> extends Abstra /**
* Parameter to specify the size of the D-neighborhood
*/
- public static final OptionID D_ID = OptionID.getOrCreateOptionID("dbod.d", "size of the D-neighborhood");
+ public static final OptionID D_ID = new OptionID("dbod.d", "size of the D-neighborhood");
/**
* Holds the value of {@link #D_ID}.
@@ -83,7 +83,7 @@ public abstract class AbstractDBOutlier<O, D extends Distance<D>> extends Abstra */
public OutlierResult run(Database database, Relation<O> relation) {
// Run the actual score process
- DataStore<Double> dbodscore = computeOutlierScores(database, relation, d);
+ DoubleDataStore dbodscore = computeOutlierScores(database, relation, d);
// Build result representation.
Relation<Double> scoreResult = new MaterializedRelation<Double>("Density-Based Outlier Detection", "db-outlier", TypeUtil.DOUBLE, dbodscore, relation.getDBIDs());
@@ -99,7 +99,7 @@ public abstract class AbstractDBOutlier<O, D extends Distance<D>> extends Abstra * @param d distance
* @return computed scores
*/
- protected abstract DataStore<Double> computeOutlierScores(Database database, Relation<O> relation, D d);
+ protected abstract DoubleDataStore computeOutlierScores(Database database, Relation<O> relation, D d);
@Override
public TypeInformation[] getInputTypeRestriction() {
@@ -113,7 +113,7 @@ public abstract class AbstractDBOutlier<O, D extends Distance<D>> extends Abstra *
* @apiviz.exclude
*/
- public static abstract class Parameterizer<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ public abstract static class Parameterizer<O, D extends Distance<D>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
/**
* Query radius
*/
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuEvolutionary.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuEvolutionary.java index 1d02e865..c263cdfa 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuEvolutionary.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuEvolutionary.java @@ -37,18 +37,18 @@ import de.lmu.ifi.dbs.elki.database.Database; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TopBoundedHeap;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
@@ -58,7 +58,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.LongParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
import de.lmu.ifi.dbs.elki.utilities.pairs.FCPair;
import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
@@ -85,40 +85,26 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; @Title("EAFOD: the evolutionary outlier detection algorithm")
@Description("Outlier detection for high dimensional data")
@Reference(authors = "C.C. Aggarwal, P. S. Yu", title = "Outlier detection for high dimensional data", booktitle = "Proc. ACM SIGMOD Int. Conf. on Management of Data (SIGMOD 2001), Santa Barbara, CA, 2001", url = "http://dx.doi.org/10.1145/375663.375668")
-public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends AbstractAggarwalYuOutlier<V> {
+public class AggarwalYuEvolutionary<V extends NumberVector<?>> extends AbstractAggarwalYuOutlier<V> {
/**
* The logger for this class.
*/
- protected static final Logging logger = Logging.getLogger(AggarwalYuEvolutionary.class);
-
- /**
- * Parameter to specify the number of solutions must be an integer greater
- * than 1.
- * <p>
- * Key: {@code -eafod.m}
- * </p>
- */
- public static final OptionID M_ID = OptionID.getOrCreateOptionID("ay.m", "Population size for evolutionary algorithm.");
-
- /**
- * Parameter to specify the random generator seed.
- */
- public static final OptionID SEED_ID = OptionID.getOrCreateOptionID("ay.seed", "The random number generator seed.");
+ private static final Logging LOG = Logging.getLogger(AggarwalYuEvolutionary.class);
/**
* Maximum iteration count for evolutionary search.
*/
- protected final int MAX_ITERATIONS = 1000;
+ protected final static int MAX_ITERATIONS = 1000;
/**
- * Holds the value of {@link #M_ID}.
+ * Holds the value of {@link Parameterizer#M_ID}.
*/
private int m;
/**
- * Holds the value of {@link #SEED_ID}.
+ * Random generator.
*/
- private Long seed;
+ private RandomFactory rnd;
/**
* Constructor.
@@ -126,12 +112,12 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra * @param k K
* @param phi Phi
* @param m M
- * @param seed Seed
+ * @param rnd Random generator
*/
- public AggarwalYuEvolutionary(int k, int phi, int m, Long seed) {
+ public AggarwalYuEvolutionary(int k, int phi, int m, RandomFactory rnd) {
super(k, phi);
this.m = m;
- this.seed = seed;
+ this.rnd = rnd;
}
/**
@@ -145,27 +131,25 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra final int dbsize = relation.size();
ArrayList<ArrayList<DBIDs>> ranges = buildRanges(relation);
- Collection<Individuum> individuums = (new EvolutionarySearch(relation, ranges, m, seed)).run();
+ Iterable<Individuum> individuums = (new EvolutionarySearch(relation, ranges, m, rnd.getRandom())).run();
WritableDoubleDataStore outlierScore = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
for(Individuum ind : individuums) {
DBIDs ids = computeSubspaceForGene(ind.getGene(), ranges);
- double sparsityC = sparsity(ids.size(), dbsize, k);
+ double sparsityC = sparsity(ids.size(), dbsize, k, phi);
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
- DBID id = iter.getDBID();
- double prev = outlierScore.doubleValue(id);
+ double prev = outlierScore.doubleValue(iter);
if(Double.isNaN(prev) || sparsityC < prev) {
- outlierScore.putDouble(id, sparsityC);
+ outlierScore.putDouble(iter, sparsityC);
}
}
}
DoubleMinMax minmax = new DoubleMinMax();
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { - DBID id = iditer.getDBID();
- double val = outlierScore.doubleValue(id);
+ double val = outlierScore.doubleValue(iditer);
if(Double.isNaN(val)) {
- outlierScore.putDouble(id, 0.0);
+ outlierScore.putDouble(iditer, 0.0);
val = 0.0;
}
minmax.put(val);
@@ -177,7 +161,7 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra @Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -189,17 +173,17 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra */
private class EvolutionarySearch {
/**
- * Database size
+ * Database size.
*/
final int dbsize;
/**
- * Database dimensionality
+ * Database dimensionality.
*/
final int dim;
/**
- * Database ranges
+ * Database ranges.
*/
final ArrayList<ArrayList<DBIDs>> ranges;
@@ -209,36 +193,34 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra final int m;
/**
- * random generator
+ * random generator.
*/
final private Random random;
/**
* Constructor.
*
- * @param database Database to use
+ * @param relation Database to use
+ * @param ranges DBID ranges to process
* @param m Population size
- * @param seed Random generator seed
+ * @param random Random generator
*/
- public EvolutionarySearch(Relation<V> database, ArrayList<ArrayList<DBIDs>> ranges, int m, Long seed) {
+ public EvolutionarySearch(Relation<V> relation, ArrayList<ArrayList<DBIDs>> ranges, int m, Random random) {
super();
this.ranges = ranges;
this.m = m;
- this.dbsize = database.size();
- this.dim = DatabaseUtil.dimensionality(database);
- if(seed != null) {
- this.random = new Random(seed);
- }
- else {
- this.random = new Random();
- }
+ this.dbsize = relation.size();
+ this.dim = RelationUtil.dimensionality(relation);
+ this.random = random;
}
- public Collection<Individuum> run() {
+ public Iterable<Individuum> run() {
ArrayList<Individuum> pop = initialPopulation(m);
// best Population
TopBoundedHeap<Individuum> bestSol = new TopBoundedHeap<Individuum>(m, Collections.reverseOrder());
- bestSol.addAll(pop);
+ for (Individuum ind : pop) {
+ bestSol.add(ind);
+ }
int iterations = 0;
while(!checkConvergence(pop)) {
@@ -249,26 +231,29 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra // Mutation with probability 0.25 , 0.25
pop = mutation(pop, 0.5, 0.5);
// Avoid duplicates
- for(Individuum ind : pop) {
- if(!bestSol.contains(ind)) {
- bestSol.add(ind);
+ ind: for(Individuum ind : pop) {
+ for (Individuum b : bestSol) {
+ if (b.equals(ind)) {
+ continue ind;
+ }
}
+ bestSol.add(ind);
}
- if(logger.isDebuggingFinest()) {
- StringBuffer buf = new StringBuffer();
+ if(LOG.isDebuggingFinest()) {
+ StringBuilder buf = new StringBuilder();
buf.append("Top solutions:\n");
for(Individuum ind : bestSol) {
- buf.append(ind.toString()).append("\n");
+ buf.append(ind.toString()).append('\n');
}
buf.append("Population:\n");
for(Individuum ind : pop) {
- buf.append(ind.toString()).append("\n");
+ buf.append(ind.toString()).append('\n');
}
- logger.debugFinest(buf.toString());
+ LOG.debugFinest(buf.toString());
}
iterations++;
if(iterations > MAX_ITERATIONS) {
- logger.warning("Maximum iterations reached.");
+ LOG.warning("Maximum iterations reached.");
break;
}
}
@@ -276,7 +261,10 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra }
/**
- * check the termination criterion
+ * check the termination criterion.
+ *
+ * @param pop Population
+ * @return Convergence
*/
private boolean checkConvergence(Collection<Individuum> pop) {
if(pop.size() == 0) {
@@ -291,7 +279,7 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra for(int d = 0; d < dim; d++) {
int val = gene[d] + DONT_CARE;
if(val < 0 || val >= phi + 1) {
- logger.warning("Invalid gene value encountered: " + val + " in " + ind.toString());
+ LOG.warning("Invalid gene value encountered: " + val + " in " + ind.toString());
continue;
}
occur[d][val] += 1;
@@ -299,8 +287,8 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra }
int conv = (int) (pop.size() * 0.95);
- if(logger.isDebuggingFine()) {
- logger.debugFine("Convergence at " + conv + " of " + pop.size() + " individuums.");
+ if(LOG.isDebuggingFine()) {
+ LOG.debugFine("Convergence at " + conv + " of " + pop.size() + " individuums.");
}
for(int d = 0; d < dim; d++) {
boolean converged = false;
@@ -353,18 +341,21 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra }
/**
+ * Select surviving individuums weighted by rank.
+ *
* the selection criterion for the genetic algorithm: <br>
* roulette wheel mechanism: <br>
* where the probability of sampling an individual of the population was
* proportional to p - r(i), where p is the size of population and r(i) the
* rank of i-th individual
*
- * @param population
+ * @param population Population
+ * @return Survivors
*/
private ArrayList<Individuum> rouletteRankSelection(ArrayList<Individuum> population) {
final int popsize = population.size();
// Relative weight := popsize - position => sum(1..popsize)
- int totalweight = popsize * (popsize + 1) / 2;
+ int totalweight = (popsize * (popsize + 1)) >> 1;
// Survivors
ArrayList<Individuum> survivors = new ArrayList<Individuum>(popsize);
@@ -392,7 +383,7 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra }
/**
- * method implements the mutation algorithm
+ * Apply the mutation alogrithm.
*/
private ArrayList<Individuum> mutation(ArrayList<Individuum> population, double perc1, double perc2) {
// the Mutations
@@ -470,7 +461,7 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra */
private Individuum makeIndividuum(int[] gene) {
final DBIDs ids = computeSubspaceForGene(gene, ranges);
- final double fitness = (ids.size() > 0) ? sparsity(ids.size(), dbsize, k) : Double.MAX_VALUE;
+ final double fitness = (ids.size() > 0) ? sparsity(ids.size(), dbsize, k, phi) : Double.MAX_VALUE;
return new Individuum(fitness, gene);
}
@@ -543,8 +534,8 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra l1[next] = parent1.getGene()[next];
l2[next] = parent2.getGene()[next];
- final double sparsityL1 = sparsity(computeSubspaceForGene(l1, ranges).size(), dbsize, k);
- final double sparsityL2 = sparsity(computeSubspaceForGene(l2, ranges).size(), dbsize, k);
+ final double sparsityL1 = sparsity(computeSubspaceForGene(l1, ranges).size(), dbsize, k, phi);
+ final double sparsityL2 = sparsity(computeSubspaceForGene(l2, ranges).size(), dbsize, k, phi);
if(sparsityL1 <= sparsityL2) {
b = l1.clone();
@@ -619,6 +610,8 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra * Individuum for the evolutionary search.
*
* @author Erich Schubert
+ * + * @apiviz.exclude de.lmu.ifi.dbs.elki.utilities.pairs.FCPair */
private static class Individuum extends FCPair<Double, int[]> {
/**
@@ -691,27 +684,42 @@ public class AggarwalYuEvolutionary<V extends NumberVector<?, ?>> extends Abstra *
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<?, ?>> extends AbstractAggarwalYuOutlier.Parameterizer {
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractAggarwalYuOutlier.Parameterizer {
+ /**
+ * Parameter to specify the number of solutions must be an integer greater
+ * than 1.
+ * <p>
+ * Key: {@code -eafod.m}
+ * </p>
+ */
+ public static final OptionID M_ID = new OptionID("ay.m", "Population size for evolutionary algorithm.");
+
+ /**
+ * Parameter to specify the random generator seed.
+ */
+ public static final OptionID SEED_ID = new OptionID("ay.seed", "The random number generator seed.");
+
protected int m = 0;
- protected Long seed = null;
+ protected RandomFactory rnd;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final IntParameter mP = new IntParameter(M_ID, new GreaterEqualConstraint(2));
+ final IntParameter mP = new IntParameter(M_ID);
+ mP.addConstraint(new GreaterEqualConstraint(2));
if(config.grab(mP)) {
m = mP.getValue();
}
- final LongParameter seedP = new LongParameter(SEED_ID, true);
- if(config.grab(seedP)) {
- seed = seedP.getValue();
+ final RandomParameter rndP = new RandomParameter(SEED_ID);
+ if(config.grab(rndP)) {
+ rnd = rndP.getValue();
}
}
@Override
protected AggarwalYuEvolutionary<V> makeInstance() {
- return new AggarwalYuEvolutionary<V>(k, phi, m, seed);
+ return new AggarwalYuEvolutionary<V>(k, phi, m, rnd);
}
}
-}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuNaive.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuNaive.java index 0bb73aba..9cd7d79f 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuNaive.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/AggarwalYuNaive.java @@ -24,7 +24,6 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier; */
import java.util.ArrayList;
-import java.util.Vector;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
@@ -35,12 +34,12 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -65,16 +64,18 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.IntIntPair; *
* @author Ahmed Hettab
* @author Erich Schubert
+ *
+ * @param <V> Vector type
*/
// TODO: progress logging!
@Title("BruteForce: Outlier detection for high dimensional data")
@Description("Examines all possible sets of k dimensional projections")
@Reference(authors = "C.C. Aggarwal, P. S. Yu", title = "Outlier detection for high dimensional data", booktitle = "Proc. ACM SIGMOD Int. Conf. on Management of Data (SIGMOD 2001), Santa Barbara, CA, 2001", url = "http://dx.doi.org/10.1145/375663.375668")
-public class AggarwalYuNaive<V extends NumberVector<?, ?>> extends AbstractAggarwalYuOutlier<V> {
+public class AggarwalYuNaive<V extends NumberVector<?>> extends AbstractAggarwalYuOutlier<V> {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(AggarwalYuNaive.class);
+ private static final Logging LOG = Logging.getLogger(AggarwalYuNaive.class);
/**
* Constructor.
@@ -93,23 +94,23 @@ public class AggarwalYuNaive<V extends NumberVector<?, ?>> extends AbstractAggar * @return Outlier detection result
*/
public OutlierResult run(Relation<V> relation) {
- final int dimensionality = DatabaseUtil.dimensionality(relation);
+ final int dimensionality = RelationUtil.dimensionality(relation);
final int size = relation.size();
ArrayList<ArrayList<DBIDs>> ranges = buildRanges(relation);
- ArrayList<Vector<IntIntPair>> Rk;
+ ArrayList<ArrayList<IntIntPair>> Rk;
// Build a list of all subspaces
{
// R1 initial one-dimensional subspaces.
- Rk = new ArrayList<Vector<IntIntPair>>();
+ Rk = new ArrayList<ArrayList<IntIntPair>>();
// Set of all dim*phi ranges
ArrayList<IntIntPair> q = new ArrayList<IntIntPair>();
- for(int i = 1; i <= dimensionality; i++) {
+ for(int i = 0; i < dimensionality; i++) {
for(int j = 1; j <= phi; j++) {
IntIntPair s = new IntIntPair(i, j);
q.add(s);
// Add to first Rk
- Vector<IntIntPair> v = new Vector<IntIntPair>();
+ ArrayList<IntIntPair> v = new ArrayList<IntIntPair>();
v.add(s);
Rk.add(v);
}
@@ -117,10 +118,10 @@ public class AggarwalYuNaive<V extends NumberVector<?, ?>> extends AbstractAggar // build Ri
for(int i = 2; i <= k; i++) {
- ArrayList<Vector<IntIntPair>> Rnew = new ArrayList<Vector<IntIntPair>>();
+ ArrayList<ArrayList<IntIntPair>> Rnew = new ArrayList<ArrayList<IntIntPair>>();
for(int j = 0; j < Rk.size(); j++) {
- Vector<IntIntPair> c = Rk.get(j);
+ ArrayList<IntIntPair> c = Rk.get(j);
for(IntIntPair pair : q) {
boolean invalid = false;
for(int t = 0; t < c.size(); t++) {
@@ -130,7 +131,7 @@ public class AggarwalYuNaive<V extends NumberVector<?, ?>> extends AbstractAggar }
}
if(!invalid) {
- Vector<IntIntPair> neu = new Vector<IntIntPair>(c);
+ ArrayList<IntIntPair> neu = new ArrayList<IntIntPair>(c);
neu.add(pair);
Rnew.add(neu);
}
@@ -142,9 +143,9 @@ public class AggarwalYuNaive<V extends NumberVector<?, ?>> extends AbstractAggar WritableDoubleDataStore sparsity = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
// calculate the sparsity coefficient
- for(Vector<IntIntPair> sub : Rk) {
+ for(ArrayList<IntIntPair> sub : Rk) {
DBIDs ids = computeSubspace(sub, ranges);
- final double sparsityC = sparsity(ids.size(), size, k);
+ final double sparsityC = sparsity(ids.size(), size, k, phi);
if(sparsityC < 0) {
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
@@ -171,7 +172,7 @@ public class AggarwalYuNaive<V extends NumberVector<?, ?>> extends AbstractAggar @Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -181,7 +182,7 @@ public class AggarwalYuNaive<V extends NumberVector<?, ?>> extends AbstractAggar *
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<?, ?>> extends AbstractAggarwalYuOutlier.Parameterizer {
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractAggarwalYuOutlier.Parameterizer {
@Override
protected AggarwalYuNaive<V> makeInstance() {
return new AggarwalYuNaive<V>(k, phi);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/COP.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/COP.java new file mode 100644 index 00000000..ac544b7f --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/COP.java @@ -0,0 +1,385 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.Arrays; + +import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm; +import de.lmu.ifi.dbs.elki.data.NumberVector; +import de.lmu.ifi.dbs.elki.data.type.TypeInformation; +import de.lmu.ifi.dbs.elki.data.type.TypeUtil; +import de.lmu.ifi.dbs.elki.database.QueryUtil; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; +import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore; +import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; +import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore; +import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; +import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs; +import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; +import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult; +import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; +import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; +import de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid; +import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix; +import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector; +import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAResult; +import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCARunner; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.ChiSquaredDistribution; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.GammaDistribution; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; +import de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.documentation.Title; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.EnumParameter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; + +/** + * Correlation outlier probability: Outlier Detection in Arbitrarily Oriented + * Subspaces + * + * <p> + * Hans-Peter Kriegel, Peer Kröger, Erich Schubert, Arthur Zimek<br /> + * Outlier Detection in Arbitrarily Oriented Subspaces<br /> + * in: Proc. IEEE International Conference on Data Mining (ICDM 2012) + * </p> + * + * @author Erich Schubert + * + * @param <V> the type of NumberVector handled by this Algorithm + * @param <D> Distance type + */ +@Title("COP: Correlation Outlier Probability") +@Reference(authors = "Hans-Peter Kriegel, Peer Kröger, Erich Schubert, Arthur Zimek", title = "Outlier Detection in Arbitrarily Oriented Subspaces", booktitle = "Proc. IEEE International Conference on Data Mining (ICDM 2012)") +public class COP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<V, D, OutlierResult> implements OutlierAlgorithm { + /** + * The logger for this class. + */ + private static final Logging LOG = Logging.getLogger(COP.class); + + /** + * Result name for the COP outlier scores. + */ + public static final String COP_SCORES = "cop-outlier"; + + /** + * Result name for the dimensionality. + */ + public static final String COP_DIM = "cop-dim"; + + /** + * Result name for the error vectors. + */ + public static final String COP_ERRORVEC = "cop-errorvec"; + + /** + * Number of neighbors to be considered. + */ + int k; + + /** + * Holds the PCA runner. + */ + private PCARunner<V> pca; + + /** + * Expected amount of outliers. + */ + double expect = 0.0001; + + /** + * Score type. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public enum DistanceDist { + /** + * Use chi^2 for score normalization. + */ + CHISQUARED, + /** + * Use gamma distributions for score normalization. + */ + GAMMA + } + + /** + * Type of distribution to assume for distances. + */ + DistanceDist dist = DistanceDist.CHISQUARED; + + /** + * Constructor. + * + * @param distanceFunction distance function + * @param k number of neighbors + * @param pca PCA computation method + * @param expect Expected fraction of outliers (for score normalization) + * @param dist Distance distribution model (ChiSquared, Gamma) + */ + public COP(DistanceFunction<? super V, D> distanceFunction, int k, PCARunner<V> pca, double expect, DistanceDist dist) { + super(distanceFunction); + this.k = k; + this.pca = pca; + this.expect = expect; + this.dist = dist; + } + + /** + * Process a single relation. + * + * @param relation Relation to process + * @return Outlier detection result + */ + public OutlierResult run(Relation<V> relation) { + final DBIDs ids = relation.getDBIDs(); + KNNQuery<V, D> knnQuery = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k + 1); + + final int dim = RelationUtil.dimensionality(relation); + if (k <= dim + 1) { + LOG.warning("PCA is underspecified with a too low k! k should be at much larger than " + dim); + } + + WritableDoubleDataStore cop_score = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC); + WritableDataStore<Vector> cop_err_v = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, Vector.class); + WritableIntegerDataStore cop_dim = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, -1); + // compute neighbors of each db object + FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Correlation Outlier Probabilities", relation.size(), LOG) : null; + + for (DBIDIter id = ids.iter(); id.valid(); id.advance()) { + KNNResult<D> neighbors = knnQuery.getKNNForDBID(id, k + 1); + ModifiableDBIDs nids = DBIDUtil.newHashSet(neighbors); + nids.remove(id); // Do not use query object + + Vector centroid = Centroid.make(relation, nids).toVector(relation).getColumnVector(); + Vector relative = relation.get(id).getColumnVector().minusEquals(centroid); + + PCAResult pcares = pca.processIds(nids, relation); + Matrix evecs = pcares.getEigenvectors(); + Vector projected = evecs.transposeTimes(relative); + double[] evs = pcares.getEigenvalues(); + + double min = Double.POSITIVE_INFINITY; + int vdim = dim; + switch(dist) { + case CHISQUARED: { + double sqdevs = 0; + for (int d = 0; d < dim; d++) { + // Scale with Stddev + double dev = projected.get(d); + // Accumulate + sqdevs += dev * dev / evs[d]; + // Evaluate + double score = 1 - ChiSquaredDistribution.cdf(sqdevs, d + 1); + if (score < min) { + min = score; + vdim = d + 1; + } + } + break; + } + case GAMMA: { + double[][] dists = new double[dim][nids.size()]; + int j = 0; + Vector srel = new Vector(dim); + for (DBIDIter s = nids.iter(); s.valid() && j < nids.size(); s.advance()) { + V vec = relation.get(s); + for (int d = 0; d < dim; d++) { + srel.set(d, vec.doubleValue(d) - centroid.get(d)); + } + Vector serr = evecs.transposeTimes(srel); + double sqdist = 0.0; + for (int d = 0; d < dim; d++) { + sqdist += serr.get(d) * serr.get(d) / evs[d]; + dists[d][j] = sqdist; + } + j++; + } + double sqdevs = 0; + for (int d = 0; d < dim; d++) { + // Scale with Stddev + final double dev = projected.get(d); + // Accumulate + sqdevs += dev * dev / evs[d]; + // Sort, so we can trim the top 15% below. + Arrays.sort(dists[d]); + // Evaluate + double score = 1 - GammaDistribution.estimate(dists[d], (int) (.85 * dists[d].length)).cdf(sqdevs); + if (score < min) { + min = score; + vdim = d + 1; + } + } + break; + } + } + // Normalize the value + final double prob = expect * (1 - min) / (expect + min); + // Construct the error vector: + for (int d = vdim; d < dim; d++) { + projected.set(d, 0.0); + } + Vector ev = evecs.times(projected).timesEquals(-1 * prob); + + cop_score.putDouble(id, prob); + cop_err_v.put(id, ev); + cop_dim.putInt(id, dim + 1 - vdim); + + if (prog != null) { + prog.incrementProcessed(LOG); + } + } + if (prog != null) { + prog.ensureCompleted(LOG); + } + + // combine results. + Relation<Double> scoreResult = new MaterializedRelation<Double>("Correlation Outlier Probabilities", COP_SCORES, TypeUtil.DOUBLE, cop_score, ids); + OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore(); + OutlierResult result = new OutlierResult(scoreMeta, scoreResult); + result.addChildResult(new MaterializedRelation<Integer>("Local Dimensionality", COP_DIM, TypeUtil.INTEGER, cop_dim, ids)); + result.addChildResult(new MaterializedRelation<Vector>("Error vectors", COP_ERRORVEC, TypeUtil.VECTOR, cop_err_v, ids)); + return result; + } + + @Override + public TypeInformation[] getInputTypeRestriction() { + return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD); + } + + @Override + protected Logging getLogger() { + return LOG; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<V, D> { + /** + * Parameter to specify the number of nearest neighbors of an object to be + * considered for computing its COP_SCORE, must be an integer greater than + * 0. + * <p> + * Key: {@code -cop.k} + * </p> + */ + public static final OptionID K_ID = new OptionID("cop.k", "The number of nearest neighbors of an object to be considered for computing its COP_SCORE."); + + /** + * Distribution assumption for distances. + * <p> + * Key: {@code -cop.dist} + * </p> + */ + public static final OptionID DIST_ID = new OptionID("cop.dist", "The assumed distribution of squared distances. ChiSquared is faster, Gamma expected to be more accurate but could also overfit."); + + /** + * Class to compute the PCA with. + * <p> + * Key: {@code -cop.pcarunner} + * </p> + */ + public static final OptionID PCARUNNER_ID = new OptionID("cop.pcarunner", "The class to compute (filtered) PCA."); + + /** + * Expected share of outliers. + * <p> + * Key: {@code -cop.expect} + * + * Default: 0.001 + * </p> + */ + public static final OptionID EXPECT_ID = new OptionID("cop.expect", "Expected share of outliers. Only affect score normalization."); + + /** + * Number of neighbors to be considered. + */ + int k; + + /** + * Holds the object performing the dependency derivation. + */ + PCARunner<V> pca; + + /** + * Distance distributution assumption. + */ + DistanceDist dist; + + /** + * Expected amount of outliers. + */ + double expect; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + IntParameter kP = new IntParameter(K_ID); + kP.addConstraint(new GreaterConstraint(5)); + if (config.grab(kP)) { + k = kP.intValue(); + } + EnumParameter<DistanceDist> distP = new EnumParameter<DistanceDist>(DIST_ID, DistanceDist.class, DistanceDist.GAMMA); + if (config.grab(distP)) { + dist = distP.getValue(); + } + DoubleParameter expectP = new DoubleParameter(EXPECT_ID, 0.001); + expectP.addConstraint(new GreaterConstraint(0)); + expectP.addConstraint(new LessConstraint(1.0)); + if (config.grab(expectP)) { + expect = expectP.doubleValue(); + } + ObjectParameter<PCARunner<V>> pcaP = new ObjectParameter<PCARunner<V>>(PCARUNNER_ID, PCARunner.class, PCARunner.class); + if (config.grab(pcaP)) { + pca = pcaP.instantiateClass(config); + } + } + + @Override + protected COP<V, D> makeInstance() { + return new COP<V, D>(distanceFunction, k, pca, expect, dist); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierDetection.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierDetection.java index dbaf8a5a..ba1fd841 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierDetection.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierDetection.java @@ -24,17 +24,17 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier; */
import de.lmu.ifi.dbs.elki.database.Database;
-import de.lmu.ifi.dbs.elki.database.datastore.DataStore;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.DoubleDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
@@ -72,13 +72,13 @@ public class DBOutlierDetection<O, D extends Distance<D>> extends AbstractDBOutl /**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(DBOutlierDetection.class);
+ private static final Logging LOG = Logging.getLogger(DBOutlierDetection.class);
/**
* Parameter to specify the minimum fraction of objects that must be outside
* the D- neighborhood of an outlier
*/
- public static final OptionID P_ID = OptionID.getOrCreateOptionID("dbod.p", "minimum fraction of objects that must be outside the D-neighborhood of an outlier");
+ public static final OptionID P_ID = new OptionID("dbod.p", "minimum fraction of objects that must be outside the D-neighborhood of an outlier");
/**
* Holds the value of {@link #P_ID}.
@@ -98,7 +98,7 @@ public class DBOutlierDetection<O, D extends Distance<D>> extends AbstractDBOutl }
@Override
- protected DataStore<Double> computeOutlierScores(Database database, Relation<O> relation, D neighborhoodSize) {
+ protected DoubleDataStore computeOutlierScores(Database database, Relation<O> relation, D neighborhoodSize) {
DistanceQuery<O, D> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
KNNQuery<O, D> knnQuery = database.getKNNQuery(distFunc, DatabaseQuery.HINT_OPTIMIZED_ONLY);
@@ -106,11 +106,11 @@ public class DBOutlierDetection<O, D extends Distance<D>> extends AbstractDBOutl int m = (int) ((distFunc.getRelation().size()) * (1 - p));
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(distFunc.getRelation().getDBIDs(), DataStoreFactory.HINT_STATIC);
- if(logger.isVerbose()) {
- logger.verbose("computing outlier flag");
+ if(LOG.isVerbose()) {
+ LOG.verbose("computing outlier flag");
}
- FiniteProgress progressOFlags = logger.isVerbose() ? new FiniteProgress("DBOutlier for objects", distFunc.getRelation().size(), logger) : null;
+ FiniteProgress progressOFlags = LOG.isVerbose() ? new FiniteProgress("DBOutlier for objects", distFunc.getRelation().size(), LOG) : null;
int counter = 0;
// if index exists, kNN query. if the distance to the mth nearest neighbor
// is more than d -> object is outlier
@@ -118,8 +118,8 @@ public class DBOutlierDetection<O, D extends Distance<D>> extends AbstractDBOutl for(DBIDIter iditer = distFunc.getRelation().iterDBIDs(); iditer.valid(); iditer.advance()) { counter++;
final KNNResult<D> knns = knnQuery.getKNNForDBID(iditer, m);
- if(logger.isDebugging()) {
- logger.debugFine("distance to mth nearest neighbour" + knns.toString());
+ if(LOG.isDebugging()) {
+ LOG.debugFine("distance to mth nearest neighbour" + knns.toString());
}
if(knns.get(Math.min(m, knns.size()) - 1).getDistance().compareTo(neighborhoodSize) <= 0) {
// flag as outlier
@@ -131,7 +131,7 @@ public class DBOutlierDetection<O, D extends Distance<D>> extends AbstractDBOutl }
}
if(progressOFlags != null) {
- progressOFlags.setProcessed(counter, logger);
+ progressOFlags.setProcessed(counter, LOG);
}
}
else {
@@ -149,18 +149,18 @@ public class DBOutlierDetection<O, D extends Distance<D>> extends AbstractDBOutl }
if(progressOFlags != null) {
- progressOFlags.setProcessed(counter, logger);
+ progressOFlags.setProcessed(counter, LOG);
}
}
if(progressOFlags != null) {
- progressOFlags.ensureCompleted(logger);
+ progressOFlags.ensureCompleted(LOG);
}
return scores;
}
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierScore.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierScore.java index 419b9a0e..a2d39130 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierScore.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/DBOutlierScore.java @@ -24,9 +24,9 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier; */
import de.lmu.ifi.dbs.elki.database.Database;
-import de.lmu.ifi.dbs.elki.database.datastore.DataStore;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.DoubleDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
@@ -60,7 +60,7 @@ public class DBOutlierScore<O, D extends Distance<D>> extends AbstractDBOutlier< /**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(DBOutlierScore.class);
+ private static final Logging LOG = Logging.getLogger(DBOutlierScore.class);
/**
* Constructor with parameters.
@@ -73,7 +73,7 @@ public class DBOutlierScore<O, D extends Distance<D>> extends AbstractDBOutlier< }
@Override
- protected DataStore<Double> computeOutlierScores(Database database, Relation<O> relation, D d) {
+ protected DoubleDataStore computeOutlierScores(Database database, Relation<O> relation, D d) {
DistanceQuery<O, D> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
RangeQuery<O, D> rangeQuery = database.getRangeQuery(distFunc);
final double size = distFunc.getRelation().size();
@@ -90,7 +90,7 @@ public class DBOutlierScore<O, D extends Distance<D>> extends AbstractDBOutlier< @Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/EMOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/EMOutlier.java index db4b7782..2d2a4466 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/EMOutlier.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/EMOutlier.java @@ -62,11 +62,11 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz // TODO: re-use an existing EM when present?
@Title("EM Outlier: Outlier Detection based on the generic EM clustering")
@Description("The outlier score assigned is based on the highest cluster probability obtained from EM clustering.")
-public class EMOutlier<V extends NumberVector<V, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+public class EMOutlier<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(EMOutlier.class);
+ private static final Logging LOG = Logging.getLogger(EMOutlier.class);
/**
* Inner algorithm.
@@ -120,7 +120,7 @@ public class EMOutlier<V extends NumberVector<V, ?>> extends AbstractAlgorithm<O @Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -130,7 +130,7 @@ public class EMOutlier<V extends NumberVector<V, ?>> extends AbstractAlgorithm<O *
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
protected EM<V> em = null;
@Override
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java index 51833c8b..6aed60fe 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianModel.java @@ -29,10 +29,10 @@ import de.lmu.ifi.dbs.elki.data.type.TypeUtil; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.math.MathUtil;
@@ -43,7 +43,6 @@ import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
@@ -61,16 +60,16 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag; */
@Title("Gaussian Model Outlier Detection")
@Description("Fit a multivariate gaussian model onto the data, and use the PDF to compute an outlier score.")
-public class GaussianModel<V extends NumberVector<V, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+public class GaussianModel<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(GaussianModel.class);
+ private static final Logging LOG = Logging.getLogger(GaussianModel.class);
/**
* OptionID for inversion flag.
*/
- public static final OptionID INVERT_ID = OptionID.getOrCreateOptionID("gaussod.invert", "Invert the value range to [0:1], with 1 being outliers instead of 0.");
+ public static final OptionID INVERT_ID = new OptionID("gaussod.invert", "Invert the value range to [0:1], with 1 being outliers instead of 0.");
/**
* Small value to increment diagonally of a matrix in order to avoid
@@ -113,7 +112,7 @@ public class GaussianModel<V extends NumberVector<V, ?>> extends AbstractAlgorit Matrix covarianceTransposed = covarianceMatrix.cheatToAvoidSingularity(SINGULARITY_CHEAT).inverse();
// Normalization factors for Gaussian PDF
- final double fakt = (1.0 / (Math.sqrt(Math.pow(MathUtil.TWOPI, DatabaseUtil.dimensionality(relation)) * covarianceMatrix.det())));
+ final double fakt = (1.0 / (Math.sqrt(Math.pow(MathUtil.TWOPI, RelationUtil.dimensionality(relation)) * covarianceMatrix.det())));
// for each object compute Mahalanobis distance
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { @@ -130,8 +129,7 @@ public class GaussianModel<V extends NumberVector<V, ?>> extends AbstractAlgorit if(invert) {
double max = mm.getMax() != 0 ? mm.getMax() : 1.;
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { - DBID id = iditer.getDBID();
- oscores.putDouble(id, (max - oscores.doubleValue(id)) / max);
+ oscores.putDouble(iditer, (max - oscores.doubleValue(iditer)) / max);
}
meta = new BasicOutlierScoreMeta(0.0, 1.0);
}
@@ -149,7 +147,7 @@ public class GaussianModel<V extends NumberVector<V, ?>> extends AbstractAlgorit @Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -159,7 +157,7 @@ public class GaussianModel<V extends NumberVector<V, ?>> extends AbstractAlgorit *
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
protected boolean invert = false;
@Override
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java index 1cd31442..db53a3ef 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/GaussianUniformMixture.java @@ -32,13 +32,13 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.generic.MaskedDBIDs;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.math.MathUtil;
@@ -48,7 +48,6 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector; import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -79,21 +78,21 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; @Title("Gaussian-Uniform Mixture Model Outlier Detection")
@Description("Fits a mixture model consisting of a Gaussian and a uniform distribution to the data.")
@Reference(prefix = "Generalization using the likelihood gain as outlier score of", authors = "Eskin, Eleazar", title = "Anomaly detection over noisy data using learned probability distributions", booktitle = "Proc. of the Seventeenth International Conference on Machine Learning (ICML-2000)")
-public class GaussianUniformMixture<V extends NumberVector<V, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+public class GaussianUniformMixture<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(GaussianUniformMixture.class);
+ private static final Logging LOG = Logging.getLogger(GaussianUniformMixture.class);
/**
* Parameter to specify the fraction of expected outliers.
*/
- public static final OptionID L_ID = OptionID.getOrCreateOptionID("mmo.l", "expected fraction of outliers");
+ public static final OptionID L_ID = new OptionID("mmo.l", "expected fraction of outliers");
/**
* Parameter to specify the cutoff.
*/
- public static final OptionID C_ID = OptionID.getOrCreateOptionID("mmo.c", "cutoff");
+ public static final OptionID C_ID = new OptionID("mmo.c", "cutoff");
/**
* Small value to increment diagonally of a matrix in order to avoid
@@ -154,20 +153,19 @@ public class GaussianUniformMixture<V extends NumberVector<V, ?>> extends Abstra // logger.debugFine(logLike + " loglike beginning" +
// loglikelihoodNormal(normalObjs, database));
DoubleMinMax minmax = new DoubleMinMax();
- for(int i = 0; i < objids.size(); i++) {
+
+ DBIDIter iter = objids.iter();
+ for(int i = 0; i < objids.size(); i++, iter.advance()) {
// logger.debugFine("i " + i);
// Change mask to make the current object anomalous
bits.set(i);
// Compute new likelihoods
double currentLogLike = normalObjs.size() * logml + loglikelihoodNormal(normalObjs, relation) + anomalousObjs.size() * logl + loglikelihoodAnomalous(anomalousObjs);
- // Get the actual object id
- DBID curid = objids.get(i);
-
// if the loglike increases more than a threshold, object stays in
// anomalous set and is flagged as outlier
final double loglikeGain = currentLogLike - logLike;
- oscores.putDouble(curid, loglikeGain);
+ oscores.putDouble(iter, loglikeGain);
minmax.put(loglikeGain);
if(loglikeGain > c) {
@@ -221,7 +219,7 @@ public class GaussianUniformMixture<V extends NumberVector<V, ?>> extends Abstra Matrix covInv = covarianceMatrix.cheatToAvoidSingularity(SINGULARITY_CHEAT).inverse();
double covarianceDet = covarianceMatrix.det();
- double fakt = 1.0 / Math.sqrt(Math.pow(MathUtil.TWOPI, DatabaseUtil.dimensionality(database)) * covarianceDet);
+ double fakt = 1.0 / Math.sqrt(Math.pow(MathUtil.TWOPI, RelationUtil.dimensionality(database)) * covarianceDet);
// for each object compute probability and sum
double prob = 0;
for (DBIDIter iter = objids.iter(); iter.valid(); iter.advance()) {
@@ -239,7 +237,7 @@ public class GaussianUniformMixture<V extends NumberVector<V, ?>> extends Abstra @Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -249,7 +247,7 @@ public class GaussianUniformMixture<V extends NumberVector<V, ?>> extends Abstra *
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
protected double l = 1E-7;
protected double c = 0;
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/HilOut.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/HilOut.java index 4ed56e1a..15f6cbf3 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/HilOut.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/HilOut.java @@ -36,13 +36,15 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDFactory;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDistanceDBIDPair;
import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.query.DoubleDistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.LPNormDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
@@ -91,11 +93,11 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; @Title("Fast Outlier Detection in High Dimensional Spaces")
@Description("Algorithm to compute outliers using Hilbert space filling curves")
@Reference(authors = "F. Angiulli, C. Pizzuti", title = "Fast Outlier Detection in High Dimensional Spaces", booktitle = "Proc. European Conference on Principles of Knowledge Discovery and Data Mining (PKDD'02)", url = "http://dx.doi.org/10.1145/375663.375668")
-public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedAlgorithm<O, DoubleDistance, OutlierResult> implements OutlierAlgorithm {
+public class HilOut<O extends NumberVector<?>> extends AbstractDistanceBasedAlgorithm<O, DoubleDistance, OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(HilOut.class);
+ private static final Logging LOG = Logging.getLogger(HilOut.class);
/**
* Number of nearest neighbors
@@ -170,7 +172,7 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA public OutlierResult run(Database database, Relation<O> relation) {
distq = database.getDistanceQuery(relation, getDistanceFunction());
- d = DatabaseUtil.dimensionality(relation);
+ d = RelationUtil.dimensionality(relation);
WritableDoubleDataStore hilout_weight = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
// Compute extend of dataset.
@@ -181,18 +183,18 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA min = new double[d];
double[] max = new double[d];
for(int i = 0; i < d; i++) {
- min[i] = hbbs.first.doubleValue(i + 1);
- max[i] = hbbs.second.doubleValue(i + 1);
+ min[i] = hbbs.first.doubleValue(i);
+ max[i] = hbbs.second.doubleValue(i);
diameter = Math.max(diameter, max[i] - min[i]);
}
// Enlarge bounding box to have equal lengths.
for(int i = 0; i < d; i++) {
- double diff = (diameter - (max[i] - min[i])) / 2;
+ double diff = (diameter - (max[i] - min[i])) * .5;
min[i] -= diff;
max[i] += diff;
}
- if(logger.isVerbose()) {
- logger.verbose("Rescaling dataset by " + (1 / diameter) + " to fit the unit cube.");
+ if(LOG.isVerbose()) {
+ LOG.verbose("Rescaling dataset by " + (1 / diameter) + " to fit the unit cube.");
}
}
@@ -200,8 +202,8 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA capital_n_star = capital_n = relation.size();
HilbertFeatures h = new HilbertFeatures(relation, min, diameter);
- FiniteProgress progressHilOut = logger.isVerbose() ? new FiniteProgress("HilOut iterations", d + 1, logger) : null;
- FiniteProgress progressTrueOut = logger.isVerbose() ? new FiniteProgress("True outliers found", n, logger) : null;
+ FiniteProgress progressHilOut = LOG.isVerbose() ? new FiniteProgress("HilOut iterations", d + 1, LOG) : null;
+ FiniteProgress progressTrueOut = LOG.isVerbose() ? new FiniteProgress("True outliers found", n, LOG) : null;
// Main part: 1. Phase max. d+1 loops
for(int j = 0; j <= d && n_star < n; j++) {
// initialize (clear) out and wlb - not 100% clear in the paper
@@ -214,7 +216,7 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA // determine the true outliers (n_star)
trueOutliers(h);
if(progressTrueOut != null) {
- progressTrueOut.setProcessed(n_star, logger);
+ progressTrueOut.setProcessed(n_star, LOG);
}
// Build the top Set as out + wlb
h.top.clear();
@@ -230,7 +232,7 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA }
}
if(progressHilOut != null) {
- progressHilOut.incrementProcessed(logger);
+ progressHilOut.incrementProcessed(LOG);
}
}
// 2. Phase: Additional Scan if less than n true outliers determined
@@ -241,12 +243,12 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA scan(h, capital_n);
}
if(progressHilOut != null) {
- progressHilOut.setProcessed(d, logger);
- progressHilOut.ensureCompleted(logger);
+ progressHilOut.setProcessed(d, LOG);
+ progressHilOut.ensureCompleted(LOG);
}
if(progressTrueOut != null) {
- progressTrueOut.setProcessed(n, logger);
- progressTrueOut.ensureCompleted(logger);
+ progressTrueOut.setProcessed(n, LOG);
+ progressTrueOut.ensureCompleted(LOG);
}
DoubleMinMax minmax = new DoubleMinMax();
// Return weights in out
@@ -281,8 +283,8 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA */
private void scan(HilbertFeatures hf, int k0) {
final int mink0 = Math.min(2 * k0, capital_n - 1);
- if(logger.isDebuggingFine()) {
- logger.debugFine("Scanning with k0=" + k0 + " (" + mink0 + ")" + " N*=" + capital_n_star);
+ if(LOG.isDebuggingFine()) {
+ LOG.debugFine("Scanning with k0=" + k0 + " (" + mink0 + ")" + " N*=" + capital_n_star);
}
for(int i = 0; i < hf.pf.length; i++) {
if(hf.pf[i].ubound < omega_star) {
@@ -366,7 +368,7 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA if(mlevel < level) {
level = mlevel;
final double delta = hf.minDistLevel(hf.pf[i].id, level);
- if(delta >= hf.pf[i].nn.peek().getDoubleDistance()) {
+ if(delta >= hf.pf[i].nn.peek().doubleDistance()) {
break; // stop = true
}
}
@@ -376,10 +378,10 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA double br = hf.boxRadius(i, a - 1, b + 1);
double newlb = 0.0;
double newub = 0.0;
- for(DoubleDistanceResultPair entry : hf.pf[i].nn) {
- newub += entry.getDoubleDistance();
- if(entry.getDoubleDistance() <= br) {
- newlb += entry.getDoubleDistance();
+ for(DoubleDistanceDBIDPair entry : hf.pf[i].nn) {
+ newub += entry.doubleDistance();
+ if(entry.doubleDistance() <= br) {
+ newlb += entry.doubleDistance();
}
}
if(newlb > hf.pf[i].lbound) {
@@ -408,7 +410,7 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA @Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
@Override
@@ -482,7 +484,7 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA int pos = 0;
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { - pf[pos++] = new HilFeature(iditer.getDBID(), new Heap<DoubleDistanceResultPair>(k, Collections.reverseOrder()));
+ pf[pos++] = new HilFeature(DBIDUtil.deref(iditer), new Heap<DoubleDistanceDBIDPair>(k, Collections.reverseOrder()));
}
this.out = new Heap<HilFeature>(n, new Comparator<HilFeature>() {
@Override
@@ -513,7 +515,7 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA if(h >= 32) { // 32 to 63 bit
final long scale = Long.MAX_VALUE; // = 63 bits
for(int i = 0; i < pf.length; i++) {
- NumberVector<?, ?> obj = relation.get(pf[i].id);
+ NumberVector<?> obj = relation.get(pf[i].id);
long[] coord = new long[d];
for(int dim = 0; dim < d; dim++) {
coord[dim] = (long) (getDimForObject(obj, dim) * .5 * scale);
@@ -524,7 +526,7 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA else if(h >= 16) { // 16-31 bit
final int scale = ~1 >>> 1;
for(int i = 0; i < pf.length; i++) {
- NumberVector<?, ?> obj = relation.get(pf[i].id);
+ NumberVector<?> obj = relation.get(pf[i].id);
int[] coord = new int[d];
for(int dim = 0; dim < d; dim++) {
coord[dim] = (int) (getDimForObject(obj, dim) * .5 * scale);
@@ -535,7 +537,7 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA else if(h >= 8) { // 8-15 bit
final int scale = ~1 >>> 16;
for(int i = 0; i < pf.length; i++) {
- NumberVector<?, ?> obj = relation.get(pf[i].id);
+ NumberVector<?> obj = relation.get(pf[i].id);
short[] coord = new short[d];
for(int dim = 0; dim < d; dim++) {
coord[dim] = (short) (getDimForObject(obj, dim) * .5 * scale);
@@ -546,7 +548,7 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA else { // 1-7 bit
final int scale = ~1 >>> 8;
for(int i = 0; i < pf.length; i++) {
- NumberVector<?, ?> obj = relation.get(pf[i].id);
+ NumberVector<?> obj = relation.get(pf[i].id);
byte[] coord = new byte[d];
for(int dim = 0; dim < d; dim++) {
coord[dim] = (byte) (getDimForObject(obj, dim) * .5 * scale);
@@ -575,15 +577,13 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA */
private void updateOUT(int i) {
if(out.size() < n) {
- out.offer(pf[i]);
+ out.add(pf[i]);
}
else {
HilFeature head = out.peek();
if(pf[i].ubound > head.ubound) {
// replace smallest
- out.poll();
- // assert (out.peek().ubound >= head.ubound);
- out.offer(pf[i]);
+ out.replaceTopElement(pf[i]);
}
}
}
@@ -595,15 +595,13 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA */
private void updateWLB(int i) {
if(wlb.size() < n) {
- wlb.offer(pf[i]);
+ wlb.add(pf[i]);
}
else {
HilFeature head = wlb.peek();
if(pf[i].lbound > head.lbound) {
// replace smallest
- wlb.poll();
- // assert (wlb.peek().lbound >= head.lbound);
- wlb.offer(pf[i]);
+ wlb.replaceTopElement(pf[i]);
}
}
}
@@ -639,7 +637,7 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA * @param level Level of the corresponding r-region
*/
private double minDistLevel(DBID id, int level) {
- final NumberVector<?, ?> obj = relation.get(id);
+ final NumberVector<?> obj = relation.get(id);
// level 1 is supposed to have r=1 as in the original publication
// 2 ^ - (level - 1)
final double r = 1.0 / (1 << (level - 1));
@@ -659,7 +657,7 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA * @param level Level of the corresponding r-region
*/
private double maxDistLevel(DBID id, int level) {
- final NumberVector<?, ?> obj = relation.get(id);
+ final NumberVector<?> obj = relation.get(id);
// level 1 is supposed to have r=1 as in the original publication
final double r = 1.0 / (1 << (level - 1));
double dist;
@@ -780,8 +778,8 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA * @param dim Dimension
* @return Projected and shifted position
*/
- private double getDimForObject(NumberVector<?, ?> obj, int dim) {
- return (obj.doubleValue(dim + 1) - min[dim]) / diameter + shift;
+ private double getDimForObject(NumberVector<?> obj, int dim) {
+ return (obj.doubleValue(dim) - min[dim]) / diameter + shift;
}
}
@@ -824,7 +822,7 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA /**
* Heap with the nearest known neighbors
*/
- public Heap<DoubleDistanceResultPair> nn;
+ public Heap<DoubleDistanceDBIDPair> nn;
/**
* Set representation of the nearest neighbors for faster lookups
@@ -842,7 +840,7 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA * @param id Object ID
* @param nn Heap for neighbors
*/
- public HilFeature(DBID id, Heap<DoubleDistanceResultPair> nn) {
+ public HilFeature(DBID id, Heap<DoubleDistanceDBIDPair> nn) {
super();
this.id = id;
this.nn = nn;
@@ -864,27 +862,26 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA protected void insert(DBID id, double dt, int k) {
// assert (!nn_keys.contains(id));
if(nn.size() < k) {
- DoubleDistanceResultPair entry = new DoubleDistanceResultPair(dt, id);
- nn.offer(entry);
+ DoubleDistanceDBIDPair entry = DBIDFactory.FACTORY.newDistancePair(dt, id);
+ nn.add(entry);
nn_keys.add(id);
sum_nn += dt;
}
else {
- DoubleDistanceResultPair head = nn.peek();
- if(dt < head.getDoubleDistance()) {
+ DoubleDistanceDBIDPair head = nn.peek();
+ if(dt < head.doubleDistance()) {
head = nn.poll(); // Remove worst
- sum_nn -= head.getDoubleDistance();
- nn_keys.remove(head.getDBID());
+ sum_nn -= head.doubleDistance();
+ nn_keys.remove(head);
- // assert (nn.peek().getDoubleDistance() <= head.getDoubleDistance());
+ // assert (nn.peek().doubleDistance() <= head.doubleDistance());
- DoubleDistanceResultPair entry = new DoubleDistanceResultPair(dt, id);
- nn.offer(entry);
+ DoubleDistanceDBIDPair entry = DBIDFactory.FACTORY.newDistancePair(dt, id);
+ nn.add(entry);
nn_keys.add(id);
sum_nn += dt;
}
}
-
}
}
@@ -897,33 +894,33 @@ public class HilOut<O extends NumberVector<O, ?>> extends AbstractDistanceBasedA *
* @param <O> Vector type
*/
- public static class Parameterizer<O extends NumberVector<O, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<O extends NumberVector<?>> extends AbstractParameterizer {
/**
* Parameter to specify how many next neighbors should be used in the
* computation
*/
- public static final OptionID K_ID = OptionID.getOrCreateOptionID("HilOut.k", "Compute up to k next neighbors");
+ public static final OptionID K_ID = new OptionID("HilOut.k", "Compute up to k next neighbors");
/**
* Parameter to specify how many outliers should be computed
*/
- public static final OptionID N_ID = OptionID.getOrCreateOptionID("HilOut.n", "Compute n outliers");
+ public static final OptionID N_ID = new OptionID("HilOut.n", "Compute n outliers");
/**
* Parameter to specify the maximum Hilbert-Level
*/
- public static final OptionID H_ID = OptionID.getOrCreateOptionID("HilOut.h", "Max. Hilbert-Level");
+ public static final OptionID H_ID = new OptionID("HilOut.h", "Max. Hilbert-Level");
/**
* Parameter to specify p of LP-NormDistance
*/
- public static final OptionID T_ID = OptionID.getOrCreateOptionID("HilOut.t", "t of Lt Metric");
+ public static final OptionID T_ID = new OptionID("HilOut.t", "t of Lt Metric");
/**
* Parameter to specify if only the Top n, or also approximations for the
* other elements, should be returned
*/
- public static final OptionID TN_ID = OptionID.getOrCreateOptionID("HilOut.tn", "output of Top n or all elements");
+ public static final OptionID TN_ID = new OptionID("HilOut.tn", "output of Top n or all elements");
/**
* Neighborhood size
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/INFLO.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/INFLO.java index 1fe5fe71..655a0910 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/INFLO.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/INFLO.java @@ -1,26 +1,27 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
-/* -This file is part of ELKI: -Environment for Developing KDD-Applications Supported by Index-Structures - -Copyright (C) 2012 -Ludwig-Maximilians-Universität München -Lehr- und Forschungseinheit für Datenbanksysteme -ELKI Development Team - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU Affero General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU Affero General Public License for more details. - -You should have received a copy of the GNU Affero General Public License -along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ +
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures +
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team +
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version. +
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details. +
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
@@ -36,10 +37,10 @@ import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs; import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
@@ -81,7 +82,7 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa /**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(INFLO.class);
+ private static final Logging LOG = Logging.getLogger(INFLO.class);
/**
* Parameter to specify if any object is a Core Object must be a double
@@ -89,7 +90,7 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa * <p>
* see paper "Two-way search method" 3.2
*/
- public static final OptionID M_ID = OptionID.getOrCreateOptionID("inflo.m", "The threshold");
+ public static final OptionID M_ID = new OptionID("inflo.m", "The threshold");
/**
* Holds the value of {@link #M_ID}.
@@ -101,7 +102,7 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa * considered for computing its INFLO_SCORE. must be an integer greater than
* 1.
*/
- public static final OptionID K_ID = OptionID.getOrCreateOptionID("inflo.k", "The number of nearest neighbors of an object to be considered for computing its INFLO_SCORE.");
+ public static final OptionID K_ID = new OptionID("inflo.k", "The number of nearest neighbors of an object to be considered for computing its INFLO_SCORE.");
/**
* Holds the value of {@link #K_ID}.
@@ -140,7 +141,7 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa // density
WritableDoubleDataStore density = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT);
// init knns and rnns
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { + for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
knns.put(iditer, DBIDUtil.newArray());
rnns.put(iditer, DBIDUtil.newArray());
}
@@ -148,38 +149,34 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa // TODO: use kNN preprocessor?
KNNQuery<O, D> knnQuery = database.getKNNQuery(distFunc, k, DatabaseQuery.HINT_HEAVY_USE);
- for(DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) { + for (DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) {
// if not visited count=0
int count = rnns.get(id).size();
- ModifiableDBIDs s;
- if(!processedIDs.contains(id)) {
+ if (!processedIDs.contains(id)) {
// TODO: use exactly k neighbors?
KNNResult<D> list = knnQuery.getKNNForDBID(id, k);
- knns.get(id).addDBIDs(list.asDBIDs());
+ knns.get(id).addDBIDs(list);
processedIDs.add(id);
- s = knns.get(id);
- density.putDouble(id, 1 / list.get(k - 1).getDistance().doubleValue());
+ density.putDouble(id, 1 / list.getKNNDistance().doubleValue());
}
- else {
- s = knns.get(id);
- }
- for (DBIDIter q = s.iter(); q.valid(); q.advance()) {
- if(!processedIDs.contains(q)) {
+ ModifiableDBIDs s = knns.get(id);
+ for (DBIDIter q = knns.get(id).iter(); q.valid(); q.advance()) {
+ if (!processedIDs.contains(q)) {
// TODO: use exactly k neighbors?
KNNResult<D> listQ = knnQuery.getKNNForDBID(q, k);
- knns.get(q).addDBIDs(listQ.asDBIDs());
+ knns.get(q).addDBIDs(listQ);
density.putDouble(q, 1 / listQ.getKNNDistance().doubleValue());
processedIDs.add(q);
}
- if(knns.get(q).contains(id)) {
+ if (knns.get(q).contains(id)) {
rnns.get(q).add(id);
rnns.get(id).add(q);
count++;
}
}
- if(count >= s.size() * m) {
+ if (count >= s.size() * m) {
pruned.add(id);
}
}
@@ -188,8 +185,8 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa // IF Object is pruned INFLO=1.0
DoubleMinMax inflominmax = new DoubleMinMax();
WritableDoubleDataStore inflos = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
- for(DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) { - if(!pruned.contains(id)) {
+ for (DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) {
+ if (!pruned.contains(id)) {
ModifiableDBIDs knn = knns.get(id);
ModifiableDBIDs rnn = rnns.get(id);
@@ -205,7 +202,7 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa inflominmax.put(den);
}
- if(pruned.contains(id)) {
+ if (pruned.contains(id)) {
inflos.putDouble(id, 1.0);
inflominmax.put(1.0);
}
@@ -224,15 +221,15 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa @Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
- * Parameterization class. - * - * @author Erich Schubert - * - * @apiviz.exclude + * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
*/
public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
protected double m = 1.0;
@@ -242,14 +239,16 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa @Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final DoubleParameter mP = new DoubleParameter(M_ID, new GreaterConstraint(0.0), 1.0);
- if(config.grab(mP)) {
- m = mP.getValue();
+ final DoubleParameter mP = new DoubleParameter(M_ID, 1.0);
+ mP.addConstraint(new GreaterConstraint(0.0));
+ if (config.grab(mP)) {
+ m = mP.doubleValue();
}
- final IntParameter kP = new IntParameter(K_ID, new GreaterConstraint(1));
- if(config.grab(kP)) {
- k = kP.getValue();
+ final IntParameter kP = new IntParameter(K_ID);
+ kP.addConstraint(new GreaterConstraint(1));
+ if (config.grab(kP)) {
+ k = kP.intValue();
}
}
@@ -258,4 +257,4 @@ public class INFLO<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBa return new INFLO<O, D>(distanceFunction, m, k);
}
}
-}
\ No newline at end of file +}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNOutlier.java index 08be944a..4c4873dd 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNOutlier.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNOutlier.java @@ -1,26 +1,27 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier;
-/* -This file is part of ELKI: -Environment for Developing KDD-Applications Supported by Index-Structures - -Copyright (C) 2012 -Ludwig-Maximilians-Universität München -Lehr- und Forschungseinheit für Datenbanksysteme -ELKI Development Team - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU Affero General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU Affero General Public License for more details. - -You should have received a copy of the GNU Affero General Public License -along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ +
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures +
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team +
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version. +
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details. +
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
@@ -32,10 +33,11 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceKNNList;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
@@ -77,12 +79,12 @@ public class KNNOutlier<O, D extends NumberDistance<D, ?>> extends AbstractDista /**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(KNNOutlier.class);
+ private static final Logging LOG = Logging.getLogger(KNNOutlier.class);
/**
* Parameter to specify the k nearest neighbor
*/
- public static final OptionID K_ID = OptionID.getOrCreateOptionID("knno.k", "k nearest neighbor");
+ public static final OptionID K_ID = new OptionID("knno.k", "k nearest neighbor");
/**
* The parameter k
@@ -107,28 +109,34 @@ public class KNNOutlier<O, D extends NumberDistance<D, ?>> extends AbstractDista final DistanceQuery<O, D> distanceQuery = database.getDistanceQuery(relation, getDistanceFunction());
KNNQuery<O, D> knnQuery = database.getKNNQuery(distanceQuery, k);
- if(logger.isVerbose()) {
- logger.verbose("Computing the kNN outlier degree (distance to the k nearest neighbor)");
+ if(LOG.isVerbose()) {
+ LOG.verbose("Computing the kNN outlier degree (distance to the k nearest neighbor)");
}
- FiniteProgress progressKNNDistance = logger.isVerbose() ? new FiniteProgress("kNN distance for objects", relation.size(), logger) : null;
+ FiniteProgress progressKNNDistance = LOG.isVerbose() ? new FiniteProgress("kNN distance for objects", relation.size(), LOG) : null;
DoubleMinMax minmax = new DoubleMinMax();
WritableDoubleDataStore knno_score = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
// compute distance to the k nearest neighbor.
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { + for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
// distance to the kth nearest neighbor
final KNNResult<D> knns = knnQuery.getKNNForDBID(iditer, k);
- double dkn = knns.getKNNDistance().doubleValue();
- knno_score.putDouble(iditer, dkn);
+ final double dkn;
+ if(knns instanceof DoubleDistanceKNNList) {
+ dkn = ((DoubleDistanceKNNList) knns).doubleKNNDistance();
+ }
+ else {
+ dkn = knns.getKNNDistance().doubleValue();
+ }
+ knno_score.putDouble(iditer, dkn);
minmax.put(dkn);
if(progressKNNDistance != null) {
- progressKNNDistance.incrementProcessed(logger);
+ progressKNNDistance.incrementProcessed(LOG);
}
}
if(progressKNNDistance != null) {
- progressKNNDistance.ensureCompleted(logger);
+ progressKNNDistance.ensureCompleted(LOG);
}
Relation<Double> scoreres = new MaterializedRelation<Double>("kNN Outlier Score", "knn-outlier", TypeUtil.DOUBLE, knno_score, relation.getDBIDs());
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
@@ -142,15 +150,15 @@ public class KNNOutlier<O, D extends NumberDistance<D, ?>> extends AbstractDista @Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
- * Parameterization class. - * - * @author Erich Schubert - * - * @apiviz.exclude + * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
*/
public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
protected int k = 0;
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNWeightOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNWeightOutlier.java index cb3ca2f1..e7eeeb9c 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNWeightOutlier.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/KNNWeightOutlier.java @@ -31,13 +31,15 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDResultIter;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceKNNList;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
@@ -74,17 +76,17 @@ public class KNNWeightOutlier<O, D extends NumberDistance<D, ?>> extends Abstrac /**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(KNNWeightOutlier.class);
+ private static final Logging LOG = Logging.getLogger(KNNWeightOutlier.class);
/**
* Parameter to specify the k nearest neighbor
*/
- public static final OptionID K_ID = OptionID.getOrCreateOptionID("knnwod.k", "k nearest neighbor");
+ public static final OptionID K_ID = new OptionID("knnwod.k", "k nearest neighbor");
/**
* The kNN query used.
*/
- public static final OptionID KNNQUERY_ID = OptionID.getOrCreateOptionID("knnwod.knnquery", "kNN query to use");
+ public static final OptionID KNNQUERY_ID = new OptionID("knnwod.knnquery", "kNN query to use");
/**
* Holds the value of {@link #K_ID}.
@@ -109,33 +111,40 @@ public class KNNWeightOutlier<O, D extends NumberDistance<D, ?>> extends Abstrac final DistanceQuery<O, D> distanceQuery = database.getDistanceQuery(relation, getDistanceFunction());
KNNQuery<O, D> knnQuery = database.getKNNQuery(distanceQuery, k);
- if(logger.isVerbose()) {
- logger.verbose("computing outlier degree(sum of the distances to the k nearest neighbors");
+ if(LOG.isVerbose()) {
+ LOG.verbose("computing outlier degree(sum of the distances to the k nearest neighbors");
}
- FiniteProgress progressKNNWeight = logger.isVerbose() ? new FiniteProgress("KNNWOD_KNNWEIGHT for objects", relation.size(), logger) : null;
+ FiniteProgress progressKNNWeight = LOG.isVerbose() ? new FiniteProgress("KNNWOD_KNNWEIGHT for objects", relation.size(), LOG) : null;
DoubleMinMax minmax = new DoubleMinMax();
// compute distance to the k nearest neighbor. n objects with the highest
// distance are flagged as outliers
WritableDoubleDataStore knnw_score = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { + for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
// compute sum of the distances to the k nearest neighbors
final KNNResult<D> knn = knnQuery.getKNNForDBID(iditer, k);
double skn = 0;
- for(DistanceResultPair<D> r : knn) {
- skn += r.getDistance().doubleValue();
+ if(knn instanceof DoubleDistanceKNNList) {
+ for(DoubleDistanceDBIDResultIter neighbor = ((DoubleDistanceKNNList) knn).iter(); neighbor.valid(); neighbor.advance()) {
+ skn += neighbor.doubleDistance();
+ }
+ }
+ else {
+ for(DistanceDBIDResultIter<D> neighbor = knn.iter(); neighbor.valid(); neighbor.advance()) {
+ skn += neighbor.getDistance().doubleValue();
+ }
}
knnw_score.putDouble(iditer, skn);
minmax.put(skn);
if(progressKNNWeight != null) {
- progressKNNWeight.incrementProcessed(logger);
+ progressKNNWeight.incrementProcessed(LOG);
}
}
if(progressKNNWeight != null) {
- progressKNNWeight.ensureCompleted(logger);
+ progressKNNWeight.ensureCompleted(LOG);
}
Relation<Double> res = new MaterializedRelation<Double>("Weighted kNN Outlier Score", "knnw-outlier", TypeUtil.DOUBLE, knnw_score, relation.getDBIDs());
@@ -150,7 +159,7 @@ public class KNNWeightOutlier<O, D extends NumberDistance<D, ?>> extends Abstrac @Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LDF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LDF.java new file mode 100644 index 00000000..4ce0313e --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LDF.java @@ -0,0 +1,342 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm; +import de.lmu.ifi.dbs.elki.data.NumberVector; +import de.lmu.ifi.dbs.elki.data.type.CombinedTypeInformation; +import de.lmu.ifi.dbs.elki.data.type.TypeInformation; +import de.lmu.ifi.dbs.elki.data.type.TypeUtil; +import de.lmu.ifi.dbs.elki.database.QueryUtil; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; +import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; +import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; +import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery; +import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; +import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; +import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; +import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDResultIter; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceKNNList; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult; +import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; +import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor; +import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; +import de.lmu.ifi.dbs.elki.logging.progress.StepProgress; +import de.lmu.ifi.dbs.elki.math.DoubleMinMax; +import de.lmu.ifi.dbs.elki.math.statistics.GaussianKernelDensityFunction; +import de.lmu.ifi.dbs.elki.math.statistics.KernelDensityFunction; +import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; + +/** + * Outlier Detection with Kernel Density Functions. + * + * A variation of LOF which uses kernel density estimation, but in contrast to + * {@link SimpleKernelDensityLOF} also uses the reachability concept of LOF. + * + * Reference: + * <p> + * Outlier Detection with Kernel Density Functions.<br/> + * L. J. Latecki, A. Lazarevic, D. Pokrajac<br /> + * Machine Learning and Data Mining in Pattern Recognition 2007 + * </p> + * + * @author Erich Schubert + * + * @apiviz.has KNNQuery + * @apiviz.has KernelDensityFunction + * + * @param <O> the type of objects handled by this Algorithm + * @param <D> Distance type + */ +@Reference(authors = "L. J. Latecki, A. Lazarevic, D. Pokrajac", title = "Outlier Detection with Kernel Density Functions", booktitle = "Machine Learning and Data Mining in Pattern Recognition", url = "http://dx.doi.org/10.1007/978-3-540-73499-4_6") +public class LDF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm { + /** + * The logger for this class. + */ + private static final Logging LOG = Logging.getLogger(LDF.class); + + /** + * Parameter k. + */ + protected int k; + + /** + * Bandwidth scaling factor. + */ + protected double h = 1; + + /** + * Scaling constant, to limit value range to 1/c + */ + protected double c = 0.1; + + /** + * Kernel density function + */ + private KernelDensityFunction kernel; + + /** + * Constructor. + * + * @param k the value of k + * @param kernel Kernel function + * @param h Kernel bandwidth scaling + * @param c Score scaling parameter + */ + public LDF(int k, DistanceFunction<? super O, D> distance, KernelDensityFunction kernel, double h, double c) { + super(distance); + this.k = k + 1; + this.kernel = kernel; + this.h = h; + this.c = c; + } + + /** + * Run the naive kernel density LOF algorithm. + * + * @param relation Data to process + * @return LOF outlier result + */ + public OutlierResult run(Relation<O> relation) { + StepProgress stepprog = LOG.isVerbose() ? new StepProgress("LDF", 3) : null; + + final int dim = RelationUtil.dimensionality(relation); + + DBIDs ids = relation.getDBIDs(); + + // "HEAVY" flag for KNN Query since it is used more than once + KNNQuery<O, D> knnq = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE); + // No optimized kNN query - use a preprocessor! + if (!(knnq instanceof PreprocessorKNNQuery)) { + if (stepprog != null) { + stepprog.beginStep(1, "Materializing neighborhoods w.r.t. distance function.", LOG); + } + MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<O, D>(relation, getDistanceFunction(), k); + relation.getDatabase().addIndex(preproc); + DistanceQuery<O, D> rdq = relation.getDatabase().getDistanceQuery(relation, getDistanceFunction()); + knnq = preproc.getKNNQuery(rdq, k); + } + + // Compute LRDs + if (stepprog != null) { + stepprog.beginStep(2, "Computing LDEs.", LOG); + } + WritableDoubleDataStore ldes = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP); + FiniteProgress densProgress = LOG.isVerbose() ? new FiniteProgress("Densities", ids.size(), LOG) : null; + for (DBIDIter it = ids.iter(); it.valid(); it.advance()) { + final KNNResult<D> neighbors = knnq.getKNNForDBID(it, k); + double sum = 0.0; + int count = 0; + if (neighbors instanceof DoubleDistanceKNNList) { + // Fast version for double distances + for (DoubleDistanceDBIDResultIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) { + if (DBIDUtil.equal(neighbor, it)) { + continue; + } + double nkdist = ((DoubleDistanceKNNList) knnq.getKNNForDBID(neighbor, k)).doubleKNNDistance(); + + final double v = Math.max(nkdist, neighbor.doubleDistance()) / (h * nkdist); + sum += kernel.density(v) / Math.pow(h * nkdist, dim); + count++; + } + } else { + for (DistanceDBIDResultIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { + if (DBIDUtil.equal(neighbor, it)) { + continue; + } + double nkdist = knnq.getKNNForDBID(neighbor, k).getKNNDistance().doubleValue(); + final double v = Math.max(nkdist, neighbor.getDistance().doubleValue()) / (h * nkdist); + sum += kernel.density(v) / Math.pow(h * nkdist, dim); + count++; + } + } + ldes.putDouble(it, sum / count); + if (densProgress != null) { + densProgress.incrementProcessed(LOG); + } + } + if (densProgress != null) { + densProgress.ensureCompleted(LOG); + } + + // Compute local density factors. + if (stepprog != null) { + stepprog.beginStep(3, "Computing LDFs.", LOG); + } + WritableDoubleDataStore ldfs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC); + // track the maximum value for normalization. + DoubleMinMax lofminmax = new DoubleMinMax(); + + FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("Local Density Factors", ids.size(), LOG) : null; + for (DBIDIter it = ids.iter(); it.valid(); it.advance()) { + final double lrdp = ldes.doubleValue(it); + final KNNResult<D> neighbors = knnq.getKNNForDBID(it, k); + double sum = 0.0; + int count = 0; + for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { + // skip the point itself + if (DBIDUtil.equal(neighbor, it)) { + continue; + } + sum += ldes.doubleValue(neighbor); + count++; + } + sum /= count; + final double div = lrdp + c * sum; + double ldf = (div > 0) ? sum / div : 0; + ldfs.putDouble(it, ldf); + // update minimum and maximum + lofminmax.put(ldf); + + if (progressLOFs != null) { + progressLOFs.incrementProcessed(LOG); + } + } + if (progressLOFs != null) { + progressLOFs.ensureCompleted(LOG); + } + + if (stepprog != null) { + stepprog.setCompleted(LOG); + } + + // Build result representation. + Relation<Double> scoreResult = new MaterializedRelation<Double>("Local Density Factor", "ldf-outlier", TypeUtil.DOUBLE, ldfs, ids); + OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, 1. / c, 1 / (1 + c)); + OutlierResult result = new OutlierResult(scoreMeta, scoreResult); + + return result; + } + + @Override + public TypeInformation[] getInputTypeRestriction() { + return TypeUtil.array(new CombinedTypeInformation(getDistanceFunction().getInputTypeRestriction(), TypeUtil.NUMBER_VECTOR_FIELD)); + } + + @Override + protected Logging getLogger() { + return LOG; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + * + * @param <O> vector type + * @param <D> distance type + */ + public static class Parameterizer<O extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> { + /** + * Option ID for kernel. + */ + public static final OptionID KERNEL_ID = new OptionID("ldf.kernel", "Kernel to use for LDF."); + + /** + * Option ID for k + */ + public static final OptionID K_ID = new OptionID("ldf.k", "Number of neighbors to use for LDF."); + + /** + * Option ID for h - kernel bandwidth scaling + */ + public static final OptionID H_ID = new OptionID("ldf.h", "Kernel bandwidth multiplier for LDF."); + + /** + * Option ID for c + */ + public static final OptionID C_ID = new OptionID("ldf.c", "Score scaling parameter for LDF."); + + /** + * The neighborhood size to use. + */ + protected int k = 2; + + /** + * Kernel density function parameter + */ + KernelDensityFunction kernel; + + /** + * Bandwidth scaling factor. + */ + protected double h = 1; + + /** + * Scaling constant, to limit value range to 1/c + */ + protected double c = 0.1; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + + final IntParameter pK = new IntParameter(K_ID); + pK.addConstraint(new GreaterConstraint(1)); + if (config.grab(pK)) { + k = pK.getValue(); + } + + ObjectParameter<KernelDensityFunction> kernelP = new ObjectParameter<KernelDensityFunction>(KERNEL_ID, KernelDensityFunction.class, GaussianKernelDensityFunction.class); + if (config.grab(kernelP)) { + kernel = kernelP.instantiateClass(config); + } + + DoubleParameter hP = new DoubleParameter(H_ID); + if (config.grab(hP)) { + h = hP.doubleValue(); + } + + DoubleParameter cP = new DoubleParameter(C_ID, 0.1); + if (config.grab(cP)) { + c = cP.doubleValue(); + } + } + + @Override + protected LDF<O, D> makeInstance() { + return new LDF<O, D>(k, distanceFunction, kernel, h, c); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LDOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LDOF.java index 84f5dcc6..fbbfe484 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LDOF.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LDOF.java @@ -31,13 +31,14 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
@@ -81,13 +82,13 @@ public class LDOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas /**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(LDOF.class);
+ private static final Logging LOG = Logging.getLogger(LDOF.class);
/**
* Parameter to specify the number of nearest neighbors of an object to be
* considered for computing its LDOF_SCORE, must be an integer greater than 1.
*/
- public static final OptionID K_ID = OptionID.getOrCreateOptionID("ldof.k", "The number of nearest neighbors of an object to be considered for computing its LDOF_SCORE.");
+ public static final OptionID K_ID = new OptionID("ldof.k", "The number of nearest neighbors of an object to be considered for computing its LDOF_SCORE.");
/**
* The baseline for LDOF values. The paper gives 0.5 for uniform
@@ -128,21 +129,22 @@ public class LDOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas WritableDoubleDataStore ldofs = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
// compute LOF_SCORE of each db object
- if(logger.isVerbose()) {
- logger.verbose("Computing LDOFs");
+ if(LOG.isVerbose()) {
+ LOG.verbose("Computing LDOFs");
}
- FiniteProgress progressLDOFs = logger.isVerbose() ? new FiniteProgress("LDOF_SCORE for objects", relation.size(), logger) : null;
+ FiniteProgress progressLDOFs = LOG.isVerbose() ? new FiniteProgress("LDOF_SCORE for objects", relation.size(), LOG) : null;
Mean dxp = new Mean(), Dxp = new Mean();
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { KNNResult<D> neighbors = knnQuery.getKNNForDBID(iditer, k);
// skip the point itself
dxp.reset(); Dxp.reset();
- for(DistanceResultPair<D> neighbor1 : neighbors) {
- if(!neighbor1.sameDBID(iditer)) {
+ // TODO: optimize for double distances
+ for (DistanceDBIDResultIter<D> neighbor1 = neighbors.iter(); neighbor1.valid(); neighbor1.advance()) {
+ if(!DBIDUtil.equal(neighbor1, iditer)) {
dxp.put(neighbor1.getDistance().doubleValue());
- for(DistanceResultPair<D> neighbor2 : neighbors) {
- if(!neighbor1.sameDBID(neighbor2) && !neighbor2.sameDBID(iditer)) {
+ for (DistanceDBIDResultIter<D> neighbor2 = neighbors.iter(); neighbor2.valid(); neighbor2.advance()) {
+ if(!DBIDUtil.equal(neighbor1, neighbor2) && !DBIDUtil.equal(neighbor2, iditer)) {
Dxp.put(distFunc.distance(neighbor1, neighbor2).doubleValue());
}
}
@@ -157,11 +159,11 @@ public class LDOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas ldofminmax.put(ldof);
if(progressLDOFs != null) {
- progressLDOFs.incrementProcessed(logger);
+ progressLDOFs.incrementProcessed(LOG);
}
}
if(progressLDOFs != null) {
- progressLDOFs.ensureCompleted(logger);
+ progressLDOFs.ensureCompleted(LOG);
}
// Build result representation.
@@ -177,7 +179,7 @@ public class LDOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas @Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -193,7 +195,8 @@ public class LDOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas @Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final IntParameter kP = new IntParameter(K_ID, new GreaterConstraint(1));
+ final IntParameter kP = new IntParameter(K_ID);
+ kP.addConstraint(new GreaterConstraint(1));
if(config.grab(kP)) {
k = kP.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOCI.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOCI.java index a04aa041..ba9ad20e 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOCI.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOCI.java @@ -36,13 +36,14 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; -import de.lmu.ifi.dbs.elki.database.query.DistanceDBIDResult; -import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair; +import de.lmu.ifi.dbs.elki.database.ids.DistanceDBIDPair; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery; import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter; import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; @@ -64,9 +65,7 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleIntPair; /** * Fast Outlier Detection Using the "Local Correlation Integral". * - * Exact implementation only, not aLOCI. - * - * TODO: add aLOCI + * Exact implementation only, not aLOCI. See {@link ALOCI} * * Outlier detection using multiple epsilon neighborhoods. * @@ -88,23 +87,23 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas /** * The logger for this class. */ - private static final Logging logger = Logging.getLogger(LOCI.class); + private static final Logging LOG = Logging.getLogger(LOCI.class); /** * Parameter to specify the maximum radius of the neighborhood to be * considered, must be suitable to the distance function specified. */ - public static final OptionID RMAX_ID = OptionID.getOrCreateOptionID("loci.rmax", "The maximum radius of the neighborhood to be considered."); + public static final OptionID RMAX_ID = new OptionID("loci.rmax", "The maximum radius of the neighborhood to be considered."); /** * Parameter to specify the minimum neighborhood size */ - public static final OptionID NMIN_ID = OptionID.getOrCreateOptionID("loci.nmin", "Minimum neighborhood size to be considered."); + public static final OptionID NMIN_ID = new OptionID("loci.nmin", "Minimum neighborhood size to be considered."); /** * Parameter to specify the averaging neighborhood scaling. */ - public static final OptionID ALPHA_ID = OptionID.getOrCreateOptionID("loci.alpha", "Scaling factor for averaging neighborhood"); + public static final OptionID ALPHA_ID = new OptionID("loci.alpha", "Scaling factor for averaging neighborhood"); /** * Holds the value of {@link #RMAX_ID}. @@ -147,16 +146,16 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas DistanceQuery<O, D> distFunc = database.getDistanceQuery(relation, getDistanceFunction()); RangeQuery<O, D> rangeQuery = database.getRangeQuery(distFunc); - FiniteProgress progressPreproc = logger.isVerbose() ? new FiniteProgress("LOCI preprocessing", relation.size(), logger) : null; + FiniteProgress progressPreproc = LOG.isVerbose() ? new FiniteProgress("LOCI preprocessing", relation.size(), LOG) : null; // LOCI preprocessing step WritableDataStore<ArrayList<DoubleIntPair>> interestingDistances = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_SORTED, ArrayList.class); for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { DistanceDBIDResult<D> neighbors = rangeQuery.getRangeForDBID(iditer, rmax); // build list of critical distances - ArrayList<DoubleIntPair> cdist = new ArrayList<DoubleIntPair>(neighbors.size() * 2); + ArrayList<DoubleIntPair> cdist = new ArrayList<DoubleIntPair>(neighbors.size() << 1); { for(int i = 0; i < neighbors.size(); i++) { - DistanceResultPair<D> r = neighbors.get(i); + DistanceDBIDPair<D> r = neighbors.get(i); if(i + 1 < neighbors.size() && r.getDistance().compareTo(neighbors.get(i + 1).getDistance()) == 0) { continue; } @@ -182,14 +181,14 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas interestingDistances.put(iditer, cdist); if(progressPreproc != null) { - progressPreproc.incrementProcessed(logger); + progressPreproc.incrementProcessed(LOG); } } if(progressPreproc != null) { - progressPreproc.ensureCompleted(logger); + progressPreproc.ensureCompleted(LOG); } // LOCI main step - FiniteProgress progressLOCI = logger.isVerbose() ? new FiniteProgress("LOCI scores", relation.size(), logger) : null; + FiniteProgress progressLOCI = LOG.isVerbose() ? new FiniteProgress("LOCI scores", relation.size(), LOG) : null; WritableDoubleDataStore mdef_norm = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC); WritableDoubleDataStore mdef_radius = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC); DoubleMinMax minmax = new DoubleMinMax(); @@ -204,9 +203,8 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas if(maxneig >= nmin) { D range = distFunc.getDistanceFactory().fromDouble(maxdist); // Compute the largest neighborhood we will need. - List<DistanceResultPair<D>> maxneighbors = rangeQuery.getRangeForDBID(iditer, range); - // Ensure the set is sorted. Should be a no-op with most indexes. - Collections.sort(maxneighbors); + DistanceDBIDResult<D> maxneighbors = rangeQuery.getRangeForDBID(iditer, range); + // TODO: Ensure the set is sorted. Should be a no-op with most indexes. // For any critical distance, compute the normalized MDEF score. for(DoubleIntPair c : cdist) { // Only start when minimum size is fulfilled @@ -219,12 +217,13 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas final int n_alphar = elementsAtRadius(cdist, alpha_r); // compute \hat{n}(p_i, r, \alpha) and the corresponding \simga_{MDEF} MeanVariance mv_n_r_alpha = new MeanVariance(); - for(DistanceResultPair<D> ne : maxneighbors) { + // TODO: optimize for double distances + for (DistanceDBIDResultIter<D> neighbor = maxneighbors.iter(); neighbor.valid(); neighbor.advance()) { // Stop at radius r - if(ne.getDistance().doubleValue() > r) { + if(neighbor.getDistance().doubleValue() > r) { break; } - int rn_alphar = elementsAtRadius(interestingDistances.get(ne), alpha_r); + int rn_alphar = elementsAtRadius(interestingDistances.get(neighbor), alpha_r); mv_n_r_alpha.put(rn_alphar); } // We only use the average and standard deviation @@ -251,11 +250,11 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas mdef_radius.putDouble(iditer, maxnormr); minmax.put(maxmdefnorm); if(progressLOCI != null) { - progressLOCI.incrementProcessed(logger); + progressLOCI.incrementProcessed(LOG); } } if(progressLOCI != null) { - progressLOCI.ensureCompleted(logger); + progressLOCI.ensureCompleted(LOG); } Relation<Double> scoreResult = new MaterializedRelation<Double>("LOCI normalized MDEF", "loci-mdef-outlier", TypeUtil.DOUBLE, mdef_norm, relation.getDBIDs()); OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0); @@ -293,7 +292,7 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas @Override protected Logging getLogger() { - return logger; + return LOG; } /** @@ -335,4 +334,4 @@ public class LOCI<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBas return new LOCI<O, D>(distanceFunction, rmax, nmin, alpha); } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOF.java index 5aba41ec..66bed47a 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOF.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LOF.java @@ -29,29 +29,31 @@ import de.lmu.ifi.dbs.elki.data.type.CombinedTypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; import de.lmu.ifi.dbs.elki.database.QueryUtil; -import de.lmu.ifi.dbs.elki.database.datastore.DataStore; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; +import de.lmu.ifi.dbs.elki.database.datastore.DoubleDataStore; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery; -import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; -import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult; import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery; import de.lmu.ifi.dbs.elki.database.query.rknn.RKNNQuery; import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDResultIter; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceKNNList; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult; import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; import de.lmu.ifi.dbs.elki.logging.progress.StepProgress; import de.lmu.ifi.dbs.elki.math.DoubleMinMax; -import de.lmu.ifi.dbs.elki.math.Mean; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta; @@ -118,19 +120,19 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou /** * The logger for this class. */ - private static final Logging logger = Logging.getLogger(LOF.class); + private static final Logging LOG = Logging.getLogger(LOF.class); /** * The distance function to determine the reachability distance between * database objects. */ - public static final OptionID REACHABILITY_DISTANCE_FUNCTION_ID = OptionID.getOrCreateOptionID("lof.reachdistfunction", "Distance function to determine the reachability distance between database objects."); + public static final OptionID REACHABILITY_DISTANCE_FUNCTION_ID = new OptionID("lof.reachdistfunction", "Distance function to determine the reachability distance between database objects."); /** * Parameter to specify the number of nearest neighbors of an object to be * considered for computing its LOF_SCORE, must be an integer greater than 1. */ - public static final OptionID K_ID = OptionID.getOrCreateOptionID("lof.k", "The number of nearest neighbors of an object to be considered for computing its LOF_SCORE."); + public static final OptionID K_ID = new OptionID("lof.k", "The number of nearest neighbors of an object to be considered for computing its LOF_SCORE."); /** * Holds the value of {@link #K_ID}. @@ -189,9 +191,10 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou * calling {@link #doRunInTime}. * * @param relation Data to process + * @return LOF outlier result */ public OutlierResult run(Relation<O> relation) { - StepProgress stepprog = logger.isVerbose() ? new StepProgress("LOF", 3) : null; + StepProgress stepprog = LOG.isVerbose() ? new StepProgress("LOF", 3) : null; Pair<KNNQuery<O, D>, KNNQuery<O, D>> pair = getKNNQueries(relation, stepprog); KNNQuery<O, D> kNNRefer = pair.getFirst(); KNNQuery<O, D> kNNReach = pair.getSecond(); @@ -209,13 +212,12 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou // "HEAVY" flag for knnReach since it is used more than once KNNQuery<O, D> knnReach = QueryUtil.getKNNQuery(relation, reachabilityDistanceFunction, k, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE); // No optimized kNN query - use a preprocessor! - if(!(knnReach instanceof PreprocessorKNNQuery)) { - if(stepprog != null) { - if(neighborhoodDistanceFunction.equals(reachabilityDistanceFunction)) { - stepprog.beginStep(1, "Materializing neighborhoods w.r.t. reference neighborhood distance function.", logger); - } - else { - stepprog.beginStep(1, "Not materializing neighborhoods w.r.t. reference neighborhood distance function, but materializing neighborhoods w.r.t. reachability distance function.", logger); + if (!(knnReach instanceof PreprocessorKNNQuery)) { + if (stepprog != null) { + if (neighborhoodDistanceFunction.equals(reachabilityDistanceFunction)) { + stepprog.beginStep(1, "Materializing neighborhoods w.r.t. reference neighborhood distance function.", LOG); + } else { + stepprog.beginStep(1, "Not materializing neighborhoods w.r.t. reference neighborhood distance function, but materializing neighborhoods w.r.t. reachability distance function.", LOG); } } MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<O, D>(relation, reachabilityDistanceFunction, k); @@ -226,10 +228,9 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou // knnReach is only used once KNNQuery<O, D> knnRefer; - if(neighborhoodDistanceFunction == reachabilityDistanceFunction || neighborhoodDistanceFunction.equals(reachabilityDistanceFunction)) { + if (neighborhoodDistanceFunction == reachabilityDistanceFunction || neighborhoodDistanceFunction.equals(reachabilityDistanceFunction)) { knnRefer = knnReach; - } - else { + } else { // do not materialize the first neighborhood, since it is used only once knnRefer = QueryUtil.getKNNQuery(relation, neighborhoodDistanceFunction, k); } @@ -251,30 +252,30 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou */ protected LOFResult<O, D> doRunInTime(DBIDs ids, KNNQuery<O, D> kNNRefer, KNNQuery<O, D> kNNReach, StepProgress stepprog) { // Assert we got something - if(kNNRefer == null) { + if (kNNRefer == null) { throw new AbortException("No kNN queries supported by database for reference neighborhood distance function."); } - if(kNNReach == null) { + if (kNNReach == null) { throw new AbortException("No kNN queries supported by database for reachability distance function."); } // Compute LRDs - if(stepprog != null) { - stepprog.beginStep(2, "Computing LRDs.", logger); + if (stepprog != null) { + stepprog.beginStep(2, "Computing LRDs.", LOG); } WritableDoubleDataStore lrds = computeLRDs(ids, kNNReach); // compute LOF_SCORE of each db object - if(stepprog != null) { - stepprog.beginStep(3, "Computing LOFs.", logger); + if (stepprog != null) { + stepprog.beginStep(3, "Computing LOFs.", LOG); } Pair<WritableDoubleDataStore, DoubleMinMax> lofsAndMax = computeLOFs(ids, lrds, kNNRefer); WritableDoubleDataStore lofs = lofsAndMax.getFirst(); // track the maximum value for normalization. DoubleMinMax lofminmax = lofsAndMax.getSecond(); - if(stepprog != null) { - stepprog.setCompleted(logger); + if (stepprog != null) { + stepprog.setCompleted(LOG); } // Build result representation. @@ -295,26 +296,44 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou */ protected WritableDoubleDataStore computeLRDs(DBIDs ids, KNNQuery<O, D> knnReach) { WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP); - FiniteProgress lrdsProgress = logger.isVerbose() ? new FiniteProgress("LRD", ids.size(), logger) : null; - Mean mean = new Mean(); - for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { - mean.reset(); - KNNResult<D> neighbors = knnReach.getKNNForDBID(iter, k); - for(DistanceResultPair<D> neighbor : neighbors) { - if(objectIsInKNN || !neighbor.sameDBID(iter)) { - KNNResult<D> neighborsNeighbors = knnReach.getKNNForDBID(neighbor, k); - mean.put(Math.max(neighbor.getDistance().doubleValue(), neighborsNeighbors.getKNNDistance().doubleValue())); + FiniteProgress lrdsProgress = LOG.isVerbose() ? new FiniteProgress("LRD", ids.size(), LOG) : null; + for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { + final KNNResult<D> neighbors = knnReach.getKNNForDBID(iter, k); + double sum = 0.0; + int count = 0; + if (neighbors instanceof DoubleDistanceKNNList) { + // Fast version for double distances + for (DoubleDistanceDBIDResultIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) { + if (objectIsInKNN || !DBIDUtil.equal(neighbor, iter)) { + KNNResult<D> neighborsNeighbors = knnReach.getKNNForDBID(neighbor, k); + final double nkdist; + if (neighborsNeighbors instanceof DoubleDistanceKNNList) { + nkdist = ((DoubleDistanceKNNList) neighborsNeighbors).doubleKNNDistance(); + } else { + nkdist = neighborsNeighbors.getKNNDistance().doubleValue(); + } + sum += Math.max(neighbor.doubleDistance(), nkdist); + count++; + } + } + } else { + for (DistanceDBIDResultIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { + if (objectIsInKNN || !DBIDUtil.equal(neighbor, iter)) { + KNNResult<D> neighborsNeighbors = knnReach.getKNNForDBID(neighbor, k); + sum += Math.max(neighbor.getDistance().doubleValue(), neighborsNeighbors.getKNNDistance().doubleValue()); + count++; + } } } // Avoid division by 0 - final double lrd = (mean.getCount() > 0) ? 1 / mean.getMean() : 0.0; + final double lrd = (sum > 0) ? (count / sum) : 0; lrds.putDouble(iter, lrd); - if(lrdsProgress != null) { - lrdsProgress.incrementProcessed(logger); + if (lrdsProgress != null) { + lrdsProgress.incrementProcessed(LOG); } } - if(lrdsProgress != null) { - lrdsProgress.ensureCompleted(logger); + if (lrdsProgress != null) { + lrdsProgress.ensureCompleted(LOG); } return lrds; } @@ -328,40 +347,40 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou * reference distance * @return the LOFs of the objects and the maximum LOF */ - protected Pair<WritableDoubleDataStore, DoubleMinMax> computeLOFs(DBIDs ids, DataStore<Double> lrds, KNNQuery<O, D> knnRefer) { + protected Pair<WritableDoubleDataStore, DoubleMinMax> computeLOFs(DBIDs ids, DoubleDataStore lrds, KNNQuery<O, D> knnRefer) { WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC); // track the maximum value for normalization. DoubleMinMax lofminmax = new DoubleMinMax(); - FiniteProgress progressLOFs = logger.isVerbose() ? new FiniteProgress("LOF_SCORE for objects", ids.size(), logger) : null; - Mean mean = new Mean(); - for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { - double lrdp = lrds.get(iter); + FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("LOF_SCORE for objects", ids.size(), LOG) : null; + for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { + final double lrdp = lrds.doubleValue(iter); final double lof; - if(lrdp > 0) { + if (lrdp > 0) { final KNNResult<D> neighbors = knnRefer.getKNNForDBID(iter, k); - mean.reset(); - for(DistanceResultPair<D> neighbor : neighbors) { + double sum = 0.0; + int count = 0; + for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { // skip the point itself - if(objectIsInKNN || !neighbor.sameDBID(iter)) { - mean.put(lrds.get(neighbor)); + if (objectIsInKNN || !DBIDUtil.equal(neighbor, iter)) { + sum += lrds.doubleValue(neighbor); + count++; } } - lof = mean.getMean() / lrdp; - } - else { + lof = sum / (count * lrdp); + } else { lof = 1.0; } lofs.putDouble(iter, lof); // update minimum and maximum lofminmax.put(lof); - if(progressLOFs != null) { - progressLOFs.incrementProcessed(logger); + if (progressLOFs != null) { + progressLOFs.incrementProcessed(LOG); } } - if(progressLOFs != null) { - progressLOFs.ensureCompleted(logger); + if (progressLOFs != null) { + progressLOFs.ensureCompleted(LOG); } return new Pair<WritableDoubleDataStore, DoubleMinMax>(lofs, lofminmax); } @@ -369,10 +388,9 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou @Override public TypeInformation[] getInputTypeRestriction() { final TypeInformation type; - if(reachabilityDistanceFunction.equals(neighborhoodDistanceFunction)) { + if (reachabilityDistanceFunction.equals(neighborhoodDistanceFunction)) { type = reachabilityDistanceFunction.getInputTypeRestriction(); - } - else { + } else { type = new CombinedTypeInformation(neighborhoodDistanceFunction.getInputTypeRestriction(), reachabilityDistanceFunction.getInputTypeRestriction()); } return TypeUtil.array(type); @@ -380,7 +398,7 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou @Override protected Logging getLogger() { - return logger; + return LOG; } /** @@ -442,6 +460,8 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou } /** + * Get the knn query for the reference set. + * * @return the kNN query w.r.t. the reference neighborhood distance */ public KNNQuery<O, D> getKNNRefer() { @@ -449,6 +469,8 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou } /** + * Get the knn query for the reachability set. + * * @return the kNN query w.r.t. the reachability distance */ public KNNQuery<O, D> getKNNReach() { @@ -456,6 +478,8 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou } /** + * Get the LRD data store. + * * @return the LRD values of the objects */ public WritableDoubleDataStore getLrds() { @@ -463,6 +487,8 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou } /** + * Get the LOF data store. + * * @return the LOF values of the objects */ public WritableDoubleDataStore getLofs() { @@ -470,6 +496,8 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou } /** + * Get the outlier result. + * * @return the result of the run of the {@link LOF} algorithm */ public OutlierResult getResult() { @@ -486,6 +514,8 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou } /** + * Get the RkNN query for the reference set. + * * @return the RkNN query w.r.t. the reference neighborhood distance */ public RKNNQuery<O, D> getRkNNRefer() { @@ -493,6 +523,8 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou } /** + * Get the RkNN query for the reachability set. + * * @return the RkNN query w.r.t. the reachability distance */ public RKNNQuery<O, D> getRkNNReach() { @@ -518,7 +550,7 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou */ public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> { /** - * The neighborhood size to use + * The neighborhood size to use. */ protected int k = 2; @@ -536,13 +568,14 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou protected void makeOptions(Parameterization config) { super.makeOptions(config); - final IntParameter pK = new IntParameter(K_ID, new GreaterConstraint(1)); - if(config.grab(pK)) { + final IntParameter pK = new IntParameter(K_ID); + pK.addConstraint(new GreaterConstraint(1)); + if (config.grab(pK)) { k = pK.getValue(); } final ObjectParameter<DistanceFunction<O, D>> reachDistP = new ObjectParameter<DistanceFunction<O, D>>(REACHABILITY_DISTANCE_FUNCTION_ID, DistanceFunction.class, true); - if(config.grab(reachDistP)) { + if (config.grab(reachDistP)) { reachabilityDistanceFunction = reachDistP.instantiateClass(config); } } @@ -554,4 +587,4 @@ public class LOF<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<Ou return new LOF<O, D>(k, distanceFunction, rdist); } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LoOP.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LoOP.java index dc0d26a4..5da06983 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LoOP.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/LoOP.java @@ -33,15 +33,18 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery; -import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; -import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult; import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDResultIter; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceKNNList; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult; import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor; import de.lmu.ifi.dbs.elki.logging.Logging; @@ -76,7 +79,8 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; * * @apiviz.has KNNQuery * - * @param <O> the type of DatabaseObjects handled by this Algorithm + * @param <O> type of objects handled by this algorithm + * @param <D> type of distances used */ @Title("LoOP: Local Outlier Probabilities") @Description("Variant of the LOF algorithm normalized using statistical values.") @@ -85,37 +89,37 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O /** * The logger for this class. */ - private static final Logging logger = Logging.getLogger(LoOP.class); + private static final Logging LOG = Logging.getLogger(LoOP.class); /** * The distance function to determine the reachability distance between * database objects. */ - public static final OptionID REACHABILITY_DISTANCE_FUNCTION_ID = OptionID.getOrCreateOptionID("loop.referencedistfunction", "Distance function to determine the density of an object."); + public static final OptionID REACHABILITY_DISTANCE_FUNCTION_ID = new OptionID("loop.referencedistfunction", "Distance function to determine the density of an object."); /** * The distance function to determine the reachability distance between * database objects. */ - public static final OptionID COMPARISON_DISTANCE_FUNCTION_ID = OptionID.getOrCreateOptionID("loop.comparedistfunction", "Distance function to determine the reference set of an object."); + public static final OptionID COMPARISON_DISTANCE_FUNCTION_ID = new OptionID("loop.comparedistfunction", "Distance function to determine the reference set of an object."); /** * Parameter to specify the number of nearest neighbors of an object to be * considered for computing its LOOP_SCORE, must be an integer greater than 1. */ - public static final OptionID KREACH_ID = OptionID.getOrCreateOptionID("loop.kref", "The number of nearest neighbors of an object to be used for the PRD value."); + public static final OptionID KREACH_ID = new OptionID("loop.kref", "The number of nearest neighbors of an object to be used for the PRD value."); /** * Parameter to specify the number of nearest neighbors of an object to be * considered for computing its LOOP_SCORE, must be an integer greater than 1. */ - public static final OptionID KCOMP_ID = OptionID.getOrCreateOptionID("loop.kcomp", "The number of nearest neighbors of an object to be considered for computing its LOOP_SCORE."); + public static final OptionID KCOMP_ID = new OptionID("loop.kcomp", "The number of nearest neighbors of an object to be considered for computing its LOOP_SCORE."); /** * Parameter to specify the number of nearest neighbors of an object to be * considered for computing its LOOP_SCORE, must be an integer greater than 1. */ - public static final OptionID LAMBDA_ID = OptionID.getOrCreateOptionID("loop.lambda", "The number of standard deviations to consider for density computation."); + public static final OptionID LAMBDA_ID = new OptionID("loop.lambda", "The number of standard deviations to consider for density computation."); /** * Holds the value of {@link #KREACH_ID}. @@ -133,12 +137,12 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O double lambda; /** - * Preprocessor Step 1 + * Preprocessor Step 1. */ protected DistanceFunction<? super O, D> reachabilityDistanceFunction; /** - * Preprocessor Step 2 + * Preprocessor Step 2. */ protected DistanceFunction<? super O, D> comparisonDistanceFunction; @@ -150,11 +154,11 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O /** * Constructor with parameters. * - * @param kreach - * @param kcomp - * @param reachabilityDistanceFunction - * @param comparisonDistanceFunction - * @param lambda + * @param kreach k for reachability + * @param kcomp k for comparison + * @param reachabilityDistanceFunction distance function for reachability + * @param comparisonDistanceFunction distance function for comparison + * @param lambda Lambda parameter */ public LoOP(int kreach, int kcomp, DistanceFunction<? super O, D> reachabilityDistanceFunction, DistanceFunction<? super O, D> comparisonDistanceFunction, double lambda) { super(); @@ -168,36 +172,35 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O /** * Get the kNN queries for the algorithm. * - * @param database Database - * @param stepprog Progress logger + * @param database Database to analyze + * @param relation Relation to analyze + * @param stepprog Progress logger, may be {@code null} * @return result */ protected Pair<KNNQuery<O, D>, KNNQuery<O, D>> getKNNQueries(Database database, Relation<O> relation, StepProgress stepprog) { KNNQuery<O, D> knnComp; KNNQuery<O, D> knnReach; - if(comparisonDistanceFunction == reachabilityDistanceFunction || comparisonDistanceFunction.equals(reachabilityDistanceFunction)) { + if (comparisonDistanceFunction == reachabilityDistanceFunction || comparisonDistanceFunction.equals(reachabilityDistanceFunction)) { // We need each neighborhood twice - use "HEAVY" flag. knnComp = QueryUtil.getKNNQuery(relation, comparisonDistanceFunction, Math.max(kreach, kcomp), DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE); // No optimized kNN query - use a preprocessor! - if(knnComp == null) { - if(stepprog != null) { - stepprog.beginStep(1, "Materializing neighborhoods with respect to reference neighborhood distance function.", logger); + if (knnComp == null) { + if (stepprog != null) { + stepprog.beginStep(1, "Materializing neighborhoods with respect to reference neighborhood distance function.", LOG); } MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<O, D>(relation, comparisonDistanceFunction, kcomp); database.addIndex(preproc); DistanceQuery<O, D> cdq = database.getDistanceQuery(relation, comparisonDistanceFunction); knnComp = preproc.getKNNQuery(cdq, kreach, DatabaseQuery.HINT_HEAVY_USE); - } - else { - if(stepprog != null) { - stepprog.beginStep(1, "Optimized neighborhoods provided by database.", logger); + } else { + if (stepprog != null) { + stepprog.beginStep(1, "Optimized neighborhoods provided by database.", LOG); } } knnReach = knnComp; - } - else { - if(stepprog != null) { - stepprog.beginStep(1, "Not materializing distance functions, since we request each DBID once only.", logger); + } else { + if (stepprog != null) { + stepprog.beginStep(1, "Not materializing distance functions, since we request each DBID once only.", LOG); } knnComp = QueryUtil.getKNNQuery(relation, comparisonDistanceFunction, kreach); knnReach = QueryUtil.getKNNQuery(relation, reachabilityDistanceFunction, kcomp); @@ -215,17 +218,17 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O public OutlierResult run(Database database, Relation<O> relation) { final double sqrt2 = Math.sqrt(2.0); - StepProgress stepprog = logger.isVerbose() ? new StepProgress(5) : null; + StepProgress stepprog = LOG.isVerbose() ? new StepProgress(5) : null; Pair<KNNQuery<O, D>, KNNQuery<O, D>> pair = getKNNQueries(database, relation, stepprog); KNNQuery<O, D> knnComp = pair.getFirst(); KNNQuery<O, D> knnReach = pair.getSecond(); // Assert we got something - if(knnComp == null) { + if (knnComp == null) { throw new AbortException("No kNN queries supported by database for comparison distance function."); } - if(knnReach == null) { + if (knnReach == null) { throw new AbortException("No kNN queries supported by database for density estimation distance function."); } @@ -233,29 +236,43 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O WritableDoubleDataStore pdists = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP); Mean mean = new Mean(); {// computing PRDs - if(stepprog != null) { - stepprog.beginStep(3, "Computing pdists", logger); + if (stepprog != null) { + stepprog.beginStep(3, "Computing pdists", LOG); } - FiniteProgress prdsProgress = logger.isVerbose() ? new FiniteProgress("pdists", relation.size(), logger) : null; - for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { + FiniteProgress prdsProgress = LOG.isVerbose() ? new FiniteProgress("pdists", relation.size(), LOG) : null; + for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { final KNNResult<D> neighbors = knnReach.getKNNForDBID(iditer, kreach); mean.reset(); // use first kref neighbors as reference set int ks = 0; - for(DistanceResultPair<D> neighbor : neighbors) { - if(objectIsInKNN || !neighbor.sameDBID(iditer)) { - double d = neighbor.getDistance().doubleValue(); - mean.put(d * d); - ks++; - if(ks >= kreach) { - break; + // TODO: optimize for double distances + if (neighbors instanceof DoubleDistanceKNNList) { + for (DoubleDistanceDBIDResultIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) { + if (objectIsInKNN || !DBIDUtil.equal(neighbor, iditer)) { + final double d = neighbor.doubleDistance(); + mean.put(d * d); + ks++; + if (ks >= kreach) { + break; + } + } + } + } else { + for (DistanceDBIDResultIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { + if (objectIsInKNN || !DBIDUtil.equal(neighbor, iditer)) { + double d = neighbor.getDistance().doubleValue(); + mean.put(d * d); + ks++; + if (ks >= kreach) { + break; + } } } } double pdist = lambda * Math.sqrt(mean.getMean()); pdists.putDouble(iditer, pdist); - if(prdsProgress != null) { - prdsProgress.incrementProcessed(logger); + if (prdsProgress != null) { + prdsProgress.incrementProcessed(LOG); } } } @@ -263,63 +280,63 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O WritableDoubleDataStore plofs = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP); MeanVariance mvplof = new MeanVariance(); {// compute LOOP_SCORE of each db object - if(stepprog != null) { - stepprog.beginStep(4, "Computing PLOF", logger); + if (stepprog != null) { + stepprog.beginStep(4, "Computing PLOF", LOG); } - FiniteProgress progressPLOFs = logger.isVerbose() ? new FiniteProgress("PLOFs for objects", relation.size(), logger) : null; + FiniteProgress progressPLOFs = LOG.isVerbose() ? new FiniteProgress("PLOFs for objects", relation.size(), LOG) : null; MeanVariance mv = new MeanVariance(); - for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { + for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { final KNNResult<D> neighbors = knnComp.getKNNForDBID(iditer, kcomp); mv.reset(); // use first kref neighbors as comparison set. int ks = 0; - for(DistanceResultPair<D> neighbor1 : neighbors) { - if(objectIsInKNN || !neighbor1.sameDBID(iditer)) { - mv.put(pdists.doubleValue(neighbor1)); + for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { + if (objectIsInKNN || !DBIDUtil.equal(neighbor, iditer)) { + mv.put(pdists.doubleValue(neighbor)); ks++; - if(ks >= kcomp) { + if (ks >= kcomp) { break; } } } double plof = Math.max(pdists.doubleValue(iditer) / mv.getMean(), 1.0); - if(Double.isNaN(plof) || Double.isInfinite(plof)) { + if (Double.isNaN(plof) || Double.isInfinite(plof)) { plof = 1.0; } plofs.putDouble(iditer, plof); mvplof.put((plof - 1.0) * (plof - 1.0)); - if(progressPLOFs != null) { - progressPLOFs.incrementProcessed(logger); + if (progressPLOFs != null) { + progressPLOFs.incrementProcessed(LOG); } } } double nplof = lambda * Math.sqrt(mvplof.getMean()); - if(logger.isDebugging()) { - logger.verbose("nplof normalization factor is " + nplof + " " + mvplof.getMean() + " " + mvplof.getSampleStddev()); + if (LOG.isDebugging()) { + LOG.verbose("nplof normalization factor is " + nplof + " " + mvplof.getMean() + " " + mvplof.getSampleStddev()); } // Compute final LoOP values. WritableDoubleDataStore loops = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC); {// compute LOOP_SCORE of each db object - if(stepprog != null) { - stepprog.beginStep(5, "Computing LoOP scores", logger); + if (stepprog != null) { + stepprog.beginStep(5, "Computing LoOP scores", LOG); } - FiniteProgress progressLOOPs = logger.isVerbose() ? new FiniteProgress("LoOP for objects", relation.size(), logger) : null; - for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { + FiniteProgress progressLOOPs = LOG.isVerbose() ? new FiniteProgress("LoOP for objects", relation.size(), LOG) : null; + for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { loops.putDouble(iditer, NormalDistribution.erf((plofs.doubleValue(iditer) - 1) / (nplof * sqrt2))); - if(progressLOOPs != null) { - progressLOOPs.incrementProcessed(logger); + if (progressLOOPs != null) { + progressLOOPs.incrementProcessed(LOG); } } } - if(stepprog != null) { - stepprog.setCompleted(logger); + if (stepprog != null) { + stepprog.setCompleted(LOG); } // Build result representation. @@ -331,10 +348,9 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O @Override public TypeInformation[] getInputTypeRestriction() { final TypeInformation type; - if(reachabilityDistanceFunction.equals(comparisonDistanceFunction)) { + if (reachabilityDistanceFunction.equals(comparisonDistanceFunction)) { type = reachabilityDistanceFunction.getInputTypeRestriction(); - } - else { + } else { type = new CombinedTypeInformation(reachabilityDistanceFunction.getInputTypeRestriction(), comparisonDistanceFunction.getInputTypeRestriction()); } return TypeUtil.array(type); @@ -342,7 +358,7 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O @Override protected Logging getLogger() { - return logger; + return LOG; } /** @@ -369,45 +385,48 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O double lambda = 2.0; /** - * Preprocessor Step 1 + * Preprocessor Step 1. */ protected DistanceFunction<O, D> reachabilityDistanceFunction = null; /** - * Preprocessor Step 2 + * Preprocessor Step 2. */ protected DistanceFunction<O, D> comparisonDistanceFunction = null; @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - final IntParameter kcompP = new IntParameter(KCOMP_ID, new GreaterConstraint(1)); - if(config.grab(kcompP)) { - kcomp = kcompP.getValue(); + final IntParameter kcompP = new IntParameter(KCOMP_ID); + kcompP.addConstraint(new GreaterConstraint(1)); + if (config.grab(kcompP)) { + kcomp = kcompP.intValue(); } final ObjectParameter<DistanceFunction<O, D>> compDistP = new ObjectParameter<DistanceFunction<O, D>>(COMPARISON_DISTANCE_FUNCTION_ID, DistanceFunction.class, EuclideanDistanceFunction.class); - if(config.grab(compDistP)) { + if (config.grab(compDistP)) { comparisonDistanceFunction = compDistP.instantiateClass(config); } - final IntParameter kreachP = new IntParameter(KREACH_ID, new GreaterConstraint(1), true); - if(config.grab(kreachP)) { - kreach = kreachP.getValue(); - } - else { + final IntParameter kreachP = new IntParameter(KREACH_ID); + kreachP.addConstraint(new GreaterConstraint(1)); + kreachP.setOptional(true); + if (config.grab(kreachP)) { + kreach = kreachP.intValue(); + } else { kreach = kcomp; } final ObjectParameter<DistanceFunction<O, D>> reachDistP = new ObjectParameter<DistanceFunction<O, D>>(REACHABILITY_DISTANCE_FUNCTION_ID, DistanceFunction.class, true); - if(config.grab(reachDistP)) { + if (config.grab(reachDistP)) { reachabilityDistanceFunction = reachDistP.instantiateClass(config); } // TODO: make default 1.0? - final DoubleParameter lambdaP = new DoubleParameter(LAMBDA_ID, new GreaterConstraint(0.0), 2.0); - if(config.grab(lambdaP)) { - lambda = lambdaP.getValue(); + final DoubleParameter lambdaP = new DoubleParameter(LAMBDA_ID, 2.0); + lambdaP.addConstraint(new GreaterConstraint(0.0)); + if (config.grab(lambdaP)) { + lambda = lambdaP.doubleValue(); } } @@ -417,4 +436,4 @@ public class LoOP<O, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<O return new LoOP<O, D>(kreach, kcomp, realreach, comparisonDistanceFunction, lambda); } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java index b3d24463..bed27a33 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OPTICSOF.java @@ -37,14 +37,14 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
@@ -83,7 +83,7 @@ public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanc /**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(OPTICSOF.class);
+ private static final Logging LOG = Logging.getLogger(OPTICSOF.class);
/**
* Parameter to specify the threshold MinPts.
@@ -136,9 +136,10 @@ public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanc for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { List<Double> core = new ArrayList<Double>();
double lrd = 0;
- for(DistanceResultPair<D> neighPair : nMinPts.get(iditer)) {
- double coreDist = coreDistance.doubleValue(neighPair);
- double dist = distQuery.distance(iditer, neighPair).doubleValue();
+ // TODO: optimize for double distances
+ for (DistanceDBIDResultIter<D> neighbor = nMinPts.get(iditer).iter(); neighbor.valid(); neighbor.advance()) {
+ double coreDist = coreDistance.doubleValue(neighbor);
+ double dist = distQuery.distance(iditer, neighbor).doubleValue();
double rd = Math.max(coreDist, dist);
lrd = rd + lrd;
core.add(rd);
@@ -153,9 +154,9 @@ public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanc WritableDoubleDataStore ofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { double of = 0;
- for(DistanceResultPair<D> pair : nMinPts.get(iditer)) {
+ for (DBIDIter neighbor = nMinPts.get(iditer).iter(); neighbor.valid(); neighbor.advance()) {
double lrd = lrds.doubleValue(iditer);
- double lrdN = lrds.doubleValue(pair);
+ double lrdN = lrds.doubleValue(neighbor);
of = of + lrdN / lrd;
}
of = of / minPtsNeighborhoodSize.intValue(iditer);
@@ -176,7 +177,7 @@ public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanc @Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -192,7 +193,8 @@ public class OPTICSOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanc @Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final IntParameter param = new IntParameter(OPTICS.MINPTS_ID, new GreaterConstraint(1));
+ final IntParameter param = new IntParameter(OPTICS.MINPTS_ID);
+ param.addConstraint(new GreaterConstraint(1));
if(config.grab(param)) {
minpts = param.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OnlineLOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OnlineLOF.java index 9b974ad9..bac5db36 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OnlineLOF.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OnlineLOF.java @@ -34,14 +34,14 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery;
import de.lmu.ifi.dbs.elki.database.query.rknn.RKNNQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.index.preprocessed.knn.AbstractMaterializeKNNPreprocessor;
import de.lmu.ifi.dbs.elki.index.preprocessed.knn.KNNChangeEvent;
@@ -73,7 +73,7 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> { /**
* The logger for this class.
*/
- static final Logging logger = Logging.getLogger(OnlineLOF.class);
+ private static final Logging LOG = Logging.getLogger(OnlineLOF.class);
/**
* Constructor.
@@ -93,7 +93,7 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> { */
@Override
public OutlierResult run(Relation<O> relation) {
- StepProgress stepprog = logger.isVerbose() ? new StepProgress("OnlineLOF", 3) : null;
+ StepProgress stepprog = LOG.isVerbose() ? new StepProgress("OnlineLOF", 3) : null;
Pair<Pair<KNNQuery<O, D>, KNNQuery<O, D>>, Pair<RKNNQuery<O, D>, RKNNQuery<O, D>>> queries = getKNNAndRkNNQueries(relation, stepprog);
KNNQuery<O, D> kNNRefer = queries.getFirst().getFirst();
@@ -128,7 +128,7 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> { // No optimized kNN query or RkNN query - use a preprocessor!
if(kNNRefer == null || rkNNRefer == null) {
if(stepprog != null) {
- stepprog.beginStep(1, "Materializing neighborhood w.r.t. reference neighborhood distance function.", logger);
+ stepprog.beginStep(1, "Materializing neighborhood w.r.t. reference neighborhood distance function.", LOG);
}
MaterializeKNNAndRKNNPreprocessor<O, D> preproc = new MaterializeKNNAndRKNNPreprocessor<O, D>(relation, neighborhoodDistanceFunction, k);
DistanceQuery<O, D> ndq = relation.getDatabase().getDistanceQuery(relation, neighborhoodDistanceFunction);
@@ -139,7 +139,7 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> { }
else {
if(stepprog != null) {
- stepprog.beginStep(1, "Optimized neighborhood w.r.t. reference neighborhood distance function provided by database.", logger);
+ stepprog.beginStep(1, "Optimized neighborhood w.r.t. reference neighborhood distance function provided by database.", LOG);
}
}
@@ -147,7 +147,7 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> { RKNNQuery<O, D> rkNNReach = QueryUtil.getRKNNQuery(relation, reachabilityDistanceFunction, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE);
if(kNNReach == null || rkNNReach == null) {
if(stepprog != null) {
- stepprog.beginStep(2, "Materializing neighborhood w.r.t. reachability distance function.", logger);
+ stepprog.beginStep(2, "Materializing neighborhood w.r.t. reachability distance function.", LOG);
}
ListParameterization config = new ListParameterization();
config.addParameter(AbstractMaterializeKNNPreprocessor.Factory.DISTANCE_FUNCTION_ID, reachabilityDistanceFunction);
@@ -261,14 +261,14 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> { * @param lofResult the result of the former LOF run
*/
private void kNNsInserted(DBIDs insertions, DBIDs updates1, DBIDs updates2, LOFResult<O, D> lofResult) {
- StepProgress stepprog = logger.isVerbose() ? new StepProgress(3) : null;
+ StepProgress stepprog = LOG.isVerbose() ? new StepProgress(3) : null;
// recompute lrds
if(stepprog != null) {
- stepprog.beginStep(1, "Recompute LRDs.", logger);
+ stepprog.beginStep(1, "Recompute LRDs.", LOG);
}
ArrayDBIDs lrd_ids = DBIDUtil.ensureArray(DBIDUtil.union(insertions, updates2));
- List<List<DistanceResultPair<D>>> reachDistRKNNs = lofResult.getRkNNReach().getRKNNForBulkDBIDs(lrd_ids, k);
+ List<? extends DistanceDBIDResult<D>> reachDistRKNNs = lofResult.getRkNNReach().getRKNNForBulkDBIDs(lrd_ids, k);
ArrayDBIDs affected_lrd_id_candidates = mergeIDs(reachDistRKNNs, lrd_ids);
ArrayModifiableDBIDs affected_lrd_ids = DBIDUtil.newArray(affected_lrd_id_candidates.size());
WritableDoubleDataStore new_lrds = computeLRDs(affected_lrd_id_candidates, lofResult.getKNNReach());
@@ -283,20 +283,20 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> { // recompute lofs
if(stepprog != null) {
- stepprog.beginStep(2, "Recompute LOFS.", logger);
+ stepprog.beginStep(2, "Recompute LOFS.", LOG);
}
- List<List<DistanceResultPair<D>>> primDistRKNNs = lofResult.getRkNNRefer().getRKNNForBulkDBIDs(affected_lrd_ids, k);
+ List<? extends DistanceDBIDResult<D>> primDistRKNNs = lofResult.getRkNNRefer().getRKNNForBulkDBIDs(affected_lrd_ids, k);
ArrayDBIDs affected_lof_ids = mergeIDs(primDistRKNNs, affected_lrd_ids, insertions, updates1);
recomputeLOFs(affected_lof_ids, lofResult);
// fire result changed
if(stepprog != null) {
- stepprog.beginStep(3, "Inform listeners.", logger);
+ stepprog.beginStep(3, "Inform listeners.", LOG);
}
lofResult.getResult().getHierarchy().resultChanged(lofResult.getResult());
if(stepprog != null) {
- stepprog.setCompleted(logger);
+ stepprog.setCompleted(LOG);
}
}
@@ -311,11 +311,11 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> { * @param lofResult the result of the former LOF run
*/
private void kNNsRemoved(DBIDs deletions, DBIDs updates1, DBIDs updates2, LOFResult<O, D> lofResult) {
- StepProgress stepprog = logger.isVerbose() ? new StepProgress(4) : null;
+ StepProgress stepprog = LOG.isVerbose() ? new StepProgress(4) : null;
// delete lrds and lofs
if(stepprog != null) {
- stepprog.beginStep(1, "Delete old LRDs and LOFs.", logger);
+ stepprog.beginStep(1, "Delete old LRDs and LOFs.", LOG);
}
for (DBIDIter iter = deletions.iter(); iter.valid(); iter.advance()) {
lofResult.getLrds().delete(iter);
@@ -324,10 +324,10 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> { // recompute lrds
if(stepprog != null) {
- stepprog.beginStep(2, "Recompute LRDs.", logger);
+ stepprog.beginStep(2, "Recompute LRDs.", LOG);
}
ArrayDBIDs lrd_ids = DBIDUtil.ensureArray(updates2);
- List<List<DistanceResultPair<D>>> reachDistRKNNs = lofResult.getRkNNReach().getRKNNForBulkDBIDs(lrd_ids, k);
+ List<? extends DistanceDBIDResult<D>> reachDistRKNNs = lofResult.getRkNNReach().getRKNNForBulkDBIDs(lrd_ids, k);
ArrayDBIDs affected_lrd_id_candidates = mergeIDs(reachDistRKNNs, lrd_ids);
ArrayModifiableDBIDs affected_lrd_ids = DBIDUtil.newArray(affected_lrd_id_candidates.size());
WritableDoubleDataStore new_lrds = computeLRDs(affected_lrd_id_candidates, lofResult.getKNNReach());
@@ -342,20 +342,20 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> { // recompute lofs
if(stepprog != null) {
- stepprog.beginStep(3, "Recompute LOFS.", logger);
+ stepprog.beginStep(3, "Recompute LOFS.", LOG);
}
- List<List<DistanceResultPair<D>>> primDistRKNNs = lofResult.getRkNNRefer().getRKNNForBulkDBIDs(affected_lrd_ids, k);
+ List<? extends DistanceDBIDResult<D>> primDistRKNNs = lofResult.getRkNNRefer().getRKNNForBulkDBIDs(affected_lrd_ids, k);
ArrayDBIDs affected_lof_ids = mergeIDs(primDistRKNNs, affected_lrd_ids, updates1);
recomputeLOFs(affected_lof_ids, lofResult);
// fire result changed
if(stepprog != null) {
- stepprog.beginStep(4, "Inform listeners.", logger);
+ stepprog.beginStep(4, "Inform listeners.", LOG);
}
lofResult.getResult().getHierarchy().resultChanged(lofResult.getResult());
if(stepprog != null) {
- stepprog.setCompleted(logger);
+ stepprog.setCompleted(LOG);
}
}
@@ -367,15 +367,13 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> { * @return a set containing the ids of the query result and the specified
* ids
*/
- private ArrayModifiableDBIDs mergeIDs(List<List<DistanceResultPair<D>>> queryResults, DBIDs... ids) {
+ private ArrayModifiableDBIDs mergeIDs(List<? extends DistanceDBIDResult<D>> queryResults, DBIDs... ids) {
ModifiableDBIDs result = DBIDUtil.newHashSet();
for(DBIDs dbids : ids) {
result.addDBIDs(dbids);
}
- for(List<DistanceResultPair<D>> queryResult : queryResults) {
- for(DistanceResultPair<D> qr : queryResult) {
- result.add(qr);
- }
+ for(DistanceDBIDResult<D> queryResult : queryResults) {
+ result.addDBIDs(queryResult);
}
return DBIDUtil.newArray(result);
}
@@ -410,7 +408,7 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> { @Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -440,7 +438,8 @@ public class OnlineLOF<O, D extends NumberDistance<D, ?>> extends LOF<O, D> { protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final IntParameter pK = new IntParameter(K_ID, new GreaterConstraint(1));
+ final IntParameter pK = new IntParameter(K_ID);
+ pK.addConstraint(new GreaterConstraint(1));
if(config.grab(pK)) {
k = pK.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OutlierAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OutlierAlgorithm.java index d8322d8b..00c4a8ec 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OutlierAlgorithm.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/OutlierAlgorithm.java @@ -31,6 +31,8 @@ import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; * Generic super interface for outlier detection algorithms. * * @author Erich Schubert + * + * @apiviz.landmark * * @apiviz.has OutlierResult */ @@ -39,4 +41,4 @@ public interface OutlierAlgorithm extends Algorithm { // Use the magic in AbstractAlgorithm and just implement a run method for your input data @Override OutlierResult run(Database database); -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ReferenceBasedOutlierDetection.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ReferenceBasedOutlierDetection.java index dd1d37a3..93eca7db 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ReferenceBasedOutlierDetection.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/ReferenceBasedOutlierDetection.java @@ -23,11 +23,8 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier; along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.ArrayList;
import java.util.Collection;
-import java.util.Collections;
import java.util.Iterator;
-import java.util.List;
import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
@@ -39,12 +36,13 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
-import de.lmu.ifi.dbs.elki.database.query.GenericDistanceResultPair;
+import de.lmu.ifi.dbs.elki.database.ids.DistanceDBIDPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.GenericDistanceDBIDList;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.Mean;
@@ -88,23 +86,23 @@ import de.lmu.ifi.dbs.elki.utilities.referencepoints.ReferencePointsHeuristic; @Title("An Efficient Reference-based Approach to Outlier Detection in Large Datasets")
@Description("Computes kNN distances approximately, using reference points with various reference point strategies.")
@Reference(authors = "Y. Pei, O.R. Zaiane, Y. Gao", title = "An Efficient Reference-based Approach to Outlier Detection in Large Datasets", booktitle = "Proc. 6th IEEE Int. Conf. on Data Mining (ICDM '06), Hong Kong, China, 2006", url = "http://dx.doi.org/10.1109/ICDM.2006.17")
-public class ReferenceBasedOutlierDetection<V extends NumberVector<?, ?>, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+public class ReferenceBasedOutlierDetection<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(ReferenceBasedOutlierDetection.class);
+ private static final Logging LOG = Logging.getLogger(ReferenceBasedOutlierDetection.class);
/**
* Parameter for the reference points heuristic.
*/
- public static final OptionID REFP_ID = OptionID.getOrCreateOptionID("refod.refp", "The heuristic for finding reference points.");
+ public static final OptionID REFP_ID = new OptionID("refod.refp", "The heuristic for finding reference points.");
/**
* Parameter to specify the number of nearest neighbors of an object, to be
* considered for computing its REFOD_SCORE, must be an integer greater than
* 1.
*/
- public static final OptionID K_ID = OptionID.getOrCreateOptionID("refod.k", "The number of nearest neighbors");
+ public static final OptionID K_ID = new OptionID("refod.k", "The number of nearest neighbors");
/**
* Holds the value of {@link #K_ID}.
@@ -160,7 +158,7 @@ public class ReferenceBasedOutlierDetection<V extends NumberVector<?, ?>, D exte }
V firstRef = iter.next();
// compute distance vector for the first reference point
- List<DistanceResultPair<D>> firstReferenceDists = computeDistanceVector(firstRef, relation, distFunc);
+ DistanceDBIDResult<D> firstReferenceDists = computeDistanceVector(firstRef, relation, distFunc);
for(int l = 0; l < firstReferenceDists.size(); l++) {
double density = computeDensity(firstReferenceDists, l);
// Initial value
@@ -169,7 +167,7 @@ public class ReferenceBasedOutlierDetection<V extends NumberVector<?, ?>, D exte // compute density values for all remaining reference points
while(iter.hasNext()) {
V refPoint = iter.next();
- List<DistanceResultPair<D>> referenceDists = computeDistanceVector(refPoint, relation, distFunc);
+ DistanceDBIDResult<D> referenceDists = computeDistanceVector(refPoint, relation, distFunc);
// compute density value for each object
for(int l = 0; l < referenceDists.size(); l++) {
double density = computeDensity(referenceDists, l);
@@ -215,14 +213,13 @@ public class ReferenceBasedOutlierDetection<V extends NumberVector<?, ?>, D exte * @return array containing the distance to one reference point for each
* database object and the object id
*/
- protected List<DistanceResultPair<D>> computeDistanceVector(V refPoint, Relation<V> database, DistanceQuery<V, D> distFunc) {
+ protected DistanceDBIDResult<D> computeDistanceVector(V refPoint, Relation<V> database, DistanceQuery<V, D> distFunc) {
// TODO: optimize for double distances?
- List<DistanceResultPair<D>> referenceDists = new ArrayList<DistanceResultPair<D>>(database.size());
+ GenericDistanceDBIDList<D> referenceDists = new GenericDistanceDBIDList<D>(database.size());
for(DBIDIter iditer = database.iterDBIDs(); iditer.valid(); iditer.advance()) { - final D distance = distFunc.distance(iditer, refPoint);
- referenceDists.add(new GenericDistanceResultPair<D>(distance, iditer.getDBID()));
+ referenceDists.add(distFunc.distance(iditer, refPoint), iditer);
}
- Collections.sort(referenceDists);
+ referenceDists.sort();
return referenceDists;
}
@@ -238,8 +235,8 @@ public class ReferenceBasedOutlierDetection<V extends NumberVector<?, ?>, D exte * @param index index of the current object
* @return density for one object and reference point
*/
- protected double computeDensity(List<DistanceResultPair<D>> referenceDists, int index) {
- final DistanceResultPair<D> x = referenceDists.get(index);
+ protected double computeDensity(DistanceDBIDResult<D> referenceDists, int index) {
+ final DistanceDBIDPair<D> x = referenceDists.get(index);
final double xDist = x.getDistance().doubleValue();
int lef = index - 1;
@@ -295,7 +292,7 @@ public class ReferenceBasedOutlierDetection<V extends NumberVector<?, ?>, D exte @Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -305,7 +302,7 @@ public class ReferenceBasedOutlierDetection<V extends NumberVector<?, ?>, D exte *
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<?, ?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<V, D> {
+ public static class Parameterizer<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<V, D> {
/**
* Holds the value of {@link #K_ID}.
*/
@@ -319,7 +316,8 @@ public class ReferenceBasedOutlierDetection<V extends NumberVector<?, ?>, D exte @Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final IntParameter pK = new IntParameter(K_ID, new GreaterConstraint(1));
+ final IntParameter pK = new IntParameter(K_ID);
+ pK.addConstraint(new GreaterConstraint(1));
if(config.grab(pK)) {
k = pK.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleCOP.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleCOP.java new file mode 100644 index 00000000..e8077819 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleCOP.java @@ -0,0 +1,236 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm; +import de.lmu.ifi.dbs.elki.algorithm.DependencyDerivator; +import de.lmu.ifi.dbs.elki.data.NumberVector; +import de.lmu.ifi.dbs.elki.data.model.CorrelationAnalysisSolution; +import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation; +import de.lmu.ifi.dbs.elki.data.type.TypeInformation; +import de.lmu.ifi.dbs.elki.data.type.TypeUtil; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.database.QueryUtil; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; +import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore; +import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; +import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore; +import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; +import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs; +import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult; +import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; +import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; +import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix; +import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector; +import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAFilteredRunner; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; +import de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore; +import de.lmu.ifi.dbs.elki.utilities.FormatUtil; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.documentation.Title; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; + +/** + * Algorithm to compute local correlation outlier probability. + * + * This is the simpler, original version of COP, as published in + * <p> + * Arthur Zimek<br /> + * Correlation Clustering.<br /> + * PhD thesis, Chapter 18 + * </p> + * which has then been refined to the method published as {@link COP} + * + * @author Erich Schubert + * @param <V> the type of NumberVector handled by this Algorithm + */ +@Title("Simple COP: Correlation Outlier Probability") +@Reference(authors = "Arthur Zimek", title = "Correlation Clustering. PhD thesis, Chapter 18", booktitle = "") +public class SimpleCOP<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<V, D, OutlierResult> implements OutlierAlgorithm { + /** + * The logger for this class. + */ + private static final Logging LOG = Logging.getLogger(SimpleCOP.class); + + /** + * Number of neighbors to be considered. + */ + int k; + + /** + * Holds the object performing the dependency derivation + */ + private DependencyDerivator<V, D> dependencyDerivator; + + /** + * Constructor. + * + * @param distanceFunction Distance function + * @param k k Parameter + * @param pca PCA runner- + */ + public SimpleCOP(DistanceFunction<? super V, D> distanceFunction, int k, PCAFilteredRunner<V> pca) { + super(distanceFunction); + this.k = k; + this.dependencyDerivator = new DependencyDerivator<V, D>(null, FormatUtil.NF8, pca, 0, false); + } + + public OutlierResult run(Database database, Relation<V> data) throws IllegalStateException { + KNNQuery<V, D> knnQuery = QueryUtil.getKNNQuery(data, getDistanceFunction(), k + 1); + + DBIDs ids = data.getDBIDs(); + + WritableDoubleDataStore cop_score = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC); + WritableDataStore<Vector> cop_err_v = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, Vector.class); + WritableDataStore<Matrix> cop_datav = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, Matrix.class); + WritableIntegerDataStore cop_dim = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, -1); + WritableDataStore<CorrelationAnalysisSolution<?>> cop_sol = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, CorrelationAnalysisSolution.class); + {// compute neighbors of each db object + FiniteProgress progressLocalPCA = LOG.isVerbose() ? new FiniteProgress("Correlation Outlier Probabilities", data.size(), LOG) : null; + double sqrt2 = Math.sqrt(2.0); + for (DBIDIter id = data.iterDBIDs(); id.valid(); id.advance()) { + KNNResult<D> neighbors = knnQuery.getKNNForDBID(id, k + 1); + ModifiableDBIDs nids = DBIDUtil.newArray(neighbors); + nids.remove(id); + + // TODO: do we want to use the query point as centroid? + CorrelationAnalysisSolution<V> depsol = dependencyDerivator.generateModel(data, nids); + + double stddev = depsol.getStandardDeviation(); + double distance = depsol.distance(data.get(id)); + double prob = NormalDistribution.erf(distance / (stddev * sqrt2)); + + cop_score.putDouble(id, prob); + + Vector errv = depsol.errorVector(data.get(id)).timesEquals(-1); + cop_err_v.put(id, errv); + + Matrix datav = depsol.dataProjections(data.get(id)); + cop_datav.put(id, datav); + + cop_dim.putInt(id, depsol.getCorrelationDimensionality()); + + cop_sol.put(id, depsol); + + if (progressLocalPCA != null) { + progressLocalPCA.incrementProcessed(LOG); + } + } + if (progressLocalPCA != null) { + progressLocalPCA.ensureCompleted(LOG); + } + } + // combine results. + Relation<Double> scoreResult = new MaterializedRelation<Double>("Original Correlation Outlier Probabilities", "origcop-outlier", TypeUtil.DOUBLE, cop_score, ids); + OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore(); + OutlierResult result = new OutlierResult(scoreMeta, scoreResult); + // extra results + result.addChildResult(new MaterializedRelation<Integer>("Local Dimensionality", COP.COP_DIM, TypeUtil.INTEGER, cop_dim, ids)); + result.addChildResult(new MaterializedRelation<Vector>("Error vectors", COP.COP_ERRORVEC, TypeUtil.VECTOR, cop_err_v, ids)); + result.addChildResult(new MaterializedRelation<Matrix>("Data vectors", "cop-datavec", TypeUtil.MATRIX, cop_datav, ids)); + result.addChildResult(new MaterializedRelation<CorrelationAnalysisSolution<?>>("Correlation analysis", "cop-sol", new SimpleTypeInformation<CorrelationAnalysisSolution<?>>(CorrelationAnalysisSolution.class), cop_sol, ids)); + return result; + } + + @Override + public TypeInformation[] getInputTypeRestriction() { + return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD); + } + + @Override + protected Logging getLogger() { + return LOG; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<V, D> { + /** + * Parameter to specify the number of nearest neighbors of an object to be + * considered for computing its COP_SCORE, must be an integer greater than + * 0. + * <p/> + * Key: {@code -cop.k} + * </p> + */ + public static final OptionID K_ID = new OptionID("cop.k", "The number of nearest neighbors of an object to be considered for computing its COP_SCORE."); + + /** + * Parameter for the PCA runner class. + * + * <p> + * Key: {@code -cop.pcarunner} + * </p> + */ + public static final OptionID PCARUNNER_ID = new OptionID("cop.pcarunner", "The class to compute (filtered) PCA."); + + /** + * Number of neighbors to be considered. + */ + int k; + + /** + * Holds the object performing the dependency derivation + */ + protected PCAFilteredRunner<V> pca; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + IntParameter kP = new IntParameter(K_ID); + kP.addConstraint(new GreaterConstraint(0)); + if (config.grab(kP)) { + k = kP.intValue(); + } + ObjectParameter<PCAFilteredRunner<V>> pcaP = new ObjectParameter<PCAFilteredRunner<V>>(PCARUNNER_ID, PCAFilteredRunner.class, PCAFilteredRunner.class); + if (config.grab(pcaP)) { + pca = pcaP.instantiateClass(config); + } + } + + @Override + protected SimpleCOP<V, D> makeInstance() { + return new SimpleCOP<V, D>(distanceFunction, k, pca); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleKernelDensityLOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleKernelDensityLOF.java new file mode 100644 index 00000000..1c104c08 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleKernelDensityLOF.java @@ -0,0 +1,284 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm; +import de.lmu.ifi.dbs.elki.data.NumberVector; +import de.lmu.ifi.dbs.elki.data.type.CombinedTypeInformation; +import de.lmu.ifi.dbs.elki.data.type.TypeInformation; +import de.lmu.ifi.dbs.elki.data.type.TypeUtil; +import de.lmu.ifi.dbs.elki.database.QueryUtil; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; +import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; +import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; +import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery; +import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; +import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; +import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; +import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDResultIter; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceKNNList; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult; +import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; +import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor; +import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; +import de.lmu.ifi.dbs.elki.logging.progress.StepProgress; +import de.lmu.ifi.dbs.elki.math.DoubleMinMax; +import de.lmu.ifi.dbs.elki.math.statistics.EpanechnikovKernelDensityFunction; +import de.lmu.ifi.dbs.elki.math.statistics.KernelDensityFunction; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; +import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; + +/** + * A simple variant of the LOF algorithm, which uses a simple kernel density + * estimation instead of the local reachability density. + * + * @author Erich Schubert + * + * @apiviz.has KNNQuery + * @apiviz.has KernelDensityFunction + * + * @param <O> the type of objects handled by this Algorithm + * @param <D> Distance type + */ +public class SimpleKernelDensityLOF<O extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm { + /** + * The logger for this class. + */ + private static final Logging LOG = Logging.getLogger(SimpleKernelDensityLOF.class); + + /** + * Parameter k. + */ + protected int k; + + /** + * Kernel density function + */ + private KernelDensityFunction kernel; + + /** + * Constructor. + * + * @param k the value of k + * @param kernel Kernel function + */ + public SimpleKernelDensityLOF(int k, DistanceFunction<? super O, D> distance, KernelDensityFunction kernel) { + super(distance); + this.k = k + 1; + this.kernel = kernel; + } + + /** + * Run the naive kernel density LOF algorithm. + * + * @param relation Data to process + * @return LOF outlier result + */ + public OutlierResult run(Relation<O> relation) { + StepProgress stepprog = LOG.isVerbose() ? new StepProgress("KernelDensityLOF", 3) : null; + + final int dim = RelationUtil.dimensionality(relation); + + DBIDs ids = relation.getDBIDs(); + + // "HEAVY" flag for KNN Query since it is used more than once + KNNQuery<O, D> knnq = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE); + // No optimized kNN query - use a preprocessor! + if (!(knnq instanceof PreprocessorKNNQuery)) { + if (stepprog != null) { + stepprog.beginStep(1, "Materializing neighborhoods w.r.t. distance function.", LOG); + } + MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<O, D>(relation, getDistanceFunction(), k); + relation.getDatabase().addIndex(preproc); + DistanceQuery<O, D> rdq = relation.getDatabase().getDistanceQuery(relation, getDistanceFunction()); + knnq = preproc.getKNNQuery(rdq, k); + } + + // Compute LRDs + if (stepprog != null) { + stepprog.beginStep(2, "Computing densities.", LOG); + } + WritableDoubleDataStore dens = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP); + FiniteProgress densProgress = LOG.isVerbose() ? new FiniteProgress("Densities", ids.size(), LOG) : null; + for (DBIDIter it = ids.iter(); it.valid(); it.advance()) { + final KNNResult<D> neighbors = knnq.getKNNForDBID(it, k); + int count = 0; + double sum = 0.0; + if (neighbors instanceof DoubleDistanceKNNList) { + // Fast version for double distances + for (DoubleDistanceDBIDResultIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) { + if (DBIDUtil.equal(neighbor, it)) { + continue; + } + double max = ((DoubleDistanceKNNList)knnq.getKNNForDBID(neighbor, k)).doubleKNNDistance(); + final double v = neighbor.doubleDistance() / max; + sum += kernel.density(v) / Math.pow(max, dim); + count++; + } + } else { + for (DistanceDBIDResultIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { + if (DBIDUtil.equal(neighbor, it)) { + continue; + } + double max = knnq.getKNNForDBID(neighbor, k).getKNNDistance().doubleValue(); + final double v = neighbor.getDistance().doubleValue() / max; + sum += kernel.density(v) / Math.pow(max, dim); + count++; + } + } + final double density = sum / count; + dens.putDouble(it, density); + if (densProgress != null) { + densProgress.incrementProcessed(LOG); + } + } + if (densProgress != null) { + densProgress.ensureCompleted(LOG); + } + + // compute LOF_SCORE of each db object + if (stepprog != null) { + stepprog.beginStep(3, "Computing KLOFs.", LOG); + } + WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC); + // track the maximum value for normalization. + DoubleMinMax lofminmax = new DoubleMinMax(); + + FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("KLOF_SCORE for objects", ids.size(), LOG) : null; + for (DBIDIter it = ids.iter(); it.valid(); it.advance()) { + final double lrdp = dens.doubleValue(it); + final double lof; + if (lrdp > 0) { + final KNNResult<D> neighbors = knnq.getKNNForDBID(it, k); + double sum = 0.0; + int count = 0; + for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { + // skip the point itself + if (DBIDUtil.equal(neighbor, it)) { + continue; + } + sum += dens.doubleValue(neighbor); + count++; + } + lof = sum / (count * lrdp); + } else { + lof = 1.0; + } + lofs.putDouble(it, lof); + // update minimum and maximum + lofminmax.put(lof); + + if (progressLOFs != null) { + progressLOFs.incrementProcessed(LOG); + } + } + if (progressLOFs != null) { + progressLOFs.ensureCompleted(LOG); + } + + if (stepprog != null) { + stepprog.setCompleted(LOG); + } + + // Build result representation. + Relation<Double> scoreResult = new MaterializedRelation<Double>("Kernel Density Local Outlier Factor", "kernel-density-slof-outlier", TypeUtil.DOUBLE, lofs, ids); + OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0); + OutlierResult result = new OutlierResult(scoreMeta, scoreResult); + + return result; + } + + @Override + public TypeInformation[] getInputTypeRestriction() { + return TypeUtil.array(new CombinedTypeInformation(getDistanceFunction().getInputTypeRestriction(), TypeUtil.NUMBER_VECTOR_FIELD)); + } + + @Override + protected Logging getLogger() { + return LOG; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + * + * @param <O> vector type + * @param <D> distance type + */ + public static class Parameterizer<O extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> { + /** + * Option ID for kernel density LOF kernel. + */ + public static final OptionID KERNEL_ID = new OptionID("kernellof.kernel", "Kernel to use for kernel density LOF."); + + /** + * The neighborhood size to use. + */ + protected int k = 2; + + /** + * Kernel density function parameter + */ + KernelDensityFunction kernel; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + + final IntParameter pK = new IntParameter(LOF.K_ID); + pK.addConstraint(new GreaterConstraint(1)); + if (config.grab(pK)) { + k = pK.getValue(); + } + + ObjectParameter<KernelDensityFunction> kernelP = new ObjectParameter<KernelDensityFunction>(KERNEL_ID, KernelDensityFunction.class, EpanechnikovKernelDensityFunction.class); + if (config.grab(kernelP)) { + kernel = kernelP.instantiateClass(config); + } + } + + @Override + protected SimpleKernelDensityLOF<O, D> makeInstance() { + return new SimpleKernelDensityLOF<O, D>(k, distanceFunction, kernel); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleLOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleLOF.java new file mode 100644 index 00000000..48505ed5 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/SimpleLOF.java @@ -0,0 +1,249 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm; +import de.lmu.ifi.dbs.elki.data.type.TypeInformation; +import de.lmu.ifi.dbs.elki.data.type.TypeUtil; +import de.lmu.ifi.dbs.elki.database.QueryUtil; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; +import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; +import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; +import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.query.DatabaseQuery; +import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; +import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; +import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDResultIter; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceKNNList; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult; +import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; +import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor; +import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; +import de.lmu.ifi.dbs.elki.logging.progress.StepProgress; +import de.lmu.ifi.dbs.elki.math.DoubleMinMax; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; +import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; + +/** + * A simplified version of the original LOF algorithm, which does not use the + * reachability distance, yielding less stable results on inliers. + * + * @author Erich Schubert + * + * @apiviz.has KNNQuery + * + * @param <O> the type of DatabaseObjects handled by this Algorithm + * @param <D> Distance type + */ +public class SimpleLOF<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, OutlierResult> implements OutlierAlgorithm { + /** + * The logger for this class. + */ + private static final Logging LOG = Logging.getLogger(SimpleLOF.class); + + /** + * Parameter k. + */ + protected int k; + + /** + * Constructor. + * + * @param k the value of k + */ + public SimpleLOF(int k, DistanceFunction<? super O, D> distance) { + super(distance); + this.k = k + 1; + } + + /** + * Run the Simple LOF algorithm. + * + * @param relation Data to process + * @return LOF outlier result + */ + public OutlierResult run(Relation<O> relation) { + StepProgress stepprog = LOG.isVerbose() ? new StepProgress("SimpleLOF", 3) : null; + + DBIDs ids = relation.getDBIDs(); + + // "HEAVY" flag for KNN Query since it is used more than once + KNNQuery<O, D> knnq = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k, DatabaseQuery.HINT_HEAVY_USE, DatabaseQuery.HINT_OPTIMIZED_ONLY, DatabaseQuery.HINT_NO_CACHE); + // No optimized kNN query - use a preprocessor! + if (!(knnq instanceof PreprocessorKNNQuery)) { + if (stepprog != null) { + stepprog.beginStep(1, "Materializing neighborhoods w.r.t. distance function.", LOG); + } + MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<O, D>(relation, getDistanceFunction(), k); + relation.getDatabase().addIndex(preproc); + DistanceQuery<O, D> rdq = relation.getDatabase().getDistanceQuery(relation, getDistanceFunction()); + knnq = preproc.getKNNQuery(rdq, k); + } + + // Compute LRDs + if (stepprog != null) { + stepprog.beginStep(2, "Computing densities.", LOG); + } + WritableDoubleDataStore dens = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP); + FiniteProgress densProgress = LOG.isVerbose() ? new FiniteProgress("Densities", ids.size(), LOG) : null; + for (DBIDIter it = ids.iter(); it.valid(); it.advance()) { + final KNNResult<D> neighbors = knnq.getKNNForDBID(it, k); + double sum = 0.0; + int count = 0; + if (neighbors instanceof DoubleDistanceKNNList) { + // Fast version for double distances + for (DoubleDistanceDBIDResultIter neighbor = ((DoubleDistanceKNNList) neighbors).iter(); neighbor.valid(); neighbor.advance()) { + if (DBIDUtil.equal(neighbor, it)) { + continue; + } + sum += neighbor.doubleDistance(); + count++; + } + } else { + for (DistanceDBIDResultIter<D> neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { + if (DBIDUtil.equal(neighbor, it)) { + continue; + } + sum += neighbor.getDistance().doubleValue(); + count++; + } + } + // Avoid division by 0 + final double lrd = (sum > 0) ? (count / sum) : 0; + dens.putDouble(it, lrd); + if (densProgress != null) { + densProgress.incrementProcessed(LOG); + } + } + if (densProgress != null) { + densProgress.ensureCompleted(LOG); + } + + // compute LOF_SCORE of each db object + if (stepprog != null) { + stepprog.beginStep(3, "Computing SLOFs.", LOG); + } + WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC); + // track the maximum value for normalization. + DoubleMinMax lofminmax = new DoubleMinMax(); + + FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("Simple LOF scores.", ids.size(), LOG) : null; + for (DBIDIter it = ids.iter(); it.valid(); it.advance()) { + final double lrdp = dens.doubleValue(it); + final double lof; + if (lrdp > 0) { + final KNNResult<D> neighbors = knnq.getKNNForDBID(it, k); + double sum = 0.0; + int count = 0; + for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { + // skip the point itself + if (DBIDUtil.equal(neighbor, it)) { + continue; + } + sum += dens.doubleValue(neighbor); + count++; + } + lof = sum / (count * lrdp); + } else { + lof = 1.0; + } + lofs.putDouble(it, lof); + // update minimum and maximum + lofminmax.put(lof); + + if (progressLOFs != null) { + progressLOFs.incrementProcessed(LOG); + } + } + if (progressLOFs != null) { + progressLOFs.ensureCompleted(LOG); + } + + if (stepprog != null) { + stepprog.setCompleted(LOG); + } + + // Build result representation. + Relation<Double> scoreResult = new MaterializedRelation<Double>("Simple Local Outlier Factor", "simple-lof-outlier", TypeUtil.DOUBLE, lofs, ids); + OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0); + OutlierResult result = new OutlierResult(scoreMeta, scoreResult); + + return result; + } + + @Override + public TypeInformation[] getInputTypeRestriction() { + return TypeUtil.array(getDistanceFunction().getInputTypeRestriction()); + } + + @Override + protected Logging getLogger() { + return LOG; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + * + * @param <O> vector type + * @param <D> distance type + */ + public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> { + /** + * The neighborhood size to use. + */ + protected int k = 2; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + + final IntParameter pK = new IntParameter(LOF.K_ID); + pK.addConstraint(new GreaterConstraint(1)); + if (config.grab(pK)) { + k = pK.getValue(); + } + } + + @Override + protected SimpleLOF<O, D> makeInstance() { + return new SimpleLOF<O, D>(k, distanceFunction); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java index 1542b8e3..f230fd3b 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/ExternalDoubleOutlierScore.java @@ -77,7 +77,7 @@ public class ExternalDoubleOutlierScore extends AbstractAlgorithm<OutlierResult> /** * The logger for this class. */ - private static final Logging logger = Logging.getLogger(ExternalDoubleOutlierScore.class); + private static final Logging LOG = Logging.getLogger(ExternalDoubleOutlierScore.class); /** * The comment character. @@ -183,7 +183,7 @@ public class ExternalDoubleOutlierScore extends AbstractAlgorithm<OutlierResult> minmax.put(score); } else if(id == null && Double.isNaN(score)) { - logger.warning("Line did not match either ID nor score nor comment: " + line); + LOG.warning("Line did not match either ID nor score nor comment: " + line); } else { throw new AbortException("Line matched only ID or only SCORE patterns: " + line); @@ -224,7 +224,7 @@ public class ExternalDoubleOutlierScore extends AbstractAlgorithm<OutlierResult> @Override protected Logging getLogger() { - return logger; + return LOG; } @Override @@ -246,7 +246,7 @@ public class ExternalDoubleOutlierScore extends AbstractAlgorithm<OutlierResult> * Key: {@code -externaloutlier.file} * </p> */ - public static final OptionID FILE_ID = OptionID.getOrCreateOptionID("externaloutlier.file", "The file name containing the (external) outlier scores."); + public static final OptionID FILE_ID = new OptionID("externaloutlier.file", "The file name containing the (external) outlier scores."); /** * Parameter that specifies the object ID pattern @@ -255,7 +255,7 @@ public class ExternalDoubleOutlierScore extends AbstractAlgorithm<OutlierResult> * Default: ^ID= * </p> */ - public static final OptionID ID_ID = OptionID.getOrCreateOptionID("externaloutlier.idpattern", "The pattern to match object ID prefix"); + public static final OptionID ID_ID = new OptionID("externaloutlier.idpattern", "The pattern to match object ID prefix"); /** * Parameter that specifies the object score pattern @@ -263,7 +263,7 @@ public class ExternalDoubleOutlierScore extends AbstractAlgorithm<OutlierResult> * Key: {@code -externaloutlier.scorepattern}<br /> * </p> */ - public static final OptionID SCORE_ID = OptionID.getOrCreateOptionID("externaloutlier.scorepattern", "The pattern to match object score prefix"); + public static final OptionID SCORE_ID = new OptionID("externaloutlier.scorepattern", "The pattern to match object score prefix"); /** * Parameter to specify a scaling function to use. @@ -271,12 +271,12 @@ public class ExternalDoubleOutlierScore extends AbstractAlgorithm<OutlierResult> * Key: {@code -externaloutlier.scaling} * </p> */ - public static final OptionID SCALING_ID = OptionID.getOrCreateOptionID("externaloutlier.scaling", "Class to use as scaling function."); + public static final OptionID SCALING_ID = new OptionID("externaloutlier.scaling", "Class to use as scaling function."); /** * Flag parameter for inverted scores. */ - public static final OptionID INVERTED_ID = OptionID.getOrCreateOptionID("externaloutlier.inverted", "Flag to signal an inverted outlier score."); + public static final OptionID INVERTED_ID = new OptionID("externaloutlier.inverted", "Flag to signal an inverted outlier score."); /** * The file to be reparsed diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java index 407b7400..b53a0942 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/FeatureBagging.java @@ -39,6 +39,7 @@ import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction; import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; import de.lmu.ifi.dbs.elki.logging.Logging; @@ -47,7 +48,7 @@ import de.lmu.ifi.dbs.elki.math.DoubleMinMax; import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; -import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil; +import de.lmu.ifi.dbs.elki.utilities.RandomFactory; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; @@ -57,7 +58,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualCons import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.LongParameter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter; import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; /** @@ -85,22 +86,22 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements /** * The logger for this class. */ - private static final Logging logger = Logging.getLogger(FeatureBagging.class); + private static final Logging LOG = Logging.getLogger(FeatureBagging.class); /** - * Number of instances to use + * Number of instances to use. */ protected int num = 1; /** - * Cumulative sum or breadth first combinations + * Cumulative sum or breadth first combinations. */ protected boolean breadth = false; /** - * Random number generator for subspace choice + * Random number generator for subspace choice. */ - private Random RANDOM; + private RandomFactory rnd; /** * The parameters k for LOF. @@ -113,18 +114,14 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements * @param k k Parameter for LOF * @param num Number of subspaces to use * @param breadth Flag for breadth-first merging + * @param rnd Random generator */ - public FeatureBagging(int k, int num, boolean breadth, Long seed) { + public FeatureBagging(int k, int num, boolean breadth, RandomFactory rnd) { super(); this.k = k; this.num = num; this.breadth = breadth; - if(seed != null) { - this.RANDOM = new Random(seed); - } - else { - this.RANDOM = new Random(); - } + this.rnd = rnd; } /** @@ -133,80 +130,79 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements * @param relation Relation to use * @return Outlier detection result */ - public OutlierResult run(Relation<NumberVector<?, ?>> relation) { - final int dbdim = DatabaseUtil.dimensionality(relation); - final int mindim = dbdim / 2; + public OutlierResult run(Relation<NumberVector<?>> relation) { + final int dbdim = RelationUtil.dimensionality(relation); + final int mindim = dbdim >> 1; final int maxdim = dbdim - 1; + final Random rand = rnd.getRandom(); ArrayList<OutlierResult> results = new ArrayList<OutlierResult>(num); { - FiniteProgress prog = logger.isVerbose() ? new FiniteProgress("LOF iterations", num, logger) : null; - for(int i = 0; i < num; i++) { - BitSet dimset = randomSubspace(dbdim, mindim, maxdim); + FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("LOF iterations", num, LOG) : null; + for (int i = 0; i < num; i++) { + BitSet dimset = randomSubspace(dbdim, mindim, maxdim, rand); SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(dimset); - LOF<NumberVector<?, ?>, DoubleDistance> lof = new LOF<NumberVector<?, ?>, DoubleDistance>(k, df); + LOF<NumberVector<?>, DoubleDistance> lof = new LOF<NumberVector<?>, DoubleDistance>(k, df); // run LOF and collect the result OutlierResult result = lof.run(relation); results.add(result); - if(prog != null) { - prog.incrementProcessed(logger); + if (prog != null) { + prog.incrementProcessed(LOG); } } - if(prog != null) { - prog.ensureCompleted(logger); + if (prog != null) { + prog.ensureCompleted(LOG); } } WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC); DoubleMinMax minmax = new DoubleMinMax(); - if(breadth) { - FiniteProgress cprog = logger.isVerbose() ? new FiniteProgress("Combining results", relation.size(), logger) : null; + if (breadth) { + FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Combining results", relation.size(), LOG) : null; Pair<DBIDIter, Relation<Double>>[] IDVectorOntoScoreVector = Pair.newPairArray(results.size()); // Mapping score-sorted DBID-Iterators onto their corresponding scores. // We need to initialize them now be able to iterate them "in parallel". { int i = 0; - for(OutlierResult r : results) { + for (OutlierResult r : results) { IDVectorOntoScoreVector[i] = new Pair<DBIDIter, Relation<Double>>(r.getOrdering().iter(relation.getDBIDs()).iter(), r.getScores()); i++; } } // Iterating over the *lines* of the AS_t(i)-matrix. - for(int i = 0; i < relation.size(); i++) { + for (int i = 0; i < relation.size(); i++) { // Iterating over the elements of a line (breadth-first). - for(Pair<DBIDIter, Relation<Double>> pair : IDVectorOntoScoreVector) { + for (Pair<DBIDIter, Relation<Double>> pair : IDVectorOntoScoreVector) { DBIDIter iter = pair.first; // Always true if every algorithm returns a complete result (one score // for every DBID). - if(iter.valid()) { + if (iter.valid()) { double score = pair.second.get(iter); - if(Double.isNaN(scores.doubleValue(iter))) { + if (Double.isNaN(scores.doubleValue(iter))) { scores.putDouble(iter, score); minmax.put(score); } iter.advance(); - } - else { - logger.warning("Incomplete result: Iterator does not contain |DB| DBIDs"); + } else { + LOG.warning("Incomplete result: Iterator does not contain |DB| DBIDs"); } } // Progress does not take the initial mapping into account. - if(cprog != null) { - cprog.incrementProcessed(logger); + if (cprog != null) { + cprog.incrementProcessed(LOG); } } - if(cprog != null) { - cprog.ensureCompleted(logger); + if (cprog != null) { + cprog.ensureCompleted(LOG); } - } - else { - FiniteProgress cprog = logger.isVerbose() ? new FiniteProgress("Combining results", relation.size(), logger) : null; - for(DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) { + } else { + FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Combining results", relation.size(), LOG) : null; + for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) { double sum = 0.0; - for(OutlierResult r : results) { + for (OutlierResult r : results) { final Double s = r.getScores().get(iter); if (s != null && !Double.isNaN(s)) { sum += s; @@ -214,12 +210,12 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements } scores.putDouble(iter, sum); minmax.put(sum); - if(cprog != null) { - cprog.incrementProcessed(logger); + if (cprog != null) { + cprog.incrementProcessed(LOG); } } - if(cprog != null) { - cprog.ensureCompleted(logger); + if (cprog != null) { + cprog.ensureCompleted(LOG); } } OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax()); @@ -228,36 +224,34 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements } /** - * Choose a random subspace + * Choose a random subspace. * * @param alldim Number of total dimensions * @param mindim Minimum number to choose * @param maxdim Maximum number to choose * @return Subspace as bits. */ - private BitSet randomSubspace(final int alldim, final int mindim, final int maxdim) { + private BitSet randomSubspace(final int alldim, final int mindim, final int maxdim, final Random rand) { BitSet dimset = new BitSet(); - { - // Fill with all dimensions - int[] dims = new int[alldim]; - for(int d = 0; d < alldim; d++) { - dims[d] = d; - } - // Target dimensionality: - int subdim = mindim + RANDOM.nextInt(maxdim - mindim); - // Shrink the subspace to the destination size - for(int d = 0; d < alldim - subdim; d++) { - int s = RANDOM.nextInt(alldim - d); - dimset.set(dims[s]); - dims[s] = dims[alldim - d - 1]; - } + // Fill with all dimensions + int[] dims = new int[alldim]; + for (int d = 0; d < alldim; d++) { + dims[d] = d; + } + // Target dimensionality: + int subdim = mindim + rand.nextInt(maxdim - mindim); + // Shrink the subspace to the destination size + for (int d = 0; d < alldim - subdim; d++) { + int s = rand.nextInt(alldim - d); + dimset.set(dims[s]); + dims[s] = dims[alldim - d - 1]; } return dimset; } @Override protected Logging getLogger() { - return logger; + return LOG; } @Override @@ -279,69 +273,71 @@ public class FeatureBagging extends AbstractAlgorithm<OutlierResult> implements * Key: {@code -fbagging.num} * </p> */ - public static final OptionID NUM_ID = OptionID.getOrCreateOptionID("fbagging.num", "The number of instances to use in the ensemble."); + public static final OptionID NUM_ID = new OptionID("fbagging.num", "The number of instances to use in the ensemble."); /** - * The flag for using the breadth first approach + * The flag for using the breadth first approach. * <p> * Key: {@code -fbagging.breadth} * </p> */ - public static final OptionID BREADTH_ID = OptionID.getOrCreateOptionID("fbagging.breadth", "Use the breadth first combinations instead of the cumulative sum approach"); + public static final OptionID BREADTH_ID = new OptionID("fbagging.breadth", "Use the breadth first combinations instead of the cumulative sum approach"); /** - * The parameter to specify the random seed + * The parameter to specify the random seed. * <p> * Key: {@code -fbagging.seed} * </p> */ - public static final OptionID SEED_ID = OptionID.getOrCreateOptionID("fbagging.seed", "Specify a particular random seed."); + public static final OptionID SEED_ID = new OptionID("fbagging.seed", "Specify a particular random seed."); /** - * The neighborhood size to use + * The neighborhood size to use. */ protected int k = 2; /** - * Number of instances to use + * Number of instances to use. */ protected int num = 1; /** - * Cumulative sum or breadth first combinations + * Cumulative sum or breadth first combinations. */ protected boolean breadth = false; /** - * Random generator seed + * Random generator. */ - protected Long seed = null; + protected RandomFactory rnd; @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - final IntParameter pK = new IntParameter(LOF.K_ID, new GreaterConstraint(1)); - if(config.grab(pK)) { + final IntParameter pK = new IntParameter(LOF.K_ID); + pK.addConstraint(new GreaterConstraint(1)); + if (config.grab(pK)) { k = pK.getValue(); } - IntParameter NUM_PARAM = new IntParameter(NUM_ID, new GreaterEqualConstraint(1)); - if(config.grab(NUM_PARAM)) { - num = NUM_PARAM.getValue(); + IntParameter numP = new IntParameter(NUM_ID); + numP.addConstraint(new GreaterEqualConstraint(1)); + if (config.grab(numP)) { + num = numP.getValue(); } - Flag BREADTH_FLAG = new Flag(BREADTH_ID); - if(config.grab(BREADTH_FLAG)) { - breadth = BREADTH_FLAG.getValue(); + Flag breadthF = new Flag(BREADTH_ID); + if (config.grab(breadthF)) { + breadth = breadthF.getValue(); } - LongParameter seedP = new LongParameter(SEED_ID, true); - if(config.grab(seedP)) { - seed = seedP.getValue(); + RandomParameter rndP = new RandomParameter(SEED_ID); + if (config.grab(rndP)) { + rnd = rndP.getValue(); } } @Override protected FeatureBagging makeInstance() { // Default is to re-use the same distance - return new FeatureBagging(k, num, breadth, seed); + return new FeatureBagging(k, num, breadth, rnd); } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/HiCS.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/HiCS.java index 73d4156a..15b94322 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/HiCS.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/HiCS.java @@ -48,12 +48,14 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.ProjectedView;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
@@ -63,7 +65,7 @@ import de.lmu.ifi.dbs.elki.math.statistics.tests.KolmogorovSmirnovTest; import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TopBoundedHeap;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
@@ -74,8 +76,8 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstrain import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.LongParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
/**
* Algorithm to compute High Contrast Subspaces for Density-Based Outlier
@@ -99,12 +101,12 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; */
@Title("HiCS: High Contrast Subspaces for Density-Based Outlier Ranking")
@Description("Algorithm to compute High Contrast Subspaces in a database as a pre-processing step for for density-based outlier ranking methods.")
-@Reference(authors = "Fabian Keller, Emmanuel Müller, Klemens Böhm", title = "HiCS: High Contrast Subspaces for Density-Based Outlier Ranking", booktitle = "Proc. IEEE 28th International Conference on Data Engineering (ICDE 2012)")
-public class HiCS<V extends NumberVector<V, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
+@Reference(authors = "Fabian Keller, Emmanuel Müller, Klemens Böhm", title = "HiCS: High Contrast Subspaces for Density-Based Outlier Ranking", booktitle = "Proc. IEEE 28th International Conference on Data Engineering (ICDE 2012)", url = "http://dx.doi.org/10.1109/ICDE.2012.88")
+public class HiCS<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm {
/**
- * The Logger for this class
+ * The Logger for this class.
*/
- private static final Logging logger = Logging.getLogger(HiCS.class);
+ private static final Logging LOG = Logging.getLogger(HiCS.class);
/**
* Maximum number of retries.
@@ -112,57 +114,57 @@ public class HiCS<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlie private static final int MAX_RETRIES = 100;
/**
- * Monte-Carlo iterations
+ * Monte-Carlo iterations.
*/
private int m;
/**
- * Alpha threshold
+ * Alpha threshold.
*/
private double alpha;
/**
- * Outlier detection algorithm
+ * Outlier detection algorithm.
*/
private OutlierAlgorithm outlierAlgorithm;
/**
- * Statistical test to use
+ * Statistical test to use.
*/
private GoodnessOfFitTest statTest;
/**
- * Candidates limit
+ * Candidates limit.
*/
private int cutoff;
-
+
/**
- * Random generator
+ * Random generator.
*/
- private Random random;
+ private RandomFactory rnd;
/**
- * Constructor
+ * Constructor.
*
* @param m value of m
* @param alpha value of alpha
* @param outlierAlgorithm Inner outlier detection algorithm
* @param statTest Test to use
* @param cutoff Candidate limit
- * @param seed Random seed
+ * @param rnd Random generator
*/
- public HiCS(int m, double alpha, OutlierAlgorithm outlierAlgorithm, GoodnessOfFitTest statTest, int cutoff, Long seed) {
+ public HiCS(int m, double alpha, OutlierAlgorithm outlierAlgorithm, GoodnessOfFitTest statTest, int cutoff, RandomFactory rnd) {
super();
this.m = m;
this.alpha = alpha;
this.outlierAlgorithm = outlierAlgorithm;
this.statTest = statTest;
this.cutoff = cutoff;
- this.random = (seed != null) ? new Random(seed) : new Random();
+ this.rnd = rnd;
}
/**
- * Perform HiCS on a given database
+ * Perform HiCS on a given database.
*
* @param relation the database
* @return The aggregated resulting scores that were assigned by the given
@@ -170,23 +172,23 @@ public class HiCS<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlie */
public OutlierResult run(Relation<V> relation) {
final DBIDs ids = relation.getDBIDs();
- final V factory = DatabaseUtil.assumeVectorField(relation).getFactory();
+ final NumberVector.Factory<V, ?> factory = RelationUtil.getNumberVectorFactory(relation);
ArrayList<ArrayDBIDs> subspaceIndex = buildOneDimIndexes(relation);
- Set<HiCSSubspace> subspaces = calculateSubspaces(relation, subspaceIndex);
+ Set<HiCSSubspace> subspaces = calculateSubspaces(relation, subspaceIndex, rnd.getRandom());
- if(logger.isVerbose()) {
- logger.verbose("Number of high-contrast subspaces: " + subspaces.size());
+ if (LOG.isVerbose()) {
+ LOG.verbose("Number of high-contrast subspaces: " + subspaces.size());
}
List<Relation<Double>> results = new ArrayList<Relation<Double>>();
- FiniteProgress prog = logger.isVerbose() ? new FiniteProgress("Calculating Outlier scores for high Contrast subspaces", subspaces.size(), logger) : null;
+ FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Calculating Outlier scores for high Contrast subspaces", subspaces.size(), LOG) : null;
// run outlier detection and collect the result
// TODO extend so that any outlierAlgorithm can be used (use materialized
// relation instead of SubspaceEuclideanDistanceFunction?)
- for(HiCSSubspace dimset : subspaces) {
- if(logger.isVerbose()) {
- logger.verbose("Performing outlier detection in subspace " + dimset);
+ for (HiCSSubspace dimset : subspaces) {
+ if (LOG.isVerbose()) {
+ LOG.verbose("Performing outlier detection in subspace " + dimset);
}
ProxyDatabase pdb = new ProxyDatabase(ids);
@@ -196,22 +198,22 @@ public class HiCS<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlie // run LOF and collect the result
OutlierResult result = outlierAlgorithm.run(pdb);
results.add(result.getScores());
- if(prog != null) {
- prog.incrementProcessed(logger);
+ if (prog != null) {
+ prog.incrementProcessed(LOG);
}
}
- if(prog != null) {
- prog.ensureCompleted(logger);
+ if (prog != null) {
+ prog.ensureCompleted(LOG);
}
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
DoubleMinMax minmax = new DoubleMinMax();
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { + for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double sum = 0.0;
- for(Relation<Double> r : results) {
+ for (Relation<Double> r : results) {
final Double s = r.get(iditer);
- if(s != null && !Double.isNaN(s)) {
+ if (s != null && !Double.isNaN(s)) {
sum += s;
}
}
@@ -232,12 +234,12 @@ public class HiCS<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlie * @param relation Relation to index
* @return List of sorted objects
*/
- private ArrayList<ArrayDBIDs> buildOneDimIndexes(Relation<? extends NumberVector<?, ?>> relation) {
- final int dim = DatabaseUtil.dimensionality(relation);
+ private ArrayList<ArrayDBIDs> buildOneDimIndexes(Relation<? extends NumberVector<?>> relation) {
+ final int dim = RelationUtil.dimensionality(relation);
ArrayList<ArrayDBIDs> subspaceIndex = new ArrayList<ArrayDBIDs>(dim + 1);
SortDBIDsBySingleDimension comp = new VectorUtil.SortDBIDsBySingleDimension(relation);
- for(int i = 1; i <= dim; i++) {
+ for (int i = 0; i < dim; i++) {
ArrayModifiableDBIDs amDBIDs = DBIDUtil.newArray(relation.getDBIDs());
comp.setDimension(i);
amDBIDs.sort(comp);
@@ -248,140 +250,143 @@ public class HiCS<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlie }
/**
- * Identifies high contrast subspaces in a given full-dimensional database
+ * Identifies high contrast subspaces in a given full-dimensional database.
*
* @param relation the relation the HiCS should be evaluated for
* @param subspaceIndex Subspace indexes
* @return a set of high contrast subspaces
*/
- private Set<HiCSSubspace> calculateSubspaces(Relation<? extends NumberVector<?, ?>> relation, ArrayList<ArrayDBIDs> subspaceIndex) {
- final int dbdim = DatabaseUtil.dimensionality(relation);
+ private Set<HiCSSubspace> calculateSubspaces(Relation<? extends NumberVector<?>> relation, ArrayList<ArrayDBIDs> subspaceIndex, Random random) {
+ final int dbdim = RelationUtil.dimensionality(relation);
- FiniteProgress dprog = logger.isVerbose() ? new FiniteProgress("Subspace dimensionality", dbdim, logger) : null;
- if(dprog != null) {
- dprog.setProcessed(2, logger);
+ FiniteProgress dprog = LOG.isVerbose() ? new FiniteProgress("Subspace dimensionality", dbdim, LOG) : null;
+ if (dprog != null) {
+ dprog.setProcessed(2, LOG);
}
TreeSet<HiCSSubspace> subspaceList = new TreeSet<HiCSSubspace>(HiCSSubspace.SORT_BY_SUBSPACE);
TopBoundedHeap<HiCSSubspace> dDimensionalList = new TopBoundedHeap<HiCSSubspace>(cutoff, HiCSSubspace.SORT_BY_CONTRAST_ASC);
- FiniteProgress prog = logger.isVerbose() ? new FiniteProgress("Generating two-element subsets", dbdim * (dbdim - 1) / 2, logger) : null;
+ FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Generating two-element subsets", (dbdim * (dbdim - 1)) >> 1, LOG) : null;
// compute two-element sets of subspaces
- for(int i = 0; i < dbdim; i++) {
- for(int j = i + 1; j < dbdim; j++) {
+ for (int i = 0; i < dbdim; i++) {
+ for (int j = i + 1; j < dbdim; j++) {
HiCSSubspace ts = new HiCSSubspace();
ts.set(i);
ts.set(j);
- calculateContrast(relation, ts, subspaceIndex);
+ calculateContrast(relation, ts, subspaceIndex, random);
dDimensionalList.add(ts);
- if(prog != null) {
- prog.incrementProcessed(logger);
+ if (prog != null) {
+ prog.incrementProcessed(LOG);
}
}
}
- if(prog != null) {
- prog.ensureCompleted(logger);
+ if (prog != null) {
+ prog.ensureCompleted(LOG);
}
- IndefiniteProgress qprog = logger.isVerbose() ? new IndefiniteProgress("Testing subspace candidates", logger) : null;
- for(int d = 3; !dDimensionalList.isEmpty(); d++) {
- if(dprog != null) {
- dprog.setProcessed(d, logger);
+ IndefiniteProgress qprog = LOG.isVerbose() ? new IndefiniteProgress("Testing subspace candidates", LOG) : null;
+ for (int d = 3; !dDimensionalList.isEmpty(); d++) {
+ if (dprog != null) {
+ dprog.setProcessed(d, LOG);
}
- subspaceList.addAll(dDimensionalList);
// result now contains all d-dimensional sets of subspaces
- ArrayList<HiCSSubspace> candidateList = new ArrayList<HiCSSubspace>(dDimensionalList);
+ ArrayList<HiCSSubspace> candidateList = new ArrayList<HiCSSubspace>(dDimensionalList.size());
+ for (HiCSSubspace sub : dDimensionalList) {
+ subspaceList.add(sub);
+ candidateList.add(sub);
+ }
dDimensionalList.clear();
// candidateList now contains the *m* best d-dimensional sets
Collections.sort(candidateList, HiCSSubspace.SORT_BY_SUBSPACE);
// TODO: optimize APRIORI style, by not even computing the bit set or?
- for(int i = 0; i < candidateList.size() - 1; i++) {
- for(int j = i + 1; j < candidateList.size(); j++) {
+ for (int i = 0; i < candidateList.size() - 1; i++) {
+ for (int j = i + 1; j < candidateList.size(); j++) {
HiCSSubspace set1 = candidateList.get(i);
HiCSSubspace set2 = candidateList.get(j);
HiCSSubspace joinedSet = new HiCSSubspace();
joinedSet.or(set1);
joinedSet.or(set2);
- if(joinedSet.cardinality() != d) {
+ if (joinedSet.cardinality() != d) {
continue;
}
- calculateContrast(relation, joinedSet, subspaceIndex);
+ calculateContrast(relation, joinedSet, subspaceIndex, random);
dDimensionalList.add(joinedSet);
- if(qprog != null) {
- qprog.incrementProcessed(logger);
+ if (qprog != null) {
+ qprog.incrementProcessed(LOG);
}
}
}
// Prune
- for(HiCSSubspace cand : candidateList) {
- for(HiCSSubspace nextSet : dDimensionalList) {
- if(nextSet.contrast > cand.contrast) {
+ for (HiCSSubspace cand : candidateList) {
+ for (HiCSSubspace nextSet : dDimensionalList) {
+ if (nextSet.contrast > cand.contrast) {
subspaceList.remove(cand);
break;
}
}
}
}
- if(qprog != null) {
- qprog.setCompleted(logger);
+ if (qprog != null) {
+ qprog.setCompleted(LOG);
}
- if(dprog != null) {
- dprog.setProcessed(dbdim, logger);
- dprog.ensureCompleted(logger);
+ if (dprog != null) {
+ dprog.setProcessed(dbdim, LOG);
+ dprog.ensureCompleted(LOG);
}
return subspaceList;
}
/**
- * Calculates the actual contrast of a given subspace
+ * Calculates the actual contrast of a given subspace.
*
- * @param relation
- * @param subspace
+ * @param relation Relation to process
+ * @param subspace Subspace
* @param subspaceIndex Subspace indexes
*/
- private void calculateContrast(Relation<? extends NumberVector<?, ?>> relation, HiCSSubspace subspace, ArrayList<ArrayDBIDs> subspaceIndex) {
+ private void calculateContrast(Relation<? extends NumberVector<?>> relation, HiCSSubspace subspace, ArrayList<ArrayDBIDs> subspaceIndex, Random random) {
final int card = subspace.cardinality();
final double alpha1 = Math.pow(alpha, (1.0 / card));
final int windowsize = (int) (relation.size() * alpha1);
- final FiniteProgress prog = logger.isDebugging() ? new FiniteProgress("Monte-Carlo iterations", m, logger) : null;
+ final FiniteProgress prog = LOG.isDebugging() ? new FiniteProgress("Monte-Carlo iterations", m, LOG) : null;
int retries = 0;
double deviationSum = 0.0;
- for(int i = 0; i < m; i++) {
+ for (int i = 0; i < m; i++) {
// Choose a random set bit.
int chosen = -1;
- for(int tmp = random.nextInt(card); tmp >= 0; tmp--) {
+ for (int tmp = random.nextInt(card); tmp >= 0; tmp--) {
chosen = subspace.nextSetBit(chosen + 1);
}
// initialize sample
DBIDs conditionalSample = relation.getDBIDs();
- for(int j = subspace.nextSetBit(0); j >= 0; j = subspace.nextSetBit(j + 1)) {
- if(j == chosen) {
+ for (int j = subspace.nextSetBit(0); j >= 0; j = subspace.nextSetBit(j + 1)) {
+ if (j == chosen) {
continue;
}
ArrayDBIDs sortedIndices = subspaceIndex.get(j);
- ArrayModifiableDBIDs indexBlock = DBIDUtil.newArray();
+ ArrayModifiableDBIDs indexBlock = DBIDUtil.newArray(windowsize);
// initialize index block
- int start = random.nextInt(relation.size() - windowsize);
- for(int k = start; k < start + windowsize; k++) {
- indexBlock.add(sortedIndices.get(k)); // select index block
+ DBIDArrayIter iter = sortedIndices.iter();
+ iter.seek(random.nextInt(relation.size() - windowsize));
+ for (int k = 0; k < windowsize; k++, iter.advance()) {
+ indexBlock.add(iter); // select index block
}
conditionalSample = DBIDUtil.intersection(conditionalSample, indexBlock);
}
- if(conditionalSample.size() < 10) {
+ if (conditionalSample.size() < 10) {
retries++;
- if(logger.isDebugging()) {
- logger.debug("Sample size very small. Retry no. " + retries);
+ if (LOG.isDebugging()) {
+ LOG.debug("Sample size very small. Retry no. " + retries);
}
- if(retries >= MAX_RETRIES) {
- logger.warning("Too many retries, for small samples: " + retries);
- }
- else {
+ if (retries >= MAX_RETRIES) {
+ LOG.warning("Too many retries, for small samples: " + retries);
+ } else {
i--;
continue;
}
@@ -391,7 +396,7 @@ public class HiCS<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlie {
int l = 0;
for (DBIDIter iter = conditionalSample.iter(); iter.valid(); iter.advance()) {
- sampleValues[l] = relation.get(iter).doubleValue(chosen + 1);
+ sampleValues[l] = relation.get(iter).doubleValue(chosen);
l++;
}
}
@@ -400,23 +405,23 @@ public class HiCS<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlie {
int l = 0;
for (DBIDIter iter = subspaceIndex.get(chosen).iter(); iter.valid(); iter.advance()) {
- fullValues[l] = relation.get(iter).doubleValue(chosen + 1);
+ fullValues[l] = relation.get(iter).doubleValue(chosen);
l++;
}
}
double contrast = statTest.deviation(fullValues, sampleValues);
- if(Double.isNaN(contrast)) {
+ if (Double.isNaN(contrast)) {
i--;
- logger.warning("Contrast was NaN");
+ LOG.warning("Contrast was NaN");
continue;
}
deviationSum += contrast;
- if(prog != null) {
- prog.incrementProcessed(logger);
+ if (prog != null) {
+ prog.incrementProcessed(LOG);
}
}
- if(prog != null) {
- prog.ensureCompleted(logger);
+ if (prog != null) {
+ prog.ensureCompleted(LOG);
}
subspace.contrast = deviationSum / m;
}
@@ -428,7 +433,7 @@ public class HiCS<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlie @Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -441,12 +446,12 @@ public class HiCS<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlie */
public static class HiCSSubspace extends BitSet {
/**
- * Serial version
+ * Serial version.
*/
private static final long serialVersionUID = 1L;
/**
- * The HiCS contrast value
+ * The HiCS contrast value.
*/
protected double contrast;
@@ -459,22 +464,22 @@ public class HiCS<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlie @Override
public String toString() {
- StringBuffer buf = new StringBuffer();
+ StringBuilder buf = new StringBuilder();
buf.append("[contrast=").append(contrast);
- for(int i = nextSetBit(0); i >= 0; i = nextSetBit(i + 1)) {
- buf.append(" ").append(i + 1);
+ for (int i = nextSetBit(0); i >= 0; i = nextSetBit(i + 1)) {
+ buf.append(' ').append(i + 1);
}
- buf.append("]");
+ buf.append(']');
return buf.toString();
}
/**
* Sort subspaces by their actual subspace.
*/
- public static Comparator<HiCSSubspace> SORT_BY_CONTRAST_ASC = new Comparator<HiCSSubspace>() {
+ public static final Comparator<HiCSSubspace> SORT_BY_CONTRAST_ASC = new Comparator<HiCSSubspace>() {
@Override
public int compare(HiCSSubspace o1, HiCSSubspace o2) {
- if(o1.contrast == o2.contrast) {
+ if (o1.contrast == o2.contrast) {
return 0;
}
return o1.contrast > o2.contrast ? 1 : -1;
@@ -484,10 +489,10 @@ public class HiCS<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlie /**
* Sort subspaces by their actual subspace.
*/
- public static Comparator<HiCSSubspace> SORT_BY_CONTRAST_DESC = new Comparator<HiCSSubspace>() {
+ public static final Comparator<HiCSSubspace> SORT_BY_CONTRAST_DESC = new Comparator<HiCSSubspace>() {
@Override
public int compare(HiCSSubspace o1, HiCSSubspace o2) {
- if(o1.contrast == o2.contrast) {
+ if (o1.contrast == o2.contrast) {
return 0;
}
return o1.contrast < o2.contrast ? 1 : -1;
@@ -497,16 +502,15 @@ public class HiCS<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlie /**
* Sort subspaces by their actual subspace.
*/
- public static Comparator<HiCSSubspace> SORT_BY_SUBSPACE = new Comparator<HiCSSubspace>() {
+ public static final Comparator<HiCSSubspace> SORT_BY_SUBSPACE = new Comparator<HiCSSubspace>() {
@Override
public int compare(HiCSSubspace o1, HiCSSubspace o2) {
int dim1 = o1.nextSetBit(0);
int dim2 = o2.nextSetBit(0);
- while(dim1 >= 0 && dim2 >= 0) {
- if(dim1 < dim2) {
+ while (dim1 >= 0 && dim2 >= 0) {
+ if (dim1 < dim2) {
return -1;
- }
- else if(dim1 > dim2) {
+ } else if (dim1 > dim2) {
return 1;
}
dim1 = o1.nextSetBit(dim1 + 1);
@@ -518,7 +522,7 @@ public class HiCS<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlie }
/**
- * Parameterization class
+ * Parameterization class.
*
* @author Jan Brusis
*
@@ -526,40 +530,40 @@ public class HiCS<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlie *
* @param <V> vector type
*/
- public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
/**
* Parameter that specifies the number of iterations in the Monte-Carlo
- * process of identifying high contrast subspaces
+ * process of identifying high contrast subspaces.
*/
- public static final OptionID M_ID = OptionID.getOrCreateOptionID("hics.m", "The number of iterations in the Monte-Carlo processing.");
+ public static final OptionID M_ID = new OptionID("hics.m", "The number of iterations in the Monte-Carlo processing.");
/**
* Parameter that determines the size of the test statistic during the
- * Monte-Carlo iteration
+ * Monte-Carlo iteration.
*/
- public static final OptionID ALPHA_ID = OptionID.getOrCreateOptionID("hics.alpha", "The discriminance value that determines the size of the test statistic .");
+ public static final OptionID ALPHA_ID = new OptionID("hics.alpha", "The discriminance value that determines the size of the test statistic .");
/**
* Parameter that specifies which outlier detection algorithm to use on the
- * resulting set of high contrast subspaces
+ * resulting set of high contrast subspaces.
*/
- public static final OptionID ALGO_ID = OptionID.getOrCreateOptionID("hics.algo", "The Algorithm that performs the actual outlier detection on the resulting set of subspace");
+ public static final OptionID ALGO_ID = new OptionID("hics.algo", "The Algorithm that performs the actual outlier detection on the resulting set of subspace");
/**
* Parameter that specifies which statistical test to use in order to
- * calculate the deviation of two given data samples
+ * calculate the deviation of two given data samples.
*/
- public static final OptionID TEST_ID = OptionID.getOrCreateOptionID("hics.test", "The statistical test that is used to calculate the deviation of two data samples");
+ public static final OptionID TEST_ID = new OptionID("hics.test", "The statistical test that is used to calculate the deviation of two data samples");
/**
- * Parameter that specifies the candidate_cutoff
+ * Parameter that specifies the candidate_cutoff.
*/
- public static final OptionID LIMIT_ID = OptionID.getOrCreateOptionID("hics.limit", "The threshold that determines how many d-dimensional subspace candidates to retain in each step of the generation");
+ public static final OptionID LIMIT_ID = new OptionID("hics.limit", "The threshold that determines how many d-dimensional subspace candidates to retain in each step of the generation");
/**
- * Parameter that specifies the random seed
+ * Parameter that specifies the random seed.
*/
- public static final OptionID SEED_ID = OptionID.getOrCreateOptionID("hics.seed", "The random seed.");
+ public static final OptionID SEED_ID = new OptionID("hics.seed", "The random seed.");
/**
* Holds the value of {@link #M_ID}.
@@ -582,52 +586,55 @@ public class HiCS<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outlie private GoodnessOfFitTest statTest;
/**
- * Holds the value of {@link #LIMIT_ID}
+ * Holds the value of {@link #LIMIT_ID}.
*/
private int cutoff = 400;
-
+
/**
- * Random seed (optional)
+ * Random generator.
*/
- private Long seed = null;
+ private RandomFactory rnd;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- final IntParameter mP = new IntParameter(M_ID, new GreaterConstraint(1), 50);
- if(config.grab(mP)) {
- m = mP.getValue();
+ final IntParameter mP = new IntParameter(M_ID, 50);
+ mP.addConstraint(new GreaterConstraint(1));
+ if (config.grab(mP)) {
+ m = mP.intValue();
}
- final DoubleParameter alphaP = new DoubleParameter(ALPHA_ID, new GreaterConstraint(0), 0.1);
- if(config.grab(alphaP)) {
- alpha = alphaP.getValue();
+ final DoubleParameter alphaP = new DoubleParameter(ALPHA_ID, 0.1);
+ alphaP.addConstraint(new GreaterConstraint(0));
+ if (config.grab(alphaP)) {
+ alpha = alphaP.doubleValue();
}
final ObjectParameter<OutlierAlgorithm> algoP = new ObjectParameter<OutlierAlgorithm>(ALGO_ID, OutlierAlgorithm.class, LOF.class);
- if(config.grab(algoP)) {
+ if (config.grab(algoP)) {
outlierAlgorithm = algoP.instantiateClass(config);
}
final ObjectParameter<GoodnessOfFitTest> testP = new ObjectParameter<GoodnessOfFitTest>(TEST_ID, GoodnessOfFitTest.class, KolmogorovSmirnovTest.class);
- if(config.grab(testP)) {
+ if (config.grab(testP)) {
statTest = testP.instantiateClass(config);
}
- final IntParameter cutoffP = new IntParameter(LIMIT_ID, new GreaterConstraint(1), 100);
- if(config.grab(cutoffP)) {
- cutoff = cutoffP.getValue();
+ final IntParameter cutoffP = new IntParameter(LIMIT_ID, 100);
+ cutoffP.addConstraint(new GreaterConstraint(1));
+ if (config.grab(cutoffP)) {
+ cutoff = cutoffP.intValue();
}
- final LongParameter seedP = new LongParameter(SEED_ID, true);
- if(config.grab(seedP)) {
- seed = seedP.getValue();
+ final RandomParameter rndP = new RandomParameter(SEED_ID);
+ if (config.grab(rndP)) {
+ rnd = rndP.getValue();
}
-}
+ }
@Override
protected HiCS<V> makeInstance() {
- return new HiCS<V>(m, alpha, outlierAlgorithm, statTest, cutoff, seed);
+ return new HiCS<V>(m, alpha, outlierAlgorithm, statTest, cutoff, rnd);
}
}
-}
\ No newline at end of file +}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/RescaleMetaOutlierAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/RescaleMetaOutlierAlgorithm.java index a4db7e3d..387041da 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/RescaleMetaOutlierAlgorithm.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/RescaleMetaOutlierAlgorithm.java @@ -62,7 +62,7 @@ public class RescaleMetaOutlierAlgorithm extends AbstractAlgorithm<OutlierResult /** * The logger for this class. */ - private static final Logging logger = Logging.getLogger(RescaleMetaOutlierAlgorithm.class); + private static final Logging LOG = Logging.getLogger(RescaleMetaOutlierAlgorithm.class); /** * Parameter to specify a scaling function to use. @@ -70,7 +70,7 @@ public class RescaleMetaOutlierAlgorithm extends AbstractAlgorithm<OutlierResult * Key: {@code -comphist.scaling} * </p> */ - public static final OptionID SCALING_ID = OptionID.getOrCreateOptionID("metaoutlier.scaling", "Class to use as scaling function."); + public static final OptionID SCALING_ID = new OptionID("metaoutlier.scaling", "Class to use as scaling function."); /** * Holds the algorithm to run. @@ -137,7 +137,7 @@ public class RescaleMetaOutlierAlgorithm extends AbstractAlgorithm<OutlierResult @Override protected Logging getLogger() { - return logger; + return LOG; } @Override diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/SimpleOutlierEnsemble.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/SimpleOutlierEnsemble.java new file mode 100644 index 00000000..b7791fc4 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/SimpleOutlierEnsemble.java @@ -0,0 +1,222 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier.meta; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm; +import de.lmu.ifi.dbs.elki.algorithm.Algorithm; +import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm; +import de.lmu.ifi.dbs.elki.data.type.CombinedTypeInformation; +import de.lmu.ifi.dbs.elki.data.type.TypeInformation; +import de.lmu.ifi.dbs.elki.data.type.TypeUtil; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; +import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; +import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; +import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; +import de.lmu.ifi.dbs.elki.math.DoubleMinMax; +import de.lmu.ifi.dbs.elki.result.Result; +import de.lmu.ifi.dbs.elki.result.ResultUtil; +import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; +import de.lmu.ifi.dbs.elki.utilities.ensemble.EnsembleVoting; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ChainedParameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectListParameter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; + +/** + * Simple outlier ensemble method. + * + * @author Erich Schubert + * + * @apiviz.composedOf EnsembleVoting + * @apiviz.uses OutlierResult oneway - - reads + * @apiviz.uses OutlierResult oneway - - «create» + */ +public class SimpleOutlierEnsemble extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm { + /** + * The logger for this class. + */ + private static final Logging LOG = Logging.getLogger(SimpleOutlierEnsemble.class); + + /** + * The algorithms to run. + */ + private List<OutlierAlgorithm> algorithms; + + /** + * The voting in use. + */ + private EnsembleVoting voting; + + /** + * Constructor. + * + * @param algorithms Algorithms to run + * @param voting Voting method + */ + public SimpleOutlierEnsemble(List<OutlierAlgorithm> algorithms, EnsembleVoting voting) { + this.algorithms = algorithms; + this.voting = voting; + } + + @Override + public OutlierResult run(Database database) throws IllegalStateException { + int num = algorithms.size(); + // Run inner outlier algorithms + ModifiableDBIDs ids = DBIDUtil.newHashSet(); + ArrayList<OutlierResult> results = new ArrayList<OutlierResult>(num); + { + FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Inner outlier algorithms", num, LOG) : null; + for (Algorithm alg : algorithms) { + Result res = alg.run(database); + List<OutlierResult> ors = ResultUtil.getOutlierResults(res); + for (OutlierResult or : ors) { + results.add(or); + ids.addDBIDs(or.getScores().getDBIDs()); + } + if (prog != null) { + prog.incrementProcessed(LOG); + } + } + if (prog != null) { + prog.ensureCompleted(LOG); + } + } + // Combine + WritableDoubleDataStore sumscore = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC); + DoubleMinMax minmax = new DoubleMinMax(); + { + FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Combining results", ids.size(), LOG) : null; + for (DBIDIter id = ids.iter(); id.valid(); id.advance()) { + double[] scores = new double[num]; + int i = 0; + for (OutlierResult r : results) { + Double score = r.getScores().get(id); + if (score != null) { + scores[i] = score; + i++; + } else { + LOG.warning("DBID " + id + " was not given a score by result " + r); + } + } + if (i > 0) { + // Shrink array if necessary. + if (i < scores.length) { + scores = Arrays.copyOf(scores, i); + } + double combined = voting.combine(scores); + sumscore.putDouble(id, combined); + minmax.put(combined); + } else { + LOG.warning("DBID " + id + " was not given any score at all."); + } + if (cprog != null) { + cprog.incrementProcessed(LOG); + } + } + if (cprog != null) { + cprog.ensureCompleted(LOG); + } + } + OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax()); + Relation<Double> scores = new MaterializedRelation<Double>("Simple Outlier Ensemble", "ensemble-outlier", TypeUtil.DOUBLE, sumscore, ids); + return new OutlierResult(meta, scores); + } + + @Override + protected Logging getLogger() { + return LOG; + } + + @Override + public TypeInformation[] getInputTypeRestriction() { + TypeInformation[] trs = new TypeInformation[algorithms.size()]; + for (int i = 0; i < trs.length; i++) { + // FIXME: what if an algorithm needs more than one input data source? + trs[i] = algorithms.get(i).getInputTypeRestriction()[0]; + } + return TypeUtil.array(new CombinedTypeInformation(trs)); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + /** + * Voting strategy to use in the ensemble. + */ + public static final OptionID VOTING_ID = new OptionID("ensemble.voting", "Voting strategy to use in the ensemble."); + + /** + * The algorithms to run. + */ + private List<OutlierAlgorithm> algorithms; + + /** + * The voting in use. + */ + private EnsembleVoting voting; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + ObjectListParameter<OutlierAlgorithm> algP = new ObjectListParameter<OutlierAlgorithm>(OptionID.ALGORITHM, OutlierAlgorithm.class); + if (config.grab(algP)) { + ListParameterization subconfig = new ListParameterization(); + ChainedParameterization chain = new ChainedParameterization(subconfig, config); + chain.errorsTo(config); + algorithms = algP.instantiateClasses(chain); + subconfig.logAndClearReportedErrors(); + } + ObjectParameter<EnsembleVoting> votingP = new ObjectParameter<EnsembleVoting>(VOTING_ID, EnsembleVoting.class); + if (config.grab(votingP)) { + voting = votingP.instantiateClass(config); + } + } + + @Override + protected SimpleOutlierEnsemble makeInstance() { + return new SimpleOutlierEnsemble(algorithms, voting); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/package-info.java index d7e78281..7c5dd8b0 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/meta/package-info.java @@ -1,5 +1,8 @@ /** * <p>Meta outlier detection algorithms: external scores, score rescaling.</p> + * + * @apiviz.exclude java.io.File + * @apiviz.exclude algorithm.AbstractAlgorithm */ /* This file is part of ELKI: @@ -23,4 +26,4 @@ GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ -package de.lmu.ifi.dbs.elki.algorithm.outlier.meta;
\ No newline at end of file +package de.lmu.ifi.dbs.elki.algorithm.outlier.meta; diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/package-info.java index ea5d3ec4..eca0d876 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/package-info.java @@ -4,6 +4,11 @@ * @see de.lmu.ifi.dbs.elki.algorithm * * @apiviz.exclude database.query + * @apiviz.exclude java.lang.Comparable + * @apiviz.exclude de.lmu.ifi.dbs.elki.utilities + * @apiviz.exclude de.lmu.ifi.dbs.elki.algorithm.Algorithm + * @apiviz.exclude de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm + * @apiviz.exclude AggarwalYuEvoluationary.Individuum */ /* This file is part of ELKI: @@ -27,4 +32,4 @@ GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ -package de.lmu.ifi.dbs.elki.algorithm.outlier;
\ No newline at end of file +package de.lmu.ifi.dbs.elki.algorithm.outlier; diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractDistanceBasedSpatialOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractDistanceBasedSpatialOutlier.java index 1caf7582..f37ee182 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractDistanceBasedSpatialOutlier.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractDistanceBasedSpatialOutlier.java @@ -45,7 +45,7 @@ public abstract class AbstractDistanceBasedSpatialOutlier<N, O, D extends Number /** * Parameter to specify the non spatial distance function to use */ - public static final OptionID NON_SPATIAL_DISTANCE_FUNCTION_ID = OptionID.getOrCreateOptionID("spatialoutlier.nonspatialdistance", "The distance function to use for non spatial attributes"); + public static final OptionID NON_SPATIAL_DISTANCE_FUNCTION_ID = new OptionID("spatialoutlier.nonspatialdistance", "The distance function to use for non spatial attributes"); /** * The distance function to use @@ -84,7 +84,7 @@ public abstract class AbstractDistanceBasedSpatialOutlier<N, O, D extends Number * @param <O> Non-spatial object type * @param <D> Distance value type */ - public static abstract class Parameterizer<N, O, D extends NumberDistance<D, ?>> extends AbstractNeighborhoodOutlier.Parameterizer<N> { + public abstract static class Parameterizer<N, O, D extends NumberDistance<D, ?>> extends AbstractNeighborhoodOutlier.Parameterizer<N> { /** * The distance function to use on the non-spatial attributes. */ diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractNeighborhoodOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractNeighborhoodOutlier.java index f0c05e1e..d3770504 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractNeighborhoodOutlier.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/AbstractNeighborhoodOutlier.java @@ -44,7 +44,7 @@ public abstract class AbstractNeighborhoodOutlier<O> extends AbstractAlgorithm<O /** * Parameter to specify the neighborhood predicate to use. */ - public static final OptionID NEIGHBORHOOD_ID = OptionID.getOrCreateOptionID("neighborhood", "The neighborhood predicate to use in comparison step."); + public static final OptionID NEIGHBORHOOD_ID = new OptionID("neighborhood", "The neighborhood predicate to use in comparison step."); /** * Our predicate to obtain the neighbors @@ -79,7 +79,7 @@ public abstract class AbstractNeighborhoodOutlier<O> extends AbstractAlgorithm<O * * @param <O> Object type */ - public static abstract class Parameterizer<O> extends AbstractParameterizer { + public abstract static class Parameterizer<O> extends AbstractParameterizer { /** * The predicate to obtain the neighbors. */ diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuGLSBackwardSearchAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuGLSBackwardSearchAlgorithm.java index 7f3bac29..cd5670f7 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuGLSBackwardSearchAlgorithm.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuGLSBackwardSearchAlgorithm.java @@ -37,13 +37,13 @@ import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs; -import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair; import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; -import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult; import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; import de.lmu.ifi.dbs.elki.database.relation.ProxyView; import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult; import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.math.DoubleMinMax; @@ -52,7 +52,6 @@ import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution; import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; -import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; @@ -85,11 +84,11 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; */ @Title("GLS-Backward Search") @Reference(authors = "F. Chen and C.-T. Lu and A. P. Boedihardjo", title = "GLS-SOD: A Generalized Local Statistical Approach for Spatial Outlier Detection", booktitle = "Proc. 16th ACM SIGKDD international conference on Knowledge discovery and data mining", url = "http://dx.doi.org/10.1145/1835804.1835939") -public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?, ?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<V, D, OutlierResult> implements OutlierAlgorithm { +public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<V, D, OutlierResult> implements OutlierAlgorithm { /** * The logger for this class. */ - private static final Logging logger = Logging.getLogger(CTLuGLSBackwardSearchAlgorithm.class); + private static final Logging LOG = Logging.getLogger(CTLuGLSBackwardSearchAlgorithm.class); /** * Parameter Alpha - significance niveau @@ -121,7 +120,7 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?, ?>, D exte * @param relationy Attribute relation * @return Algorithm result */ - public OutlierResult run(Relation<V> relationx, Relation<? extends NumberVector<?, ?>> relationy) { + public OutlierResult run(Relation<V> relationx, Relation<? extends NumberVector<?>> relationy) { WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relationx.getDBIDs(), DataStoreFactory.HINT_STATIC); DoubleMinMax mm = new DoubleMinMax(0.0, 0.0); @@ -130,7 +129,7 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?, ?>, D exte ModifiableDBIDs idview = DBIDUtil.newHashSet(relationx.getDBIDs()); ProxyView<V> proxy = new ProxyView<V>(relationx.getDatabase(), idview, relationx); - double phialpha = NormalDistribution.standardNormalQuantile(1.0 - alpha / 2); + double phialpha = NormalDistribution.standardNormalQuantile(1.0 - alpha *.5); // Detect outliers while significant. while(true) { Pair<DBID, Double> candidate = singleIteration(proxy, relationy); @@ -138,15 +137,15 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?, ?>, D exte break; } scores.putDouble(candidate.first, candidate.second); - if (!Double.isNaN(candidate.second)) { + if(!Double.isNaN(candidate.second)) { mm.put(candidate.second); } idview.remove(candidate.first); } // Remaining objects are inliers - for (DBIDIter iter = idview.iter(); iter.valid(); iter.advance()) { - scores.putDouble(iter.getDBID(), 0.0); + for(DBIDIter iter = idview.iter(); iter.valid(); iter.advance()) { + scores.putDouble(iter, 0.0); } } @@ -162,9 +161,9 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?, ?>, D exte * @param relationy Attribute relation * @return Top outlier and associated score */ - private Pair<DBID, Double> singleIteration(Relation<V> relationx, Relation<? extends NumberVector<?, ?>> relationy) { - final int dim = DatabaseUtil.dimensionality(relationx); - final int dimy = DatabaseUtil.dimensionality(relationy); + private Pair<DBID, Double> singleIteration(Relation<V> relationx, Relation<? extends NumberVector<?>> relationy) { + final int dim = RelationUtil.dimensionality(relationx); + final int dimy = RelationUtil.dimensionality(relationy); assert (dim == 2); KNNQuery<V, D> knnQuery = QueryUtil.getKNNQuery(relationx, getDistanceFunction(), k + 1); @@ -177,47 +176,51 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?, ?>, D exte Matrix X = new Matrix(ids.size(), 6); Matrix F = new Matrix(ids.size(), ids.size()); Matrix Y = new Matrix(ids.size(), dimy); - for(int i = 0; i < ids.size(); i++) { - DBID id = ids.get(i); - - // Fill the data matrix - { - V vec = relationx.get(id); - double la = vec.doubleValue(1); - double lo = vec.doubleValue(2); - X.set(i, 0, 1.0); - X.set(i, 1, la); - X.set(i, 2, lo); - X.set(i, 3, la * lo); - X.set(i, 4, la * la); - X.set(i, 5, lo * lo); - } - { - for(int d = 0; d < dimy; d++) { - double idy = relationy.get(id).doubleValue(d + 1); - Y.set(i, d, idy); + { + int i = 0; + for(DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) { + // Fill the data matrix + { + V vec = relationx.get(id); + double la = vec.doubleValue(0); + double lo = vec.doubleValue(1); + X.set(i, 0, 1.0); + X.set(i, 1, la); + X.set(i, 2, lo); + X.set(i, 3, la * lo); + X.set(i, 4, la * la); + X.set(i, 5, lo * lo); } - } - // Fill the neighborhood matrix F: - { - KNNResult<D> neighbors = knnQuery.getKNNForDBID(id, k + 1); - ModifiableDBIDs neighborhood = DBIDUtil.newArray(neighbors.size()); - for(DistanceResultPair<D> dpair : neighbors) { - if(id.sameDBID(dpair.getDBID())) { - continue; + { + final NumberVector<?> vecy = relationy.get(id); + for(int d = 0; d < dimy; d++) { + double idy = vecy.doubleValue(d); + Y.set(i, d, idy); } - neighborhood.add(dpair.getDBID()); } - // Weight object itself positively. - F.set(i, i, 1.0); - final int nweight = -1 / neighborhood.size(); - // We need to find the index positions of the neighbors, unfortunately. - for (DBIDIter iter = neighborhood.iter(); iter.valid(); iter.advance()) { - int pos = ids.binarySearch(iter.getDBID()); - assert (pos >= 0); - F.set(pos, i, nweight); + + // Fill the neighborhood matrix F: + { + KNNResult<D> neighbors = knnQuery.getKNNForDBID(id, k + 1); + ModifiableDBIDs neighborhood = DBIDUtil.newArray(neighbors.size()); + for(DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { + if(DBIDUtil.equal(id, neighbor)) { + continue; + } + neighborhood.add(neighbor); + } + // Weight object itself positively. + F.set(i, i, 1.0); + final int nweight = -1 / neighborhood.size(); + // We need to find the index positions of the neighbors, + // unfortunately. + for(DBIDIter iter = neighborhood.iter(); iter.valid(); iter.advance()) { + int pos = ids.binarySearch(iter); + assert (pos >= 0); + F.set(pos, i, nweight); + } } } } @@ -236,13 +239,13 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?, ?>, D exte DBID worstid = null; double worstscore = Double.NEGATIVE_INFINITY; - for(int i = 0; i < ids.size(); i++) { - DBID id = ids.get(i); + int i = 0; + for(DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) { double err = E.getRow(i).euclideanLength(); // double err = Math.abs(E.get(i, 0)); if(err > worstscore) { worstscore = err; - worstid = id; + worstid = DBIDUtil.deref(id); } } @@ -256,7 +259,7 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?, ?>, D exte @Override protected Logging getLogger() { - return logger; + return LOG; } /** @@ -269,16 +272,16 @@ public class CTLuGLSBackwardSearchAlgorithm<V extends NumberVector<?, ?>, D exte * @param <V> Input vector type * @param <D> Distance type */ - public static class Parameterizer<V extends NumberVector<?, ?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<V, D> { + public static class Parameterizer<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<V, D> { /** * Holds the alpha value - significance niveau */ - public static final OptionID ALPHA_ID = OptionID.getOrCreateOptionID("glsbs.alpha", "Significance niveau"); + public static final OptionID ALPHA_ID = new OptionID("glsbs.alpha", "Significance niveau"); /** * Parameter to specify the k nearest neighbors */ - public static final OptionID K_ID = OptionID.getOrCreateOptionID("glsbs.k", "k nearest neighbors to use"); + public static final OptionID K_ID = new OptionID("glsbs.k", "k nearest neighbors to use"); /** * Parameter Alpha - significance niveau diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMeanMultipleAttributes.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMeanMultipleAttributes.java index a0c09057..2caee128 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMeanMultipleAttributes.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMeanMultipleAttributes.java @@ -31,11 +31,11 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; -import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.math.DoubleMinMax; import de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid; @@ -45,7 +45,6 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector; import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; -import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; /** @@ -72,11 +71,11 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; * @param <O> Attribute Vector */ @Reference(authors = "Chang-Tien Lu and Dechang Chen and Yufeng Kou", title = "Detecting Spatial Outliers with Multiple Attributes", booktitle = "Proc. 15th IEEE International Conference on Tools with Artificial Intelligence, 2003", url = "http://dx.doi.org/10.1109/TAI.2003.1250179") -public class CTLuMeanMultipleAttributes<N, O extends NumberVector<?, ?>> extends AbstractNeighborhoodOutlier<N> { +public class CTLuMeanMultipleAttributes<N, O extends NumberVector<?>> extends AbstractNeighborhoodOutlier<N> { /** * logger */ - public static final Logging logger = Logging.getLogger(CTLuMeanMultipleAttributes.class); + private static final Logging LOG = Logging.getLogger(CTLuMeanMultipleAttributes.class); /** * Constructor @@ -89,28 +88,27 @@ public class CTLuMeanMultipleAttributes<N, O extends NumberVector<?, ?>> extends @Override protected Logging getLogger() { - return logger; + return LOG; } public OutlierResult run(Relation<N> spatial, Relation<O> attributes) { - if(logger.isDebugging()) { - logger.debug("Dimensionality: " + DatabaseUtil.dimensionality(attributes)); + if(LOG.isDebugging()) { + LOG.debug("Dimensionality: " + RelationUtil.dimensionality(attributes)); } final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(spatial); - CovarianceMatrix covmaker = new CovarianceMatrix(DatabaseUtil.dimensionality(attributes)); + CovarianceMatrix covmaker = new CovarianceMatrix(RelationUtil.dimensionality(attributes)); WritableDataStore<Vector> deltas = DataStoreUtil.makeStorage(attributes.getDBIDs(), DataStoreFactory.HINT_TEMP, Vector.class); for(DBIDIter iditer = attributes.iterDBIDs(); iditer.valid(); iditer.advance()) { - DBID id = iditer.getDBID(); - final O obj = attributes.get(id); - final DBIDs neighbors = npred.getNeighborDBIDs(id); + final O obj = attributes.get(iditer); + final DBIDs neighbors = npred.getNeighborDBIDs(iditer); // TODO: remove object itself from neighbors? // Mean vector "g" Vector mean = Centroid.make(attributes, neighbors); // Delta vector "h" - Vector delta = obj.getColumnVector().minus(mean); - deltas.put(id, delta); + Vector delta = obj.getColumnVector().minusEquals(mean); + deltas.put(iditer, delta); covmaker.put(delta); } // Finalize covariance matrix: @@ -120,11 +118,10 @@ public class CTLuMeanMultipleAttributes<N, O extends NumberVector<?, ?>> extends DoubleMinMax minmax = new DoubleMinMax(); WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(attributes.getDBIDs(), DataStoreFactory.HINT_STATIC); for(DBIDIter iditer = attributes.iterDBIDs(); iditer.valid(); iditer.advance()) { - DBID id = iditer.getDBID(); - Vector temp = deltas.get(id).minus(mean); + Vector temp = deltas.get(iditer).minus(mean); final double score = temp.transposeTimesTimes(cmati, temp); minmax.put(score); - scores.putDouble(id, score); + scores.putDouble(iditer, score); } Relation<Double> scoreResult = new MaterializedRelation<Double>("mean multiple attributes spatial outlier", "mean-multipleattributes-outlier", TypeUtil.DOUBLE, scores, attributes.getDBIDs()); @@ -149,7 +146,7 @@ public class CTLuMeanMultipleAttributes<N, O extends NumberVector<?, ?>> extends * @param <N> Neighborhood type * @param <O> Attribute object type */ - public static class Parameterizer<N, O extends NumberVector<?, ?>> extends AbstractNeighborhoodOutlier.Parameterizer<N> { + public static class Parameterizer<N, O extends NumberVector<?>> extends AbstractNeighborhoodOutlier.Parameterizer<N> { @Override protected CTLuMeanMultipleAttributes<N, O> makeInstance() { return new CTLuMeanMultipleAttributes<N, O>(npredf); diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianAlgorithm.java index 20ab9a00..7755a459 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianAlgorithm.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianAlgorithm.java @@ -1,26 +1,27 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial;
-/* -This file is part of ELKI: -Environment for Developing KDD-Applications Supported by Index-Structures - -Copyright (C) 2012 -Ludwig-Maximilians-Universität München -Lehr- und Forschungseinheit für Datenbanksysteme -ELKI Development Team - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU Affero General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU Affero General Public License for more details. - -You should have received a copy of the GNU Affero General Public License -along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ +
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures +
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team +
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version. +
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details. +
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
import de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate;
import de.lmu.ifi.dbs.elki.data.NumberVector;
@@ -30,8 +31,8 @@ import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
@@ -60,22 +61,22 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title; * The Difference e = non-spatial-Attribute-Value - Median (Neighborhood) is
* computed.<br>
* The Spatial Objects with the highest standardized e value are Spatial
- * Outliers. </p>
+ * Outliers.
*
* @author Ahmed Hettab
*
* @param <N> Neighborhood type
*/
@Title("Median Algorithm for Spatial Outlier Detection")
-@Reference(authors = "C.-T. Lu and D. Chen and Y. Kou", title = "Algorithms for Spatial Outlier Detection", booktitle = "Proc. 3rd IEEE International Conference on Data Mining", url="http://dx.doi.org/10.1109/ICDM.2003.1250986")
+@Reference(authors = "C.-T. Lu and D. Chen and Y. Kou", title = "Algorithms for Spatial Outlier Detection", booktitle = "Proc. 3rd IEEE International Conference on Data Mining", url = "http://dx.doi.org/10.1109/ICDM.2003.1250986")
public class CTLuMedianAlgorithm<N> extends AbstractNeighborhoodOutlier<N> {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(CTLuMedianAlgorithm.class);
+ private static final Logging LOG = Logging.getLogger(CTLuMedianAlgorithm.class);
/**
- * Constructor
+ * Constructor.
*
* @param npredf Neighborhood predicate
*/
@@ -84,42 +85,40 @@ public class CTLuMedianAlgorithm<N> extends AbstractNeighborhoodOutlier<N> { }
/**
- * Main method
+ * Main method.
*
* @param nrel Neighborhood relation
* @param relation Data relation (1d!)
* @return Outlier detection result
*/
- public OutlierResult run(Relation<N> nrel, Relation<? extends NumberVector<?, ?>> relation) {
+ public OutlierResult run(Relation<N> nrel, Relation<? extends NumberVector<?>> relation) {
final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(nrel);
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
MeanVariance mv = new MeanVariance();
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { - DBID id = iditer.getDBID();
- DBIDs neighbors = npred.getNeighborDBIDs(id);
+ for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ DBIDs neighbors = npred.getNeighborDBIDs(iditer);
final double median;
{
double[] fi = new double[neighbors.size()];
// calculate and store Median of neighborhood
int c = 0;
- for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
- if(id.sameDBID(iter)) {
+ for (DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
+ if (DBIDUtil.equal(iditer, iter)) {
continue;
}
- fi[c] = relation.get(iter).doubleValue(1);
+ fi[c] = relation.get(iter).doubleValue(0);
c++;
}
- if(c > 0) {
+ if (c > 0) {
median = QuickSelect.median(fi, 0, c);
- }
- else {
- median = relation.get(id).doubleValue(1);
+ } else {
+ median = relation.get(iditer).doubleValue(0);
}
}
- double h = relation.get(id).doubleValue(1) - median;
- scores.putDouble(id, h);
+ double h = relation.get(iditer).doubleValue(0) - median;
+ scores.putDouble(iditer, h);
mv.put(h);
}
@@ -127,11 +126,10 @@ public class CTLuMedianAlgorithm<N> extends AbstractNeighborhoodOutlier<N> { final double mean = mv.getMean();
final double stddev = mv.getNaiveStddev();
DoubleMinMax minmax = new DoubleMinMax();
- for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { - DBID id = iditer.getDBID();
- double score = Math.abs((scores.doubleValue(id) - mean) / stddev);
+ for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ double score = Math.abs((scores.doubleValue(iditer) - mean) / stddev);
minmax.put(score);
- scores.putDouble(id, score);
+ scores.putDouble(iditer, score);
}
Relation<Double> scoreResult = new MaterializedRelation<Double>("MO", "Median-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs());
@@ -143,16 +141,16 @@ public class CTLuMedianAlgorithm<N> extends AbstractNeighborhoodOutlier<N> { @Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
@Override
public TypeInformation[] getInputTypeRestriction() {
- return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), VectorFieldTypeInformation.get(NumberVector.class, 1));
+ return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), new VectorFieldTypeInformation<NumberVector<?>>(NumberVector.class, 1));
}
/**
- * Parameterization class
+ * Parameterization class.
*
* @author Ahmed Hettab
*
@@ -166,4 +164,4 @@ public class CTLuMedianAlgorithm<N> extends AbstractNeighborhoodOutlier<N> { return new CTLuMedianAlgorithm<N>(npredf);
}
}
-}
\ No newline at end of file +}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianMultipleAttributes.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianMultipleAttributes.java index c8bcba74..0d515ac7 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianMultipleAttributes.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMedianMultipleAttributes.java @@ -31,11 +31,11 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; -import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.math.DoubleMinMax; import de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix; @@ -44,7 +44,6 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector; import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; -import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil; import de.lmu.ifi.dbs.elki.utilities.datastructures.QuickSelect; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; @@ -73,11 +72,11 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; * @param <O> Non Spatial Vector */ @Reference(authors = "Chang-Tien Lu and Dechang Chen and Yufeng Kou", title = "Detecting Spatial Outliers with Multiple Attributes", booktitle = "Proc. 15th IEEE International Conference on Tools with Artificial Intelligence, 2003", url = "http://dx.doi.org/10.1109/TAI.2003.1250179") -public class CTLuMedianMultipleAttributes<N, O extends NumberVector<?, ?>> extends AbstractNeighborhoodOutlier<N> { +public class CTLuMedianMultipleAttributes<N, O extends NumberVector<?>> extends AbstractNeighborhoodOutlier<N> { /** * logger */ - public static final Logging logger = Logging.getLogger(CTLuMedianMultipleAttributes.class); + private static final Logging LOG = Logging.getLogger(CTLuMedianMultipleAttributes.class); /** * Constructor @@ -90,7 +89,7 @@ public class CTLuMedianMultipleAttributes<N, O extends NumberVector<?, ?>> exten @Override protected Logging getLogger() { - return logger; + return LOG; } /** @@ -101,18 +100,17 @@ public class CTLuMedianMultipleAttributes<N, O extends NumberVector<?, ?>> exten * @return Outlier detection result */ public OutlierResult run(Relation<N> spatial, Relation<O> attributes) { - final int dim = DatabaseUtil.dimensionality(attributes); - if(logger.isDebugging()) { - logger.debug("Dimensionality: " + dim); + final int dim = RelationUtil.dimensionality(attributes); + if(LOG.isDebugging()) { + LOG.debug("Dimensionality: " + dim); } final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(spatial); CovarianceMatrix covmaker = new CovarianceMatrix(dim); WritableDataStore<Vector> deltas = DataStoreUtil.makeStorage(attributes.getDBIDs(), DataStoreFactory.HINT_TEMP, Vector.class); for(DBIDIter iditer = attributes.iterDBIDs(); iditer.valid(); iditer.advance()) { - DBID id = iditer.getDBID(); - final O obj = attributes.get(id); - final DBIDs neighbors = npred.getNeighborDBIDs(id); + final O obj = attributes.get(iditer); + final DBIDs neighbors = npred.getNeighborDBIDs(iditer); // Compute the median vector final Vector median; { @@ -123,7 +121,7 @@ public class CTLuMedianMultipleAttributes<N, O extends NumberVector<?, ?>> exten // TODO: skip object itself within neighbors? O nobj = attributes.get(iter); for(int d = 0; d < dim; d++) { - data[d][i] = nobj.doubleValue(d + 1); + data[d][i] = nobj.doubleValue(d); } i++; } @@ -135,8 +133,8 @@ public class CTLuMedianMultipleAttributes<N, O extends NumberVector<?, ?>> exten } // Delta vector "h" - Vector delta = obj.getColumnVector().minus(median); - deltas.put(id, delta); + Vector delta = obj.getColumnVector().minusEquals(median); + deltas.put(iditer, delta); covmaker.put(delta); } // Finalize covariance matrix: @@ -146,11 +144,10 @@ public class CTLuMedianMultipleAttributes<N, O extends NumberVector<?, ?>> exten DoubleMinMax minmax = new DoubleMinMax(); WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(attributes.getDBIDs(), DataStoreFactory.HINT_STATIC); for(DBIDIter iditer = attributes.iterDBIDs(); iditer.valid(); iditer.advance()) { - DBID id = iditer.getDBID(); - Vector temp = deltas.get(id).minus(mean); + Vector temp = deltas.get(iditer).minus(mean); final double score = temp.transposeTimesTimes(cmati, temp); minmax.put(score); - scores.putDouble(id, score); + scores.putDouble(iditer, score); } Relation<Double> scoreResult = new MaterializedRelation<Double>("Median multiple attributes outlier", "median-outlier", TypeUtil.DOUBLE, scores, attributes.getDBIDs()); @@ -175,7 +172,7 @@ public class CTLuMedianMultipleAttributes<N, O extends NumberVector<?, ?>> exten * @param <N> Neighborhood type * @param <O> Attributes vector type */ - public static class Parameterizer<N, O extends NumberVector<?, ?>> extends AbstractNeighborhoodOutlier.Parameterizer<N> { + public static class Parameterizer<N, O extends NumberVector<?>> extends AbstractNeighborhoodOutlier.Parameterizer<N> { @Override protected CTLuMedianMultipleAttributes<N, O> makeInstance() { return new CTLuMedianMultipleAttributes<N, O>(npredf); diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMoranScatterplotOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMoranScatterplotOutlier.java index 7b88ae66..3b876bba 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMoranScatterplotOutlier.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuMoranScatterplotOutlier.java @@ -32,8 +32,8 @@ import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; -import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.logging.Logging; @@ -76,10 +76,10 @@ public class CTLuMoranScatterplotOutlier<N> extends AbstractNeighborhoodOutlier< /** * The logger for this class. */ - private static final Logging logger = Logging.getLogger(CTLuMoranScatterplotOutlier.class); + private static final Logging LOG = Logging.getLogger(CTLuMoranScatterplotOutlier.class); /** - * Constructor + * Constructor. * * @param npredf Neighborhood */ @@ -88,20 +88,19 @@ public class CTLuMoranScatterplotOutlier<N> extends AbstractNeighborhoodOutlier< } /** - * Main method + * Main method. * * @param nrel Neighborhood relation * @param relation Data relation (1d!) * @return Outlier detection result */ - public OutlierResult run(Relation<N> nrel, Relation<? extends NumberVector<?, ?>> relation) { + public OutlierResult run(Relation<N> nrel, Relation<? extends NumberVector<?>> relation) { final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(nrel); // Compute the global mean and variance MeanVariance globalmv = new MeanVariance(); for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { - DBID id = iditer.getDBID(); - globalmv.put(relation.get(id).doubleValue(1)); + globalmv.put(relation.get(iditer).doubleValue(0)); } DoubleMinMax minmax = new DoubleMinMax(); @@ -110,17 +109,15 @@ public class CTLuMoranScatterplotOutlier<N> extends AbstractNeighborhoodOutlier< // calculate normalized attribute values // calculate neighborhood average of normalized attribute values. for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { - DBID id = iditer.getDBID(); // Compute global z score - final double globalZ = (relation.get(id).doubleValue(1) - globalmv.getMean()) / globalmv.getNaiveStddev(); + final double globalZ = (relation.get(iditer).doubleValue(0) - globalmv.getMean()) / globalmv.getNaiveStddev(); // Compute local average z score Mean localm = new Mean(); - for(DBIDIter iter = npred.getNeighborDBIDs(id).iter(); iter.valid(); iter.advance()) { - DBID n = iter.getDBID(); - if(id.equals(n)) { + for(DBIDIter iter = npred.getNeighborDBIDs(iditer).iter(); iter.valid(); iter.advance()) { + if(DBIDUtil.equal(iditer, iter)) { continue; } - localm.put((relation.get(n).doubleValue(1) - globalmv.getMean()) / globalmv.getNaiveStddev()); + localm.put((relation.get(iter).doubleValue(0) - globalmv.getMean()) / globalmv.getNaiveStddev()); } // if neighors.size == 0 final double localZ; @@ -136,7 +133,7 @@ public class CTLuMoranScatterplotOutlier<N> extends AbstractNeighborhoodOutlier< // Note: in the original moran scatterplot, any object with a score < 0 would be an outlier. final double score = Math.max(-globalZ * localZ, 0); minmax.put(score); - scores.putDouble(id, score); + scores.putDouble(iditer, score); } Relation<Double> scoreResult = new MaterializedRelation<Double>("MoranOutlier", "Moran Scatterplot Outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs()); @@ -148,16 +145,16 @@ public class CTLuMoranScatterplotOutlier<N> extends AbstractNeighborhoodOutlier< @Override public TypeInformation[] getInputTypeRestriction() { - return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), VectorFieldTypeInformation.get(NumberVector.class, 1)); + return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), new VectorFieldTypeInformation<NumberVector<?>>(NumberVector.class, 1)); } @Override protected Logging getLogger() { - return logger; + return LOG; } /** - * Parameterization class + * Parameterization class. * * @author Ahmed Hettab * diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java index 852c4be4..ec92afd7 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuRandomWalkEC.java @@ -1,26 +1,27 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial;
-/* -This file is part of ELKI: -Environment for Developing KDD-Applications Supported by Index-Structures - -Copyright (C) 2012 -Ludwig-Maximilians-Universität München -Lehr- und Forschungseinheit für Datenbanksysteme -ELKI Development Team - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU Affero General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU Affero General Public License for more details. - -You should have received a copy of the GNU Affero General Public License -along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ +
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures +
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team +
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version. +
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details. +
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
@@ -33,7 +34,6 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
@@ -42,6 +42,8 @@ import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNHeap;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNUtil;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
@@ -51,7 +53,6 @@ import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector; import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
-import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.KNNHeap;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -82,30 +83,30 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; */
@Title("Random Walk on Exhaustive Combination")
@Description("Spatial Outlier Detection using Random Walk on Exhaustive Combination")
-@Reference(authors = "X. Liu and C.-T. Lu and F. Chen", title = "Spatial outlier detection: random walk based approaches", booktitle = "Proc. 18th SIGSPATIAL International Conference on Advances in Geographic Information Systems, 2010", url="http://dx.doi.org/10.1145/1869790.1869841")
+@Reference(authors = "X. Liu and C.-T. Lu and F. Chen", title = "Spatial outlier detection: random walk based approaches", booktitle = "Proc. 18th SIGSPATIAL International Conference on Advances in Geographic Information Systems, 2010", url = "http://dx.doi.org/10.1145/1869790.1869841")
public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<N, D, OutlierResult> implements OutlierAlgorithm {
/**
- * Logger
+ * Logger.
*/
- private static final Logging logger = Logging.getLogger(CTLuRandomWalkEC.class);
+ private static final Logging LOG = Logging.getLogger(CTLuRandomWalkEC.class);
/**
- * Parameter alpha: Attribute difference exponent
+ * Parameter alpha: Attribute difference exponent.
*/
private double alpha;
/**
- * Parameter c: damping factor
+ * Parameter c: damping factor.
*/
private double c;
/**
- * Parameter k
+ * Parameter k.
*/
private int k;
/**
- * Constructor
+ * Constructor.
*
* @param distanceFunction Distance function
* @param alpha Alpha parameter
@@ -120,13 +121,13 @@ public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends Abstrac }
/**
- * Run the algorithm
+ * Run the algorithm.
*
* @param spatial Spatial neighborhood relation
* @param relation Attribute value relation
* @return Outlier result
*/
- public OutlierResult run(Relation<N> spatial, Relation<? extends NumberVector<?, ?>> relation) {
+ public OutlierResult run(Relation<N> spatial, Relation<? extends NumberVector<?>> relation) {
DistanceQuery<N, D> distFunc = getDistanceFunction().instantiate(spatial);
WritableDataStore<Vector> similarityVectors = DataStoreUtil.makeStorage(spatial.getDBIDs(), DataStoreFactory.HINT_TEMP, Vector.class);
WritableDataStore<DBIDs> neighbors = DataStoreUtil.makeStorage(spatial.getDBIDs(), DataStoreFactory.HINT_TEMP, DBIDs.class);
@@ -136,39 +137,41 @@ public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends Abstrac // construct the relation Matrix of the ec-graph
Matrix E = new Matrix(ids.size(), ids.size());
- KNNHeap<D> heap = new KNNHeap<D>(k);
- for(int i = 0; i < ids.size(); i++) {
- final DBID id = ids.get(i);
- final double val = relation.get(id).doubleValue(1);
- assert (heap.size() == 0);
- for(int j = 0; j < ids.size(); j++) {
- if(i == j) {
- continue;
- }
- final DBID n = ids.get(j);
- final double e;
- final D distance = distFunc.distance(id, n);
- heap.add(distance, n);
- double dist = distance.doubleValue();
- if(dist == 0) {
- logger.warning("Zero distances are not supported - skipping: " + id + " " + n);
- e = 0;
+ KNNHeap<D> heap = KNNUtil.newHeap(distFunc.getDistanceFactory(), k);
+ {
+ int i = 0;
+ for(DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
+ final double val = relation.get(id).doubleValue(0);
+ assert (heap.size() == 0);
+ int j = 0;
+ for(DBIDIter n = ids.iter(); n.valid(); n.advance(), j++) {
+ if(i == j) {
+ continue;
+ }
+ final double e;
+ final D distance = distFunc.distance(id, n);
+ heap.add(distance, n);
+ double dist = distance.doubleValue();
+ if(dist == 0) {
+ LOG.warning("Zero distances are not supported - skipping: " + DBIDUtil.toString(id) + " " + DBIDUtil.toString(n));
+ e = 0;
+ }
+ else {
+ double diff = Math.abs(val - relation.get(n).doubleValue(0));
+ double exp = Math.exp(Math.pow(diff, alpha));
+ // Implementation note: not inverting exp worked a lot better.
+ // Therefore we diverge from the article here.
+ e = exp / dist;
+ }
+ E.set(j, i, e);
}
- else {
- double diff = Math.abs(val - relation.get(n).doubleValue(1));
- double exp = Math.exp(Math.pow(diff, alpha));
- // Implementation note: not inverting exp worked a lot better.
- // Therefore we diverge from the article here.
- e = exp / dist;
+ // Convert kNN Heap into DBID array
+ ModifiableDBIDs nids = DBIDUtil.newArray(heap.size());
+ while(heap.size() > 0) {
+ nids.add(heap.poll());
}
- E.set(j, i, e);
- }
- // Convert kNN Heap into DBID array
- ModifiableDBIDs nids = DBIDUtil.newArray(heap.size());
- while(!heap.isEmpty()) {
- nids.add(heap.poll().getDBID());
+ neighbors.put(id, nids);
}
- neighbors.put(id, nids);
}
// normalize the adjacent Matrix
// Sum based normalization - don't use E.normalizeColumns()
@@ -195,26 +198,26 @@ public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends Abstrac E = E.inverse().timesEquals(1 - c);
// Split the matrix into columns
- for(int i = 0; i < ids.size(); i++) {
- DBID id = ids.get(i);
- // Note: matrix times ith unit vector = ith column
- Vector sim = E.getCol(i);
- similarityVectors.put(id, sim);
+ {
+ int i = 0;
+ for(DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
+ // Note: matrix times ith unit vector = ith column
+ Vector sim = E.getCol(i);
+ similarityVectors.put(id, sim);
+ }
}
E = null;
// compute the relevance scores between specified Object and its neighbors
DoubleMinMax minmax = new DoubleMinMax();
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(spatial.getDBIDs(), DataStoreFactory.HINT_STATIC);
- for(int i = 0; i < ids.size(); i++) {
- DBID id = ids.get(i);
+ for(DBIDIter id = ids.iter(); id.valid(); id.advance()) {
double gmean = 1.0;
int cnt = 0;
for(DBIDIter iter = neighbors.get(id).iter(); iter.valid(); iter.advance()) {
- DBID n = iter.getDBID();
- if(id.equals(n)) {
+ if(DBIDUtil.equal(id, iter)) {
continue;
}
- double sim = MathUtil.angle(similarityVectors.get(id), similarityVectors.get(n));
+ double sim = MathUtil.angle(similarityVectors.get(id), similarityVectors.get(iter));
gmean *= sim;
cnt++;
}
@@ -230,12 +233,12 @@ public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends Abstrac @Override
public TypeInformation[] getInputTypeRestriction() {
- return TypeUtil.array(getDistanceFunction().getInputTypeRestriction(), VectorFieldTypeInformation.get(NumberVector.class, 1));
+ return TypeUtil.array(getDistanceFunction().getInputTypeRestriction(), new VectorFieldTypeInformation<NumberVector<?>>(NumberVector.class, 1));
}
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -250,32 +253,32 @@ public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends Abstrac */
public static class Parameterizer<N, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<N, D> {
/**
- * Parameter to specify the number of neighbors
+ * Parameter to specify the number of neighbors.
*/
- public static final OptionID K_ID = OptionID.getOrCreateOptionID("randomwalkec.k", "Number of nearest neighbors to use.");
+ public static final OptionID K_ID = new OptionID("randomwalkec.k", "Number of nearest neighbors to use.");
/**
- * Parameter to specify alpha
+ * Parameter to specify alpha.
*/
- public static final OptionID ALPHA_ID = OptionID.getOrCreateOptionID("randomwalkec.alpha", "Scaling exponent for value differences.");
+ public static final OptionID ALPHA_ID = new OptionID("randomwalkec.alpha", "Scaling exponent for value differences.");
/**
- * Parameter to specify the c
+ * Parameter to specify the c.
*/
- public static final OptionID C_ID = OptionID.getOrCreateOptionID("randomwalkec.c", "The damping parameter c.");
+ public static final OptionID C_ID = new OptionID("randomwalkec.c", "The damping parameter c.");
/**
- * Parameter alpha: scaling
+ * Parameter alpha: scaling.
*/
double alpha = 0.5;
/**
- * Parameter c: damping coefficient
+ * Parameter c: damping coefficient.
*/
double c = 0.9;
/**
- * Parameter for kNN
+ * Parameter for kNN.
*/
int k;
@@ -288,19 +291,20 @@ public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends Abstrac }
/**
- * Get the kNN parameter
+ * Get the kNN parameter.
*
* @param config Parameterization
*/
protected void configK(Parameterization config) {
- final IntParameter param = new IntParameter(K_ID, new GreaterEqualConstraint(1));
+ final IntParameter param = new IntParameter(K_ID);
+ param.addConstraint(new GreaterEqualConstraint(1));
if(config.grab(param)) {
k = param.getValue();
}
}
/**
- * Get the alpha parameter
+ * Get the alpha parameter.
*
* @param config Parameterization
*/
@@ -312,9 +316,9 @@ public class CTLuRandomWalkEC<N, D extends NumberDistance<D, ?>> extends Abstrac }
/**
- * get the c parameter
+ * get the c parameter.
*
- * @param config
+ * @param config Parameterization
*/
protected void configC(Parameterization config) {
final DoubleParameter param = new DoubleParameter(C_ID);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuScatterplotOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuScatterplotOutlier.java index 4f11cb38..295c7414 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuScatterplotOutlier.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuScatterplotOutlier.java @@ -31,8 +31,8 @@ import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; -import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; @@ -78,10 +78,10 @@ public class CTLuScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<N> { /** * The logger for this class. */ - private static final Logging logger = Logging.getLogger(CTLuScatterplotOutlier.class); + private static final Logging LOG = Logging.getLogger(CTLuScatterplotOutlier.class); /** - * Constructor + * Constructor. * * @param npredf Neighborhood predicate */ @@ -90,13 +90,13 @@ public class CTLuScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<N> { } /** - * Main method + * Main method. * * @param nrel Neighborhood relation * @param relation Data relation (1d!) * @return Outlier detection result */ - public OutlierResult run(Relation<N> nrel, Relation<? extends NumberVector<?, ?>> relation) { + public OutlierResult run(Relation<N> nrel, Relation<? extends NumberVector<?>> relation) { final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(nrel); WritableDoubleDataStore means = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP); @@ -104,17 +104,15 @@ public class CTLuScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<N> { // regression using the covariance matrix CovarianceMatrix covm = new CovarianceMatrix(2); for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { - DBID id = iditer.getDBID(); - final double local = relation.get(id).doubleValue(1); + final double local = relation.get(iditer).doubleValue(0); // Compute mean of neighbors Mean mean = new Mean(); - DBIDs neighbors = npred.getNeighborDBIDs(id); + DBIDs neighbors = npred.getNeighborDBIDs(iditer); for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) { - DBID n = iter.getDBID(); - if(id.equals(n)) { + if(DBIDUtil.equal(iditer, iter)) { continue; } - mean.put(relation.get(n).doubleValue(1)); + mean.put(relation.get(iter).doubleValue(0)); } final double m; if(mean.getCount() > 0) { @@ -125,7 +123,7 @@ public class CTLuScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<N> { m = local; } // Store the mean for the score calculation - means.putDouble(id, m); + means.putDouble(iditer, m); covm.put(new double[] { local, m }); } // Finalize covariance matrix, compute linear regression @@ -143,11 +141,10 @@ public class CTLuScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<N> { WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC); MeanVariance mv = new MeanVariance(); for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { - DBID id = iditer.getDBID(); // Compute the error from the linear regression - double y_i = relation.get(id).doubleValue(1); - double e = means.doubleValue(id) - (slope * y_i + inter); - scores.putDouble(id, e); + double y_i = relation.get(iditer).doubleValue(0); + double e = means.doubleValue(iditer) - (slope * y_i + inter); + scores.putDouble(iditer, e); mv.put(e); } @@ -157,10 +154,9 @@ public class CTLuScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<N> { final double mean = mv.getMean(); final double variance = mv.getNaiveStddev(); for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { - DBID id = iditer.getDBID(); - double score = Math.abs((scores.doubleValue(id) - mean) / variance); + double score = Math.abs((scores.doubleValue(iditer) - mean) / variance); minmax.put(score); - scores.putDouble(id, score); + scores.putDouble(iditer, score); } } // build representation @@ -173,16 +169,16 @@ public class CTLuScatterplotOutlier<N> extends AbstractNeighborhoodOutlier<N> { @Override protected Logging getLogger() { - return logger; + return LOG; } @Override public TypeInformation[] getInputTypeRestriction() { - return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), VectorFieldTypeInformation.get(NumberVector.class, 1)); + return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), new VectorFieldTypeInformation<NumberVector<?>>(NumberVector.class, 1)); } /** - * Parameterization class + * Parameterization class. * * @author Ahmed Hettab * diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuZTestOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuZTestOutlier.java index 05729481..02573a06 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuZTestOutlier.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/CTLuZTestOutlier.java @@ -32,8 +32,8 @@ import de.lmu.ifi.dbs.elki.database.Database; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; -import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; @@ -79,60 +79,57 @@ public class CTLuZTestOutlier<N> extends AbstractNeighborhoodOutlier<N> { /** * The logger for this class. */ - private static final Logging logger = Logging.getLogger(CTLuZTestOutlier.class); + private static final Logging LOG = Logging.getLogger(CTLuZTestOutlier.class); /** - * Constructor + * Constructor. * - * @param npredf + * @param npredf Neighbor predicate */ public CTLuZTestOutlier(NeighborSetPredicate.Factory<N> npredf) { super(npredf); } /** - * Main method + * Main method. * * @param database Database * @param nrel Neighborhood relation * @param relation Data relation (1d!) * @return Outlier detection result */ - public OutlierResult run(Database database, Relation<N> nrel, Relation<? extends NumberVector<?, ?>> relation) { + public OutlierResult run(Database database, Relation<N> nrel, Relation<? extends NumberVector<?>> relation) { final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(nrel); WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC); MeanVariance zmv = new MeanVariance(); for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { - DBID id = iditer.getDBID(); - DBIDs neighbors = npred.getNeighborDBIDs(id); + DBIDs neighbors = npred.getNeighborDBIDs(iditer); // Compute Mean of neighborhood Mean localmean = new Mean(); for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) { - DBID n = iter.getDBID(); - if(id.equals(n)) { + if(DBIDUtil.equal(iditer, iter)) { continue; } - localmean.put(relation.get(n).doubleValue(1)); + localmean.put(relation.get(iter).doubleValue(0)); } final double localdiff; if(localmean.getCount() > 0) { - localdiff = relation.get(id).doubleValue(1) - localmean.getMean(); + localdiff = relation.get(iditer).doubleValue(0) - localmean.getMean(); } else { localdiff = 0.0; } - scores.putDouble(id, localdiff); + scores.putDouble(iditer, localdiff); zmv.put(localdiff); } // Normalize scores using mean and variance DoubleMinMax minmax = new DoubleMinMax(); for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { - DBID id = iditer.getDBID(); - double score = Math.abs(scores.doubleValue(id) - zmv.getMean()) / zmv.getSampleStddev(); + double score = Math.abs(scores.doubleValue(iditer) - zmv.getMean()) / zmv.getSampleStddev(); minmax.put(score); - scores.putDouble(id, score); + scores.putDouble(iditer, score); } // Wrap result @@ -145,16 +142,16 @@ public class CTLuZTestOutlier<N> extends AbstractNeighborhoodOutlier<N> { @Override protected Logging getLogger() { - return logger; + return LOG; } @Override public TypeInformation[] getInputTypeRestriction() { - return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), VectorFieldTypeInformation.get(NumberVector.class, 1)); + return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), new VectorFieldTypeInformation<NumberVector<?>>(NumberVector.class, 1)); } /** - * Parameterization class + * Parameterization class. * * @author Ahmed Hettab * diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SLOM.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SLOM.java index 8ae23229..720fa39f 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SLOM.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SLOM.java @@ -30,8 +30,8 @@ import de.lmu.ifi.dbs.elki.database.Database; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; -import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; @@ -74,7 +74,7 @@ public class SLOM<N, O, D extends NumberDistance<D, ?>> extends AbstractDistance /** * The logger for this class. */ - private static final Logging logger = Logging.getLogger(SLOM.class); + private static final Logging LOG = Logging.getLogger(SLOM.class); /** * Constructor. @@ -100,29 +100,27 @@ public class SLOM<N, O, D extends NumberDistance<D, ?>> extends AbstractDistance WritableDoubleDataStore modifiedDistance = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP); // calculate D-Tilde for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { - DBID id = iditer.getDBID(); double sum = 0; double maxDist = 0; int cnt = 0; - final DBIDs neighbors = npred.getNeighborDBIDs(id); + final DBIDs neighbors = npred.getNeighborDBIDs(iditer); for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) { - DBID neighbor = iter.getDBID(); - if(id.equals(neighbor)) { + if(DBIDUtil.equal(iditer, iter)) { continue; } - double dist = distFunc.distance(id, neighbor).doubleValue(); + double dist = distFunc.distance(iditer, iter).doubleValue(); sum += dist; cnt++; maxDist = Math.max(maxDist, dist); } if(cnt > 1) { - modifiedDistance.putDouble(id, ((sum - maxDist) / (cnt - 1))); + modifiedDistance.putDouble(iditer, ((sum - maxDist) / (cnt - 1))); } else { // Use regular distance when the d-tilde trick is undefined. // Note: this can be 0 when there were no neighbors. - modifiedDistance.putDouble(id, maxDist); + modifiedDistance.putDouble(iditer, maxDist); } } @@ -131,29 +129,26 @@ public class SLOM<N, O, D extends NumberDistance<D, ?>> extends AbstractDistance WritableDoubleDataStore sloms = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC); for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { - DBID id = iditer.getDBID(); double sum = 0; int cnt = 0; - final DBIDs neighbors = npred.getNeighborDBIDs(id); + final DBIDs neighbors = npred.getNeighborDBIDs(iditer); for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) { - DBID neighbor = iter.getDBID(); - if(neighbor.equals(id)) { + if(DBIDUtil.equal(iditer, iter)) { continue; } - sum += modifiedDistance.doubleValue(neighbor); + sum += modifiedDistance.doubleValue(iter); cnt++; } double slom; if(cnt > 0) { // With and without the object itself: - double avgPlus = (sum + modifiedDistance.doubleValue(id)) / (cnt + 1); + double avgPlus = (sum + modifiedDistance.doubleValue(iditer)) / (cnt + 1); double avg = sum / cnt; double beta = 0; for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) { - DBID neighbor = iter.getDBID(); - final double dist = modifiedDistance.doubleValue(neighbor); + final double dist = modifiedDistance.doubleValue(iter); if(dist > avgPlus) { beta += 1; } @@ -162,8 +157,8 @@ public class SLOM<N, O, D extends NumberDistance<D, ?>> extends AbstractDistance } } // Include object itself - if(!neighbors.contains(id)) { - final double dist = modifiedDistance.doubleValue(id); + if(!neighbors.contains(iditer)) { + final double dist = modifiedDistance.doubleValue(iditer); if(dist > avgPlus) { beta += 1; } @@ -182,13 +177,13 @@ public class SLOM<N, O, D extends NumberDistance<D, ?>> extends AbstractDistance } beta = beta / (1 + avg); - slom = beta * modifiedDistance.doubleValue(id); + slom = beta * modifiedDistance.doubleValue(iditer); } else { // No neighbors to compare to - no score. slom = 0.0; } - sloms.putDouble(id, slom); + sloms.putDouble(iditer, slom); slomminmax.put(slom); } @@ -201,7 +196,7 @@ public class SLOM<N, O, D extends NumberDistance<D, ?>> extends AbstractDistance @Override protected Logging getLogger() { - return logger; + return LOG; } @Override diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SOF.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SOF.java index e9987bf0..a6f39a60 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SOF.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/SOF.java @@ -29,7 +29,6 @@ import de.lmu.ifi.dbs.elki.database.Database; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
@@ -74,7 +73,7 @@ public class SOF<N, O, D extends NumberDistance<D, ?>> extends AbstractDistanceB /**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(SOF.class);
+ private static final Logging LOG = Logging.getLogger(SOF.class);
/**
* Constructor.
@@ -89,7 +88,7 @@ public class SOF<N, O, D extends NumberDistance<D, ?>> extends AbstractDistanceB @Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -110,33 +109,31 @@ public class SOF<N, O, D extends NumberDistance<D, ?>> extends AbstractDistanceB // Compute densities
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { - DBID id = iditer.getDBID();
- DBIDs neighbors = npred.getNeighborDBIDs(id);
+ DBIDs neighbors = npred.getNeighborDBIDs(iditer);
double avg = 0;
for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
- avg += distFunc.distance(id, iter.getDBID()).doubleValue();
+ avg += distFunc.distance(iditer, iter).doubleValue();
}
double lrd = 1 / (avg / neighbors.size());
if (Double.isNaN(lrd)) {
lrd = 0;
}
- lrds.putDouble(id, lrd);
+ lrds.putDouble(iditer, lrd);
}
// Compute density quotients
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { - DBID id = iditer.getDBID();
- DBIDs neighbors = npred.getNeighborDBIDs(id);
+ DBIDs neighbors = npred.getNeighborDBIDs(iditer);
double avg = 0;
for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
- avg += lrds.doubleValue(iter.getDBID());
+ avg += lrds.doubleValue(iter);
}
- final double lrd = (avg / neighbors.size()) / lrds.doubleValue(id);
+ final double lrd = (avg / neighbors.size()) / lrds.doubleValue(iditer);
if (!Double.isNaN(lrd)) {
- lofs.putDouble(id, lrd);
+ lofs.putDouble(iditer, lrd);
lofminmax.put(lrd);
} else {
- lofs.putDouble(id, 0.0);
+ lofs.putDouble(iditer, 0.0);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java index 41022414..9aa21b66 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/TrimmedMeanApproach.java @@ -33,11 +33,11 @@ import de.lmu.ifi.dbs.elki.database.Database; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
@@ -45,14 +45,13 @@ import de.lmu.ifi.dbs.elki.math.Mean; import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.datastructures.QuickSelect;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.IntervalConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.IntervalConstraint.IntervalBoundary;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
@@ -83,15 +82,15 @@ public class TrimmedMeanApproach<N> extends AbstractNeighborhoodOutlier<N> { /**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(TrimmedMeanApproach.class);
+ private static final Logging LOG = Logging.getLogger(TrimmedMeanApproach.class);
/**
- * the parameter p
+ * the parameter p.
*/
private double p;
/**
- * Constructor
+ * Constructor.
*
* @param p Parameter p
* @param npredf Neighborhood factory.
@@ -102,29 +101,28 @@ public class TrimmedMeanApproach<N> extends AbstractNeighborhoodOutlier<N> { }
/**
- * Run the algorithm
+ * Run the algorithm.
*
* @param database Database
* @param nrel Neighborhood relation
* @param relation Data Relation (1 dimensional!)
* @return Outlier detection result
*/
- public OutlierResult run(Database database, Relation<N> nrel, Relation<? extends NumberVector<?, ?>> relation) {
- assert (DatabaseUtil.dimensionality(relation) == 1) : "TrimmedMean can only process one-dimensional data sets.";
+ public OutlierResult run(Database database, Relation<N> nrel, Relation<? extends NumberVector<?>> relation) {
+ assert (RelationUtil.dimensionality(relation) == 1) : "TrimmedMean can only process one-dimensional data sets.";
final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(nrel);
WritableDoubleDataStore errors = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP);
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
- FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("Computing trimmed means", relation.size(), logger) : null;
+ FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Computing trimmed means", relation.size(), LOG) : null;
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { - DBID id = iditer.getDBID();
- DBIDs neighbors = npred.getNeighborDBIDs(id);
+ DBIDs neighbors = npred.getNeighborDBIDs(iditer);
int num = 0;
double[] values = new double[neighbors.size()];
// calculate trimmedMean
for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
- values[num] = relation.get(iter).doubleValue(1);
+ values[num] = relation.get(iter).doubleValue(0);
num++;
}
@@ -141,21 +139,21 @@ public class TrimmedMeanApproach<N> extends AbstractNeighborhoodOutlier<N> { tm = mean.getMean();
}
else {
- tm = relation.get(id).doubleValue(1);
+ tm = relation.get(iditer).doubleValue(0);
}
// Error: deviation from trimmed mean
- errors.putDouble(id, relation.get(id).doubleValue(1) - tm);
+ errors.putDouble(iditer, relation.get(iditer).doubleValue(0) - tm);
if(progress != null) {
- progress.incrementProcessed(logger);
+ progress.incrementProcessed(LOG);
}
}
if(progress != null) {
- progress.ensureCompleted(logger);
+ progress.ensureCompleted(LOG);
}
- if(logger.isVerbose()) {
- logger.verbose("Computing median error.");
+ if(LOG.isVerbose()) {
+ LOG.verbose("Computing median error.");
}
double median_dev_from_median;
{
@@ -164,8 +162,7 @@ public class TrimmedMeanApproach<N> extends AbstractNeighborhoodOutlier<N> { {
int i = 0;
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { - DBID id = iditer.getDBID();
- ei[i] = errors.doubleValue(id);
+ ei[i] = errors.doubleValue(iditer);
i++;
}
}
@@ -178,15 +175,14 @@ public class TrimmedMeanApproach<N> extends AbstractNeighborhoodOutlier<N> { median_dev_from_median = QuickSelect.median(ei);
}
- if(logger.isVerbose()) {
- logger.verbose("Normalizing scores.");
+ if(LOG.isVerbose()) {
+ LOG.verbose("Normalizing scores.");
}
// calculate score
DoubleMinMax minmax = new DoubleMinMax();
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { - DBID id = iditer.getDBID();
- double score = Math.abs(errors.doubleValue(id)) * 0.6745 / median_dev_from_median;
- scores.putDouble(id, score);
+ double score = Math.abs(errors.doubleValue(iditer)) * 0.6745 / median_dev_from_median;
+ scores.putDouble(iditer, score);
minmax.put(score);
}
//
@@ -199,17 +195,17 @@ public class TrimmedMeanApproach<N> extends AbstractNeighborhoodOutlier<N> { @Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
@Override
public TypeInformation[] getInputTypeRestriction() {
// Get one dimensional attribute for analysis.
- return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), VectorFieldTypeInformation.get(NumberVector.class, 1));
+ return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), new VectorFieldTypeInformation<NumberVector<?>>(NumberVector.class, 1));
}
/**
- * Parameterizer
+ * Parameterizer.
*
* @author Ahmed Hettab
*
@@ -219,19 +215,21 @@ public class TrimmedMeanApproach<N> extends AbstractNeighborhoodOutlier<N> { */
public static class Parameterizer<N> extends AbstractNeighborhoodOutlier.Parameterizer<N> {
/**
- * Parameter for the percentile value p
+ * Parameter for the percentile value p.
*/
- public static final OptionID P_ID = OptionID.getOrCreateOptionID("tma.p", "the percentile parameter");
+ public static final OptionID P_ID = new OptionID("tma.p", "the percentile parameter");
/**
- * Percentile parameter p
+ * Percentile parameter p.
*/
protected double p = 0.2;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- DoubleParameter pP = new DoubleParameter(P_ID, new IntervalConstraint(0.0, IntervalBoundary.OPEN, 0.5, IntervalBoundary.OPEN));
+ DoubleParameter pP = new DoubleParameter(P_ID);
+ pP.addConstraint(new GreaterConstraint(0.0)); + pP.addConstraint(new LessConstraint(0.5));
if(config.grab(pP)) {
p = pP.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/AbstractPrecomputedNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/AbstractPrecomputedNeighborhood.java index 5898b053..2c706ce0 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/AbstractPrecomputedNeighborhood.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/AbstractPrecomputedNeighborhood.java @@ -24,7 +24,8 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood; */ import de.lmu.ifi.dbs.elki.database.datastore.DataStore; -import de.lmu.ifi.dbs.elki.database.ids.DBID; +import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.logging.Logging; @@ -50,7 +51,7 @@ public abstract class AbstractPrecomputedNeighborhood implements NeighborSetPred } @Override - public DBIDs getNeighborDBIDs(DBID reference) { + public DBIDs getNeighborDBIDs(DBIDRef reference) { DBIDs neighbors = store.get(reference); if(neighbors != null) { return neighbors; @@ -60,7 +61,7 @@ public abstract class AbstractPrecomputedNeighborhood implements NeighborSetPred if(getLogger().isDebugging()) { getLogger().warning("No neighbors for object " + reference); } - return reference; + return DBIDUtil.deref(reference); } } @@ -69,7 +70,7 @@ public abstract class AbstractPrecomputedNeighborhood implements NeighborSetPred * * @return Logger */ - abstract protected Logging getLogger(); + protected abstract Logging getLogger(); /** * Factory class. diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java index 7a2fda52..4aa96b25 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExtendedNeighborhood.java @@ -28,7 +28,6 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStore; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore; -import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; @@ -54,7 +53,7 @@ public class ExtendedNeighborhood extends AbstractPrecomputedNeighborhood { /** * The logger to use. */ - static final Logging logger = Logging.getLogger(ExtendedNeighborhood.class); + private static final Logging LOG = Logging.getLogger(ExtendedNeighborhood.class); /** * Constructor. @@ -67,7 +66,7 @@ public class ExtendedNeighborhood extends AbstractPrecomputedNeighborhood { @Override protected Logging getLogger() { - return logger; + return LOG; } @Override @@ -132,23 +131,22 @@ public class ExtendedNeighborhood extends AbstractPrecomputedNeighborhood { final WritableDataStore<DBIDs> store = DataStoreUtil.makeStorage(database.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC | DataStoreFactory.HINT_TEMP, DBIDs.class); // Expand multiple steps - FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("Expanding neighborhoods", database.size(), logger) : null; + FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Expanding neighborhoods", database.size(), LOG) : null; for(DBIDIter iter = database.iterDBIDs(); iter.valid(); iter.advance()) { - DBID id = iter.getDBID(); - HashSetModifiableDBIDs res = DBIDUtil.newHashSet(id); - DBIDs todo = id; + HashSetModifiableDBIDs res = DBIDUtil.newHashSet(); + res.add(iter); + DBIDs todo = DBIDUtil.deref(iter); for(int i = 0; i < steps; i++) { ModifiableDBIDs ntodo = DBIDUtil.newHashSet(); for(DBIDIter iter2 = todo.iter(); iter2.valid(); iter2.advance()) { - DBIDs add = innerinst.getNeighborDBIDs(iter2.getDBID()); + DBIDs add = innerinst.getNeighborDBIDs(iter2); if(add != null) { - for(DBIDIter iter3 = add.iter(); iter.valid(); iter.advance()) { - DBID nid = iter3.getDBID(); - if(res.contains(nid)) { + for(DBIDIter iter3 = add.iter(); iter3.valid(); iter3.advance()) { + if(res.contains(iter3)) { continue; } - ntodo.add(nid); - res.add(nid); + ntodo.add(iter3); + res.add(iter3); } } } @@ -157,13 +155,13 @@ public class ExtendedNeighborhood extends AbstractPrecomputedNeighborhood { } todo = ntodo; } - store.put(id, res); + store.put(iter, res); if(progress != null) { - progress.incrementProcessed(logger); + progress.incrementProcessed(LOG); } } if(progress != null) { - progress.ensureCompleted(logger); + progress.ensureCompleted(LOG); } return store; @@ -180,12 +178,12 @@ public class ExtendedNeighborhood extends AbstractPrecomputedNeighborhood { /** * Parameter to specify the neighborhood predicate to use. */ - public static final OptionID NEIGHBORHOOD_ID = OptionID.getOrCreateOptionID("extendedneighbors.neighborhood", "The inner neighborhood predicate to use."); + public static final OptionID NEIGHBORHOOD_ID = new OptionID("extendedneighbors.neighborhood", "The inner neighborhood predicate to use."); /** * Parameter to specify the number of steps allowed */ - public static final OptionID STEPS_ID = OptionID.getOrCreateOptionID("extendedneighbors.steps", "The number of steps allowed in the neighborhood graph."); + public static final OptionID STEPS_ID = new OptionID("extendedneighbors.steps", "The number of steps allowed in the neighborhood graph."); /** * The number of steps to do. @@ -225,7 +223,8 @@ public class ExtendedNeighborhood extends AbstractPrecomputedNeighborhood { * @return number of steps, default 1 */ public static int getParameterSteps(Parameterization config) { - final IntParameter param = new IntParameter(STEPS_ID, new GreaterEqualConstraint(1)); + final IntParameter param = new IntParameter(STEPS_ID); + param.addConstraint(new GreaterEqualConstraint(1)); if(config.grab(param)) { return param.getValue(); } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java index 74e5bbcf..01052c1f 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/ExternalNeighborhood.java @@ -63,12 +63,12 @@ public class ExternalNeighborhood extends AbstractPrecomputedNeighborhood { /** * Logger */ - static final Logging logger = Logging.getLogger(ExternalNeighborhood.class); + private static final Logging LOG = Logging.getLogger(ExternalNeighborhood.class); /** * Parameter to specify the neighborhood file */ - public static final OptionID NEIGHBORHOOD_FILE_ID = OptionID.getOrCreateOptionID("externalneighbors.file", "The file listing the neighbors."); + public static final OptionID NEIGHBORHOOD_FILE_ID = new OptionID("externalneighbors.file", "The file listing the neighbors."); /** * Constructor. @@ -91,7 +91,7 @@ public class ExternalNeighborhood extends AbstractPrecomputedNeighborhood { @Override protected Logging getLogger() { - return logger; + return LOG; } /** @@ -136,33 +136,32 @@ public class ExternalNeighborhood extends AbstractPrecomputedNeighborhood { private DataStore<DBIDs> loadNeighbors(Relation<?> database) { final WritableDataStore<DBIDs> store = DataStoreUtil.makeStorage(database.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC | DataStoreFactory.HINT_TEMP, DBIDs.class); - if(logger.isVerbose()) { - logger.verbose("Loading external neighborhoods."); + if(LOG.isVerbose()) { + LOG.verbose("Loading external neighborhoods."); } - if(logger.isDebugging()) { - logger.verbose("Building reverse label index..."); + if(LOG.isDebugging()) { + LOG.verbose("Building reverse label index..."); } // Build a map label/ExternalId -> DBID // (i.e. a reverse index!) // TODO: move this into the database layer to share? - Map<String, DBID> lblmap = new HashMap<String, DBID>(database.size() * 2); + Map<String, DBID> lblmap = new HashMap<String, DBID>(database.size() << 1); { Relation<LabelList> olq = database.getDatabase().getRelation(TypeUtil.LABELLIST); Relation<ExternalID> eidq = database.getDatabase().getRelation(TypeUtil.EXTERNALID); for(DBIDIter iditer = database.iterDBIDs(); iditer.valid(); iditer.advance()) { - DBID id = iditer.getDBID(); if(eidq != null) { - ExternalID eid = eidq.get(id); + ExternalID eid = eidq.get(iditer); if(eid != null) { - lblmap.put(eid.toString(), id); + lblmap.put(eid.toString(), DBIDUtil.deref(iditer)); } } if(olq != null) { - LabelList label = olq.get(id); + LabelList label = olq.get(iditer); if(label != null) { for(String lbl : label) { - lblmap.put(lbl, id); + lblmap.put(lbl, DBIDUtil.deref(iditer)); } } } @@ -170,8 +169,8 @@ public class ExternalNeighborhood extends AbstractPrecomputedNeighborhood { } try { - if(logger.isDebugging()) { - logger.verbose("Loading neighborhood file."); + if(LOG.isDebugging()) { + LOG.verbose("Loading neighborhood file."); } InputStream in = new FileInputStream(file); in = FileUtil.tryGzipInput(in); @@ -187,16 +186,16 @@ public class ExternalNeighborhood extends AbstractPrecomputedNeighborhood { neighbours.add(neigh); } else { - if(logger.isDebugging()) { - logger.debug("No object found for label " + entries[i]); + if(LOG.isDebugging()) { + LOG.debug("No object found for label " + entries[i]); } } } store.put(id, neighbours); } else { - if(logger.isDebugging()) { - logger.warning("No object found for label " + entries[0]); + if(LOG.isDebugging()) { + LOG.warning("No object found for label " + entries[0]); } } } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/NeighborSetPredicate.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/NeighborSetPredicate.java index 3a6d0e28..b52f8e91 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/NeighborSetPredicate.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/NeighborSetPredicate.java @@ -24,7 +24,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood; */ import de.lmu.ifi.dbs.elki.data.type.TypeInformation; -import de.lmu.ifi.dbs.elki.database.ids.DBID; +import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.result.Result; @@ -42,7 +42,7 @@ public interface NeighborSetPredicate extends Result { * @param reference Reference object * @return Neighborhood */ - public DBIDs getNeighborDBIDs(DBID reference); + public DBIDs getNeighborDBIDs(DBIDRef reference); /** * Factory interface to produce instances. diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/PrecomputedKNearestNeighborNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/PrecomputedKNearestNeighborNeighborhood.java index 9dd2dee1..f6000ef0 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/PrecomputedKNearestNeighborNeighborhood.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/PrecomputedKNearestNeighborNeighborhood.java @@ -29,15 +29,13 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
-import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.KNNResult;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
@@ -57,7 +55,7 @@ public class PrecomputedKNearestNeighborNeighborhood<D extends Distance<D>> exte /**
* Logger
*/
- private static final Logging logger = Logging.getLogger(PrecomputedKNearestNeighborNeighborhood.class);
+ private static final Logging LOG = Logging.getLogger(PrecomputedKNearestNeighborNeighborhood.class);
/**
* Constructor.
@@ -80,7 +78,7 @@ public class PrecomputedKNearestNeighborNeighborhood<D extends Distance<D>> exte @Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -121,13 +119,12 @@ public class PrecomputedKNearestNeighborNeighborhood<D extends Distance<D>> exte // TODO: use bulk?
WritableDataStore<DBIDs> s = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, DBIDs.class);
for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { - DBID id = iditer.getDBID();
- KNNResult<D> neighbors = knnQuery.getKNNForDBID(id, k);
+ KNNResult<D> neighbors = knnQuery.getKNNForDBID(iditer, k);
ArrayModifiableDBIDs neighbours = DBIDUtil.newArray(neighbors.size());
- for(DistanceResultPair<D> dpair : neighbors) {
- neighbours.add(dpair.getDBID());
+ for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
+ neighbours.add(neighbor);
}
- s.put(id, neighbours);
+ s.put(iditer, neighbours);
}
return new PrecomputedKNearestNeighborNeighborhood<D>(s);
}
@@ -151,12 +148,12 @@ public class PrecomputedKNearestNeighborNeighborhood<D extends Distance<D>> exte /**
* Parameter k
*/
- public static final OptionID K_ID = OptionID.getOrCreateOptionID("neighborhood.k", "the number of neighbors");
+ public static final OptionID K_ID = new OptionID("neighborhood.k", "the number of neighbors");
/**
* Parameter to specify the distance function to use
*/
- public static final OptionID DISTANCEFUNCTION_ID = OptionID.getOrCreateOptionID("neighborhood.distancefunction", "the distance function to use");
+ public static final OptionID DISTANCEFUNCTION_ID = new OptionID("neighborhood.distancefunction", "the distance function to use");
/**
* Parameter k
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java index d170571f..f1c68577 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/LinearWeightedExtendedNeighborhood.java @@ -29,10 +29,11 @@ import java.util.List; import de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate; import de.lmu.ifi.dbs.elki.data.type.TypeInformation; -import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair; import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; @@ -41,7 +42,6 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualCons import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; -import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair; /** * Neighborhood obtained by computing the k-fold closure of an existing @@ -87,29 +87,27 @@ public class LinearWeightedExtendedNeighborhood implements WeightedNeighborSetPr } @Override - public Collection<DoubleObjPair<DBID>> getWeightedNeighbors(DBID reference) { + public Collection<DoubleDBIDPair> getWeightedNeighbors(DBIDRef reference) { ModifiableDBIDs seen = DBIDUtil.newHashSet(); - List<DoubleObjPair<DBID>> result = new ArrayList<DoubleObjPair<DBID>>(); + List<DoubleDBIDPair> result = new ArrayList<DoubleDBIDPair>(); // Add starting object - result.add(new DoubleObjPair<DBID>(computeWeight(0), reference)); + result.add(DBIDUtil.newPair(computeWeight(0), reference)); seen.add(reference); // Extend. - DBIDs cur = reference; + DBIDs cur = DBIDUtil.deref(reference); for(int i = 1; i <= steps; i++) { final double weight = computeWeight(i); // Collect newly discovered IDs ModifiableDBIDs add = DBIDUtil.newHashSet(); for(DBIDIter iter = cur.iter(); iter.valid(); iter.advance()) { - DBID id = iter.getDBID(); - for(DBIDIter iter2 = inner.getNeighborDBIDs(id).iter(); iter2.valid(); iter2.advance()) { - DBID nid = iter2.getDBID(); + for(DBIDIter iter2 = inner.getNeighborDBIDs(iter).iter(); iter2.valid(); iter2.advance()) { // Seen before? - if(seen.contains(nid)) { + if(seen.contains(iter2)) { continue; } - add.add(nid); - result.add(new DoubleObjPair<DBID>(weight, nid)); + add.add(iter2); + result.add(DBIDUtil.newPair(weight, iter2)); } } if(add.size() == 0) { @@ -172,12 +170,12 @@ public class LinearWeightedExtendedNeighborhood implements WeightedNeighborSetPr /** * Parameter to specify the neighborhood predicate to use. */ - public static final OptionID NEIGHBORHOOD_ID = OptionID.getOrCreateOptionID("extendedneighbors.neighborhood", "The inner neighborhood predicate to use."); + public static final OptionID NEIGHBORHOOD_ID = new OptionID("extendedneighbors.neighborhood", "The inner neighborhood predicate to use."); /** * Parameter to specify the number of steps allowed */ - public static final OptionID STEPS_ID = OptionID.getOrCreateOptionID("extendedneighbors.steps", "The number of steps allowed in the neighborhood graph."); + public static final OptionID STEPS_ID = new OptionID("extendedneighbors.steps", "The number of steps allowed in the neighborhood graph."); /** * The number of steps to do. @@ -217,7 +215,8 @@ public class LinearWeightedExtendedNeighborhood implements WeightedNeighborSetPr * @return number of steps, default 1 */ public static int getParameterSteps(Parameterization config) { - final IntParameter param = new IntParameter(STEPS_ID, new GreaterEqualConstraint(1)); + final IntParameter param = new IntParameter(STEPS_ID); + param.addConstraint(new GreaterEqualConstraint(1)); if(config.grab(param)) { return param.getValue(); } diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/UnweightedNeighborhoodAdapter.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/UnweightedNeighborhoodAdapter.java index ce0666df..c179d81f 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/UnweightedNeighborhoodAdapter.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/UnweightedNeighborhoodAdapter.java @@ -28,15 +28,16 @@ import java.util.Collection; import de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate; import de.lmu.ifi.dbs.elki.data.type.TypeInformation; -import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; +import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; -import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair; /** * Adapter to use unweighted neighborhoods in an algorithm that requires @@ -61,12 +62,11 @@ public class UnweightedNeighborhoodAdapter implements WeightedNeighborSetPredica } @Override - public Collection<DoubleObjPair<DBID>> getWeightedNeighbors(DBID reference) { + public Collection<DoubleDBIDPair> getWeightedNeighbors(DBIDRef reference) { DBIDs neighbors = inner.getNeighborDBIDs(reference); - ArrayList<DoubleObjPair<DBID>> adapted = new ArrayList<DoubleObjPair<DBID>>(neighbors.size()); + ArrayList<DoubleDBIDPair> adapted = new ArrayList<DoubleDBIDPair>(neighbors.size()); for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) { - DBID id = iter.getDBID(); - adapted.add(new DoubleObjPair<DBID>(1.0, id)); + adapted.add(DBIDUtil.newPair(1.0, iter)); } return adapted; } @@ -120,7 +120,7 @@ public class UnweightedNeighborhoodAdapter implements WeightedNeighborSetPredica /** * The parameter to give the non-weighted neighborhood to use. */ - public static final OptionID INNER_ID = OptionID.getOrCreateOptionID("neighborhood.inner", "Parameter for the non-weighted neighborhood to use."); + public static final OptionID INNER_ID = new OptionID("neighborhood.inner", "Parameter for the non-weighted neighborhood to use."); /** * The actual predicate. diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/WeightedNeighborSetPredicate.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/WeightedNeighborSetPredicate.java index b147935a..16d37587 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/WeightedNeighborSetPredicate.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/spatial/neighborhood/weighted/WeightedNeighborSetPredicate.java @@ -26,10 +26,10 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.weighted; import java.util.Collection; import de.lmu.ifi.dbs.elki.data.type.TypeInformation; -import de.lmu.ifi.dbs.elki.database.ids.DBID; +import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; +import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.utilities.optionhandling.Parameterizable; -import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair; /** * Neighbor predicate with weight support. @@ -43,7 +43,7 @@ public interface WeightedNeighborSetPredicate { * @param reference Reference object * @return Weighted Neighborhood */ - public Collection<DoubleObjPair<DBID>> getWeightedNeighbors(DBID reference); + public Collection<DoubleDBIDPair> getWeightedNeighbors(DBIDRef reference); /** * Factory interface to produce instances. diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java index 573233a7..1965914d 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OUTRES.java @@ -23,10 +23,8 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.subspace; along with this program. If not, see <http://www.gnu.org/licenses/>. */ -import java.util.ArrayList; import java.util.Arrays; import java.util.BitSet; -import java.util.List; import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm; import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm; @@ -37,16 +35,20 @@ import de.lmu.ifi.dbs.elki.database.QueryUtil; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; -import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; -import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair; -import de.lmu.ifi.dbs.elki.database.query.DoubleDistanceResultPair; +import de.lmu.ifi.dbs.elki.database.ids.DistanceDBIDPair; +import de.lmu.ifi.dbs.elki.database.ids.DoubleDistanceDBIDPair; import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery; import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction; import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultIter; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDList; +import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DoubleDistanceDBIDResultIter; import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; @@ -58,7 +60,6 @@ import de.lmu.ifi.dbs.elki.math.statistics.distribution.GammaDistribution; import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; -import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; @@ -89,11 +90,11 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; * @param <V> vector type */ @Reference(authors = "E. Müller, M. Schiffer, T. Seidl", title = "Adaptive outlierness for subspace outlier ranking", booktitle = "Proc. 19th ACM International Conference on Information and knowledge management") -public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm { +public class OUTRES<V extends NumberVector<?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm { /** * The logger for this class. */ - private static final Logging logger = Logging.getLogger(OUTRES.class); + private static final Logging LOG = Logging.getLogger(OUTRES.class); /** * The epsilon (in 2d) parameter @@ -128,7 +129,7 @@ public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outl KernelDensityEstimator kernel = new KernelDensityEstimator(relation); BitSet subspace = new BitSet(kernel.dim); - FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("OutRank scores", relation.size(), logger) : null; + FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("OUTRES scores", relation.size(), LOG) : null; for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { subspace.clear(); @@ -136,11 +137,11 @@ public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outl ranks.putDouble(iditer, score); minmax.put(score); if(progress != null) { - progress.incrementProcessed(logger); + progress.incrementProcessed(LOG); } } if(progress != null) { - progress.ensureCompleted(logger); + progress.ensureCompleted(LOG); } OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0., 1., 1.); @@ -159,33 +160,34 @@ public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outl */ public double outresScore(final int s, BitSet subspace, DBIDRef id, KernelDensityEstimator kernel) { double score = 1.0; // Initial score is 1.0 + final SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(subspace); + MeanVariance meanv = new MeanVariance(); for(int i = s; i < kernel.dim; i++) { if(subspace.get(i)) { // TODO: needed? Or should we always start with i=0? continue; } subspace.set(i); - final SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(subspace); + df.setSelectedDimensions(subspace); final double adjustedEps = kernel.adjustedEps(kernel.dim); // Query with a larger window, to also get neighbors of neighbors // Subspace euclidean is metric! - final DoubleDistance range = new DoubleDistance(adjustedEps * 2); + final DoubleDistance range = new DoubleDistance(adjustedEps * 2.); RangeQuery<V, DoubleDistance> rq = QueryUtil.getRangeQuery(kernel.relation, df, range); - List<DistanceResultPair<DoubleDistance>> neighc = rq.getRangeForDBID(id, range); - List<DoubleDistanceResultPair> neigh = refineRange(neighc, adjustedEps); + DistanceDBIDResult<DoubleDistance> neighc = rq.getRangeForDBID(id, range); + DoubleDistanceDBIDList neigh = refineRange(neighc, adjustedEps); if(neigh.size() > 2) { // Relevance test if(relevantSubspace(subspace, neigh, kernel)) { final double density = kernel.subspaceDensity(subspace, neigh); - final double deviation; // Compute mean and standard deviation for densities of neighbors. - MeanVariance meanv = new MeanVariance(); - for(DoubleDistanceResultPair pair : neigh) { - List<DoubleDistanceResultPair> n2 = subsetNeighborhoodQuery(neighc, pair.getDBID(), df, adjustedEps, kernel); + meanv.reset(); + for (DoubleDistanceDBIDResultIter neighbor = neigh.iter(); neighbor.valid(); neighbor.advance()) { + DoubleDistanceDBIDList n2 = subsetNeighborhoodQuery(neighc, neighbor, df, adjustedEps, kernel); meanv.put(kernel.subspaceDensity(subspace, n2)); } - deviation = (meanv.getMean() - density) / (2. * meanv.getSampleStddev()); + final double deviation = (meanv.getMean() - density) / (2. * meanv.getSampleStddev()); // High deviation: if(deviation >= 1) { score *= (density / deviation); @@ -206,19 +208,20 @@ public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outl * @param adjustedEps New epsilon * @return refined list */ - private List<DoubleDistanceResultPair> refineRange(List<DistanceResultPair<DoubleDistance>> neighc, double adjustedEps) { - List<DoubleDistanceResultPair> n = new ArrayList<DoubleDistanceResultPair>(neighc.size()); + private DoubleDistanceDBIDList refineRange(DistanceDBIDResult<DoubleDistance> neighc, double adjustedEps) { + DoubleDistanceDBIDList n = new DoubleDistanceDBIDList(neighc.size()); // We don't have a guarantee for this list to be sorted - for(DistanceResultPair<DoubleDistance> p : neighc) { - if(p instanceof DoubleDistanceResultPair) { - if(((DoubleDistanceResultPair) p).getDoubleDistance() <= adjustedEps) { - n.add((DoubleDistanceResultPair) p); + for (DistanceDBIDResultIter<DoubleDistance> neighbor = neighc.iter(); neighbor.valid(); neighbor.advance()) { + DistanceDBIDPair<DoubleDistance> p = neighbor.getDistancePair(); + if(p instanceof DoubleDistanceDBIDPair) { + if(((DoubleDistanceDBIDPair) p).doubleDistance() <= adjustedEps) { + n.add((DoubleDistanceDBIDPair) p); } } else { double dist = p.getDistance().doubleValue(); if(dist <= adjustedEps) { - n.add(new DoubleDistanceResultPair(dist, p.getDBID())); + n.add(dist, p); } } } @@ -235,13 +238,14 @@ public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outl * @param kernel Kernel * @return Neighbors of neighbor object */ - private List<DoubleDistanceResultPair> subsetNeighborhoodQuery(List<DistanceResultPair<DoubleDistance>> neighc, DBID dbid, PrimitiveDoubleDistanceFunction<? super V> df, double adjustedEps, KernelDensityEstimator kernel) { - List<DoubleDistanceResultPair> n = new ArrayList<DoubleDistanceResultPair>(neighc.size()); + private DoubleDistanceDBIDList subsetNeighborhoodQuery(DistanceDBIDResult<DoubleDistance> neighc, DBIDRef dbid, PrimitiveDoubleDistanceFunction<? super V> df, double adjustedEps, KernelDensityEstimator kernel) { + DoubleDistanceDBIDList n = new DoubleDistanceDBIDList(neighc.size()); V query = kernel.relation.get(dbid); - for(DistanceResultPair<DoubleDistance> p : neighc) { + for (DistanceDBIDResultIter<DoubleDistance> neighbor = neighc.iter(); neighbor.valid(); neighbor.advance()) { + DistanceDBIDPair<DoubleDistance> p = neighbor.getDistancePair(); double dist = df.doubleDistance(query, kernel.relation.get(p)); if(dist <= adjustedEps) { - n.add(new DoubleDistanceResultPair(dist, p.getDBID())); + n.add(dist, p); } } return n; @@ -255,7 +259,7 @@ public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outl * @param kernel Kernel density estimator * @return relevance test result */ - protected boolean relevantSubspace(BitSet subspace, List<DoubleDistanceResultPair> neigh, KernelDensityEstimator kernel) { + protected boolean relevantSubspace(BitSet subspace, DoubleDistanceDBIDList neigh, KernelDensityEstimator kernel) { Relation<V> relation = kernel.relation; final double crit = K_S_CRITICAL001 / Math.sqrt(neigh.size()); @@ -264,9 +268,9 @@ public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outl double[] data = new double[neigh.size()]; { int count = 0; - for(DoubleDistanceResultPair object : neigh) { - V vector = relation.get(object.getDBID()); - data[count] = vector.doubleValue(dim + 1); + for (DBIDIter neighbor = neigh.iter(); neighbor.valid(); neighbor.advance()) { + V vector = relation.get(neighbor); + data[count] = vector.doubleValue(dim); count++; } assert (count == neigh.size()); @@ -278,7 +282,7 @@ public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outl // Kolmogorow-Smirnow-Test against uniform distribution: for(int j = 1; j < data.length - 2; j++) { - double delta = (j / (data.length - 1)) - ((data[j] - min) / norm); + double delta = (j / (data.length - 1.)) - ((data[j] - min) / norm); if(Math.abs(delta) > crit) { return false; } @@ -326,7 +330,7 @@ public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outl public KernelDensityEstimator(Relation<V> relation) { super(); this.relation = relation; - dim = DatabaseUtil.dimensionality(relation); + dim = RelationUtil.dimensionality(relation); hopttwo = optimalBandwidth(2); epsilons = new double[dim + 1]; Arrays.fill(epsilons, Double.NEGATIVE_INFINITY); @@ -337,15 +341,15 @@ public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outl * Compute density in the given subspace. * * @param subspace Subspace - * @param neighbours Neighbor distance list + * @param neighbors Neighbor distance list * @return Density */ - protected double subspaceDensity(BitSet subspace, List<DoubleDistanceResultPair> neighbours) { + protected double subspaceDensity(BitSet subspace, DoubleDistanceDBIDList neighbors) { final double bandwidth = optimalBandwidth(subspace.cardinality()); double density = 0; - for(DoubleDistanceResultPair pair : neighbours) { - double v = pair.getDoubleDistance() / bandwidth; + for (DoubleDistanceDBIDResultIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) { + double v = neighbor.doubleDistance() / bandwidth; if(v < 1) { density += 1 - (v * v); } @@ -363,7 +367,7 @@ public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outl protected double optimalBandwidth(int dim) { // Pi in the publication is redundant and cancels out! double hopt = 8 * GammaDistribution.gamma(dim / 2.0 + 1) * (dim + 4) * Math.pow(2, dim); - return hopt * Math.pow(relation.size(), (-1 / (dim + 4))); + return hopt * Math.pow(relation.size(), (-1. / (dim + 4))); } /** @@ -385,7 +389,7 @@ public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outl @Override protected Logging getLogger() { - return logger; + return LOG; } @Override @@ -400,11 +404,11 @@ public class OUTRES<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Outl * * @apiviz.exclude */ - public static class Parameterizer<O extends NumberVector<O, ?>> extends AbstractParameterizer { + public static class Parameterizer<O extends NumberVector<?>> extends AbstractParameterizer { /** * Option ID for Epsilon parameter */ - public static final OptionID D_ID = OptionID.getOrCreateOptionID("outres.epsilon", "Range value for OUTRES in 2 dimensions."); + public static final OptionID D_ID = new OptionID("outres.epsilon", "Range value for OUTRES in 2 dimensions."); /** * Query radius diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OutRankS1.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OutRankS1.java index e370d2bf..79243213 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OutRankS1.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/OutRankS1.java @@ -78,7 +78,7 @@ public class OutRankS1 extends AbstractAlgorithm<OutlierResult> implements Outli /** * The logger for this class. */ - private static final Logging logger = Logging.getLogger(OutRankS1.class); + private static final Logging LOG = Logging.getLogger(OutRankS1.class); /** * Clustering algorithm to run. @@ -110,23 +110,23 @@ public class OutRankS1 extends AbstractAlgorithm<OutlierResult> implements Outli Clustering<? extends SubspaceModel<?>> clustering = clusteralg.run(database); WritableDoubleDataStore score = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT); - for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { + for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { score.putDouble(iter, 0); } int maxdim = 0, maxsize = 0; // Find maximum dimensionality and cluster size - for(Cluster<? extends SubspaceModel<?>> cluster : clustering.getAllClusters()) { + for (Cluster<? extends SubspaceModel<?>> cluster : clustering.getAllClusters()) { maxsize = Math.max(maxsize, cluster.size()); maxdim = Math.max(maxdim, cluster.getModel().getDimensions().cardinality()); } // Iterate over all clusters: DoubleMinMax minmax = new DoubleMinMax(); - for(Cluster<? extends SubspaceModel<?>> cluster : clustering.getAllClusters()) { + for (Cluster<? extends SubspaceModel<?>> cluster : clustering.getAllClusters()) { double relsize = cluster.size() / (double) maxsize; double reldim = cluster.getModel().getDimensions().cardinality() / (double) maxdim; // Process objects in the cluster - for(DBIDIter iter = cluster.getIDs().iter(); iter.valid(); iter.advance()) { + for (DBIDIter iter = cluster.getIDs().iter(); iter.valid(); iter.advance()) { double newscore = score.doubleValue(iter) + alpha * relsize + (1 - alpha) * reldim; score.putDouble(iter, newscore); minmax.put(newscore); @@ -147,7 +147,7 @@ public class OutRankS1 extends AbstractAlgorithm<OutlierResult> implements Outli @Override protected Logging getLogger() { - return logger; + return LOG; } /** @@ -161,12 +161,12 @@ public class OutRankS1 extends AbstractAlgorithm<OutlierResult> implements Outli /** * Clustering algorithm to use. */ - public static final OptionID ALGORITHM_ID = OptionID.getOrCreateOptionID("outrank.algorithm", "Subspace clustering algorithm to use."); + public static final OptionID ALGORITHM_ID = new OptionID("outrank.algorithm", "Subspace clustering algorithm to use."); /** * Alpha parameter for S1 */ - public static final OptionID ALPHA_ID = OptionID.getOrCreateOptionID("outrank.s1.alpha", "Alpha parameter for S1 score."); + public static final OptionID ALPHA_ID = new OptionID("outrank.s1.alpha", "Alpha parameter for S1 score."); /** * Clustering algorithm to run. @@ -182,12 +182,13 @@ public class OutRankS1 extends AbstractAlgorithm<OutlierResult> implements Outli protected void makeOptions(Parameterization config) { super.makeOptions(config); ObjectParameter<SubspaceClusteringAlgorithm<? extends SubspaceModel<?>>> algP = new ObjectParameter<SubspaceClusteringAlgorithm<? extends SubspaceModel<?>>>(ALGORITHM_ID, SubspaceClusteringAlgorithm.class); - if(config.grab(algP)) { + if (config.grab(algP)) { algorithm = algP.instantiateClass(config); } - DoubleParameter alphaP = new DoubleParameter(ALPHA_ID, new GreaterConstraint(0), 0.25); - if(config.grab(alphaP)) { - alpha = alphaP.getValue(); + DoubleParameter alphaP = new DoubleParameter(ALPHA_ID, 0.25); + alphaP.addConstraint(new GreaterConstraint(0)); + if (config.grab(alphaP)) { + alpha = alphaP.doubleValue(); } } @@ -196,4 +197,4 @@ public class OutRankS1 extends AbstractAlgorithm<OutlierResult> implements Outli return new OutRankS1(algorithm, alpha); } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/SOD.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/SOD.java index 7fef95e0..35a780cd 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/SOD.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/subspace/SOD.java @@ -36,14 +36,15 @@ import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore; import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs; -import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair; import de.lmu.ifi.dbs.elki.database.query.similarity.SimilarityQuery; import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction; import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.distance.similarityfunction.SharedNearestNeighborSimilarityFunction; @@ -57,7 +58,6 @@ import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.textwriter.TextWriteable; import de.lmu.ifi.dbs.elki.result.textwriter.TextWriterStream; -import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil; import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap; import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TiedTopBoundedHeap; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; @@ -70,10 +70,10 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; -import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair; /** - * Subspace Outlier Degree. Outlier detection method for axis-parallel subspaces. + * Subspace Outlier Degree. Outlier detection method for axis-parallel + * subspaces. * * Reference: * <p> @@ -89,34 +89,35 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair; * @apiviz.has SharedNearestNeighborSimilarityFunction * * @param <V> the type of NumberVector handled by this Algorithm + * @param <D> distance type */ // todo arthur comment @Title("SOD: Subspace outlier degree") @Description("Outlier Detection in Axis-Parallel Subspaces of High Dimensional Data") @Reference(authors = "H.-P. Kriegel, P. Kröger, E. Schubert, A. Zimek", title = "Outlier Detection in Axis-Parallel Subspaces of High Dimensional Data", booktitle = "Proceedings of the 13th Pacific-Asia Conference on Knowledge Discovery and Data Mining (PAKDD), Bangkok, Thailand, 2009", url = "http://dx.doi.org/10.1007/978-3-642-01307-2") -public class SOD<V extends NumberVector<V, ?>, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm { +public class SOD<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm { /** * The logger for this class. */ - private static final Logging logger = Logging.getLogger(SOD.class); + private static final Logging LOG = Logging.getLogger(SOD.class); /** * Parameter to specify the number of shared nearest neighbors to be * considered for learning the subspace properties., must be an integer * greater than 0. */ - public static final OptionID KNN_ID = OptionID.getOrCreateOptionID("sod.knn", "The number of most snn-similar objects to use as reference set for learning the subspace properties."); + public static final OptionID KNN_ID = new OptionID("sod.knn", "The number of most snn-similar objects to use as reference set for learning the subspace properties."); /** * Parameter to indicate the multiplier for the discriminance value for * discerning small from large variances. */ - public static final OptionID ALPHA_ID = OptionID.getOrCreateOptionID("sod.alpha", "The multiplier for the discriminance value for discerning small from large variances."); + public static final OptionID ALPHA_ID = new OptionID("sod.alpha", "The multiplier for the discriminance value for discerning small from large variances."); /** * Parameter for the similarity function. */ - public static final OptionID SIM_ID = OptionID.getOrCreateOptionID("sod.similarity", "The similarity function used for the neighborhood set."); + public static final OptionID SIM_ID = new OptionID("sod.similarity", "The similarity function used for the neighborhood set."); /** * Holds the value of {@link #KNN_ID}. @@ -155,20 +156,20 @@ public class SOD<V extends NumberVector<V, ?>, D extends NumberDistance<D, ?>> e */ public OutlierResult run(Relation<V> relation) { SimilarityQuery<V, D> snnInstance = similarityFunction.instantiate(relation); - FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("Assigning Subspace Outlier Degree", relation.size(), logger) : null; + FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Assigning Subspace Outlier Degree", relation.size(), LOG) : null; WritableDataStore<SODModel<?>> sod_models = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC, SODModel.class); DoubleMinMax minmax = new DoubleMinMax(); - for(DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) { - if(progress != null) { - progress.incrementProcessed(logger); + for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) { + if (progress != null) { + progress.incrementProcessed(LOG); } DBIDs knnList = getNearestNeighbors(relation, snnInstance, iter); SODModel<V> model = new SODModel<V>(relation, knnList, alpha, relation.get(iter)); sod_models.put(iter, model); minmax.put(model.getSod()); } - if(progress != null) { - progress.ensureCompleted(logger); + if (progress != null) { + progress.ensureCompleted(LOG); } // combine results. Relation<SODModel<?>> models = new MaterializedRelation<SODModel<?>>("Subspace Outlier Model", "sod-outlier", new SimpleTypeInformation<SODModel<?>>(SODModel.class), sod_models, relation.getDBIDs()); @@ -193,20 +194,19 @@ public class SOD<V extends NumberVector<V, ?>, D extends NumberDistance<D, ?>> e */ private DBIDs getNearestNeighbors(Relation<V> relation, SimilarityQuery<V, D> simQ, DBIDRef queryObject) { // similarityFunction.getPreprocessor().getParameters(); - Heap<DoubleObjPair<DBID>> nearestNeighbors = new TiedTopBoundedHeap<DoubleObjPair<DBID>>(knn); - for(DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) { - if(!iter.sameDBID(queryObject)) { + Heap<DoubleDBIDPair> nearestNeighbors = new TiedTopBoundedHeap<DoubleDBIDPair>(knn); + for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) { + if (!DBIDUtil.equal(iter, queryObject)) { double sim = simQ.similarity(queryObject, iter).doubleValue(); - if(sim > 0) { - nearestNeighbors.add(new DoubleObjPair<DBID>(sim, iter.getDBID())); + if (sim > 0) { + nearestNeighbors.add(DBIDUtil.newPair(sim, iter)); } } } // Collect DBIDs ArrayModifiableDBIDs dbids = DBIDUtil.newArray(nearestNeighbors.size()); - while(nearestNeighbors.size() > 0) { - final DoubleObjPair<DBID> next = nearestNeighbors.poll(); - dbids.add(next.second); + while (nearestNeighbors.size() > 0) { + dbids.add(nearestNeighbors.poll()); } return dbids; } @@ -218,17 +218,17 @@ public class SOD<V extends NumberVector<V, ?>, D extends NumberDistance<D, ?>> e @Override protected Logging getLogger() { - return logger; + return LOG; } /** - * + * SOD Model class * * @author Arthur Zimek * @param <V> the type of DatabaseObjects handled by this Result */ // TODO: arthur comment - public static class SODModel<V extends NumberVector<V, ?>> implements TextWriteable, Comparable<SODModel<?>> { + public static class SODModel<V extends NumberVector<?>> implements TextWriteable, Comparable<SODModel<?>> { private double[] centerValues; private V center; @@ -250,61 +250,60 @@ public class SOD<V extends NumberVector<V, ?>, D extends NumberDistance<D, ?>> e * @param queryObject Query object */ public SODModel(Relation<V> relation, DBIDs neighborhood, double alpha, V queryObject) { - if(neighborhood.size() > 0) { + if (neighborhood.size() > 0) { // TODO: store database link? - centerValues = new double[DatabaseUtil.dimensionality(relation)]; + centerValues = new double[RelationUtil.dimensionality(relation)]; variances = new double[centerValues.length]; - for(DBIDIter iter = neighborhood.iter(); iter.valid(); iter.advance()) { + for (DBIDIter iter = neighborhood.iter(); iter.valid(); iter.advance()) { V databaseObject = relation.get(iter); - for(int d = 0; d < centerValues.length; d++) { - centerValues[d] += databaseObject.doubleValue(d + 1); + for (int d = 0; d < centerValues.length; d++) { + centerValues[d] += databaseObject.doubleValue(d); } } - for(int d = 0; d < centerValues.length; d++) { + for (int d = 0; d < centerValues.length; d++) { centerValues[d] /= neighborhood.size(); } - for(DBIDIter iter = neighborhood.iter(); iter.valid(); iter.advance()) { + for (DBIDIter iter = neighborhood.iter(); iter.valid(); iter.advance()) { V databaseObject = relation.get(iter); - for(int d = 0; d < centerValues.length; d++) { + for (int d = 0; d < centerValues.length; d++) { // distance - double distance = centerValues[d] - databaseObject.doubleValue(d + 1); + double distance = centerValues[d] - databaseObject.doubleValue(d); // variance variances[d] += distance * distance; } } expectationOfVariance = 0; - for(int d = 0; d < variances.length; d++) { + for (int d = 0; d < variances.length; d++) { variances[d] /= neighborhood.size(); expectationOfVariance += variances[d]; } expectationOfVariance /= variances.length; weightVector = new BitSet(variances.length); - for(int d = 0; d < variances.length; d++) { - if(variances[d] < alpha * expectationOfVariance) { + for (int d = 0; d < variances.length; d++) { + if (variances[d] < alpha * expectationOfVariance) { weightVector.set(d, true); } } - center = DatabaseUtil.assumeVectorField(relation).getFactory().newNumberVector(centerValues); + center = RelationUtil.getNumberVectorFactory(relation).newNumberVector(centerValues); sod = subspaceOutlierDegree(queryObject, center, weightVector); - } - else { + } else { center = queryObject; sod = 0.0; } } /** - * Compute SOD score + * Compute SOD score. * - * @param queryObject - * @param center - * @param weightVector - * @return sod value + * @param queryObject Query object + * @param center Center vector + * @param weightVector Weight vector + * @return sod score */ private double subspaceOutlierDegree(V queryObject, V center, BitSet weightVector) { final SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(weightVector); final int card = weightVector.cardinality(); - if(card == 0) { + if (card == 0) { return 0; } double distance = df.distance(queryObject, center).doubleValue(); @@ -352,7 +351,7 @@ public class SOD<V extends NumberVector<V, ?>, D extends NumberDistance<D, ?>> e Relation<SODModel<?>> models; /** - * The IDs we are defined for + * The IDs we are defined for. */ DBIDs dbids; @@ -436,7 +435,7 @@ public class SOD<V extends NumberVector<V, ?>, D extends NumberDistance<D, ?>> e * * @apiviz.exclude */ - public static class Parameterizer<V extends NumberVector<V, ?>, D extends NumberDistance<D, ?>> extends AbstractParameterizer { + public static class Parameterizer<V extends NumberVector<?>, D extends NumberDistance<D, ?>> extends AbstractParameterizer { /** * Holds the value of {@link #KNN_ID}. */ @@ -456,18 +455,20 @@ public class SOD<V extends NumberVector<V, ?>, D extends NumberDistance<D, ?>> e protected void makeOptions(Parameterization config) { super.makeOptions(config); final ObjectParameter<SimilarityFunction<V, D>> simP = new ObjectParameter<SimilarityFunction<V, D>>(SIM_ID, SimilarityFunction.class, SharedNearestNeighborSimilarityFunction.class); - if(config.grab(simP)) { + if (config.grab(simP)) { similarityFunction = simP.instantiateClass(config); } - final IntParameter knnP = new IntParameter(KNN_ID, new GreaterConstraint(0)); - if(config.grab(knnP)) { + final IntParameter knnP = new IntParameter(KNN_ID); + knnP.addConstraint(new GreaterConstraint(0)); + if (config.grab(knnP)) { knn = knnP.getValue(); } - final DoubleParameter alphaP = new DoubleParameter(ALPHA_ID, new GreaterConstraint(0), 1.1); - if(config.grab(alphaP)) { - alpha = alphaP.getValue(); + final DoubleParameter alphaP = new DoubleParameter(ALPHA_ID, 1.1); + alphaP.addConstraint(new GreaterConstraint(0)); + if (config.grab(alphaP)) { + alpha = alphaP.doubleValue(); } } @@ -476,4 +477,4 @@ public class SOD<V extends NumberVector<V, ?>, D extends NumberDistance<D, ?>> e return new SOD<V, D>(knn, alpha, similarityFunction); } } -}
\ No newline at end of file +} diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java index 66a89cf5..ae95abfa 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java @@ -57,7 +57,7 @@ public class ByLabelOutlier extends AbstractAlgorithm<OutlierResult> implements /** * Our logger. */ - private static final Logging logger = Logging.getLogger(ByLabelOutlier.class); + private static final Logging LOG = Logging.getLogger(ByLabelOutlier.class); /** * The default pattern to use. @@ -124,7 +124,7 @@ public class ByLabelOutlier extends AbstractAlgorithm<OutlierResult> implements @Override protected Logging getLogger() { - return logger; + return LOG; } /** @@ -145,7 +145,7 @@ public class ByLabelOutlier extends AbstractAlgorithm<OutlierResult> implements * Key: {@code -outlier.pattern} * </p> */ - public static final OptionID OUTLIER_PATTERN_ID = OptionID.getOrCreateOptionID("outlier.pattern", "Label pattern to match outliers."); + public static final OptionID OUTLIER_PATTERN_ID = new OptionID("outlier.pattern", "Label pattern to match outliers."); /** * Stores the "outlier" class. */ diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java index b50226f1..35a85d51 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java @@ -48,7 +48,7 @@ public class TrivialAllOutlier extends AbstractAlgorithm<OutlierResult> implemen /** * Our logger. */ - private static final Logging logger = Logging.getLogger(TrivialAllOutlier.class); + private static final Logging LOG = Logging.getLogger(TrivialAllOutlier.class); /** * Constructor. @@ -80,6 +80,6 @@ public class TrivialAllOutlier extends AbstractAlgorithm<OutlierResult> implemen @Override protected Logging getLogger() { - return logger; + return LOG; } }
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java index d1c2e076..e4c3861f 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java @@ -65,12 +65,12 @@ public class TrivialGeneratedOutlier extends AbstractAlgorithm<OutlierResult> im /** * Class logger */ - private static final Logging logger = Logging.getLogger(TrivialGeneratedOutlier.class); + private static final Logging LOG = Logging.getLogger(TrivialGeneratedOutlier.class); /** * Expected share of outliers */ - public static final OptionID EXPECT_ID = OptionID.getOrCreateOptionID("modeloutlier.expect", "Expected amount of outliers, for making the scores more intuitive."); + public static final OptionID EXPECT_ID = new OptionID("modeloutlier.expect", "Expected amount of outliers, for making the scores more intuitive."); /** * Expected share of outliers. @@ -101,7 +101,7 @@ public class TrivialGeneratedOutlier extends AbstractAlgorithm<OutlierResult> im @Override public OutlierResult run(Database database) { - Relation<NumberVector<?, ?>> vecs = database.getRelation(TypeUtil.NUMBER_VECTOR_FIELD); + Relation<NumberVector<?>> vecs = database.getRelation(TypeUtil.NUMBER_VECTOR_FIELD); Relation<Model> models = database.getRelation(new SimpleTypeInformation<Model>(Model.class)); // Prefer a true class label try { @@ -122,7 +122,7 @@ public class TrivialGeneratedOutlier extends AbstractAlgorithm<OutlierResult> im * @param labels Label relation * @return Outlier result */ - public OutlierResult run(Relation<Model> models, Relation<NumberVector<?, ?>> vecs, Relation<?> labels) { + public OutlierResult run(Relation<Model> models, Relation<NumberVector<?>> vecs, Relation<?> labels) { WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(models.getDBIDs(), DataStoreFactory.HINT_HOT); // Adjustment constant @@ -136,7 +136,7 @@ public class TrivialGeneratedOutlier extends AbstractAlgorithm<OutlierResult> im } } if(generators.size() == 0) { - logger.warning("No generator models found for dataset - all points will be considered outliers."); + LOG.warning("No generator models found for dataset - all points will be considered outliers."); } for(DBIDIter iditer = models.iterDBIDs(); iditer.valid(); iditer.advance()) { @@ -179,7 +179,7 @@ public class TrivialGeneratedOutlier extends AbstractAlgorithm<OutlierResult> im @Override protected Logging getLogger() { - return logger; + return LOG; } /** diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java index 6d8e9f46..695ff112 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java @@ -48,7 +48,7 @@ public class TrivialNoOutlier extends AbstractAlgorithm<OutlierResult> implement /** * Our logger. */ - private static final Logging logger = Logging.getLogger(TrivialNoOutlier.class); + private static final Logging LOG = Logging.getLogger(TrivialNoOutlier.class); /** * Constructor. @@ -80,6 +80,6 @@ public class TrivialNoOutlier extends AbstractAlgorithm<OutlierResult> implement @Override protected Logging getLogger() { - return logger; + return LOG; } }
\ No newline at end of file |