package de.lmu.ifi.dbs.elki.algorithm.outlier; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ import java.util.Collections; import java.util.Comparator; import java.util.HashSet; import java.util.Set; import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm; import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.data.type.TypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; import de.lmu.ifi.dbs.elki.database.Database; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs; import de.lmu.ifi.dbs.elki.database.query.DoubleDistanceResultPair; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction; import de.lmu.ifi.dbs.elki.distance.distancefunction.LPNormDistanceFunction; import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; import de.lmu.ifi.dbs.elki.math.DoubleMinMax; import de.lmu.ifi.dbs.elki.math.spacefillingcurves.HilbertSpatialSorter; import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; import de.lmu.ifi.dbs.elki.utilities.BitsUtil; import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil; import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.Heap; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.EnumParameter; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; import de.lmu.ifi.dbs.elki.utilities.pairs.Pair; /** * Fast Outlier Detection in High Dimensional Spaces * * Outlier Detection using Hilbert space filling curves * * Reference: *

* F. Angiulli, C. Pizzuti:
* Fast Outlier Detection in High Dimensional Spaces.
* In: Proc. European Conference on Principles of Knowledge Discovery and Data * Mining (PKDD'02), Helsinki, Finland, 2002. *

* * @author Jonathan von Brünken * @author Erich Schubert * * @apiviz.composedOf HilbertFeatures * @apiviz.uses HilFeature * * @param Object type */ @Title("Fast Outlier Detection in High Dimensional Spaces") @Description("Algorithm to compute outliers using Hilbert space filling curves") @Reference(authors = "F. Angiulli, C. Pizzuti", title = "Fast Outlier Detection in High Dimensional Spaces", booktitle = "Proc. European Conference on Principles of Knowledge Discovery and Data Mining (PKDD'02)", url = "http://dx.doi.org/10.1145/375663.375668") public class HilOut> extends AbstractDistanceBasedAlgorithm implements OutlierAlgorithm { /** * The logger for this class. */ private static final Logging logger = Logging.getLogger(HilOut.class); /** * Number of nearest neighbors */ private int k; /** * Number of outliers to compute exactly */ private int n; /** * Hilbert precision */ private int h; /** * LPNorm p parameter */ private double t; /** * Reporting mode: exact (top n) only, or all */ private Enum tn; /** * Distance query */ private DistanceQuery distq; /** * Set sizes, total and current iteration */ private int capital_n, n_star, capital_n_star, d; /** * Outlier threshold */ private double omega_star; /** * Type of output: all scores (upper bounds) or top n only * * @author Jonathan von Brünken * * @apiviz.exclude */ public static enum ScoreType { All, TopN } /** * Constructor. * * @param k Number of Next Neighbors * @param n Number of Outlier * @param h Number of Bits for precision to use - max 32 * @param tn TopN or All Outlier Rank to return */ protected HilOut(LPNormDistanceFunction distfunc, int k, int n, int h, Enum tn) { super(distfunc); this.n = n; // HilOut does not count the object itself. We do in KNNWeightOutlier. this.k = k - 1; this.h = h; this.tn = tn; this.t = distfunc.getP(); this.n_star = 0; this.omega_star = 0.0; } public OutlierResult run(Database database, Relation relation) { distq = database.getDistanceQuery(relation, getDistanceFunction()); d = DatabaseUtil.dimensionality(relation); WritableDoubleDataStore hilout_weight = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC); // Compute extend of dataset. double[] min; double diameter = 0; // Actually "length of edge" { Pair hbbs = DatabaseUtil.computeMinMax(relation); min = new double[d]; double[] max = new double[d]; for(int i = 0; i < d; i++) { min[i] = hbbs.first.doubleValue(i + 1); max[i] = hbbs.second.doubleValue(i + 1); diameter = Math.max(diameter, max[i] - min[i]); } // Enlarge bounding box to have equal lengths. for(int i = 0; i < d; i++) { double diff = (diameter - (max[i] - min[i])) / 2; min[i] -= diff; max[i] += diff; } if(logger.isVerbose()) { logger.verbose("Rescaling dataset by " + (1 / diameter) + " to fit the unit cube."); } } // Initialization part capital_n_star = capital_n = relation.size(); HilbertFeatures h = new HilbertFeatures(relation, min, diameter); FiniteProgress progressHilOut = logger.isVerbose() ? new FiniteProgress("HilOut iterations", d + 1, logger) : null; FiniteProgress progressTrueOut = logger.isVerbose() ? new FiniteProgress("True outliers found", n, logger) : null; // Main part: 1. Phase max. d+1 loops for(int j = 0; j <= d && n_star < n; j++) { // initialize (clear) out and wlb - not 100% clear in the paper h.out.clear(); h.wlb.clear(); // Initialize Hilbert values in pf according to current shift h.initialize(.5 * j / (d + 1)); // scan the Data according to the current shift; build out and wlb scan(h, (int) (k * capital_n / (double) capital_n_star)); // determine the true outliers (n_star) trueOutliers(h); if(progressTrueOut != null) { progressTrueOut.setProcessed(n_star, logger); } // Build the top Set as out + wlb h.top.clear(); HashSetModifiableDBIDs top_keys = DBIDUtil.newHashSet(h.out.size()); for(HilFeature entry : h.out) { top_keys.add(entry.id); h.top.add(entry); } for(HilFeature entry : h.wlb) { if(!top_keys.contains(entry.id)) { // No need to update top_keys - discarded h.top.add(entry); } } if(progressHilOut != null) { progressHilOut.incrementProcessed(logger); } } // 2. Phase: Additional Scan if less than n true outliers determined if(n_star < n) { h.out.clear(); h.wlb.clear(); // TODO: reinitialize shift to 0? scan(h, capital_n); } if(progressHilOut != null) { progressHilOut.setProcessed(d, logger); progressHilOut.ensureCompleted(logger); } if(progressTrueOut != null) { progressTrueOut.setProcessed(n, logger); progressTrueOut.ensureCompleted(logger); } DoubleMinMax minmax = new DoubleMinMax(); // Return weights in out if(tn == ScoreType.TopN) { minmax.put(0.0); for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { hilout_weight.putDouble(iditer, 0.0); } for(HilFeature ent : h.out) { minmax.put(ent.ubound); hilout_weight.putDouble(ent.id, ent.ubound); } } // Return all weights in pf else { for(HilFeature ent : h.pf) { minmax.put(ent.ubound); hilout_weight.putDouble(ent.id, ent.ubound); } } Relation scoreResult = new MaterializedRelation("HilOut weight", "hilout-weight", TypeUtil.DOUBLE, hilout_weight, relation.getDBIDs()); OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY); OutlierResult result = new OutlierResult(scoreMeta, scoreResult); return result; } /** * Scan function performs a squential scan over the data. * * @param hf the hilbert features * @param k0 */ private void scan(HilbertFeatures hf, int k0) { final int mink0 = Math.min(2 * k0, capital_n - 1); if(logger.isDebuggingFine()) { logger.debugFine("Scanning with k0=" + k0 + " (" + mink0 + ")" + " N*=" + capital_n_star); } for(int i = 0; i < hf.pf.length; i++) { if(hf.pf[i].ubound < omega_star) { continue; } if(hf.pf[i].lbound < hf.pf[i].ubound) { double omega = hf.fastUpperBound(i); if(omega < omega_star) { hf.pf[i].ubound = omega; } else { int maxcount; // capital_n-1 instead of capital_n: all, except self if(hf.top.contains(hf.pf[i])) { maxcount = capital_n - 1; } else { maxcount = mink0; } innerScan(hf, i, maxcount); } } if(hf.pf[i].ubound > 0) { hf.updateOUT(i); } if(hf.pf[i].lbound > 0) { hf.updateWLB(i); } if(hf.wlb.size() >= n) { omega_star = Math.max(omega_star, hf.wlb.peek().lbound); } } } /** * innerScan function calculates new upper and lower bounds and inserts the * points of the neighborhood the bounds are based on in the NN Set * * @param i position in pf of the feature for which the bounds should be * calculated * @param maxcount maximal size of the neighborhood */ private void innerScan(HilbertFeatures hf, final int i, final int maxcount) { final O p = hf.relation.get(hf.pf[i].id); // Get only once for performance int a = i, b = i; int level = h, levela = h, levelb = h; // Explore up to "maxcount" neighbors in this pass for(int count = 0; count < maxcount; count++) { final int c; // Neighbor to explore if(a == 0) { // At left end, explore right // assert (b < capital_n - 1); levelb = Math.min(levelb, hf.pf[b].level); b++; c = b; } else if(b >= capital_n - 1) { // At right end, explore left // assert (a > 0); a--; levela = Math.min(levela, hf.pf[a].level); c = a; } else if(hf.pf[a - 1].level >= hf.pf[b].level) { // Prefer higher level a--; levela = Math.min(levela, hf.pf[a].level); c = a; } else { // assert (b < capital_n - 1); levelb = Math.min(levelb, hf.pf[b].level); b++; c = b; } if(!hf.pf[i].nn_keys.contains(hf.pf[c].id)) { // hf.distcomp ++; hf.pf[i].insert(hf.pf[c].id, distq.distance(p, hf.pf[c].id).doubleValue(), k); if(hf.pf[i].nn.size() == k) { if(hf.pf[i].sum_nn < omega_star) { break; // stop = true } final int mlevel = Math.max(levela, levelb); if(mlevel < level) { level = mlevel; final double delta = hf.minDistLevel(hf.pf[i].id, level); if(delta >= hf.pf[i].nn.peek().getDoubleDistance()) { break; // stop = true } } } } } double br = hf.boxRadius(i, a - 1, b + 1); double newlb = 0.0; double newub = 0.0; for(DoubleDistanceResultPair entry : hf.pf[i].nn) { newub += entry.getDoubleDistance(); if(entry.getDoubleDistance() <= br) { newlb += entry.getDoubleDistance(); } } if(newlb > hf.pf[i].lbound) { hf.pf[i].lbound = newlb; } if(newub < hf.pf[i].ubound) { hf.pf[i].ubound = newub; } } /** * trueOutliers function updates n_star * * @param h the HilberFeatures * */ private void trueOutliers(HilbertFeatures h) { n_star = 0; for(HilFeature entry : h.out) { if(entry.ubound >= omega_star && (entry.ubound - entry.lbound < 1E-10)) { n_star++; } } } @Override protected Logging getLogger() { return logger; } @Override public TypeInformation[] getInputTypeRestriction() { return TypeUtil.array(new LPNormDistanceFunction(t).getInputTypeRestriction()); } /** * Class organizing the data points along a hilbert curve. * * @author Jonathan von Brünken * * @apiviz.composedOf HilFeature */ class HilbertFeatures { // public int distcomp = 1; /** * Relation indexed */ Relation relation; /** * Hilbert representation ("point features") */ HilFeature[] pf; /** * Data space minimums */ double[] min; /** * Data space diameter */ double diameter; /** * Current curve shift */ double shift; /** * Top candidates */ private Set top; /** * "OUT" */ private Heap out; /** * "WLB" */ private Heap wlb; /** * Constructor. * * @param relation Relation to index * @param min Minimums for data space * @param diameter Diameter of data space */ public HilbertFeatures(Relation relation, double[] min, double diameter) { super(); this.relation = relation; this.min = min; this.diameter = diameter; this.pf = new HilFeature[relation.size()]; int pos = 0; for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { pf[pos++] = new HilFeature(iditer.getDBID(), new Heap(k, Collections.reverseOrder())); } this.out = new Heap(n, new Comparator() { @Override public int compare(HilFeature o1, HilFeature o2) { return Double.compare(o1.ubound, o2.ubound); } }); this.wlb = new Heap(n, new Comparator() { @Override public int compare(HilFeature o1, HilFeature o2) { return Double.compare(o1.lbound, o2.lbound); } }); this.top = new HashSet(2 * n); } /** * Hilbert function to fill pf with shifted Hilbert values. Also calculates * the number current Outlier candidates capital_n_star * * @param shift the new shift factor */ private void initialize(double shift) { this.shift = shift; // FIXME: 64 bit mode untested - sign bit is tricky to handle correctly // with the rescaling. 63 bit should be fine. The sign bit probably needs // to be handled differently, or at least needs careful testing of the API if(h >= 32) { // 32 to 63 bit final long scale = Long.MAX_VALUE; // = 63 bits for(int i = 0; i < pf.length; i++) { NumberVector obj = relation.get(pf[i].id); long[] coord = new long[d]; for(int dim = 0; dim < d; dim++) { coord[dim] = (long) (getDimForObject(obj, dim) * .5 * scale); } pf[i].hilbert = HilbertSpatialSorter.coordinatesToHilbert(coord, h, 1); } } else if(h >= 16) { // 16-31 bit final int scale = ~1 >>> 1; for(int i = 0; i < pf.length; i++) { NumberVector obj = relation.get(pf[i].id); int[] coord = new int[d]; for(int dim = 0; dim < d; dim++) { coord[dim] = (int) (getDimForObject(obj, dim) * .5 * scale); } pf[i].hilbert = HilbertSpatialSorter.coordinatesToHilbert(coord, h, 1); } } else if(h >= 8) { // 8-15 bit final int scale = ~1 >>> 16; for(int i = 0; i < pf.length; i++) { NumberVector obj = relation.get(pf[i].id); short[] coord = new short[d]; for(int dim = 0; dim < d; dim++) { coord[dim] = (short) (getDimForObject(obj, dim) * .5 * scale); } pf[i].hilbert = HilbertSpatialSorter.coordinatesToHilbert(coord, h, 16); } } else { // 1-7 bit final int scale = ~1 >>> 8; for(int i = 0; i < pf.length; i++) { NumberVector obj = relation.get(pf[i].id); byte[] coord = new byte[d]; for(int dim = 0; dim < d; dim++) { coord[dim] = (byte) (getDimForObject(obj, dim) * .5 * scale); } pf[i].hilbert = HilbertSpatialSorter.coordinatesToHilbert(coord, h, 24); } } java.util.Arrays.sort(pf); // Update levels for(int i = 0; i < pf.length - 1; i++) { pf[i].level = minRegLevel(i, i + 1); } // Count candidates capital_n_star = 0; for(int i = 0; i < pf.length; i++) { if(pf[i].ubound >= omega_star) { capital_n_star++; } } } /** * updateOUT function inserts pf[i] in out. * * @param i position in pf of the feature to be inserted */ private void updateOUT(int i) { if(out.size() < n) { out.offer(pf[i]); } else { HilFeature head = out.peek(); if(pf[i].ubound > head.ubound) { // replace smallest out.poll(); // assert (out.peek().ubound >= head.ubound); out.offer(pf[i]); } } } /** * updateWLB function inserts pf[i] in wlb. * * @param i position in pf of the feature to be inserted */ private void updateWLB(int i) { if(wlb.size() < n) { wlb.offer(pf[i]); } else { HilFeature head = wlb.peek(); if(pf[i].lbound > head.lbound) { // replace smallest wlb.poll(); // assert (wlb.peek().lbound >= head.lbound); wlb.offer(pf[i]); } } } /** * fastUpperBound function calculates an upper Bound as k*maxDist(pf[i], * smallest neighborhood) * * @param i position in pf of the feature for which the bound should be * calculated */ private double fastUpperBound(int i) { int pre = i; int post = i; while(post - pre < k) { int pre_level = (pre - 1 >= 0) ? pf[pre - 1].level : -2; int post_level = (post < capital_n - 1) ? pf[post].level : -2; if(post_level >= pre_level) { post++; } else { pre--; } } return k * maxDistLevel(pf[i].id, minRegLevel(pre, post)); } /** * minDist function calculate the minimal Distance from Vector p to the * border of the corresponding r-region at the given level * * @param id Object ID * @param level Level of the corresponding r-region */ private double minDistLevel(DBID id, int level) { final NumberVector obj = relation.get(id); // level 1 is supposed to have r=1 as in the original publication // 2 ^ - (level - 1) final double r = 1.0 / (1 << (level - 1)); double dist = Double.POSITIVE_INFINITY; for(int dim = 0; dim < d; dim++) { final double p_m_r = getDimForObject(obj, dim) % r; dist = Math.min(dist, Math.min(p_m_r, r - p_m_r)); } return dist * diameter; } /** * maxDist function calculate the maximal Distance from Vector p to the * border of the corresponding r-region at the given level * * @param id Object ID * @param level Level of the corresponding r-region */ private double maxDistLevel(DBID id, int level) { final NumberVector obj = relation.get(id); // level 1 is supposed to have r=1 as in the original publication final double r = 1.0 / (1 << (level - 1)); double dist; if(t == 1.0) { dist = 0.0; for(int dim = 0; dim < d; dim++) { final double p_m_r = getDimForObject(obj, dim) % r; // assert (p_m_r >= 0); dist += Math.max(p_m_r, r - p_m_r); } } else if(t == 2.0) { dist = 0.0; for(int dim = 0; dim < d; dim++) { final double p_m_r = getDimForObject(obj, dim) % r; // assert (p_m_r >= 0); double a = Math.max(p_m_r, r - p_m_r); dist += a * a; } dist = Math.sqrt(dist); } else if(!Double.isInfinite(t)) { dist = 0.0; for(int dim = 0; dim < d; dim++) { final double p_m_r = getDimForObject(obj, dim) % r; dist += Math.pow(Math.max(p_m_r, r - p_m_r), t); } dist = Math.pow(dist, 1.0 / t); } else { dist = Double.NEGATIVE_INFINITY; for(int dim = 0; dim < d; dim++) { final double p_m_r = getDimForObject(obj, dim) % r; dist = Math.max(dist, Math.max(p_m_r, r - p_m_r)); } } return dist * diameter; } /** * Number of levels shared * * @param a First bitset * @param b Second bitset * @return Number of level shared */ private int numberSharedLevels(long[] a, long[] b) { for(int i = 0, j = a.length - 1; i < a.length; i++, j--) { final long diff = a[j] ^ b[j]; if(diff != 0) { // expected unused = available - used final int expected = (a.length * Long.SIZE) - (d * h); return ((BitsUtil.numberOfLeadingZeros(diff) + i * Long.SIZE) - expected) / d; } } return h - 1; } /** * minReg function calculate the minimal r-region level containing two * points * * @param a index of first point in pf * @param b index of second point in pf * * @return Level of the r-region */ private int minRegLevel(int a, int b) { // Sanity test: first level different -> region of level 0, r=2 // all same: level h - 1 return numberSharedLevels(pf[a].hilbert, pf[b].hilbert); } /** * Level of the maximum region containing ref but not q * * @param ref Reference point * @param q Query point * @return Number of bits shared across all dimensions */ private int maxRegLevel(int ref, int q) { // Sanity test: first level different -> region of level 1, r=1 // all same: level h return numberSharedLevels(pf[ref].hilbert, pf[q].hilbert) + 1; } /** * boxRadius function calculate the Boxradius * * @param i index of first point * @param a index of second point * @param b index of third point * * @return box radius */ private double boxRadius(int i, int a, int b) { // level are inversely ordered to box sizes. min -> max final int level; if(a < 0) { if(b >= pf.length) { return Double.POSITIVE_INFINITY; } level = maxRegLevel(i, b); } else if(b >= pf.length) { level = maxRegLevel(i, a); } else { level = Math.max(maxRegLevel(i, a), maxRegLevel(i, b)); } return minDistLevel(pf[i].id, level); } /** * Get the (projected) position of the object in dimension dim. * * @param obj Object * @param dim Dimension * @return Projected and shifted position */ private double getDimForObject(NumberVector obj, int dim) { return (obj.doubleValue(dim + 1) - min[dim]) / diameter + shift; } } /** * Hilbert representation of a single object. * * Details of this representation are discussed in the main HilOut * publication, see "point features". * * @author Jonathan von Brünken */ final static class HilFeature implements Comparable { /** * Object ID */ public DBID id; /** * Hilbert representation * * TODO: use byte[] to save some memory, but slower? */ public long[] hilbert = null; /** * Object level */ public int level = 0; /** * Upper bound for object */ public double ubound = Double.POSITIVE_INFINITY; /** * Lower bound of object */ public double lbound = 0.0; /** * Heap with the nearest known neighbors */ public Heap nn; /** * Set representation of the nearest neighbors for faster lookups */ public HashSetModifiableDBIDs nn_keys = DBIDUtil.newHashSet(); /** * Current weight (sum of nn distances) */ public double sum_nn = 0.0; /** * Constructor. * * @param id Object ID * @param nn Heap for neighbors */ public HilFeature(DBID id, Heap nn) { super(); this.id = id; this.nn = nn; } @Override public int compareTo(HilFeature o) { return BitsUtil.compare(this.hilbert, o.hilbert); } /** * insert function inserts a nearest neighbor into a features nn list and * its distance * * @param id DBID of the nearest neighbor * @param dt distance or the neighbor to the features position * @param k K */ protected void insert(DBID id, double dt, int k) { // assert (!nn_keys.contains(id)); if(nn.size() < k) { DoubleDistanceResultPair entry = new DoubleDistanceResultPair(dt, id); nn.offer(entry); nn_keys.add(id); sum_nn += dt; } else { DoubleDistanceResultPair head = nn.peek(); if(dt < head.getDoubleDistance()) { head = nn.poll(); // Remove worst sum_nn -= head.getDoubleDistance(); nn_keys.remove(head.getDBID()); // assert (nn.peek().getDoubleDistance() <= head.getDoubleDistance()); DoubleDistanceResultPair entry = new DoubleDistanceResultPair(dt, id); nn.offer(entry); nn_keys.add(id); sum_nn += dt; } } } } /** * Parameterization class * * @author Jonathan von Brünken * * @apiviz.exclude * * @param Vector type */ public static class Parameterizer> extends AbstractParameterizer { /** * Parameter to specify how many next neighbors should be used in the * computation */ public static final OptionID K_ID = OptionID.getOrCreateOptionID("HilOut.k", "Compute up to k next neighbors"); /** * Parameter to specify how many outliers should be computed */ public static final OptionID N_ID = OptionID.getOrCreateOptionID("HilOut.n", "Compute n outliers"); /** * Parameter to specify the maximum Hilbert-Level */ public static final OptionID H_ID = OptionID.getOrCreateOptionID("HilOut.h", "Max. Hilbert-Level"); /** * Parameter to specify p of LP-NormDistance */ public static final OptionID T_ID = OptionID.getOrCreateOptionID("HilOut.t", "t of Lt Metric"); /** * Parameter to specify if only the Top n, or also approximations for the * other elements, should be returned */ public static final OptionID TN_ID = OptionID.getOrCreateOptionID("HilOut.tn", "output of Top n or all elements"); /** * Neighborhood size */ protected int k = 5; /** * Top-n candidates to compute exactly */ protected int n = 10; /** * Hilbert curve precision */ protected int h = 32; /** * LPNorm distance function */ protected LPNormDistanceFunction distfunc; /** * Scores to report: all or top-n only */ protected Enum tn; @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); final IntParameter kP = new IntParameter(K_ID, 5); if(config.grab(kP)) { k = kP.getValue(); } final IntParameter nP = new IntParameter(N_ID, 10); if(config.grab(nP)) { n = nP.getValue(); } final IntParameter hP = new IntParameter(H_ID, 32); if(config.grab(hP)) { h = hP.getValue(); } ObjectParameter distP = AbstractDistanceBasedAlgorithm.makeParameterDistanceFunction(EuclideanDistanceFunction.class, LPNormDistanceFunction.class); if (config.grab(distP)) { distfunc = distP.instantiateClass(config); } final EnumParameter tnP = new EnumParameter(TN_ID, ScoreType.class, ScoreType.TopN); if(config.grab(tnP)) { tn = tnP.getValue(); } } @Override protected HilOut makeInstance() { return new HilOut(distfunc, k, n, h, tn); } } }