diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/ParallelKNNWeightOutlier.java')
-rw-r--r-- | src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/ParallelKNNWeightOutlier.java | 187 |
1 files changed, 187 insertions, 0 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/ParallelKNNWeightOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/ParallelKNNWeightOutlier.java new file mode 100644 index 00000000..40639ec5 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/ParallelKNNWeightOutlier.java @@ -0,0 +1,187 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier.distance.parallel; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2014 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm; +import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm; +import de.lmu.ifi.dbs.elki.algorithm.outlier.distance.KNNWeightOutlier; +import de.lmu.ifi.dbs.elki.data.type.TypeInformation; +import de.lmu.ifi.dbs.elki.data.type.TypeUtil; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; +import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; +import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.ids.KNNList; +import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; +import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; +import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; +import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.math.DoubleMinMax; +import de.lmu.ifi.dbs.elki.parallel.ParallelExecutor; +import de.lmu.ifi.dbs.elki.parallel.processor.DoubleMinMaxProcessor; +import de.lmu.ifi.dbs.elki.parallel.processor.KNNProcessor; +import de.lmu.ifi.dbs.elki.parallel.processor.WriteDoubleDataStoreProcessor; +import de.lmu.ifi.dbs.elki.parallel.variables.SharedDouble; +import de.lmu.ifi.dbs.elki.parallel.variables.SharedObject; +import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; + +/** + * Parallel implementation of KNN Weight Outlier detection. + * + * Reference: + * <p> + * F. Angiulli, C. Pizzuti:<br /> + * Fast Outlier Detection in High Dimensional Spaces.<br /> + * In: Proc. European Conference on Principles of Knowledge Discovery and Data + * Mining (PKDD'02), Helsinki, Finland, 2002. + * </p> + * + * This parallelized implementation is based on the easy-to-parallelize + * generalized pattern discussed in + * <p> + * Erich Schubert, Arthur Zimek, Hans-Peter Kriegel<br /> + * Local Outlier Detection Reconsidered: a Generalized View on Locality with + * Applications to Spatial, Video, and Network Outlier Detection<br /> + * Data Mining and Knowledge Discovery, 28(1): 190–237, 2014. + * </p> + * + * @author Erich Schubert + * + * @apiviz.composedOf KNNWeightProcessor + * + * @param <O> Object type + */ +@Reference(authors = "E. Schubert, A. Zimek, H.-P. Kriegel", // +title = "Local Outlier Detection Reconsidered: a Generalized View on Locality with Applications to Spatial, Video, and Network Outlier Detection", // +booktitle = "Data Mining and Knowledge Discovery, 28(1): 190–237, 2014.", // +url = "http://dx.doi.org/10.1007/s10618-012-0300-z") +public class ParallelKNNWeightOutlier<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm { + /** + * Parameter k + */ + private int k; + + /** + * Constructor. + * + * @param distanceFunction Distance function + * @param k K parameter + */ + public ParallelKNNWeightOutlier(DistanceFunction<? super O> distanceFunction, int k) { + super(distanceFunction); + this.k = k; + } + + /** + * Class logger + */ + private static final Logging LOG = Logging.getLogger(ParallelKNNWeightOutlier.class); + + @Override + public TypeInformation[] getInputTypeRestriction() { + return TypeUtil.array(getDistanceFunction().getInputTypeRestriction()); + } + + /** + * Run the parallel kNN weight outlier detector. + * + * @param database Database to process + * @param relation Relation to analyze + * @return Outlier detection result + */ + public OutlierResult run(Database database, Relation<O> relation) { + DBIDs ids = relation.getDBIDs(); + WritableDoubleDataStore store = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB); + DistanceQuery<O> distq = database.getDistanceQuery(relation, getDistanceFunction()); + KNNQuery<O> knnq = database.getKNNQuery(distq, k + 1); + + // Find kNN + KNNProcessor<O> knnm = new KNNProcessor<>(k + 1, knnq); + SharedObject<KNNList> knnv = new SharedObject<>(); + knnm.connectKNNOutput(knnv); + // Extract outlier score + KNNWeightProcessor kdistm = new KNNWeightProcessor(k + 1); + SharedDouble kdistv = new SharedDouble(); + kdistm.connectKNNInput(knnv); + kdistm.connectOutput(kdistv); + // Store in output result + WriteDoubleDataStoreProcessor storem = new WriteDoubleDataStoreProcessor(store); + storem.connectInput(kdistv); + // And gather statistics for metadata + DoubleMinMaxProcessor mmm = new DoubleMinMaxProcessor(); + mmm.connectInput(kdistv); + + ParallelExecutor.run(ids, knnm, kdistm, storem, mmm); + + DoubleMinMax minmax = mmm.getMinMax(); + DoubleRelation scoreres = new MaterializedDoubleRelation("kNN weight Outlier Score", "knnw-outlier", store, ids); + OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0., Double.POSITIVE_INFINITY, 0.); + return new OutlierResult(meta, scoreres); + } + + @Override + protected Logging getLogger() { + return LOG; + } + + /** + * Parameterization class + * + * @author Erich Schubert + * + * @apiviz.exclude + * + * @param <O> Object type + */ + public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> { + /** + * K parameter + */ + int k; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + + IntParameter kP = new IntParameter(KNNWeightOutlier.Parameterizer.K_ID); + if(config.grab(kP)) { + k = kP.getValue(); + } + } + + @Override + protected ParallelKNNWeightOutlier<O> makeInstance() { + return new ParallelKNNWeightOutlier<>(distanceFunction, k); + } + } +}
\ No newline at end of file |