summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/ParallelKNNWeightOutlier.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/ParallelKNNWeightOutlier.java')
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/ParallelKNNWeightOutlier.java187
1 files changed, 187 insertions, 0 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/ParallelKNNWeightOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/ParallelKNNWeightOutlier.java
new file mode 100644
index 00000000..40639ec5
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/ParallelKNNWeightOutlier.java
@@ -0,0 +1,187 @@
+package de.lmu.ifi.dbs.elki.algorithm.outlier.distance.parallel;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.distance.KNNWeightOutlier;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.KNNList;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.parallel.ParallelExecutor;
+import de.lmu.ifi.dbs.elki.parallel.processor.DoubleMinMaxProcessor;
+import de.lmu.ifi.dbs.elki.parallel.processor.KNNProcessor;
+import de.lmu.ifi.dbs.elki.parallel.processor.WriteDoubleDataStoreProcessor;
+import de.lmu.ifi.dbs.elki.parallel.variables.SharedDouble;
+import de.lmu.ifi.dbs.elki.parallel.variables.SharedObject;
+import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+
+/**
+ * Parallel implementation of KNN Weight Outlier detection.
+ *
+ * Reference:
+ * <p>
+ * F. Angiulli, C. Pizzuti:<br />
+ * Fast Outlier Detection in High Dimensional Spaces.<br />
+ * In: Proc. European Conference on Principles of Knowledge Discovery and Data
+ * Mining (PKDD'02), Helsinki, Finland, 2002.
+ * </p>
+ *
+ * This parallelized implementation is based on the easy-to-parallelize
+ * generalized pattern discussed in
+ * <p>
+ * Erich Schubert, Arthur Zimek, Hans-Peter Kriegel<br />
+ * Local Outlier Detection Reconsidered: a Generalized View on Locality with
+ * Applications to Spatial, Video, and Network Outlier Detection<br />
+ * Data Mining and Knowledge Discovery, 28(1): 190–237, 2014.
+ * </p>
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.composedOf KNNWeightProcessor
+ *
+ * @param <O> Object type
+ */
+@Reference(authors = "E. Schubert, A. Zimek, H.-P. Kriegel", //
+title = "Local Outlier Detection Reconsidered: a Generalized View on Locality with Applications to Spatial, Video, and Network Outlier Detection", //
+booktitle = "Data Mining and Knowledge Discovery, 28(1): 190–237, 2014.", //
+url = "http://dx.doi.org/10.1007/s10618-012-0300-z")
+public class ParallelKNNWeightOutlier<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm {
+ /**
+ * Parameter k
+ */
+ private int k;
+
+ /**
+ * Constructor.
+ *
+ * @param distanceFunction Distance function
+ * @param k K parameter
+ */
+ public ParallelKNNWeightOutlier(DistanceFunction<? super O> distanceFunction, int k) {
+ super(distanceFunction);
+ this.k = k;
+ }
+
+ /**
+ * Class logger
+ */
+ private static final Logging LOG = Logging.getLogger(ParallelKNNWeightOutlier.class);
+
+ @Override
+ public TypeInformation[] getInputTypeRestriction() {
+ return TypeUtil.array(getDistanceFunction().getInputTypeRestriction());
+ }
+
+ /**
+ * Run the parallel kNN weight outlier detector.
+ *
+ * @param database Database to process
+ * @param relation Relation to analyze
+ * @return Outlier detection result
+ */
+ public OutlierResult run(Database database, Relation<O> relation) {
+ DBIDs ids = relation.getDBIDs();
+ WritableDoubleDataStore store = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB);
+ DistanceQuery<O> distq = database.getDistanceQuery(relation, getDistanceFunction());
+ KNNQuery<O> knnq = database.getKNNQuery(distq, k + 1);
+
+ // Find kNN
+ KNNProcessor<O> knnm = new KNNProcessor<>(k + 1, knnq);
+ SharedObject<KNNList> knnv = new SharedObject<>();
+ knnm.connectKNNOutput(knnv);
+ // Extract outlier score
+ KNNWeightProcessor kdistm = new KNNWeightProcessor(k + 1);
+ SharedDouble kdistv = new SharedDouble();
+ kdistm.connectKNNInput(knnv);
+ kdistm.connectOutput(kdistv);
+ // Store in output result
+ WriteDoubleDataStoreProcessor storem = new WriteDoubleDataStoreProcessor(store);
+ storem.connectInput(kdistv);
+ // And gather statistics for metadata
+ DoubleMinMaxProcessor mmm = new DoubleMinMaxProcessor();
+ mmm.connectInput(kdistv);
+
+ ParallelExecutor.run(ids, knnm, kdistm, storem, mmm);
+
+ DoubleMinMax minmax = mmm.getMinMax();
+ DoubleRelation scoreres = new MaterializedDoubleRelation("kNN weight Outlier Score", "knnw-outlier", store, ids);
+ OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0., Double.POSITIVE_INFINITY, 0.);
+ return new OutlierResult(meta, scoreres);
+ }
+
+ @Override
+ protected Logging getLogger() {
+ return LOG;
+ }
+
+ /**
+ * Parameterization class
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ *
+ * @param <O> Object type
+ */
+ public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {
+ /**
+ * K parameter
+ */
+ int k;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ IntParameter kP = new IntParameter(KNNWeightOutlier.Parameterizer.K_ID);
+ if(config.grab(kP)) {
+ k = kP.getValue();
+ }
+ }
+
+ @Override
+ protected ParallelKNNWeightOutlier<O> makeInstance() {
+ return new ParallelKNNWeightOutlier<>(distanceFunction, k);
+ }
+ }
+} \ No newline at end of file