diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel')
4 files changed, 513 insertions, 0 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/KNNWeightProcessor.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/KNNWeightProcessor.java new file mode 100644 index 00000000..a26a7505 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/KNNWeightProcessor.java @@ -0,0 +1,118 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier.distance.parallel; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2014 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; +import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter; +import de.lmu.ifi.dbs.elki.database.ids.KNNList; +import de.lmu.ifi.dbs.elki.parallel.Executor; +import de.lmu.ifi.dbs.elki.parallel.processor.AbstractDoubleProcessor; +import de.lmu.ifi.dbs.elki.parallel.processor.KNNProcessor; +import de.lmu.ifi.dbs.elki.parallel.variables.SharedDouble; +import de.lmu.ifi.dbs.elki.parallel.variables.SharedObject; + +/** + * Compute the kNN weight score, used by {@link ParallelKNNWeightOutlier}. + * + * Needs the k nearest neighbors as input, for example from {@link KNNProcessor} + * + * @author Erich Schubert + */ +public class KNNWeightProcessor extends AbstractDoubleProcessor { + /** + * K parameter + */ + int k; + + /** + * Constructor. + * + * @param k K parameter + */ + public KNNWeightProcessor(int k) { + super(); + this.k = k; + } + + /** + * KNN query object + */ + SharedObject<? extends KNNList> input; + + /** + * Connect the input channel. + * + * @param input Input channel + */ + public void connectKNNInput(SharedObject<? extends KNNList> input) { + this.input = input; + } + + @Override + public Instance instantiate(Executor executor) { + return new Instance(k, executor.getInstance(input), executor.getInstance(output)); + } + + /** + * Instance for precomputing the kNN. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + private static class Instance extends AbstractDoubleProcessor.Instance { + /** + * k Parameter + */ + int k; + + /** + * kNN query + */ + SharedObject.Instance<? extends KNNList> input; + + /** + * Constructor. + * + * @param k K parameter + * @param input kNN list input + * @param store Datastore to write to + */ + protected Instance(int k, SharedObject.Instance<? extends KNNList> input, SharedDouble.Instance store) { + super(store); + this.k = k; + this.input = input; + } + + @Override + public void map(DBIDRef id) { + final KNNList list = input.get(); + int i = 0; + double sum = 0; + for(DoubleDBIDListIter iter = list.iter(); iter.valid() && i < k; iter.advance(), ++i) { + sum += iter.doubleValue(); + } + output.set(sum); + } + } +} diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/ParallelKNNOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/ParallelKNNOutlier.java new file mode 100644 index 00000000..b7b43765 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/ParallelKNNOutlier.java @@ -0,0 +1,181 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier.distance.parallel; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2014 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm; +import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm; +import de.lmu.ifi.dbs.elki.algorithm.outlier.distance.KNNOutlier; +import de.lmu.ifi.dbs.elki.data.type.TypeInformation; +import de.lmu.ifi.dbs.elki.data.type.TypeUtil; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; +import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; +import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.ids.KNNList; +import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; +import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; +import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; +import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.math.DoubleMinMax; +import de.lmu.ifi.dbs.elki.parallel.ParallelExecutor; +import de.lmu.ifi.dbs.elki.parallel.processor.DoubleMinMaxProcessor; +import de.lmu.ifi.dbs.elki.parallel.processor.KDistanceProcessor; +import de.lmu.ifi.dbs.elki.parallel.processor.KNNProcessor; +import de.lmu.ifi.dbs.elki.parallel.processor.WriteDoubleDataStoreProcessor; +import de.lmu.ifi.dbs.elki.parallel.variables.SharedDouble; +import de.lmu.ifi.dbs.elki.parallel.variables.SharedObject; +import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; + +/** + * Parallel implementation of KNN Outlier detection. + * + * Reference: + * <p> + * S. Ramaswamy, R. Rastogi, K. Shim:<br /> + * Efficient Algorithms for Mining Outliers from Large Data Sets.<br /> + * In: Proc. of the Int. Conf. on Management of Data, Dallas, Texas, 2000. + * </p> + * + * This parallelized implementation is based on the easy-to-parallelize + * generalized pattern discussed in + * <p> + * Erich Schubert, Arthur Zimek, Hans-Peter Kriegel<br /> + * Local Outlier Detection Reconsidered: a Generalized View on Locality with + * Applications to Spatial, Video, and Network Outlier Detection<br /> + * Data Mining and Knowledge Discovery, 28(1): 190–237, 2014. + * </p> + * + * @author Erich Schubert + * + * @apiviz.composedOf KNNProcessor + * @apiviz.composedOf KDistanceProcessor + * + * @param <O> Object type + */ +@Reference(authors = "E. Schubert, A. Zimek, H.-P. Kriegel", // +title = "Local Outlier Detection Reconsidered: a Generalized View on Locality with Applications to Spatial, Video, and Network Outlier Detection", // +booktitle = "Data Mining and Knowledge Discovery, 28(1): 190–237, 2014.", // +url = "http://dx.doi.org/10.1007/s10618-012-0300-z") +public class ParallelKNNOutlier<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm { + /** + * Parameter k + */ + private int k; + + /** + * Constructor. + * + * @param distanceFunction Distance function + * @param k K parameter + */ + public ParallelKNNOutlier(DistanceFunction<? super O> distanceFunction, int k) { + super(distanceFunction); + this.k = k; + } + + /** + * Class logger + */ + private static final Logging LOG = Logging.getLogger(ParallelKNNOutlier.class); + + @Override + public TypeInformation[] getInputTypeRestriction() { + return TypeUtil.array(getDistanceFunction().getInputTypeRestriction()); + } + + public OutlierResult run(Database database, Relation<O> relation) { + DBIDs ids = relation.getDBIDs(); + WritableDoubleDataStore store = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB); + DistanceQuery<O> distq = database.getDistanceQuery(relation, getDistanceFunction()); + KNNQuery<O> knnq = database.getKNNQuery(distq, k + 1); + + // Compute the kNN + KNNProcessor<O> knnm = new KNNProcessor<>(k + 1, knnq); + SharedObject<KNNList> knnv = new SharedObject<>(); + knnm.connectKNNOutput(knnv); + // Extract the k-distance + KDistanceProcessor kdistm = new KDistanceProcessor(k + 1); + SharedDouble kdistv = new SharedDouble(); + kdistm.connectKNNInput(knnv); + kdistm.connectOutput(kdistv); + // Store in outlier scores + WriteDoubleDataStoreProcessor storem = new WriteDoubleDataStoreProcessor(store); + storem.connectInput(kdistv); + // Gather statistics + DoubleMinMaxProcessor mmm = new DoubleMinMaxProcessor(); + mmm.connectInput(kdistv); + + ParallelExecutor.run(ids, knnm, kdistm, storem, mmm); + + DoubleMinMax minmax = mmm.getMinMax(); + DoubleRelation scoreres = new MaterializedDoubleRelation("kNN Outlier Score", "knn-outlier", store, ids); + OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0); + return new OutlierResult(meta, scoreres); + } + + @Override + protected Logging getLogger() { + return LOG; + } + + /** + * Parameterization class + * + * @author Erich Schubert + * + * @apiviz.exclude + * + * @param <O> Object type + */ + public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> { + /** + * K parameter + */ + int k; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + + IntParameter kP = new IntParameter(KNNOutlier.Parameterizer.K_ID); + if(config.grab(kP)) { + k = kP.getValue(); + } + } + + @Override + protected ParallelKNNOutlier<O> makeInstance() { + return new ParallelKNNOutlier<>(distanceFunction, k); + } + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/ParallelKNNWeightOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/ParallelKNNWeightOutlier.java new file mode 100644 index 00000000..40639ec5 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/ParallelKNNWeightOutlier.java @@ -0,0 +1,187 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier.distance.parallel; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2014 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm; +import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm; +import de.lmu.ifi.dbs.elki.algorithm.outlier.distance.KNNWeightOutlier; +import de.lmu.ifi.dbs.elki.data.type.TypeInformation; +import de.lmu.ifi.dbs.elki.data.type.TypeUtil; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; +import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; +import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.ids.KNNList; +import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; +import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery; +import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction; +import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.math.DoubleMinMax; +import de.lmu.ifi.dbs.elki.parallel.ParallelExecutor; +import de.lmu.ifi.dbs.elki.parallel.processor.DoubleMinMaxProcessor; +import de.lmu.ifi.dbs.elki.parallel.processor.KNNProcessor; +import de.lmu.ifi.dbs.elki.parallel.processor.WriteDoubleDataStoreProcessor; +import de.lmu.ifi.dbs.elki.parallel.variables.SharedDouble; +import de.lmu.ifi.dbs.elki.parallel.variables.SharedObject; +import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; + +/** + * Parallel implementation of KNN Weight Outlier detection. + * + * Reference: + * <p> + * F. Angiulli, C. Pizzuti:<br /> + * Fast Outlier Detection in High Dimensional Spaces.<br /> + * In: Proc. European Conference on Principles of Knowledge Discovery and Data + * Mining (PKDD'02), Helsinki, Finland, 2002. + * </p> + * + * This parallelized implementation is based on the easy-to-parallelize + * generalized pattern discussed in + * <p> + * Erich Schubert, Arthur Zimek, Hans-Peter Kriegel<br /> + * Local Outlier Detection Reconsidered: a Generalized View on Locality with + * Applications to Spatial, Video, and Network Outlier Detection<br /> + * Data Mining and Knowledge Discovery, 28(1): 190–237, 2014. + * </p> + * + * @author Erich Schubert + * + * @apiviz.composedOf KNNWeightProcessor + * + * @param <O> Object type + */ +@Reference(authors = "E. Schubert, A. Zimek, H.-P. Kriegel", // +title = "Local Outlier Detection Reconsidered: a Generalized View on Locality with Applications to Spatial, Video, and Network Outlier Detection", // +booktitle = "Data Mining and Knowledge Discovery, 28(1): 190–237, 2014.", // +url = "http://dx.doi.org/10.1007/s10618-012-0300-z") +public class ParallelKNNWeightOutlier<O> extends AbstractDistanceBasedAlgorithm<O, OutlierResult> implements OutlierAlgorithm { + /** + * Parameter k + */ + private int k; + + /** + * Constructor. + * + * @param distanceFunction Distance function + * @param k K parameter + */ + public ParallelKNNWeightOutlier(DistanceFunction<? super O> distanceFunction, int k) { + super(distanceFunction); + this.k = k; + } + + /** + * Class logger + */ + private static final Logging LOG = Logging.getLogger(ParallelKNNWeightOutlier.class); + + @Override + public TypeInformation[] getInputTypeRestriction() { + return TypeUtil.array(getDistanceFunction().getInputTypeRestriction()); + } + + /** + * Run the parallel kNN weight outlier detector. + * + * @param database Database to process + * @param relation Relation to analyze + * @return Outlier detection result + */ + public OutlierResult run(Database database, Relation<O> relation) { + DBIDs ids = relation.getDBIDs(); + WritableDoubleDataStore store = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB); + DistanceQuery<O> distq = database.getDistanceQuery(relation, getDistanceFunction()); + KNNQuery<O> knnq = database.getKNNQuery(distq, k + 1); + + // Find kNN + KNNProcessor<O> knnm = new KNNProcessor<>(k + 1, knnq); + SharedObject<KNNList> knnv = new SharedObject<>(); + knnm.connectKNNOutput(knnv); + // Extract outlier score + KNNWeightProcessor kdistm = new KNNWeightProcessor(k + 1); + SharedDouble kdistv = new SharedDouble(); + kdistm.connectKNNInput(knnv); + kdistm.connectOutput(kdistv); + // Store in output result + WriteDoubleDataStoreProcessor storem = new WriteDoubleDataStoreProcessor(store); + storem.connectInput(kdistv); + // And gather statistics for metadata + DoubleMinMaxProcessor mmm = new DoubleMinMaxProcessor(); + mmm.connectInput(kdistv); + + ParallelExecutor.run(ids, knnm, kdistm, storem, mmm); + + DoubleMinMax minmax = mmm.getMinMax(); + DoubleRelation scoreres = new MaterializedDoubleRelation("kNN weight Outlier Score", "knnw-outlier", store, ids); + OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0., Double.POSITIVE_INFINITY, 0.); + return new OutlierResult(meta, scoreres); + } + + @Override + protected Logging getLogger() { + return LOG; + } + + /** + * Parameterization class + * + * @author Erich Schubert + * + * @apiviz.exclude + * + * @param <O> Object type + */ + public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> { + /** + * K parameter + */ + int k; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + + IntParameter kP = new IntParameter(KNNWeightOutlier.Parameterizer.K_ID); + if(config.grab(kP)) { + k = kP.getValue(); + } + } + + @Override + protected ParallelKNNWeightOutlier<O> makeInstance() { + return new ParallelKNNWeightOutlier<>(distanceFunction, k); + } + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/package-info.java new file mode 100644 index 00000000..58090507 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/distance/parallel/package-info.java @@ -0,0 +1,27 @@ +/** + * Parallel implementations of distance-based outlier detectors. + */ + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2014 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +package de.lmu.ifi.dbs.elki.algorithm.outlier.distance.parallel;
\ No newline at end of file |