diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial')
5 files changed, 225 insertions, 13 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java index 62f083fb..86730404 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/ByLabelOutlier.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.trivial; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -34,7 +34,7 @@ import de.lmu.ifi.dbs.elki.data.type.TypeUtil; import de.lmu.ifi.dbs.elki.database.Database; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; -import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore; +import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; @@ -111,7 +111,7 @@ public class ByLabelOutlier extends AbstractAlgorithm<OutlierResult> implements * @return Result */ public OutlierResult run(Relation<?> relation) { - WritableDataStore<Double> scores = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT, Double.class); + WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT); for(DBID id : relation.iterDBIDs()) { String label = relation.get(id).toString(); final double score; @@ -120,7 +120,7 @@ public class ByLabelOutlier extends AbstractAlgorithm<OutlierResult> implements } else { score = 0.0; } - scores.put(id, score); + scores.putDouble(id, score); } Relation<Double> scoreres = new MaterializedRelation<Double>("By label outlier scores", "label-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs()); OutlierScoreMeta meta = new ProbabilisticOutlierScore(); diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java index ff93e0ed..509e35e9 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialAllOutlier.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.trivial; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -29,7 +29,7 @@ import de.lmu.ifi.dbs.elki.data.type.TypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; -import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore; +import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; @@ -69,9 +69,9 @@ public class TrivialAllOutlier extends AbstractAlgorithm<OutlierResult> implemen * @return Result */ public OutlierResult run(Relation<?> relation) { - WritableDataStore<Double> scores = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT, Double.class); + WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT); for(DBID id : relation.iterDBIDs()) { - scores.put(id, 1.0); + scores.putDouble(id, 1.0); } Relation<Double> scoreres = new MaterializedRelation<Double>("Trivial all-outlier score", "all-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs()); OutlierScoreMeta meta = new ProbabilisticOutlierScore(); diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java new file mode 100644 index 00000000..db40ff30 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialGeneratedOutlier.java @@ -0,0 +1,212 @@ +package de.lmu.ifi.dbs.elki.algorithm.outlier.trivial; +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.util.HashSet; + +import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm; +import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm; +import de.lmu.ifi.dbs.elki.data.NumberVector; +import de.lmu.ifi.dbs.elki.data.model.Model; +import de.lmu.ifi.dbs.elki.data.synthetic.bymodel.GeneratorSingleCluster; +import de.lmu.ifi.dbs.elki.data.type.NoSupportedDataTypeException; +import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation; +import de.lmu.ifi.dbs.elki.data.type.TypeInformation; +import de.lmu.ifi.dbs.elki.data.type.TypeUtil; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; +import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; +import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; +import de.lmu.ifi.dbs.elki.database.ids.DBID; +import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.ChiSquaredDistribution; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution; +import de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; +import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; +import de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; + +/** + * Extract outlier score from the model the objects were generated by. + * + * This algorithm can only be applied to data that was freshly generated, to the + * generator model information is still available. + * + * @author Erich Schubert + */ +public class TrivialGeneratedOutlier extends AbstractAlgorithm<OutlierResult> implements OutlierAlgorithm { + /** + * Class logger + */ + private static final Logging logger = Logging.getLogger(TrivialGeneratedOutlier.class); + + /** + * Expected share of outliers + */ + public static final OptionID EXPECT_ID = OptionID.getOrCreateOptionID("modeloutlier.expect", "Expected amount of outliers, for making the scores more intuitive."); + + /** + * Expected share of outliers. + */ + double expect = 0.01; + + /** + * Constructor. + * + * @param expect Expected share of outliers + */ + public TrivialGeneratedOutlier(double expect) { + super(); + this.expect = expect; + } + + /** + * Constructor. + */ + public TrivialGeneratedOutlier() { + this(0.01); + } + + @Override + public TypeInformation[] getInputTypeRestriction() { + return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD, new SimpleTypeInformation<Model>(Model.class), TypeUtil.GUESSED_LABEL); + } + + @Override + public OutlierResult run(Database database) throws IllegalStateException { + Relation<NumberVector<?, ?>> vecs = database.getRelation(TypeUtil.NUMBER_VECTOR_FIELD); + Relation<Model> models = database.getRelation(new SimpleTypeInformation<Model>(Model.class)); + // Prefer a true class label + try { + Relation<?> relation = database.getRelation(TypeUtil.CLASSLABEL); + return run(models, vecs, relation); + } + catch(NoSupportedDataTypeException e) { + // Otherwise, try any labellike. + return run(models, vecs, database.getRelation(TypeUtil.GUESSED_LABEL)); + } + } + + /** + * Run the algorithm + * + * @param models Model relation + * @param vecs Vector relation + * @param labels Label relation + * @return Outlier result + */ + public OutlierResult run(Relation<Model> models, Relation<NumberVector<?, ?>> vecs, Relation<?> labels) { + WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(models.getDBIDs(), DataStoreFactory.HINT_HOT); + + // Adjustment constant + final double minscore = expect / (expect + 1); + + HashSet<GeneratorSingleCluster> generators = new HashSet<GeneratorSingleCluster>(); + for(DBID id : models.iterDBIDs()) { + Model model = models.get(id); + if(model instanceof GeneratorSingleCluster) { + generators.add((GeneratorSingleCluster) model); + } + } + if(generators.size() == 0) { + logger.warning("No generator models found for dataset - all points will be considered outliers."); + } + + for(DBID id : models.iterDBIDs()) { + double score = 0.0; + // Convert to a math vector + Vector v = vecs.get(id).getColumnVector(); + for(GeneratorSingleCluster gen : generators) { + Vector tv = v; + // Transform backwards + if(gen.getTransformation() != null) { + tv = gen.getTransformation().applyInverse(v); + } + final int dim = tv.getDimensionality(); + double lensq = 0.0; + int norm = 0; + for(int i = 0; i < dim; i++) { + Distribution dist = gen.getDistribution(i); + if(dist instanceof NormalDistribution) { + NormalDistribution d = (NormalDistribution) dist; + double delta = (tv.get(i) - d.getMean()) / d.getStddev(); + lensq += delta * delta; + norm += 1; + } + } + if(norm > 0) { + // The squared distances are ChiSquared distributed + score = Math.max(score, 1 - ChiSquaredDistribution.cdf(lensq, norm)); + } + } + // score inversion. + score = expect / (expect + score); + // adjust to 0 to 1 range: + score = (score - minscore) / (1 - minscore); + scores.putDouble(id, score); + } + Relation<Double> scoreres = new MaterializedRelation<Double>("Model outlier scores", "model-outlier", TypeUtil.DOUBLE, scores, models.getDBIDs()); + OutlierScoreMeta meta = new ProbabilisticOutlierScore(0., 1.); + return new OutlierResult(meta, scoreres); + } + + @Override + protected Logging getLogger() { + return logger; + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + /** + * Expected share of outliers + */ + double expect; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + DoubleParameter expectP = new DoubleParameter(EXPECT_ID, 0.01); + if(config.grab(expectP)) { + expect = expectP.getValue(); + } + } + + @Override + protected TrivialGeneratedOutlier makeInstance() { + return new TrivialGeneratedOutlier(expect); + } + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java index f3ae7e72..cff2ad2c 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/TrivialNoOutlier.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.outlier.trivial; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2011 + Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -29,7 +29,7 @@ import de.lmu.ifi.dbs.elki.data.type.TypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; -import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore; +import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; @@ -69,9 +69,9 @@ public class TrivialNoOutlier extends AbstractAlgorithm<OutlierResult> implement * @return Result */ public OutlierResult run(Relation<?> relation) throws IllegalStateException { - WritableDataStore<Double> scores = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT, Double.class); + WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT); for(DBID id : relation.iterDBIDs()) { - scores.put(id, 0.0); + scores.putDouble(id, 0.0); } Relation<Double> scoreres = new MaterializedRelation<Double>("Trivial no-outlier score", "no-outlier", TypeUtil.DOUBLE, scores, relation.getDBIDs()); OutlierScoreMeta meta = new ProbabilisticOutlierScore(); diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/package-info.java index fbae60dc..d49d3565 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/package-info.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/outlier/trivial/package-info.java @@ -7,7 +7,7 @@ This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures -Copyright (C) 2011 +Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team |