package de.lmu.ifi.dbs.elki.evaluation.outlier; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ import java.util.List; import java.util.regex.Pattern; import de.lmu.ifi.dbs.elki.database.Database; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.ids.SetDBIDs; import de.lmu.ifi.dbs.elki.evaluation.Evaluator; import de.lmu.ifi.dbs.elki.evaluation.scores.AveragePrecisionEvaluation; import de.lmu.ifi.dbs.elki.evaluation.scores.MaximumF1Evaluation; import de.lmu.ifi.dbs.elki.evaluation.scores.PrecisionAtKEvaluation; import de.lmu.ifi.dbs.elki.evaluation.scores.ROCEvaluation; import de.lmu.ifi.dbs.elki.evaluation.scores.adapter.DBIDsTest; import de.lmu.ifi.dbs.elki.evaluation.scores.adapter.OutlierScoreAdapter; import de.lmu.ifi.dbs.elki.evaluation.scores.adapter.SimpleAdapter; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic; import de.lmu.ifi.dbs.elki.result.EvaluationResult; import de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup; import de.lmu.ifi.dbs.elki.result.HierarchicalResult; import de.lmu.ifi.dbs.elki.result.OrderingResult; import de.lmu.ifi.dbs.elki.result.Result; import de.lmu.ifi.dbs.elki.result.ResultUtil; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil; import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.PatternParameter; /** * Evaluate outlier scores by their ranking * * @author Erich Schubert * * @apiviz.landmark * * @apiviz.uses OutlierResult * @apiviz.has EvaluationResult oneway - - «create» */ public class OutlierRankingEvaluation implements Evaluator { /** * The logger. */ private static final Logging LOG = Logging.getLogger(OutlierRankingEvaluation.class); /** * The pattern to identify positive classes. * *

* Key: {@code -rocauc.positive} *

*/ public static final OptionID POSITIVE_CLASS_NAME_ID = new OptionID("outliereval.positive", "Class label for the 'positive' class."); /** * Stores the "positive" class. */ private Pattern positiveClassName; /** * Key prefix for statistics logging. */ private String key = OutlierRankingEvaluation.class.getName(); /** * Constructor. * * @param positive_class_name Positive class name pattern */ public OutlierRankingEvaluation(Pattern positive_class_name) { super(); this.positiveClassName = positive_class_name; } private EvaluationResult evaluateOutlierResult(int size, SetDBIDs positiveids, OutlierResult or) { EvaluationResult res = new EvaluationResult("Evaluation of ranking", "ranking-evaluation"); DBIDsTest test = new DBIDsTest(positiveids); MeasurementGroup g = res.newGroup("Evaluation measures"); double rocauc = ROCEvaluation.STATIC.evaluate(test, new OutlierScoreAdapter(or)); g.addMeasure("ROC AUC", rocauc, 0., 1. ,.5, false); double avep = AveragePrecisionEvaluation.STATIC.evaluate(test, new OutlierScoreAdapter(or)); g.addMeasure("Average Precision", avep, 0., 1., 0., false); double rprec = PrecisionAtKEvaluation.RPRECISION.evaluate(test, new OutlierScoreAdapter(or)); g.addMeasure("R-Precision", rprec, 0., 1., 0., false); double maxf1 = MaximumF1Evaluation.STATIC.evaluate(test, new OutlierScoreAdapter(or)); g.addMeasure("Maximum F1", maxf1, 0., 1., 0., false); if(LOG.isStatistics()) { LOG.statistics(new DoubleStatistic(key + ".rocauc", rocauc)); LOG.statistics(new DoubleStatistic(key + ".precision.average", rocauc)); LOG.statistics(new DoubleStatistic(key + ".precision.r", rocauc)); LOG.statistics(new DoubleStatistic(key + ".f1.maximum", rocauc)); } return res; } private EvaluationResult evaluateOrderingResult(int size, SetDBIDs positiveids, DBIDs order) { if(order.size() != size) { throw new IllegalStateException("Iterable result doesn't match database size - incomplete ordering?"); } EvaluationResult res = new EvaluationResult("Evaluation of ranking", "ranking-evaluation"); DBIDsTest test = new DBIDsTest(positiveids); MeasurementGroup g = res.newGroup("Evaluation measures"); double rocauc = ROCEvaluation.STATIC.evaluate(test, new SimpleAdapter(order.iter())); g.addMeasure("ROC AUC", rocauc, 0., 1. ,.5, false); double avep = AveragePrecisionEvaluation.STATIC.evaluate(test, new SimpleAdapter(order.iter())); g.addMeasure("Average Precision", avep, 0., 1., 0., false); double rprec = PrecisionAtKEvaluation.RPRECISION.evaluate(test, new SimpleAdapter(order.iter())); g.addMeasure("R-Precision", rprec, 0., 1., 0., false); double maxf1 = MaximumF1Evaluation.STATIC.evaluate(test, new SimpleAdapter(order.iter())); g.addMeasure("Maximum F1", maxf1, 0., 1., 0., false); if(LOG.isStatistics()) { LOG.statistics(new DoubleStatistic(key + ".rocauc", rocauc)); LOG.statistics(new DoubleStatistic(key + ".precision.average", rocauc)); LOG.statistics(new DoubleStatistic(key + ".precision.r", rocauc)); LOG.statistics(new DoubleStatistic(key + ".f1.maximum", rocauc)); } return res; } @Override public void processNewResult(HierarchicalResult baseResult, Result result) { Database db = ResultUtil.findDatabase(baseResult); SetDBIDs positiveids = DBIDUtil.ensureSet(DatabaseUtil.getObjectsByLabelMatch(db, positiveClassName)); if(positiveids.size() == 0) { LOG.warning("Cannot evaluate outlier results - no objects matched the given pattern."); return; } boolean nonefound = true; List oresults = ResultUtil.getOutlierResults(result); List orderings = ResultUtil.getOrderingResults(result); // Outlier results are the main use case. for(OutlierResult o : oresults) { db.getHierarchy().add(o, evaluateOutlierResult(o.getScores().size(), positiveids, o)); // Process them only once. orderings.remove(o.getOrdering()); nonefound = false; } // FIXME: find appropriate place to add the derived result // otherwise apply an ordering to the database IDs. for(OrderingResult or : orderings) { DBIDs sorted = or.iter(or.getDBIDs()); db.getHierarchy().add(or, evaluateOrderingResult(or.getDBIDs().size(), positiveids, sorted)); nonefound = false; } if(nonefound) { return; // logger.warning("No results found to process with ROC curve analyzer. Got "+iterables.size()+" iterables, "+orderings.size()+" orderings."); } } /** * Parameterization class. * * @author Erich Schubert * * @apiviz.exclude */ public static class Parameterizer extends AbstractParameterizer { /** * Pattern for positive class. */ protected Pattern positiveClassName = null; @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); PatternParameter positiveClassNameP = new PatternParameter(POSITIVE_CLASS_NAME_ID); if(config.grab(positiveClassNameP)) { positiveClassName = positiveClassNameP.getValue(); } } @Override protected OutlierRankingEvaluation makeInstance() { return new OutlierRankingEvaluation(positiveClassName); } } }