package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ import java.util.Arrays; import de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate; import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.data.type.TypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; import de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation; import de.lmu.ifi.dbs.elki.database.Database; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress; import de.lmu.ifi.dbs.elki.math.DoubleMinMax; import de.lmu.ifi.dbs.elki.math.Mean; import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; import de.lmu.ifi.dbs.elki.utilities.datastructures.QuickSelect; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint; import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessConstraint; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; /** * A Trimmed Mean Approach to Finding Spatial Outliers. * * Outliers are defined by their value deviation from a trimmed mean of the neighbors. * *

* Reference:
* Tianming Hu and Sam Yuan Sung
* A Trimmed Mean Approach to finding Spatial Outliers
* in Intelligent Data Analysis, Volume 8, 2004. *

* *

* the contiguity Matrix is definit as
* wij = 1/k if j is neighbor of i, k is the neighbors size of i. *

* * @author Ahmed Hettab * @param Neighborhood object type */ @Title("A Trimmed Mean Approach to Finding Spatial Outliers") @Description("A local trimmed mean approach to evaluating the spatial outlier factor which is the degree that a site is outlying compared to its neighbors") @Reference(authors = "Tianming Hu and Sam Yuan Sung", title = "A trimmed mean approach to finding spatial outliers", booktitle = "Intelligent Data Analysis, Volume 8, 2004", url = "http://iospress.metapress.com/content/PLVLT6431DVNJXNK") public class TrimmedMeanApproach extends AbstractNeighborhoodOutlier { /** * The logger for this class. */ private static final Logging LOG = Logging.getLogger(TrimmedMeanApproach.class); /** * the parameter p. */ private double p; /** * Constructor. * * @param p Parameter p * @param npredf Neighborhood factory. */ protected TrimmedMeanApproach(NeighborSetPredicate.Factory npredf, double p) { super(npredf); this.p = p; } /** * Run the algorithm. * * @param database Database * @param nrel Neighborhood relation * @param relation Data Relation (1 dimensional!) * @return Outlier detection result */ public OutlierResult run(Database database, Relation nrel, Relation> relation) { assert (RelationUtil.dimensionality(relation) == 1) : "TrimmedMean can only process one-dimensional data sets."; final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(nrel); WritableDoubleDataStore errors = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP); WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC); FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Computing trimmed means", relation.size(), LOG) : null; for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { DBIDs neighbors = npred.getNeighborDBIDs(iditer); int num = 0; double[] values = new double[neighbors.size()]; // calculate trimmedMean for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) { values[num] = relation.get(iter).doubleValue(0); num++; } // calculate local trimmed Mean and error term final double tm; if(num > 0) { int left = (int) Math.floor(p * (num - 1)); int right = (int) Math.floor((1 - p) * (num - 1)); Arrays.sort(values, 0, num); Mean mean = new Mean(); for(int i = left; i <= right; i++) { mean.put(values[i]); } tm = mean.getMean(); } else { tm = relation.get(iditer).doubleValue(0); } // Error: deviation from trimmed mean errors.putDouble(iditer, relation.get(iditer).doubleValue(0) - tm); if(progress != null) { progress.incrementProcessed(LOG); } } if(progress != null) { progress.ensureCompleted(LOG); } if(LOG.isVerbose()) { LOG.verbose("Computing median error."); } double median_dev_from_median; { // calculate the median error double[] ei = new double[relation.size()]; { int i = 0; for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { ei[i] = errors.doubleValue(iditer); i++; } } double median_i = QuickSelect.median(ei); // Update to deviation from median for(int i = 0; i < ei.length; i++) { ei[i] = Math.abs(ei[i] - median_i); } // Again, extract median median_dev_from_median = QuickSelect.median(ei); } if(LOG.isVerbose()) { LOG.verbose("Normalizing scores."); } // calculate score DoubleMinMax minmax = new DoubleMinMax(); for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { double score = Math.abs(errors.doubleValue(iditer)) * 0.6745 / median_dev_from_median; scores.putDouble(iditer, score); minmax.put(score); } // Relation scoreResult = new MaterializedRelation("TrimmedMean", "Trimmed Mean Score", TypeUtil.DOUBLE, scores, relation.getDBIDs()); OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0); OutlierResult or = new OutlierResult(scoreMeta, scoreResult); or.addChildResult(npred); return or; } @Override protected Logging getLogger() { return LOG; } @Override public TypeInformation[] getInputTypeRestriction() { // Get one dimensional attribute for analysis. return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), new VectorFieldTypeInformation>(NumberVector.class, 1)); } /** * Parameterizer. * * @author Ahmed Hettab * * @apiviz.exclude * * @param Neighborhood object type */ public static class Parameterizer extends AbstractNeighborhoodOutlier.Parameterizer { /** * Parameter for the percentile value p. */ public static final OptionID P_ID = new OptionID("tma.p", "the percentile parameter"); /** * Percentile parameter p. */ protected double p = 0.2; @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); DoubleParameter pP = new DoubleParameter(P_ID); pP.addConstraint(new GreaterConstraint(0.0)); pP.addConstraint(new LessConstraint(0.5)); if(config.grab(pP)) { p = pP.getValue(); } } @Override protected TrimmedMeanApproach makeInstance() { return new TrimmedMeanApproach(npredf, p); } } }