package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ import de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate; import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.data.type.TypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.math.DoubleMinMax; import de.lmu.ifi.dbs.elki.math.MathUtil; import de.lmu.ifi.dbs.elki.math.linearalgebra.CovarianceMatrix; import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix; import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector; import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; import de.lmu.ifi.dbs.elki.utilities.datastructures.QuickSelect; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; /** * Median Approach is used to discover spatial outliers with multiple * attributes. * *

* Reference:
* Chang-Tien Lu and Dechang Chen and Yufeng Kou:
* Detecting Spatial Outliers with Multiple Attributes
* in 15th IEEE International Conference on Tools with Artificial Intelligence, * 2003 *

* *

* Implementation note: attribute standardization is not used; this is * equivalent to using the * {@link de.lmu.ifi.dbs.elki.datasource.filter.normalization.columnwise.AttributeWiseVarianceNormalization * AttributeWiseVarianceNormalization} filter. *

* * @author Ahmed Hettab * * @param Spatial Vector * @param Non Spatial Vector */ @Reference(authors = "Chang-Tien Lu and Dechang Chen and Yufeng Kou", title = "Detecting Spatial Outliers with Multiple Attributes", booktitle = "Proc. 15th IEEE International Conference on Tools with Artificial Intelligence, 2003", url = "http://dx.doi.org/10.1109/TAI.2003.1250179") public class CTLuMedianMultipleAttributes extends AbstractNeighborhoodOutlier { /** * logger */ private static final Logging LOG = Logging.getLogger(CTLuMedianMultipleAttributes.class); /** * Constructor * * @param npredf Neighborhood predicate */ public CTLuMedianMultipleAttributes(NeighborSetPredicate.Factory npredf) { super(npredf); } @Override protected Logging getLogger() { return LOG; } /** * Run the algorithm * * @param spatial Spatial relation * @param attributes Attributes relation * @return Outlier detection result */ public OutlierResult run(Relation spatial, Relation attributes) { final int dim = RelationUtil.dimensionality(attributes); if(LOG.isDebugging()) { LOG.debug("Dimensionality: " + dim); } final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(spatial); CovarianceMatrix covmaker = new CovarianceMatrix(dim); WritableDataStore deltas = DataStoreUtil.makeStorage(attributes.getDBIDs(), DataStoreFactory.HINT_TEMP, Vector.class); for(DBIDIter iditer = attributes.iterDBIDs(); iditer.valid(); iditer.advance()) { final O obj = attributes.get(iditer); final DBIDs neighbors = npred.getNeighborDBIDs(iditer); // Compute the median vector final Vector median; { double[][] data = new double[dim][neighbors.size()]; int i = 0; // Load data for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) { // TODO: skip object itself within neighbors? O nobj = attributes.get(iter); for(int d = 0; d < dim; d++) { data[d][i] = nobj.doubleValue(d); } i++; } double[] md = new double[dim]; for(int d = 0; d < dim; d++) { md[d] = QuickSelect.median(data[d]); } median = new Vector(md); } // Delta vector "h" Vector delta = obj.getColumnVector().minusEquals(median); deltas.put(iditer, delta); covmaker.put(delta); } // Finalize covariance matrix: Vector mean = covmaker.getMeanVector(); Matrix cmati = covmaker.destroyToSampleMatrix().inverse(); DoubleMinMax minmax = new DoubleMinMax(); WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(attributes.getDBIDs(), DataStoreFactory.HINT_STATIC); for(DBIDIter iditer = attributes.iterDBIDs(); iditer.valid(); iditer.advance()) { final double score = MathUtil.mahalanobisDistance(cmati, deltas.get(iditer), mean); minmax.put(score); scores.putDouble(iditer, score); } DoubleRelation scoreResult = new MaterializedDoubleRelation("Median multiple attributes outlier", "median-outlier", scores, attributes.getDBIDs()); OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0); OutlierResult or = new OutlierResult(scoreMeta, scoreResult); or.addChildResult(npred); return or; } @Override public TypeInformation[] getInputTypeRestriction() { return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), TypeUtil.NUMBER_VECTOR_FIELD); } /** * Parameterization class. * * @author Ahmed Hettab * * @apiviz.exclude * * @param Neighborhood type * @param Attributes vector type */ public static class Parameterizer extends AbstractNeighborhoodOutlier.Parameterizer { @Override protected CTLuMedianMultipleAttributes makeInstance() { return new CTLuMedianMultipleAttributes<>(npredf); } } }