package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ import de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate; import de.lmu.ifi.dbs.elki.data.type.TypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; import de.lmu.ifi.dbs.elki.database.Database; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction; import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.math.DoubleMinMax; import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; /** * SLOM: a new measure for local spatial outliers * *

* Reference:
* Sanjay Chawla and Pei Sun
* SLOM: a new measure for local spatial outliers
* in Knowledge and Information Systems 9(4), 412-429, 2006 *

* * This implementation works around some corner cases in SLOM, in particular * when an object has none or a single neighbor only (albeit the results will * still not be too useful then), which will result in divisions by zero. * * @author Ahmed Hettab * * @param the type the spatial neighborhood is defined over * @param the type of objects handled by the algorithm * @param the type of Distance used for non spatial attributes */ @Title("SLOM: a new measure for local spatial outliers") @Description("Spatial local outlier measure (SLOM), which captures the local behaviour of datum in their spatial neighbourhood") @Reference(authors = "Sanjay Chawla and Pei Sun", title = "SLOM: a new measure for local spatial outliers", booktitle = "Knowledge and Information Systems 9(4), 412-429, 2006", url = "http://dx.doi.org/10.1007/s10115-005-0200-2") public class SLOM> extends AbstractDistanceBasedSpatialOutlier { /** * The logger for this class. */ private static final Logging logger = Logging.getLogger(SLOM.class); /** * Constructor. * * @param npred Neighborhood predicate * @param nonSpatialDistanceFunction Distance function to use on the * non-spatial attributes */ public SLOM(NeighborSetPredicate.Factory npred, PrimitiveDistanceFunction nonSpatialDistanceFunction) { super(npred, nonSpatialDistanceFunction); } /** * @param database Database to process * @param spatial Spatial Relation to use. * @param relation Relation to use. * @return Outlier detection result */ public OutlierResult run(Database database, Relation spatial, Relation relation) { final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(spatial); DistanceQuery distFunc = getNonSpatialDistanceFunction().instantiate(relation); WritableDoubleDataStore modifiedDistance = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP); // calculate D-Tilde for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { DBID id = iditer.getDBID(); double sum = 0; double maxDist = 0; int cnt = 0; final DBIDs neighbors = npred.getNeighborDBIDs(id); for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) { DBID neighbor = iter.getDBID(); if(id.equals(neighbor)) { continue; } double dist = distFunc.distance(id, neighbor).doubleValue(); sum += dist; cnt++; maxDist = Math.max(maxDist, dist); } if(cnt > 1) { modifiedDistance.putDouble(id, ((sum - maxDist) / (cnt - 1))); } else { // Use regular distance when the d-tilde trick is undefined. // Note: this can be 0 when there were no neighbors. modifiedDistance.putDouble(id, maxDist); } } // Second step - compute actual SLOM values DoubleMinMax slomminmax = new DoubleMinMax(); WritableDoubleDataStore sloms = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC); for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { DBID id = iditer.getDBID(); double sum = 0; int cnt = 0; final DBIDs neighbors = npred.getNeighborDBIDs(id); for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) { DBID neighbor = iter.getDBID(); if(neighbor.equals(id)) { continue; } sum += modifiedDistance.doubleValue(neighbor); cnt++; } double slom; if(cnt > 0) { // With and without the object itself: double avgPlus = (sum + modifiedDistance.doubleValue(id)) / (cnt + 1); double avg = sum / cnt; double beta = 0; for(DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) { DBID neighbor = iter.getDBID(); final double dist = modifiedDistance.doubleValue(neighbor); if(dist > avgPlus) { beta += 1; } else if(dist < avgPlus) { beta -= 1; } } // Include object itself if(!neighbors.contains(id)) { final double dist = modifiedDistance.doubleValue(id); if(dist > avgPlus) { beta += 1; } else if(dist < avgPlus) { beta -= 1; } } beta = Math.abs(beta); // note: cnt == size of N(x), not N+(x) if(cnt > 1) { beta = Math.max(beta, 1.0) / (cnt - 1); } else { // Workaround insufficiency in SLOM paper - div by zero beta = 1.0; } beta = beta / (1 + avg); slom = beta * modifiedDistance.doubleValue(id); } else { // No neighbors to compare to - no score. slom = 0.0; } sloms.putDouble(id, slom); slomminmax.put(slom); } Relation scoreResult = new MaterializedRelation("SLOM", "slom-outlier", TypeUtil.DOUBLE, sloms, relation.getDBIDs()); OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(slomminmax.getMin(), slomminmax.getMax(), 0.0, Double.POSITIVE_INFINITY); OutlierResult or = new OutlierResult(scoreMeta, scoreResult); or.addChildResult(npred); return or; } @Override protected Logging getLogger() { return logger; } @Override public TypeInformation[] getInputTypeRestriction() { return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), TypeUtil.NUMBER_VECTOR_FIELD); } /** * Parameterization class. * * @author Ahmed Hettab * * @apiviz.exclude * * @param Neighborhood type * @param Data Object type * @param Distance type */ public static class Parameterizer> extends AbstractDistanceBasedSpatialOutlier.Parameterizer { @Override protected SLOM makeInstance() { return new SLOM(npredf, distanceFunction); } } }