package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures Copyright (C) 2012 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ import de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate; import de.lmu.ifi.dbs.elki.data.type.TypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; import de.lmu.ifi.dbs.elki.database.Database; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBID; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery; import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction; import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.math.DoubleMinMax; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; /** * The Spatial Outlier Factor (SOF) is a spatial * {@link de.lmu.ifi.dbs.elki.algorithm.outlier.LOF LOF} variation. * * Since the "reachability distance" of LOF cannot be used canonically in the * bichromatic case, this part of LOF is dropped and the exact distance is used * instead. * *

* Huang, T., Qin, X.
* Detecting outliers in spatial database.
* In: Proc. 3rd International Conference on Image and Graphics, * Hong Kong, China. *

* * A LOF variation simplified with reachDist(o,p) == dist(o,p). * * @author Ahmed Hettab * * @param Neighborhood object type * @param Attribute object type * @param Distance type */ @Title("Spatial Outlier Factor") @Reference(authors = "Huang, T., Qin, X.", title = "Detecting outliers in spatial database", booktitle = "Proc. 3rd International Conference on Image and Graphics", url = "http://dx.doi.org/10.1109/ICIG.2004.53") public class SOF> extends AbstractDistanceBasedSpatialOutlier { /** * The logger for this class. */ private static final Logging logger = Logging.getLogger(SOF.class); /** * Constructor. * * @param npred Neighborhood predicate * @param nonSpatialDistanceFunction Distance function on non-spatial * attributes */ public SOF(NeighborSetPredicate.Factory npred, PrimitiveDistanceFunction nonSpatialDistanceFunction) { super(npred, nonSpatialDistanceFunction); } @Override protected Logging getLogger() { return logger; } /** * The main run method * * @param database Database to use (actually unused) * @param spatial Relation for neighborhood * @param relation Attributes to evaluate * @return Outlier result */ public OutlierResult run(Database database, Relation spatial, Relation relation) { final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(spatial); DistanceQuery distFunc = getNonSpatialDistanceFunction().instantiate(relation); WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT); WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC); DoubleMinMax lofminmax = new DoubleMinMax(); // Compute densities for(DBID id : relation.iterDBIDs()) { DBIDs neighbors = npred.getNeighborDBIDs(id); double avg = 0; for(DBID n : neighbors) { avg += distFunc.distance(id, n).doubleValue(); } double lrd = 1 / (avg / neighbors.size()); if (Double.isNaN(lrd)) { lrd = 0; } lrds.putDouble(id, lrd); } // Compute density quotients for(DBID id : relation.iterDBIDs()) { DBIDs neighbors = npred.getNeighborDBIDs(id); double avg = 0; for(DBID n : neighbors) { avg += lrds.doubleValue(n); } final double lrd = (avg / neighbors.size()) / lrds.doubleValue(id); if (!Double.isNaN(lrd)) { lofs.putDouble(id, lrd); lofminmax.put(lrd); } else { lofs.putDouble(id, 0.0); } } // Build result representation. Relation scoreResult = new MaterializedRelation("Spatial Outlier Factor", "sof-outlier", TypeUtil.DOUBLE, lofs, relation.getDBIDs()); OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0); OutlierResult or = new OutlierResult(scoreMeta, scoreResult); or.addChildResult(npred); return or; } @Override public TypeInformation[] getInputTypeRestriction() { return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), TypeUtil.NUMBER_VECTOR_FIELD); } /** * Parameterization class * * @author Ahmed Hettab * * @apiviz.exclude * * @param Neighborhood type * @param Attribute object type * @param Distance type */ public static class Parameterizer> extends AbstractDistanceBasedSpatialOutlier.Parameterizer { @Override protected SOF makeInstance() { return new SOF(npredf, distanceFunction); } } }