package de.lmu.ifi.dbs.elki.algorithm.outlier.spatial;
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
import de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
/**
* The Spatial Outlier Factor (SOF) is a spatial
* {@link de.lmu.ifi.dbs.elki.algorithm.outlier.LOF LOF} variation.
*
* Since the "reachability distance" of LOF cannot be used canonically in the
* bichromatic case, this part of LOF is dropped and the exact distance is used
* instead.
*
*
* Huang, T., Qin, X.
* Detecting outliers in spatial database.
* In: Proc. 3rd International Conference on Image and Graphics,
* Hong Kong, China.
*
*
* A LOF variation simplified with reachDist(o,p) == dist(o,p).
*
* @author Ahmed Hettab
*
* @param Neighborhood object type
* @param Attribute object type
* @param Distance type
*/
@Title("Spatial Outlier Factor")
@Reference(authors = "Huang, T., Qin, X.", title = "Detecting outliers in spatial database", booktitle = "Proc. 3rd International Conference on Image and Graphics", url = "http://dx.doi.org/10.1109/ICIG.2004.53")
public class SOF> extends AbstractDistanceBasedSpatialOutlier {
/**
* The logger for this class.
*/
private static final Logging logger = Logging.getLogger(SOF.class);
/**
* Constructor.
*
* @param npred Neighborhood predicate
* @param nonSpatialDistanceFunction Distance function on non-spatial
* attributes
*/
public SOF(NeighborSetPredicate.Factory npred, PrimitiveDistanceFunction nonSpatialDistanceFunction) {
super(npred, nonSpatialDistanceFunction);
}
@Override
protected Logging getLogger() {
return logger;
}
/**
* The main run method
*
* @param database Database to use (actually unused)
* @param spatial Relation for neighborhood
* @param relation Attributes to evaluate
* @return Outlier result
*/
public OutlierResult run(Database database, Relation spatial, Relation relation) {
final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(spatial);
DistanceQuery distFunc = getNonSpatialDistanceFunction().instantiate(relation);
WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT);
WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
DoubleMinMax lofminmax = new DoubleMinMax();
// Compute densities
for(DBID id : relation.iterDBIDs()) {
DBIDs neighbors = npred.getNeighborDBIDs(id);
double avg = 0;
for(DBID n : neighbors) {
avg += distFunc.distance(id, n).doubleValue();
}
double lrd = 1 / (avg / neighbors.size());
if (Double.isNaN(lrd)) {
lrd = 0;
}
lrds.putDouble(id, lrd);
}
// Compute density quotients
for(DBID id : relation.iterDBIDs()) {
DBIDs neighbors = npred.getNeighborDBIDs(id);
double avg = 0;
for(DBID n : neighbors) {
avg += lrds.doubleValue(n);
}
final double lrd = (avg / neighbors.size()) / lrds.doubleValue(id);
if (!Double.isNaN(lrd)) {
lofs.putDouble(id, lrd);
lofminmax.put(lrd);
} else {
lofs.putDouble(id, 0.0);
}
}
// Build result representation.
Relation scoreResult = new MaterializedRelation("Spatial Outlier Factor", "sof-outlier", TypeUtil.DOUBLE, lofs, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
or.addChildResult(npred);
return or;
}
@Override
public TypeInformation[] getInputTypeRestriction() {
return TypeUtil.array(getNeighborSetPredicateFactory().getInputTypeRestriction(), TypeUtil.NUMBER_VECTOR_FIELD);
}
/**
* Parameterization class
*
* @author Ahmed Hettab
*
* @apiviz.exclude
*
* @param Neighborhood type
* @param Attribute object type
* @param Distance type
*/
public static class Parameterizer> extends AbstractDistanceBasedSpatialOutlier.Parameterizer {
@Override
protected SOF makeInstance() {
return new SOF(npredf, distanceFunction);
}
}
}