package de.lmu.ifi.dbs.elki.algorithm.outlier.subspace; /* This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures Copyright (C) 2015 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ import java.util.ArrayList; import de.lmu.ifi.dbs.elki.data.NumberVector; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory; import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil; import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDs; import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation; import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation; import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.math.DoubleMinMax; import de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta; import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult; import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; import de.lmu.ifi.dbs.elki.utilities.pairs.IntIntPair; /** * BruteForce variant of the high-dimensional outlier detection algorithm by * Aggarwal and Yu. * * The evolutionary approach is implemented as * {@link de.lmu.ifi.dbs.elki.algorithm.outlier.subspace.AggarwalYuEvolutionary} * . * *

* Reference:
* Outlier detection for high dimensional data
* C.C. Aggarwal, P. S. Yu
* International Conference on Management of Data Proceedings of the 2001 ACM * SIGMOD international conference on Management of data 2001, Santa Barbara, * California, United States *

* * @author Ahmed Hettab * @author Erich Schubert * * @param Vector type */ // TODO: progress logging! @Title("BruteForce: Outlier detection for high dimensional data") @Description("Examines all possible sets of k dimensional projections") @Reference(authors = "C.C. Aggarwal, P. S. Yu", title = "Outlier detection for high dimensional data", booktitle = "Proc. ACM SIGMOD Int. Conf. on Management of Data (SIGMOD 2001), Santa Barbara, CA, 2001", url = "http://dx.doi.org/10.1145/375663.375668") public class AggarwalYuNaive extends AbstractAggarwalYuOutlier { /** * The logger for this class. */ private static final Logging LOG = Logging.getLogger(AggarwalYuNaive.class); /** * Constructor. * * @param k K * @param phi Phi */ public AggarwalYuNaive(int k, int phi) { super(k, phi); } /** * Run the algorithm on the given relation. * * @param relation Relation * @return Outlier detection result */ public OutlierResult run(Relation relation) { final int dimensionality = RelationUtil.dimensionality(relation); final int size = relation.size(); ArrayList> ranges = buildRanges(relation); ArrayList> Rk; // Build a list of all subspaces { // R1 initial one-dimensional subspaces. Rk = new ArrayList<>(); // Set of all dim*phi ranges ArrayList q = new ArrayList<>(); for(int i = 0; i < dimensionality; i++) { for(int j = 0; j < phi; j++) { IntIntPair s = new IntIntPair(i, j); q.add(s); // Add to first Rk ArrayList v = new ArrayList<>(); v.add(s); Rk.add(v); } } // build Ri for(int i = 2; i <= k; i++) { ArrayList> Rnew = new ArrayList<>(); for(int j = 0; j < Rk.size(); j++) { ArrayList c = Rk.get(j); for(IntIntPair pair : q) { boolean invalid = false; for(int t = 0; t < c.size(); t++) { if(c.get(t).first == pair.first) { invalid = true; break; } } if(!invalid) { ArrayList neu = new ArrayList<>(c); neu.add(pair); Rnew.add(neu); } } } Rk = Rnew; } } WritableDoubleDataStore sparsity = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC); // calculate the sparsity coefficient for(ArrayList sub : Rk) { DBIDs ids = computeSubspace(sub, ranges); final double sparsityC = sparsity(ids.size(), size, k, phi); if(sparsityC < 0) { for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) { double prev = sparsity.doubleValue(iter); if(Double.isNaN(prev) || sparsityC < prev) { sparsity.putDouble(iter, sparsityC); } } } } DoubleMinMax minmax = new DoubleMinMax(); for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { double val = sparsity.doubleValue(iditer); if(Double.isNaN(val)) { sparsity.putDouble(iditer, 0.0); val = 0.0; } minmax.put(val); } DoubleRelation scoreResult = new MaterializedDoubleRelation("AggarwalYuNaive", "aggarwal-yu-outlier", sparsity, relation.getDBIDs()); OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), Double.NEGATIVE_INFINITY, 0.0); return new OutlierResult(meta, scoreResult); } @Override protected Logging getLogger() { return LOG; } /** * Parameterization class. * * @author Erich Schubert * * @apiviz.exclude */ public static class Parameterizer extends AbstractAggarwalYuOutlier.Parameterizer { @Override protected AggarwalYuNaive makeInstance() { return new AggarwalYuNaive<>(k, phi); } } }