package de.lmu.ifi.dbs.elki.algorithm.outlier.meta;
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.Algorithm;
import de.lmu.ifi.dbs.elki.algorithm.outlier.OutlierAlgorithm;
import de.lmu.ifi.dbs.elki.data.type.CombinedTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.result.Result;
import de.lmu.ifi.dbs.elki.result.ResultUtil;
import de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta;
import de.lmu.ifi.dbs.elki.utilities.ensemble.EnsembleVoting;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ChainedParameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectListParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
import de.lmu.ifi.dbs.elki.workflow.AlgorithmStep;
/**
* Simple outlier ensemble method.
*
* @author Erich Schubert
*
* @apiviz.composedOf EnsembleVoting
* @apiviz.uses OutlierResult oneway - - reads
* @apiviz.uses OutlierResult oneway - - «create»
*/
public class SimpleOutlierEnsemble extends AbstractAlgorithm implements OutlierAlgorithm {
/**
* The logger for this class.
*/
private static final Logging LOG = Logging.getLogger(SimpleOutlierEnsemble.class);
/**
* The algorithms to run.
*/
private List algorithms;
/**
* The voting in use.
*/
private EnsembleVoting voting;
/**
* Constructor.
*
* @param algorithms Algorithms to run
* @param voting Voting method
*/
public SimpleOutlierEnsemble(List algorithms, EnsembleVoting voting) {
this.algorithms = algorithms;
this.voting = voting;
}
@Override
public OutlierResult run(Database database) throws IllegalStateException {
int num = algorithms.size();
// Run inner outlier algorithms
ModifiableDBIDs ids = DBIDUtil.newHashSet();
ArrayList results = new ArrayList<>(num);
{
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Inner outlier algorithms", num, LOG) : null;
for (Algorithm alg : algorithms) {
Result res = alg.run(database);
List ors = ResultUtil.getOutlierResults(res);
for (OutlierResult or : ors) {
results.add(or);
ids.addDBIDs(or.getScores().getDBIDs());
}
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
}
// Combine
WritableDoubleDataStore sumscore = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
DoubleMinMax minmax = new DoubleMinMax();
{
FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Combining results", ids.size(), LOG) : null;
for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
double[] scores = new double[num];
int i = 0;
for (OutlierResult r : results) {
double score = r.getScores().doubleValue(id);
if (!Double.isNaN(score)) {
scores[i] = score;
i++;
} else {
LOG.warning("DBID " + id + " was not given a score by result " + r);
}
}
if (i > 0) {
// Shrink array if necessary.
if (i < scores.length) {
scores = Arrays.copyOf(scores, i);
}
double combined = voting.combine(scores);
sumscore.putDouble(id, combined);
minmax.put(combined);
} else {
LOG.warning("DBID " + id + " was not given any score at all.");
}
LOG.incrementProcessed(cprog);
}
LOG.ensureCompleted(cprog);
}
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
DoubleRelation scores = new MaterializedDoubleRelation("Simple Outlier Ensemble", "ensemble-outlier", sumscore, ids);
return new OutlierResult(meta, scores);
}
@Override
protected Logging getLogger() {
return LOG;
}
@Override
public TypeInformation[] getInputTypeRestriction() {
TypeInformation[] trs = new TypeInformation[algorithms.size()];
for (int i = 0; i < trs.length; i++) {
// FIXME: what if an algorithm needs more than one input data source?
trs[i] = algorithms.get(i).getInputTypeRestriction()[0];
}
return TypeUtil.array(new CombinedTypeInformation(trs));
}
/**
* Parameterization class.
*
* @author Erich Schubert
*
* @apiviz.exclude
*/
public static class Parameterizer extends AbstractParameterizer {
/**
* Voting strategy to use in the ensemble.
*/
public static final OptionID VOTING_ID = new OptionID("ensemble.voting", "Voting strategy to use in the ensemble.");
/**
* The algorithms to run.
*/
private List algorithms;
/**
* The voting in use.
*/
private EnsembleVoting voting;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
ObjectListParameter algP = new ObjectListParameter<>(AlgorithmStep.Parameterizer.ALGORITHM_ID, OutlierAlgorithm.class);
if (config.grab(algP)) {
ListParameterization subconfig = new ListParameterization();
ChainedParameterization chain = new ChainedParameterization(subconfig, config);
chain.errorsTo(config);
algorithms = algP.instantiateClasses(chain);
subconfig.logAndClearReportedErrors();
}
ObjectParameter votingP = new ObjectParameter<>(VOTING_ID, EnsembleVoting.class);
if (config.grab(votingP)) {
voting = votingP.instantiateClass(config);
}
}
@Override
protected SimpleOutlierEnsemble makeInstance() {
return new SimpleOutlierEnsemble(algorithms, voting);
}
}
}