diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/application/greedyensemble/VisualizePairwiseGainMatrix.java')
-rw-r--r-- | src/de/lmu/ifi/dbs/elki/application/greedyensemble/VisualizePairwiseGainMatrix.java | 307 |
1 files changed, 307 insertions, 0 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/application/greedyensemble/VisualizePairwiseGainMatrix.java b/src/de/lmu/ifi/dbs/elki/application/greedyensemble/VisualizePairwiseGainMatrix.java new file mode 100644 index 00000000..2c728878 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/application/greedyensemble/VisualizePairwiseGainMatrix.java @@ -0,0 +1,307 @@ +package de.lmu.ifi.dbs.elki.application.greedyensemble; + +/* + This file is part of ELKI: + Environment for Developing KDD-Applications Supported by Index-Structures + + Copyright (C) 2012 + Ludwig-Maximilians-Universität München + Lehr- und Forschungseinheit für Datenbanksysteme + ELKI Development Team + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +import java.awt.image.BufferedImage; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Set; +import java.util.TreeSet; + +import org.apache.batik.util.SVGConstants; + +import de.lmu.ifi.dbs.elki.application.AbstractApplication; +import de.lmu.ifi.dbs.elki.data.NumberVector; +import de.lmu.ifi.dbs.elki.data.type.TypeUtil; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs; +import de.lmu.ifi.dbs.elki.database.ids.DBID; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; +import de.lmu.ifi.dbs.elki.database.relation.Relation; +import de.lmu.ifi.dbs.elki.evaluation.roc.ROC; +import de.lmu.ifi.dbs.elki.evaluation.similaritymatrix.ComputeSimilarityMatrixImage; +import de.lmu.ifi.dbs.elki.evaluation.similaritymatrix.ComputeSimilarityMatrixImage.SimilarityMatrix; +import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.math.DoubleMinMax; +import de.lmu.ifi.dbs.elki.result.ResultUtil; +import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil; +import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; +import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleIntPair; +import de.lmu.ifi.dbs.elki.utilities.scaling.LinearScaling; +import de.lmu.ifi.dbs.elki.visualization.VisualizationTask; +import de.lmu.ifi.dbs.elki.visualization.VisualizerContext; +import de.lmu.ifi.dbs.elki.visualization.VisualizerParameterizer; +import de.lmu.ifi.dbs.elki.visualization.gui.SimpleSVGViewer; +import de.lmu.ifi.dbs.elki.visualization.svg.SVGPlot; +import de.lmu.ifi.dbs.elki.visualization.visualizers.Visualization; +import de.lmu.ifi.dbs.elki.visualization.visualizers.visunproj.SimilarityMatrixVisualizer; +import de.lmu.ifi.dbs.elki.workflow.InputStep; + +/** + * Class to load an outlier detection summary file, as produced by + * {@link ComputeKNNOutlierScores}, and compute a matrix with the pairwise + * gains. It will have one column / row obtained for each combination. + * + * The gain is always computed in relation to the better of the two input + * methods. Green colors indicate the result has improved, red indicate it + * became worse. + * + * Reference: + * <p> + * E. Schubert, R. Wojdanowski, A. Zimek, H.-P. Kriegel<br /> + * On Evaluation of Outlier Rankings and Outlier Scores<br/> + * In Proceedings of the 12th SIAM International Conference on Data Mining + * (SDM), Anaheim, CA, 2012. + * </p> + * + * @author Erich Schubert + */ +@Reference(authors = "E. Schubert, R. Wojdanowski, A. Zimek, H.-P. Kriegel", title = "On Evaluation of Outlier Rankings and Outlier Scores", booktitle = "Proc. 12th SIAM International Conference on Data Mining (SDM), Anaheim, CA, 2012.") +public class VisualizePairwiseGainMatrix extends AbstractApplication { + /** + * Get static logger + */ + private static final Logging logger = Logging.getLogger(VisualizePairwiseGainMatrix.class); + + /** + * The data input part. + */ + private InputStep inputstep; + + /** + * Parameterizer for visualizers + */ + private VisualizerParameterizer vispar; + + /** + * Constructor. + * + * @param verbose Verbosity + * @param inputstep Input step + * @param vispar Visualizer parameterizer + */ + public VisualizePairwiseGainMatrix(boolean verbose, InputStep inputstep, VisualizerParameterizer vispar) { + super(verbose); + this.inputstep = inputstep; + this.vispar = vispar; + } + + @Override + public void run() { + final Database database = inputstep.getDatabase(); + final Relation<NumberVector<?, ?>> relation = database.getRelation(TypeUtil.NUMBER_VECTOR_FIELD); + final Relation<String> labels = DatabaseUtil.guessLabelRepresentation(database); + final DBID firstid = labels.iterDBIDs().next(); + final String firstlabel = labels.get(firstid); + if(!firstlabel.matches("bylabel")) { + throw new AbortException("No 'by label' reference outlier found, which is needed for weighting!"); + } + + // Dimensionality and reference vector + final int dim = DatabaseUtil.dimensionality(relation); + final NumberVector<?, ?> refvec = relation.get(firstid); + + // Build the truth vector + Set<Integer> pos = new TreeSet<Integer>(); + final DoubleIntPair[] combined = new DoubleIntPair[dim]; + { + for(int d = 0; d < dim; d++) { + combined[d] = new DoubleIntPair(0, d); + if(refvec.doubleValue(d + 1) > 0) { + pos.add(d); + } + } + } + + ArrayModifiableDBIDs ids = DBIDUtil.newArray(relation.getDBIDs()); + ids.remove(firstid); + final int size = ids.size(); + + double[][] data = new double[size][size]; + DoubleMinMax minmax = new DoubleMinMax(); + + for(int a = 0; a < size; a++) { + final NumberVector<?, ?> veca = relation.get(ids.get(a)); + // Direct AUC score: + { + for(int d = 0; d < dim; d++) { + combined[d].first = veca.doubleValue(d + 1); + combined[d].second = d; + } + Arrays.sort(combined, Collections.reverseOrder(DoubleIntPair.BYFIRST_COMPARATOR)); + double auc = ROC.computeAUC(ROC.materializeROC(dim, pos, Arrays.asList(combined).iterator())); + data[a][a] = auc; + // minmax.put(auc); + // logger.verbose(auc + " " + labels.get(ids.get(a))); + } + // Compare to others, exploiting symmetry + for(int b = a + 1; b < size; b++) { + final NumberVector<?, ?> vecb = relation.get(ids.get(b)); + for(int d = 0; d < dim; d++) { + combined[d].first = veca.doubleValue(d + 1) + vecb.doubleValue(d + 1); + combined[d].second = d; + } + Arrays.sort(combined, Collections.reverseOrder(DoubleIntPair.BYFIRST_COMPARATOR)); + double auc = ROC.computeAUC(ROC.materializeROC(dim, pos, Arrays.asList(combined).iterator())); + // logger.verbose(auc + " " + labels.get(ids.get(a)) + " " + + // labels.get(ids.get(b))); + data[a][b] = auc; + data[b][a] = auc; + // minmax.put(auc); + } + } + for(int a = 0; a < size; a++) { + for(int b = a + 1; b < size; b++) { + double ref = Math.max(data[a][a], data[b][b]); + data[a][b] = (data[a][b] - ref) / (1 - ref); + data[b][a] = (data[b][a] - ref) / (1 - ref); + // logger.verbose(data[a][b] + " " + labels.get(ids.get(a)) + " " + + // labels.get(ids.get(b))); + minmax.put(data[a][b]); + } + } + for(int a = 0; a < size; a++) { + data[a][a] = 0; + } + + logger.verbose(minmax.toString()); + + boolean hasneg = (minmax.getMin() < -1E-3); + LinearScaling scale; + if(!hasneg) { + scale = new LinearScaling(minmax); + } + else { + scale = LinearScaling.fromMinMax(0.0, Math.max(minmax.getMax(), -minmax.getMin())); + } + + BufferedImage img = new BufferedImage(size, size, BufferedImage.TYPE_INT_RGB); + for(int x = 0; x < size; x++) { + for(int y = x; y < size; y++) { + double val = data[x][y]; + val = scale.getScaled(val); + // Compute color: + final int col; + { + if(!hasneg) { + int ival = 0xFF & (int) (255 * Math.max(0, val)); + col = 0xff000000 | (ival << 16) | (ival << 8) | ival; + } + else { + if(val >= 0) { + int ival = 0xFF & (int) (255 * val); + col = 0xff000000 | (ival << 8); + } + else { + int ival = 0xFF & (int) (255 * -val); + col = 0xff000000 | (ival << 16); + } + } + } + img.setRGB(x, y, col); + img.setRGB(y, x, col); + } + } + SimilarityMatrix smat = new ComputeSimilarityMatrixImage.SimilarityMatrix(img, relation, ids); + database.getHierarchy().add(database, smat); + + VisualizerContext context = vispar.newContext(database); + + // Attach visualizers to results + SimilarityMatrixVisualizer.Factory factory = new SimilarityMatrixVisualizer.Factory(); + factory.processNewResult(database, database); + + List<VisualizationTask> tasks = ResultUtil.filterResults(database, VisualizationTask.class); + for(VisualizationTask task : tasks) { + if(task.getFactory() == factory) { + showVisualization(context, factory, task); + } + } + } + + /** + * Show a single visualization. + * + * @param context + * + * @param factory + * @param task + */ + private void showVisualization(VisualizerContext context, SimilarityMatrixVisualizer.Factory factory, VisualizationTask task) { + SVGPlot plot = new SVGPlot(); + Visualization vis = factory.makeVisualization(task.clone(plot, context, null, 1.0, 1.0)); + plot.getRoot().appendChild(vis.getLayer()); + plot.getRoot().setAttribute(SVGConstants.SVG_WIDTH_ATTRIBUTE, "20cm"); + plot.getRoot().setAttribute(SVGConstants.SVG_HEIGHT_ATTRIBUTE, "20cm"); + plot.getRoot().setAttribute(SVGConstants.SVG_VIEW_BOX_ATTRIBUTE, "0 0 1 1"); + plot.updateStyleElement(); + + (new SimpleSVGViewer()).setPlot(plot); + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractApplication.Parameterizer { + /** + * Data source + */ + InputStep inputstep; + + /** + * Parameterizer for visualizers + */ + private VisualizerParameterizer vispar; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + // Data input + inputstep = config.tryInstantiate(InputStep.class); + // Visualization options + vispar = config.tryInstantiate(VisualizerParameterizer.class); + } + + @Override + protected AbstractApplication makeInstance() { + return new VisualizePairwiseGainMatrix(verbose, inputstep, vispar); + } + } + + /** + * Main method. + * + * @param args Command line parameters. + */ + public static void main(String[] args) { + VisualizePairwiseGainMatrix.runCLIApplication(VisualizePairwiseGainMatrix.class, args); + } +}
\ No newline at end of file |