summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/application
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/application')
-rw-r--r--src/de/lmu/ifi/dbs/elki/application/AbstractApplication.java4
-rw-r--r--src/de/lmu/ifi/dbs/elki/application/ComputeSingleColorHistogram.java33
-rw-r--r--src/de/lmu/ifi/dbs/elki/application/GeneratorXMLSpec.java92
-rw-r--r--src/de/lmu/ifi/dbs/elki/application/KDDCLIApplication.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/application/cache/CacheDoubleDistanceInOnDiskMatrix.java8
-rw-r--r--src/de/lmu/ifi/dbs/elki/application/cache/CacheFloatDistanceInOnDiskMatrix.java8
-rw-r--r--src/de/lmu/ifi/dbs/elki/application/cache/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/application/greedyensemble/ComputeKNNOutlierScores.java468
-rw-r--r--src/de/lmu/ifi/dbs/elki/application/greedyensemble/GreedyEnsembleExperiment.java439
-rw-r--r--src/de/lmu/ifi/dbs/elki/application/greedyensemble/VisualizePairwiseGainMatrix.java307
-rw-r--r--src/de/lmu/ifi/dbs/elki/application/greedyensemble/package-info.java32
-rw-r--r--src/de/lmu/ifi/dbs/elki/application/internal/CheckELKIServices.java (renamed from src/de/lmu/ifi/dbs/elki/application/internal/CheckELKIProperties.java)96
-rw-r--r--src/de/lmu/ifi/dbs/elki/application/internal/CheckParameterizables.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/application/internal/DocumentParameters.java13
-rw-r--r--src/de/lmu/ifi/dbs/elki/application/internal/DocumentReferences.java176
-rw-r--r--src/de/lmu/ifi/dbs/elki/application/internal/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/application/jsmap/JSONBuffer.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/application/jsmap/JSONResultHandler.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/application/jsmap/JSONWebServer.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/application/jsmap/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/application/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/application/visualization/KNNExplorer.java19
-rw-r--r--src/de/lmu/ifi/dbs/elki/application/visualization/package-info.java2
23 files changed, 1570 insertions, 145 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/application/AbstractApplication.java b/src/de/lmu/ifi/dbs/elki/application/AbstractApplication.java
index c2f19ef5..2775c928 100644
--- a/src/de/lmu/ifi/dbs/elki/application/AbstractApplication.java
+++ b/src/de/lmu/ifi/dbs/elki/application/AbstractApplication.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.application;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -75,7 +75,7 @@ public abstract class AbstractApplication implements Parameterizable {
/**
* Information for citation and version.
*/
- public static final String INFORMATION = "ELKI Version 0.4 (2011, August)" + NEWLINE + NEWLINE + "published in:" + NEWLINE + "E. Achtert, A. Hettab, H.-P. Kriegel, E. Schubert, A. Zimek:" + NEWLINE + "Spatial Outlier Detection: Data, Algorithms, Visualizations." + NEWLINE + "In Proceedings of the 12th International Symposium on" + NEWLINE + "Spatial and Temporal Databases (SSTD), Minneapolis, MN, 2011." + NEWLINE;
+ public static final String INFORMATION = "ELKI Version 0.5.0~beta1 (2012, April)" + NEWLINE + NEWLINE + "published in:" + NEWLINE + "E. Achtert, S. Goldhofer, H.-P. Kriegel, E. Schubert, A. Zimek:" + NEWLINE + "Evaluation of Clusterings – Metrics and Visual Support." + NEWLINE + "In Proceedings of the 28th"+NEWLINE+"International Conference on Data Engineering (ICDE), Washington, DC, 2012." + NEWLINE;
/**
* Parameter that specifies the name of the output file.
diff --git a/src/de/lmu/ifi/dbs/elki/application/ComputeSingleColorHistogram.java b/src/de/lmu/ifi/dbs/elki/application/ComputeSingleColorHistogram.java
index 63fc5d63..e6fe5231 100644
--- a/src/de/lmu/ifi/dbs/elki/application/ComputeSingleColorHistogram.java
+++ b/src/de/lmu/ifi/dbs/elki/application/ComputeSingleColorHistogram.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.application;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -58,8 +58,15 @@ public class ComputeSingleColorHistogram extends AbstractApplication {
* Key: {@code -colorhist.in}
* </p>
*/
+ public static final OptionID INPUT_ID = OptionID.getOrCreateOptionID("colorhist.in", "Input image file for color histogram.");
- public static final OptionID INPUT_ID = OptionID.getOrCreateOptionID("colorhist.in", "Input image for color histograms.");
+ /**
+ * Parameter that specifies the name of the mask input file.
+ * <p>
+ * Key: {@code -colorhist.mask}
+ * </p>
+ */
+ public static final OptionID MASK_ID = OptionID.getOrCreateOptionID("colorhist.mask", "Input mask image file.");
/**
* Class that will compute the actual histogram
@@ -72,23 +79,30 @@ public class ComputeSingleColorHistogram extends AbstractApplication {
private File inputFile;
/**
+ * Mask file.
+ */
+ private File maskFile;
+
+ /**
* Constructor.
*
* @param verbose Verbose flag
* @param histogrammaker Class to compute histograms with
* @param inputFile Input file
+ * @param maskFile Mask file
*/
- public ComputeSingleColorHistogram(boolean verbose, ComputeColorHistogram histogrammaker, File inputFile) {
+ public ComputeSingleColorHistogram(boolean verbose, ComputeColorHistogram histogrammaker, File inputFile, File maskFile) {
super(verbose);
this.histogrammaker = histogrammaker;
this.inputFile = inputFile;
+ this.maskFile = maskFile;
}
@Override
public void run() throws UnableToComplyException {
double[] hist;
try {
- hist = histogrammaker.computeColorHistogram(inputFile);
+ hist = histogrammaker.computeColorHistogram(inputFile, maskFile);
}
catch(IOException e) {
throw new UnableToComplyException(e);
@@ -114,6 +128,11 @@ public class ComputeSingleColorHistogram extends AbstractApplication {
*/
private File inputFile;
+ /**
+ * Mask file.
+ */
+ private File maskFile;
+
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
@@ -125,11 +144,15 @@ public class ComputeSingleColorHistogram extends AbstractApplication {
if(config.grab(inputP)) {
inputFile = inputP.getValue();
}
+ final FileParameter maskP = new FileParameter(MASK_ID, FileParameter.FileType.INPUT_FILE, true);
+ if(config.grab(maskP)) {
+ maskFile = maskP.getValue();
+ }
}
@Override
protected ComputeSingleColorHistogram makeInstance() {
- return new ComputeSingleColorHistogram(verbose, histogrammaker, inputFile);
+ return new ComputeSingleColorHistogram(verbose, histogrammaker, inputFile, maskFile);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/application/GeneratorXMLSpec.java b/src/de/lmu/ifi/dbs/elki/application/GeneratorXMLSpec.java
index 373960a5..ccee6d39 100644
--- a/src/de/lmu/ifi/dbs/elki/application/GeneratorXMLSpec.java
+++ b/src/de/lmu/ifi/dbs/elki/application/GeneratorXMLSpec.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.application;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -23,22 +23,30 @@ package de.lmu.ifi.dbs.elki.application;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import gnu.trove.iterator.TIntIterator;
+import gnu.trove.list.TIntList;
+import gnu.trove.list.array.TIntArrayList;
+
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.io.OutputStreamWriter;
-import java.util.List;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Map.Entry;
-import de.lmu.ifi.dbs.elki.data.synthetic.bymodel.GeneratorInterface;
-import de.lmu.ifi.dbs.elki.data.synthetic.bymodel.GeneratorInterfaceDynamic;
+import de.lmu.ifi.dbs.elki.data.model.Model;
import de.lmu.ifi.dbs.elki.data.synthetic.bymodel.GeneratorSingleCluster;
-import de.lmu.ifi.dbs.elki.data.synthetic.bymodel.distribution.Distribution;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.datasource.GeneratorXMLDatabaseConnection;
import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+import de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution;
import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
import de.lmu.ifi.dbs.elki.utilities.exceptions.UnableToComplyException;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
@@ -124,27 +132,52 @@ public class GeneratorXMLSpec extends AbstractApplication {
* @throws IOException thrown on write errors
*/
public void writeClusters(OutputStreamWriter outStream, MultipleObjectsBundle data) throws IOException {
- List<GeneratorInterface> clusters = generator.gen.getGenerators();
+ int modelcol = -1;
+ { // Find model column
+ for(int i = 0; i < data.metaLength(); i++) {
+ if(TypeUtil.MODEL.isAssignableFromType(data.meta(i))) {
+ modelcol = i;
+ break;
+ }
+ }
+ }
+ if(modelcol < 0) {
+ throw new AbortException("No model column found in bundle.");
+ }
+ ArrayList<Model> models = new ArrayList<Model>();
+ Map<Model, TIntList> modelMap = new HashMap<Model, TIntList>();
+ { // Build a map from model to the actual objects
+ for(int i = 0; i < data.dataLength(); i++) {
+ Model model = (Model) data.data(i, modelcol);
+ TIntList modelids = modelMap.get(model);
+ if(modelids == null) {
+ models.add(model);
+ modelids = new TIntArrayList();
+ modelMap.put(model, modelids);
+ }
+ modelids.add(i);
+ }
+ }
// compute global discard values
int totalsize = 0;
int totaldisc = 0;
- assert (clusters.size() > 0);
- for(GeneratorInterface curclus : clusters) {
- totalsize = totalsize + curclus.getSize();
- if(curclus instanceof GeneratorSingleCluster) {
- totaldisc = totaldisc + ((GeneratorSingleCluster) curclus).getDiscarded();
+ for(Entry<Model, TIntList> ent : modelMap.entrySet()) {
+ totalsize = totalsize + ent.getValue().size();
+ if(ent.getKey() instanceof GeneratorSingleCluster) {
+ totaldisc = totaldisc + ((GeneratorSingleCluster) ent.getKey()).getDiscarded();
}
}
double globdens = (double) (totalsize + totaldisc) / totalsize;
outStream.write("########################################################" + LINE_SEPARATOR);
- outStream.write("## Number of clusters: " + clusters.size() + LINE_SEPARATOR);
- for(GeneratorInterface curclus : clusters) {
+ outStream.write("## Number of clusters: " + models.size() + LINE_SEPARATOR);
+ for(Model model : models) {
+ TIntList ids = modelMap.get(model);
outStream.write("########################################################" + LINE_SEPARATOR);
- outStream.write("## Cluster: " + curclus.getName() + LINE_SEPARATOR);
- outStream.write("########################################################" + LINE_SEPARATOR);
- outStream.write("## Size: " + curclus.getSize() + LINE_SEPARATOR);
- if(curclus instanceof GeneratorSingleCluster) {
- GeneratorSingleCluster cursclus = (GeneratorSingleCluster) curclus;
+ outStream.write("## Size: " + ids.size() + LINE_SEPARATOR);
+ if(model instanceof GeneratorSingleCluster) {
+ GeneratorSingleCluster cursclus = (GeneratorSingleCluster) model;
+ outStream.write("########################################################" + LINE_SEPARATOR);
+ outStream.write("## Cluster: " + cursclus.getName() + LINE_SEPARATOR);
Vector cmin = cursclus.getClipmin();
Vector cmax = cursclus.getClipmax();
if(cmin != null && cmax != null) {
@@ -152,27 +185,30 @@ public class GeneratorXMLSpec extends AbstractApplication {
}
outStream.write("## Density correction factor: " + cursclus.getDensityCorrection() + LINE_SEPARATOR);
outStream.write("## Generators:" + LINE_SEPARATOR);
- for(Distribution gen : cursclus.getAxes()) {
+ for(int i = 0; i < cursclus.getDim(); i++) {
+ Distribution gen = cursclus.getDistribution(i);
outStream.write("## " + gen.toString() + LINE_SEPARATOR);
}
- if(cursclus.getTrans() != null && cursclus.getTrans().getTransformation() != null) {
+ if(cursclus.getTransformation() != null && cursclus.getTransformation().getTransformation() != null) {
outStream.write("## Affine transformation matrix:" + LINE_SEPARATOR);
- outStream.write(FormatUtil.format(cursclus.getTrans().getTransformation(), "## ") + LINE_SEPARATOR);
+ outStream.write(FormatUtil.format(cursclus.getTransformation().getTransformation(), "## ") + LINE_SEPARATOR);
}
- }
- if(curclus instanceof GeneratorInterfaceDynamic) {
- GeneratorSingleCluster cursclus = (GeneratorSingleCluster) curclus;
outStream.write("## Discards: " + cursclus.getDiscarded() + " Retries left: " + cursclus.getRetries() + LINE_SEPARATOR);
double corf = /* cursclus.overweight */(double) (cursclus.getSize() + cursclus.getDiscarded()) / cursclus.getSize() / globdens;
outStream.write("## Density correction factor estimation: " + corf + LINE_SEPARATOR);
}
outStream.write("########################################################" + LINE_SEPARATOR);
- for(Vector p : curclus.getPoints()) {
- for(int i = 0; i < p.getRowDimensionality(); i++) {
- outStream.write(p.get(i) + " ");
+ for(TIntIterator iter = ids.iterator(); iter.hasNext();) {
+ int num = iter.next();
+ for(int c = 0; c < data.metaLength(); c++) {
+ if(c != modelcol) {
+ if (c > 0) {
+ outStream.write(" ");
+ }
+ outStream.write(data.data(num, c).toString());
+ }
}
- outStream.write(curclus.getName());
outStream.write(LINE_SEPARATOR);
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/application/KDDCLIApplication.java b/src/de/lmu/ifi/dbs/elki/application/KDDCLIApplication.java
index 59f75a7b..9184df58 100644
--- a/src/de/lmu/ifi/dbs/elki/application/KDDCLIApplication.java
+++ b/src/de/lmu/ifi/dbs/elki/application/KDDCLIApplication.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.application;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/application/cache/CacheDoubleDistanceInOnDiskMatrix.java b/src/de/lmu/ifi/dbs/elki/application/cache/CacheDoubleDistanceInOnDiskMatrix.java
index 62080b51..c2e575be 100644
--- a/src/de/lmu/ifi/dbs/elki/application/cache/CacheDoubleDistanceInOnDiskMatrix.java
+++ b/src/de/lmu/ifi/dbs/elki/application/cache/CacheDoubleDistanceInOnDiskMatrix.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.application.cache;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -119,7 +119,7 @@ public class CacheDoubleDistanceInOnDiskMatrix<O, D extends NumberDistance<D, ?>
DistanceQuery<O, D> distanceQuery = database.getDistanceQuery(relation, distance);
int matrixsize = 0;
- for(DBID id : distanceQuery.getRelation().iterDBIDs()) {
+ for(DBID id : relation.iterDBIDs()) {
matrixsize = Math.max(matrixsize, id.getIntegerID() + 1);
if(id.getIntegerID() < 0) {
throw new AbortException("OnDiskMatrixCache does not allow negative DBIDs.");
@@ -134,8 +134,8 @@ public class CacheDoubleDistanceInOnDiskMatrix<O, D extends NumberDistance<D, ?>
throw new AbortException("Error creating output matrix.", e);
}
- for(DBID id1 : distanceQuery.getRelation().iterDBIDs()) {
- for(DBID id2 : distanceQuery.getRelation().iterDBIDs()) {
+ for(DBID id1 : relation.iterDBIDs()) {
+ for(DBID id2 : relation.iterDBIDs()) {
if(id2.getIntegerID() >= id1.getIntegerID()) {
double d = distanceQuery.distance(id1, id2).doubleValue();
if(debugExtraCheckSymmetry) {
diff --git a/src/de/lmu/ifi/dbs/elki/application/cache/CacheFloatDistanceInOnDiskMatrix.java b/src/de/lmu/ifi/dbs/elki/application/cache/CacheFloatDistanceInOnDiskMatrix.java
index 7c99779d..237aedb9 100644
--- a/src/de/lmu/ifi/dbs/elki/application/cache/CacheFloatDistanceInOnDiskMatrix.java
+++ b/src/de/lmu/ifi/dbs/elki/application/cache/CacheFloatDistanceInOnDiskMatrix.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.application.cache;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -124,7 +124,7 @@ public class CacheFloatDistanceInOnDiskMatrix<O, D extends NumberDistance<D, ?>>
DistanceQuery<O, D> distanceQuery = database.getDistanceQuery(relation, distance);
int matrixsize = 0;
- for(DBID id : distanceQuery.getRelation().iterDBIDs()) {
+ for(DBID id : relation.iterDBIDs()) {
matrixsize = Math.max(matrixsize, id.getIntegerID() + 1);
if(id.getIntegerID() < 0) {
throw new AbortException("OnDiskMatrixCache does not allow negative DBIDs.");
@@ -139,8 +139,8 @@ public class CacheFloatDistanceInOnDiskMatrix<O, D extends NumberDistance<D, ?>>
throw new AbortException("Error creating output matrix.", e);
}
- for(DBID id1 : distanceQuery.getRelation().iterDBIDs()) {
- for(DBID id2 : distanceQuery.getRelation().iterDBIDs()) {
+ for(DBID id1 : relation.iterDBIDs()) {
+ for(DBID id2 : relation.iterDBIDs()) {
if(id2.getIntegerID() >= id1.getIntegerID()) {
float d = distanceQuery.distance(id1, id2).floatValue();
if(debugExtraCheckSymmetry) {
diff --git a/src/de/lmu/ifi/dbs/elki/application/cache/package-info.java b/src/de/lmu/ifi/dbs/elki/application/cache/package-info.java
index 17013c55..5ed7ec1c 100644
--- a/src/de/lmu/ifi/dbs/elki/application/cache/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/application/cache/package-info.java
@@ -11,7 +11,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2011
+Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/application/greedyensemble/ComputeKNNOutlierScores.java b/src/de/lmu/ifi/dbs/elki/application/greedyensemble/ComputeKNNOutlierScores.java
new file mode 100644
index 00000000..f6506fcc
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/application/greedyensemble/ComputeKNNOutlierScores.java
@@ -0,0 +1,468 @@
+package de.lmu.ifi.dbs.elki.application.greedyensemble;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.PrintStream;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.ABOD;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.KNNOutlier;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.KNNWeightOutlier;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.LDOF;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.LOF;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.LoOP;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.trivial.ByLabelOutlier;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.trivial.TrivialAllOutlier;
+import de.lmu.ifi.dbs.elki.algorithm.outlier.trivial.TrivialNoOutlier;
+import de.lmu.ifi.dbs.elki.application.AbstractApplication;
+import de.lmu.ifi.dbs.elki.data.DoubleVector;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.QueryUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.PreprocessorKNNQuery;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
+import de.lmu.ifi.dbs.elki.distance.similarityfunction.kernel.PolynomialKernelFunction;
+import de.lmu.ifi.dbs.elki.index.preprocessed.knn.MaterializeKNNPreprocessor;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.result.Result;
+import de.lmu.ifi.dbs.elki.result.ResultHierarchy;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.utilities.Base64;
+import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+import de.lmu.ifi.dbs.elki.utilities.scaling.IdentityScaling;
+import de.lmu.ifi.dbs.elki.utilities.scaling.ScalingFunction;
+import de.lmu.ifi.dbs.elki.utilities.scaling.outlier.MinusLogStandardDeviationScaling;
+import de.lmu.ifi.dbs.elki.utilities.scaling.outlier.StandardDeviationScaling;
+import de.lmu.ifi.dbs.elki.workflow.InputStep;
+
+/**
+ * Application that runs a series of kNN-based algorithms on a data set, for
+ * building an ensemble in a second step. The output file consists of a label
+ * and one score value for each object.
+ *
+ * Reference:
+ * <p>
+ * E. Schubert, R. Wojdanowski, A. Zimek, H.-P. Kriegel<br />
+ * On Evaluation of Outlier Rankings and Outlier Scores<br/>
+ * In Proceedings of the 12th SIAM International Conference on Data Mining
+ * (SDM), Anaheim, CA, 2012.
+ * </p>
+ *
+ * @author Erich Schubert
+ */
+@Reference(authors = "E. Schubert, R. Wojdanowski, A. Zimek, H.-P. Kriegel", title = "On Evaluation of Outlier Rankings and Outlier Scores", booktitle = "Proc. 12th SIAM International Conference on Data Mining (SDM), Anaheim, CA, 2012.")
+public class ComputeKNNOutlierScores<O, D extends NumberDistance<D, ?>> extends AbstractApplication {
+ /**
+ * Our logger class.
+ */
+ private static final Logging logger = Logging.getLogger(ComputeKNNOutlierScores.class);
+
+ /**
+ * Input step
+ */
+ final InputStep inputstep;
+
+ /**
+ * Distance function to use
+ */
+ final DistanceFunction<? super O, D> distf;
+
+ /**
+ * Starting value of k.
+ */
+ final int startk;
+
+ /**
+ * k step size
+ */
+ final int stepk;
+
+ /**
+ * Maximum value of k
+ */
+ final int maxk;
+
+ /**
+ * Output file
+ */
+ File outfile;
+
+ /**
+ * By label outlier detection - reference
+ */
+ ByLabelOutlier bylabel;
+
+ /**
+ * Include ABOD in the experiments.
+ */
+ boolean runabod = false;
+
+ /**
+ * Constructor.
+ *
+ * @param verbose Verbose flag
+ * @param inputstep Input step
+ * @param distf Distance function
+ * @param startk Starting value of k
+ * @param stepk K step size
+ * @param maxk Maximum k value
+ * @param bylabel By label outlier (reference)
+ * @param outfile Output file
+ */
+ public ComputeKNNOutlierScores(boolean verbose, InputStep inputstep, DistanceFunction<? super O, D> distf, int startk, int stepk, int maxk, ByLabelOutlier bylabel, File outfile) {
+ super(verbose);
+ this.distf = distf;
+ this.startk = startk;
+ this.stepk = stepk;
+ this.maxk = maxk;
+ this.inputstep = inputstep;
+ this.bylabel = bylabel;
+ this.outfile = outfile;
+ }
+
+ @Override
+ public void run() {
+ final Database database = inputstep.getDatabase();
+ final Relation<O> relation = database.getRelation(distf.getInputTypeRestriction());
+ logger.verbose("Running preprocessor ...");
+ MaterializeKNNPreprocessor<O, D> preproc = new MaterializeKNNPreprocessor<O, D>(relation, distf, maxk + 2);
+ database.addIndex(preproc);
+
+ // Test that we did get a proper index query
+ KNNQuery<O, D> knnq = QueryUtil.getKNNQuery(relation, distf);
+ if(!(knnq instanceof PreprocessorKNNQuery)) {
+ logger.warning("Not using preprocessor knn query -- KNN queries using class: " + knnq.getClass());
+ }
+
+ final DBIDs ids = relation.getDBIDs();
+
+ final PrintStream fout;
+ try {
+ fout = new PrintStream(outfile);
+ }
+ catch(FileNotFoundException e) {
+ throw new AbortException("Cannot create output file.", e);
+ }
+ // Control: print the DBIDs in case we are seeing an odd iteration
+ {
+ try {
+ MessageDigest md = MessageDigest.getInstance("MD5");
+ for(DBID id : ids) {
+ md.update(" ".getBytes());
+ md.update(id.toString().getBytes());
+ }
+ fout.append("# DBID-series MD5:");
+ fout.append(Base64.encodeBase64(md.digest()));
+ fout.append(FormatUtil.NEWLINE);
+ }
+ catch(NoSuchAlgorithmException e) {
+ throw new AbortException("MD5 not found.");
+ }
+ }
+
+ // Label outlier result (reference)
+ {
+ OutlierResult bylabelresult = bylabel.run(database);
+ writeResult(fout, ids, bylabelresult, new IdentityScaling(), "bylabel");
+ }
+ // No/all outliers "results"
+ {
+ OutlierResult noresult = (new TrivialNoOutlier()).run(database);
+ writeResult(fout, ids, noresult, new IdentityScaling(), "no-outliers");
+ OutlierResult allresult = (new TrivialAllOutlier()).run(database);
+ writeResult(fout, ids, allresult, new IdentityScaling(), "all-outliers");
+ }
+
+ // KNN
+ logger.verbose("Running KNN");
+ runForEachK(new AlgRunner() {
+ @Override
+ public void run(int k, String kstr) {
+ KNNOutlier<O, D> knn = new KNNOutlier<O, D>(distf, k);
+ OutlierResult knnresult = knn.run(database, relation);
+ // Setup scaling
+ StandardDeviationScaling scaling = new StandardDeviationScaling();
+ scaling.prepare(knnresult);
+ writeResult(fout, ids, knnresult, scaling, "KNN-" + kstr);
+ detachResult(database, knnresult);
+ }
+ });
+ // KNN Weight
+ logger.verbose("Running KNNweight");
+ runForEachK(new AlgRunner() {
+ @Override
+ public void run(int k, String kstr) {
+ KNNWeightOutlier<O, D> knnw = new KNNWeightOutlier<O, D>(distf, k);
+ OutlierResult knnresult = knnw.run(database, relation);
+ // Setup scaling
+ StandardDeviationScaling scaling = new StandardDeviationScaling();
+ scaling.prepare(knnresult);
+ writeResult(fout, ids, knnresult, scaling, "KNNW-" + kstr);
+ detachResult(database, knnresult);
+ }
+ });
+ // Run LOF
+ logger.verbose("Running LOF");
+ runForEachK(new AlgRunner() {
+ @Override
+ public void run(int k, String kstr) {
+ LOF<O, D> lof = new LOF<O, D>(k, distf, distf);
+ OutlierResult lofresult = lof.run(relation);
+ // Setup scaling
+ StandardDeviationScaling scaling = new StandardDeviationScaling(1.0, 1.0);
+ scaling.prepare(lofresult);
+ writeResult(fout, ids, lofresult, scaling, "LOF-" + kstr);
+ detachResult(database, lofresult);
+ }
+ });
+ // LoOP
+ logger.verbose("Running LoOP");
+ runForEachK(new AlgRunner() {
+ @Override
+ public void run(int k, String kstr) {
+ LoOP<O, D> loop = new LoOP<O, D>(k, k, distf, distf, 1.0);
+ OutlierResult loopresult = loop.run(database, relation);
+ writeResult(fout, ids, loopresult, new IdentityScaling(), "LOOP-" + kstr);
+ detachResult(database, loopresult);
+ }
+ });
+ // LDOF
+ logger.verbose("Running LDOF");
+ runForEachK(new AlgRunner() {
+ @Override
+ public void run(int k, String kstr) {
+ LDOF<O, D> ldof = new LDOF<O, D>(distf, k);
+ OutlierResult ldofresult = ldof.run(database, relation);
+ // Setup scaling
+ StandardDeviationScaling scaling = new StandardDeviationScaling(1.0, 1.0);
+ scaling.prepare(ldofresult);
+ writeResult(fout, ids, ldofresult, scaling, "LDOF-" + kstr);
+ detachResult(database, ldofresult);
+ }
+ });
+ // ABOD
+ if(runabod && relation.size() < 10000) {
+ try {
+ final PolynomialKernelFunction poly = new PolynomialKernelFunction(PolynomialKernelFunction.DEFAULT_DEGREE);
+ @SuppressWarnings("unchecked")
+ final DistanceFunction<DoubleVector, DoubleDistance> df = DistanceFunction.class.cast(distf);
+ logger.verbose("Running ABOD");
+ runForEachK(new AlgRunner() {
+ @Override
+ public void run(int k, String kstr) {
+ ABOD<DoubleVector> abod = new ABOD<DoubleVector>(k, poly, df);
+ OutlierResult abodresult = abod.run(database);
+ // Setup scaling
+ StandardDeviationScaling scaling = new MinusLogStandardDeviationScaling(null, 1.0);
+ scaling.prepare(abodresult);
+ writeResult(fout, ids, abodresult, scaling, "ABOD-" + kstr);
+ detachResult(database, abodresult);
+ }
+ });
+ }
+ catch(ClassCastException e) {
+ // ABOD might just be not appropriate.
+ logger.warning("Running ABOD failed - probably not appropriate to this data type / distance?", e);
+ }
+ }
+ }
+
+ /**
+ * Avoid that (future changes?) keep a reference to the result.
+ *
+ * @param database Database
+ * @param discardresult Result to discard.
+ */
+ void detachResult(Database database, OutlierResult discardresult) {
+ final ResultHierarchy hier = database.getHierarchy();
+ for(Result parent : hier.getParents(discardresult)) {
+ hier.remove(parent, discardresult);
+ }
+ }
+
+ /**
+ * Write a single output line.
+ *
+ * @param out Output stream
+ * @param ids DBIDs
+ * @param result Outlier result
+ * @param scaling Scaling function.
+ * @param label Identification label
+ */
+ void writeResult(PrintStream out, DBIDs ids, OutlierResult result, ScalingFunction scaling, String label) {
+ out.append(label);
+ Relation<Double> scores = result.getScores();
+ for(DBID id : ids) {
+ final double value = scaling.getScaled(scores.get(id));
+ out.append(" ").append(FormatUtil.format(value, FormatUtil.NF8));
+ }
+ out.append(FormatUtil.NEWLINE);
+ }
+
+ /**
+ * Iterate over the k range.
+ *
+ * @param runner Runner to run
+ */
+ private void runForEachK(AlgRunner runner) {
+ final int digits = (int) Math.ceil(Math.log10(maxk));
+ final int startk = (this.startk > 0) ? this.startk : this.stepk;
+ for(int k = startk; k <= maxk; k += stepk) {
+ String kstr = String.format("%0" + digits + "d", k);
+ runner.run(k, kstr);
+ }
+ }
+
+ /**
+ * Run an algorithm for a given k.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ private interface AlgRunner {
+ public void run(int k, String kstr);
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractApplication.Parameterizer {
+ /**
+ * Option ID for k step size.
+ */
+ public static final OptionID STEPK_ID = OptionID.getOrCreateOptionID("stepk", "Step size for k.");
+
+ /**
+ * Option ID for k start size.
+ */
+ public static final OptionID STARTK_ID = OptionID.getOrCreateOptionID("startk", "Minimum value for k.");
+
+ /**
+ * Option ID for k step size.
+ */
+ public static final OptionID MAXK_ID = OptionID.getOrCreateOptionID("maxk", "Maximum value for k.");
+
+ /**
+ * k step size
+ */
+ int stepk;
+
+ /**
+ * starting value of k
+ */
+ int startk;
+
+ /**
+ * Maximum value of k
+ */
+ int maxk;
+
+ /**
+ * Data source
+ */
+ InputStep inputstep;
+
+ /**
+ * Distance function to use
+ */
+ DistanceFunction<? super O, D> distf;
+
+ /**
+ * By label outlier -- reference
+ */
+ ByLabelOutlier bylabel;
+
+ /**
+ * Output destination file
+ */
+ File outfile;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ // Data input
+ inputstep = config.tryInstantiate(InputStep.class);
+ // Distance function
+ ObjectParameter<DistanceFunction<? super O, D>> distP = AbstractAlgorithm.makeParameterDistanceFunction(EuclideanDistanceFunction.class, DistanceFunction.class);
+ if(config.grab(distP)) {
+ distf = distP.instantiateClass(config);
+ }
+ // k parameters
+ IntParameter stepkP = new IntParameter(STEPK_ID, new GreaterConstraint(0));
+ if(config.grab(stepkP)) {
+ stepk = stepkP.getValue();
+ }
+ IntParameter startkP = new IntParameter(STARTK_ID, true);
+ if(config.grab(startkP)) {
+ startk = startkP.getValue();
+ }
+ else {
+ startk = stepk;
+ }
+ IntParameter maxkP = new IntParameter(MAXK_ID, new GreaterConstraint(0));
+ if(config.grab(maxkP)) {
+ maxk = maxkP.getValue();
+ }
+ bylabel = config.tryInstantiate(ByLabelOutlier.class);
+ // Output
+ outfile = super.getParameterOutputFile(config, "File to output the resulting score vectors to.");
+ }
+
+ @Override
+ protected AbstractApplication makeInstance() {
+ return new ComputeKNNOutlierScores<O, D>(verbose, inputstep, distf, startk, stepk, maxk, bylabel, outfile);
+ }
+ }
+
+ /**
+ * Main method.
+ *
+ * @param args Command line parameters.
+ */
+ public static void main(String[] args) {
+ ComputeKNNOutlierScores.runCLIApplication(ComputeKNNOutlierScores.class, args);
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/application/greedyensemble/GreedyEnsembleExperiment.java b/src/de/lmu/ifi/dbs/elki/application/greedyensemble/GreedyEnsembleExperiment.java
new file mode 100644
index 00000000..cfd768c1
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/application/greedyensemble/GreedyEnsembleExperiment.java
@@ -0,0 +1,439 @@
+package de.lmu.ifi.dbs.elki.application.greedyensemble;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Set;
+import java.util.TreeSet;
+
+import de.lmu.ifi.dbs.elki.application.AbstractApplication;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.correlation.WeightedPearsonCorrelationDistanceFunction;
+import de.lmu.ifi.dbs.elki.evaluation.roc.ROC;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.math.MeanVariance;
+import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TiedTopBoundedHeap;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.heap.TopBoundedHeap;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleIntPair;
+import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair;
+import de.lmu.ifi.dbs.elki.workflow.InputStep;
+
+/**
+ * Class to load an outlier detection summary file, as produced by
+ * {@link ComputeKNNOutlierScores}, and compute a naive ensemble for it. Based
+ * on this initial estimation, and optimized ensemble is built using a greedy
+ * strategy. Starting with the best candidate only as initial ensemble, the most
+ * diverse candidate is investigated at each step. If it improves towards the
+ * (estimated) target vector, it is added, otherwise it is discarded.
+ *
+ * This approach is naive, and it may be surprising that it can improve results.
+ * The reason is probably that diversity will result in a comparable ensemble,
+ * while the reduced ensemble size is actually responsible for the improvements,
+ * by being more decisive and less noisy due to dropping "unhelpful" members.
+ *
+ * This still leaves quite a bit of room for improvement. If you build upon this
+ * basic approach, please acknowledge our proof of concept work.
+ *
+ * Reference:
+ * <p>
+ * E. Schubert, R. Wojdanowski, A. Zimek, H.-P. Kriegel<br />
+ * On Evaluation of Outlier Rankings and Outlier Scores<br/>
+ * In Proceedings of the 12th SIAM International Conference on Data Mining
+ * (SDM), Anaheim, CA, 2012.
+ * </p>
+ *
+ * @author Erich Schubert
+ */
+@Reference(authors = "E. Schubert, R. Wojdanowski, A. Zimek, H.-P. Kriegel", title = "On Evaluation of Outlier Rankings and Outlier Scores", booktitle = "Proc. 12th SIAM International Conference on Data Mining (SDM), Anaheim, CA, 2012.")
+public class GreedyEnsembleExperiment extends AbstractApplication {
+ /**
+ * Get static logger
+ */
+ private static final Logging logger = Logging.getLogger(GreedyEnsembleExperiment.class);
+
+ /**
+ * The data input part.
+ */
+ private InputStep inputstep;
+
+ /**
+ * Variant, where the truth vector is also updated.
+ */
+ boolean refine_truth = false;
+
+ /**
+ * Constructor.
+ *
+ * @param verbose Verbosity
+ * @param inputstep Input step
+ */
+ public GreedyEnsembleExperiment(boolean verbose, InputStep inputstep) {
+ super(verbose);
+ this.inputstep = inputstep;
+ }
+
+ @Override
+ public void run() {
+ // Note: the database contains the *result vectors*, not the original data
+ // points.
+ final Database database = inputstep.getDatabase();
+ final Relation<NumberVector<?, ?>> relation = database.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
+ final Relation<String> labels = DatabaseUtil.guessLabelRepresentation(database);
+ final DBID firstid = labels.iterDBIDs().next();
+ final String firstlabel = labels.get(firstid);
+ if(!firstlabel.matches("bylabel")) {
+ throw new AbortException("No 'by label' reference outlier found, which is needed for weighting!");
+ }
+
+ // Dimensionality and reference vector
+ final int dim = DatabaseUtil.dimensionality(relation);
+ final NumberVector<?, ?> refvec = relation.get(firstid);
+
+ // Build the positive index set for ROC AUC.
+ Set<Integer> positive = new TreeSet<Integer>();
+ for(int d = 0; d < dim; d++) {
+ if(refvec.doubleValue(d + 1) > 0) {
+ positive.add(d);
+ }
+ }
+
+ final int estimated_outliers = (int) (0.005 * dim);
+ int union_outliers = 0;
+ final int[] outliers_seen = new int[dim];
+ // Find the top-k for each ensemble member
+ {
+ for(DBID id : relation.iterDBIDs()) {
+ // Skip "by label", obviously
+ if(firstid.equals(id)) {
+ continue;
+ }
+ final NumberVector<?, ?> vec = relation.get(id);
+ TiedTopBoundedHeap<DoubleIntPair> heap = new TiedTopBoundedHeap<DoubleIntPair>(estimated_outliers, Collections.reverseOrder());
+ for(int i = 0; i < dim; i++) {
+ heap.add(new DoubleIntPair(vec.doubleValue(i + 1), i));
+ }
+ if(heap.size() >= 2 * estimated_outliers) {
+ logger.warning("Too many ties. Expected: " + estimated_outliers + " got: " + heap.size());
+ }
+ for(DoubleIntPair pair : heap) {
+ if(outliers_seen[pair.second] == 0) {
+ outliers_seen[pair.second] = 1;
+ union_outliers += 1;
+ }
+ else {
+ outliers_seen[pair.second] += 1;
+ }
+ }
+ }
+ }
+ logger.verbose("Merged top " + estimated_outliers + " outliers to: " + union_outliers + " outliers");
+ // Build the final weight vector.
+ final double[] estimated_weights = new double[dim];
+ final double[] estimated_truth = new double[dim];
+ updateEstimations(outliers_seen, union_outliers, estimated_weights, estimated_truth);
+ NumberVector<?, ?> estimated_truth_vec = refvec.newNumberVector(estimated_truth);
+
+ PrimitiveDoubleDistanceFunction<NumberVector<?, ?>> wdist = getDistanceFunction(estimated_weights);
+ PrimitiveDoubleDistanceFunction<NumberVector<?, ?>> tdist = wdist;
+
+ // Build the naive ensemble:
+ final double[] naiveensemble = new double[dim];
+ {
+ for(DBID id : relation.iterDBIDs()) {
+ if(firstid.equals(id)) {
+ continue;
+ }
+ final NumberVector<?, ?> vec = relation.get(id);
+ for(int d = 0; d < dim; d++) {
+ naiveensemble[d] += vec.doubleValue(d + 1);
+ }
+ }
+ for(int d = 0; d < dim; d++) {
+ naiveensemble[d] /= (relation.size() - 1);
+ }
+ }
+ NumberVector<?, ?> naivevec = refvec.newNumberVector(naiveensemble);
+
+ // Compute single AUC scores and estimations.
+ // Remember the method most similar to the estimation
+ double bestauc = 0.0;
+ String bestaucstr = "";
+ double bestcost = Double.POSITIVE_INFINITY;
+ String bestcoststr = "";
+ DBID bestid = null;
+ double bestest = Double.POSITIVE_INFINITY;
+ {
+ // Compute individual scores
+ for(DBID id : relation.iterDBIDs()) {
+ if(firstid.equals(id)) {
+ continue;
+ }
+ // fout.append(labels.get(id));
+ final NumberVector<?, ?> vec = relation.get(id);
+ double auc = computeROCAUC(vec, positive, dim);
+ double estimated = wdist.doubleDistance(vec, estimated_truth_vec);
+ double cost = tdist.doubleDistance(vec, refvec);
+ logger.verbose("ROC AUC: " + auc + " estimated " + estimated + " cost " + cost + " " + labels.get(id));
+ if(auc > bestauc) {
+ bestauc = auc;
+ bestaucstr = labels.get(id);
+ }
+ if(cost < bestcost) {
+ bestcost = cost;
+ bestcoststr = labels.get(id);
+ }
+ if(estimated < bestest) {
+ bestest = estimated;
+ bestid = id;
+ }
+ }
+ }
+
+ // Initialize ensemble with "best" method
+ logger.verbose("Distance function: " + wdist);
+ logger.verbose("Initial estimation of outliers: " + union_outliers);
+ logger.verbose("Initializing ensemble with: " + labels.get(bestid));
+ ModifiableDBIDs ensemble = DBIDUtil.newArray(bestid);
+ ModifiableDBIDs enscands = DBIDUtil.newHashSet(relation.getDBIDs());
+ enscands.remove(bestid);
+ enscands.remove(firstid);
+ final double[] greedyensemble = new double[dim];
+ {
+ final NumberVector<?, ?> vec = relation.get(bestid);
+ for(int i = 0; i < dim; i++) {
+ greedyensemble[i] = vec.doubleValue(i + 1);
+ }
+ }
+ // Greedily grow the ensemble
+ final double[] testensemble = new double[dim];
+ while(enscands.size() > 0) {
+ NumberVector<?, ?> greedyvec = refvec.newNumberVector(greedyensemble);
+
+ // Weighting factors for combining:
+ double s1 = ensemble.size() / (ensemble.size() + 1.);
+ double s2 = 1. / (ensemble.size() + 1.);
+
+ final int heapsize = enscands.size();
+ TopBoundedHeap<DoubleObjPair<DBID>> heap = new TopBoundedHeap<DoubleObjPair<DBID>>(heapsize, Collections.reverseOrder());
+ for(DBID id : enscands) {
+ final NumberVector<?, ?> vec = relation.get(id);
+ double diversity = wdist.doubleDistance(vec, greedyvec);
+ heap.add(new DoubleObjPair<DBID>(diversity, id));
+ }
+ while(heap.size() > 0) {
+ DBID bestadd = heap.poll().second;
+ enscands.remove(bestadd);
+ // Update ensemble:
+ final NumberVector<?, ?> vec = relation.get(bestadd);
+ for(int i = 0; i < dim; i++) {
+ testensemble[i] = greedyensemble[i] * s1 + vec.doubleValue(i + 1) * s2;
+ }
+ NumberVector<?, ?> testvec = refvec.newNumberVector(testensemble);
+ double oldd = wdist.doubleDistance(estimated_truth_vec, greedyvec);
+ double newd = wdist.doubleDistance(estimated_truth_vec, testvec);
+ // logger.verbose("Distances: " + oldd + " vs. " + newd);
+ if(newd < oldd) {
+ System.arraycopy(testensemble, 0, greedyensemble, 0, dim);
+ ensemble.add(bestadd);
+ // logger.verbose("Growing ensemble with: " + labels.get(bestadd));
+ break; // Recompute heap
+ }
+ else {
+ // logger.verbose("Discarding: " + labels.get(bestadd));
+ if(refine_truth) {
+ boolean refresh = false;
+ // Update target vectors and weights
+ TiedTopBoundedHeap<DoubleIntPair> oheap = new TiedTopBoundedHeap<DoubleIntPair>(estimated_outliers, Collections.reverseOrder());
+ for(int i = 0; i < dim; i++) {
+ oheap.add(new DoubleIntPair(vec.doubleValue(i + 1), i));
+ }
+ for(DoubleIntPair pair : oheap) {
+ assert (outliers_seen[pair.second] > 0);
+ outliers_seen[pair.second] -= 1;
+ if(outliers_seen[pair.second] == 0) {
+ union_outliers -= 1;
+ refresh = true;
+ }
+ }
+ if(refresh) {
+ updateEstimations(outliers_seen, union_outliers, estimated_weights, estimated_truth);
+ estimated_truth_vec = refvec.newNumberVector(estimated_truth);
+ }
+ }
+ }
+ }
+ }
+ // Build the improved ensemble:
+ StringBuffer greedylbl = new StringBuffer();
+ {
+ for(DBID id : ensemble) {
+ if(greedylbl.length() > 0) {
+ greedylbl.append(" ");
+ }
+ greedylbl.append(labels.get(id));
+ }
+ }
+ NumberVector<?, ?> greedyvec = refvec.newNumberVector(greedyensemble);
+ logger.verbose("Estimated outliers remaining: " + union_outliers);
+ logger.verbose("Greedy ensemble: " + greedylbl.toString());
+
+ logger.verbose("Best single ROC AUC: " + bestauc + " (" + bestaucstr + ")");
+ logger.verbose("Best single cost: " + bestcost + " (" + bestcoststr + ")");
+ // Evaluate the naive ensemble and the "shrunk" ensemble
+ double naiveauc, naivecost;
+ {
+ naiveauc = computeROCAUC(naivevec, positive, dim);
+ naivecost = tdist.doubleDistance(naivevec, refvec);
+ logger.verbose("Naive ensemble AUC: " + naiveauc + " cost: " + naivecost);
+ logger.verbose("Naive ensemble Gain: " + gain(naiveauc, bestauc, 1) + " cost gain: " + gain(naivecost, bestcost, 0));
+ }
+ double greedyauc, greedycost;
+ {
+ greedyauc = computeROCAUC(greedyvec, positive, dim);
+ greedycost = tdist.doubleDistance(greedyvec, refvec);
+ logger.verbose("Greedy ensemble AUC: " + greedyauc + " cost: " + greedycost);
+ logger.verbose("Greedy ensemble Gain to best: " + gain(greedyauc, bestauc, 1) + " cost gain: " + gain(greedycost, bestcost, 0));
+ logger.verbose("Greedy ensemble Gain to naive: " + gain(greedyauc, naiveauc, 1) + " cost gain: " + gain(greedycost, naivecost, 0));
+ }
+ {
+ MeanVariance meanauc = new MeanVariance();
+ MeanVariance meancost = new MeanVariance();
+ HashSetModifiableDBIDs candidates = DBIDUtil.newHashSet(relation.getDBIDs());
+ candidates.remove(firstid);
+ for(int i = 0; i < 5000; i++) {
+ // Build the improved ensemble:
+ final double[] randomensemble = new double[dim];
+ {
+ DBIDs random = DBIDUtil.randomSample(candidates, ensemble.size(), (long)i);
+ for(DBID id : random) {
+ assert (!firstid.equals(id));
+ // logger.verbose("Using: "+labels.get(id));
+ final NumberVector<?, ?> vec = relation.get(id);
+ for(int d = 0; d < dim; d++) {
+ randomensemble[d] += vec.doubleValue(d + 1);
+ }
+ }
+ for(int d = 0; d < dim; d++) {
+ randomensemble[d] /= ensemble.size();
+ }
+ }
+ NumberVector<?, ?> randomvec = refvec.newNumberVector(randomensemble);
+ double auc = computeROCAUC(randomvec, positive, dim);
+ meanauc.put(auc);
+ double cost = tdist.doubleDistance(randomvec, refvec);
+ meancost.put(cost);
+ }
+ logger.verbose("Random ensemble AUC: " + meanauc.getMean() + " + stddev: " + meanauc.getSampleStddev() + " = " + (meanauc.getMean() + meanauc.getSampleStddev()));
+ logger.verbose("Random ensemble Gain: " + gain(meanauc.getMean(), bestauc, 1));
+ logger.verbose("Greedy improvement: " + (greedyauc - meanauc.getMean()) / meanauc.getSampleStddev() + " standard deviations.");
+ logger.verbose("Random ensemble Cost: " + meancost.getMean() + " + stddev: " + meancost.getSampleStddev() + " = " + (meancost.getMean() + meanauc.getSampleStddev()));
+ logger.verbose("Random ensemble Gain: " + gain(meancost.getMean(), bestcost, 0));
+ logger.verbose("Greedy improvement: " + (meancost.getMean() - greedycost) / meancost.getSampleStddev() + " standard deviations.");
+ logger.verbose("Naive ensemble Gain to random: " + gain(naiveauc, meanauc.getMean(), 1) + " cost gain: " + gain(naivecost, meancost.getMean(), 0));
+ logger.verbose("Random ensemble Gain to naive: " + gain(meanauc.getMean(), naiveauc, 1) + " cost gain: " + gain(meancost.getMean(), naivecost, 0));
+ logger.verbose("Greedy ensemble Gain to random: " + gain(greedyauc, meanauc.getMean(), 1) + " cost gain: " + gain(greedycost, meancost.getMean(), 0));
+ }
+ }
+
+ protected void updateEstimations(final int[] outliers_seen, int union_outliers, final double[] estimated_weights, final double[] estimated_truth) {
+ for(int i = 0; i < outliers_seen.length; i++) {
+ if(outliers_seen[i] > 0) {
+ estimated_weights[i] = 0.5 / union_outliers;
+ estimated_truth[i] = 1.0;
+ }
+ else {
+ estimated_weights[i] = 0.5 / (outliers_seen.length - union_outliers);
+ estimated_truth[i] = 0.0;
+ }
+ }
+ }
+
+ private PrimitiveDoubleDistanceFunction<NumberVector<?, ?>> getDistanceFunction(double[] estimated_weights) {
+ // return new WeightedSquaredEuclideanDistanceFunction(estimated_weights);
+ // return new WeightedLPNormDistanceFunction(1.0, estimated_weights);
+ return new WeightedPearsonCorrelationDistanceFunction(estimated_weights);
+ }
+
+ private double computeROCAUC(NumberVector<?, ?> vec, Set<Integer> positive, int dim) {
+ final DoubleIntPair[] scores = new DoubleIntPair[dim];
+ for(int d = 0; d < dim; d++) {
+ scores[d] = new DoubleIntPair(vec.doubleValue(d + 1), d);
+ }
+ Arrays.sort(scores, Collections.reverseOrder(DoubleIntPair.BYFIRST_COMPARATOR));
+ return ROC.computeAUC(ROC.materializeROC(dim, positive, Arrays.asList(scores).iterator()));
+ }
+
+ double gain(double score, double ref, double optimal) {
+ return 1 - ((optimal - score) / (optimal - ref));
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractApplication.Parameterizer {
+ /**
+ * Data source
+ */
+ InputStep inputstep;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ // Data input
+ inputstep = config.tryInstantiate(InputStep.class);
+ }
+
+ @Override
+ protected AbstractApplication makeInstance() {
+ return new GreedyEnsembleExperiment(verbose, inputstep);
+ }
+ }
+
+ /**
+ * Main method.
+ *
+ * @param args Command line parameters.
+ */
+ public static void main(String[] args) {
+ GreedyEnsembleExperiment.runCLIApplication(GreedyEnsembleExperiment.class, args);
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/application/greedyensemble/VisualizePairwiseGainMatrix.java b/src/de/lmu/ifi/dbs/elki/application/greedyensemble/VisualizePairwiseGainMatrix.java
new file mode 100644
index 00000000..2c728878
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/application/greedyensemble/VisualizePairwiseGainMatrix.java
@@ -0,0 +1,307 @@
+package de.lmu.ifi.dbs.elki.application.greedyensemble;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.awt.image.BufferedImage;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Set;
+import java.util.TreeSet;
+
+import org.apache.batik.util.SVGConstants;
+
+import de.lmu.ifi.dbs.elki.application.AbstractApplication;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.evaluation.roc.ROC;
+import de.lmu.ifi.dbs.elki.evaluation.similaritymatrix.ComputeSimilarityMatrixImage;
+import de.lmu.ifi.dbs.elki.evaluation.similaritymatrix.ComputeSimilarityMatrixImage.SimilarityMatrix;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.result.ResultUtil;
+import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleIntPair;
+import de.lmu.ifi.dbs.elki.utilities.scaling.LinearScaling;
+import de.lmu.ifi.dbs.elki.visualization.VisualizationTask;
+import de.lmu.ifi.dbs.elki.visualization.VisualizerContext;
+import de.lmu.ifi.dbs.elki.visualization.VisualizerParameterizer;
+import de.lmu.ifi.dbs.elki.visualization.gui.SimpleSVGViewer;
+import de.lmu.ifi.dbs.elki.visualization.svg.SVGPlot;
+import de.lmu.ifi.dbs.elki.visualization.visualizers.Visualization;
+import de.lmu.ifi.dbs.elki.visualization.visualizers.visunproj.SimilarityMatrixVisualizer;
+import de.lmu.ifi.dbs.elki.workflow.InputStep;
+
+/**
+ * Class to load an outlier detection summary file, as produced by
+ * {@link ComputeKNNOutlierScores}, and compute a matrix with the pairwise
+ * gains. It will have one column / row obtained for each combination.
+ *
+ * The gain is always computed in relation to the better of the two input
+ * methods. Green colors indicate the result has improved, red indicate it
+ * became worse.
+ *
+ * Reference:
+ * <p>
+ * E. Schubert, R. Wojdanowski, A. Zimek, H.-P. Kriegel<br />
+ * On Evaluation of Outlier Rankings and Outlier Scores<br/>
+ * In Proceedings of the 12th SIAM International Conference on Data Mining
+ * (SDM), Anaheim, CA, 2012.
+ * </p>
+ *
+ * @author Erich Schubert
+ */
+@Reference(authors = "E. Schubert, R. Wojdanowski, A. Zimek, H.-P. Kriegel", title = "On Evaluation of Outlier Rankings and Outlier Scores", booktitle = "Proc. 12th SIAM International Conference on Data Mining (SDM), Anaheim, CA, 2012.")
+public class VisualizePairwiseGainMatrix extends AbstractApplication {
+ /**
+ * Get static logger
+ */
+ private static final Logging logger = Logging.getLogger(VisualizePairwiseGainMatrix.class);
+
+ /**
+ * The data input part.
+ */
+ private InputStep inputstep;
+
+ /**
+ * Parameterizer for visualizers
+ */
+ private VisualizerParameterizer vispar;
+
+ /**
+ * Constructor.
+ *
+ * @param verbose Verbosity
+ * @param inputstep Input step
+ * @param vispar Visualizer parameterizer
+ */
+ public VisualizePairwiseGainMatrix(boolean verbose, InputStep inputstep, VisualizerParameterizer vispar) {
+ super(verbose);
+ this.inputstep = inputstep;
+ this.vispar = vispar;
+ }
+
+ @Override
+ public void run() {
+ final Database database = inputstep.getDatabase();
+ final Relation<NumberVector<?, ?>> relation = database.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
+ final Relation<String> labels = DatabaseUtil.guessLabelRepresentation(database);
+ final DBID firstid = labels.iterDBIDs().next();
+ final String firstlabel = labels.get(firstid);
+ if(!firstlabel.matches("bylabel")) {
+ throw new AbortException("No 'by label' reference outlier found, which is needed for weighting!");
+ }
+
+ // Dimensionality and reference vector
+ final int dim = DatabaseUtil.dimensionality(relation);
+ final NumberVector<?, ?> refvec = relation.get(firstid);
+
+ // Build the truth vector
+ Set<Integer> pos = new TreeSet<Integer>();
+ final DoubleIntPair[] combined = new DoubleIntPair[dim];
+ {
+ for(int d = 0; d < dim; d++) {
+ combined[d] = new DoubleIntPair(0, d);
+ if(refvec.doubleValue(d + 1) > 0) {
+ pos.add(d);
+ }
+ }
+ }
+
+ ArrayModifiableDBIDs ids = DBIDUtil.newArray(relation.getDBIDs());
+ ids.remove(firstid);
+ final int size = ids.size();
+
+ double[][] data = new double[size][size];
+ DoubleMinMax minmax = new DoubleMinMax();
+
+ for(int a = 0; a < size; a++) {
+ final NumberVector<?, ?> veca = relation.get(ids.get(a));
+ // Direct AUC score:
+ {
+ for(int d = 0; d < dim; d++) {
+ combined[d].first = veca.doubleValue(d + 1);
+ combined[d].second = d;
+ }
+ Arrays.sort(combined, Collections.reverseOrder(DoubleIntPair.BYFIRST_COMPARATOR));
+ double auc = ROC.computeAUC(ROC.materializeROC(dim, pos, Arrays.asList(combined).iterator()));
+ data[a][a] = auc;
+ // minmax.put(auc);
+ // logger.verbose(auc + " " + labels.get(ids.get(a)));
+ }
+ // Compare to others, exploiting symmetry
+ for(int b = a + 1; b < size; b++) {
+ final NumberVector<?, ?> vecb = relation.get(ids.get(b));
+ for(int d = 0; d < dim; d++) {
+ combined[d].first = veca.doubleValue(d + 1) + vecb.doubleValue(d + 1);
+ combined[d].second = d;
+ }
+ Arrays.sort(combined, Collections.reverseOrder(DoubleIntPair.BYFIRST_COMPARATOR));
+ double auc = ROC.computeAUC(ROC.materializeROC(dim, pos, Arrays.asList(combined).iterator()));
+ // logger.verbose(auc + " " + labels.get(ids.get(a)) + " " +
+ // labels.get(ids.get(b)));
+ data[a][b] = auc;
+ data[b][a] = auc;
+ // minmax.put(auc);
+ }
+ }
+ for(int a = 0; a < size; a++) {
+ for(int b = a + 1; b < size; b++) {
+ double ref = Math.max(data[a][a], data[b][b]);
+ data[a][b] = (data[a][b] - ref) / (1 - ref);
+ data[b][a] = (data[b][a] - ref) / (1 - ref);
+ // logger.verbose(data[a][b] + " " + labels.get(ids.get(a)) + " " +
+ // labels.get(ids.get(b)));
+ minmax.put(data[a][b]);
+ }
+ }
+ for(int a = 0; a < size; a++) {
+ data[a][a] = 0;
+ }
+
+ logger.verbose(minmax.toString());
+
+ boolean hasneg = (minmax.getMin() < -1E-3);
+ LinearScaling scale;
+ if(!hasneg) {
+ scale = new LinearScaling(minmax);
+ }
+ else {
+ scale = LinearScaling.fromMinMax(0.0, Math.max(minmax.getMax(), -minmax.getMin()));
+ }
+
+ BufferedImage img = new BufferedImage(size, size, BufferedImage.TYPE_INT_RGB);
+ for(int x = 0; x < size; x++) {
+ for(int y = x; y < size; y++) {
+ double val = data[x][y];
+ val = scale.getScaled(val);
+ // Compute color:
+ final int col;
+ {
+ if(!hasneg) {
+ int ival = 0xFF & (int) (255 * Math.max(0, val));
+ col = 0xff000000 | (ival << 16) | (ival << 8) | ival;
+ }
+ else {
+ if(val >= 0) {
+ int ival = 0xFF & (int) (255 * val);
+ col = 0xff000000 | (ival << 8);
+ }
+ else {
+ int ival = 0xFF & (int) (255 * -val);
+ col = 0xff000000 | (ival << 16);
+ }
+ }
+ }
+ img.setRGB(x, y, col);
+ img.setRGB(y, x, col);
+ }
+ }
+ SimilarityMatrix smat = new ComputeSimilarityMatrixImage.SimilarityMatrix(img, relation, ids);
+ database.getHierarchy().add(database, smat);
+
+ VisualizerContext context = vispar.newContext(database);
+
+ // Attach visualizers to results
+ SimilarityMatrixVisualizer.Factory factory = new SimilarityMatrixVisualizer.Factory();
+ factory.processNewResult(database, database);
+
+ List<VisualizationTask> tasks = ResultUtil.filterResults(database, VisualizationTask.class);
+ for(VisualizationTask task : tasks) {
+ if(task.getFactory() == factory) {
+ showVisualization(context, factory, task);
+ }
+ }
+ }
+
+ /**
+ * Show a single visualization.
+ *
+ * @param context
+ *
+ * @param factory
+ * @param task
+ */
+ private void showVisualization(VisualizerContext context, SimilarityMatrixVisualizer.Factory factory, VisualizationTask task) {
+ SVGPlot plot = new SVGPlot();
+ Visualization vis = factory.makeVisualization(task.clone(plot, context, null, 1.0, 1.0));
+ plot.getRoot().appendChild(vis.getLayer());
+ plot.getRoot().setAttribute(SVGConstants.SVG_WIDTH_ATTRIBUTE, "20cm");
+ plot.getRoot().setAttribute(SVGConstants.SVG_HEIGHT_ATTRIBUTE, "20cm");
+ plot.getRoot().setAttribute(SVGConstants.SVG_VIEW_BOX_ATTRIBUTE, "0 0 1 1");
+ plot.updateStyleElement();
+
+ (new SimpleSVGViewer()).setPlot(plot);
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractApplication.Parameterizer {
+ /**
+ * Data source
+ */
+ InputStep inputstep;
+
+ /**
+ * Parameterizer for visualizers
+ */
+ private VisualizerParameterizer vispar;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ // Data input
+ inputstep = config.tryInstantiate(InputStep.class);
+ // Visualization options
+ vispar = config.tryInstantiate(VisualizerParameterizer.class);
+ }
+
+ @Override
+ protected AbstractApplication makeInstance() {
+ return new VisualizePairwiseGainMatrix(verbose, inputstep, vispar);
+ }
+ }
+
+ /**
+ * Main method.
+ *
+ * @param args Command line parameters.
+ */
+ public static void main(String[] args) {
+ VisualizePairwiseGainMatrix.runCLIApplication(VisualizePairwiseGainMatrix.class, args);
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/application/greedyensemble/package-info.java b/src/de/lmu/ifi/dbs/elki/application/greedyensemble/package-info.java
new file mode 100644
index 00000000..29b34e96
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/application/greedyensemble/package-info.java
@@ -0,0 +1,32 @@
+/**
+ * <p>Greedy ensembles for outlier detection.</p>
+ *
+ * <p>This package contains code that was used for the greedy ensemble experiment in
+ * <blockquote> E. Schubert, R. Wojdanowski, A. Zimek, H.-P. Kriegel<br />
+ * On Evaluation of Outlier Rankings and Outlier Scores<br/>
+ * In Proceedings of the 12th SIAM International Conference on Data Mining (SDM), Anaheim, CA, 2012.</blockquote>
+ * </p>
+ */
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package de.lmu.ifi.dbs.elki.application.greedyensemble; \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/application/internal/CheckELKIProperties.java b/src/de/lmu/ifi/dbs/elki/application/internal/CheckELKIServices.java
index fa260564..21ed047b 100644
--- a/src/de/lmu/ifi/dbs/elki/application/internal/CheckELKIProperties.java
+++ b/src/de/lmu/ifi/dbs/elki/application/internal/CheckELKIServices.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.application.internal;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -23,36 +23,44 @@ package de.lmu.ifi.dbs.elki.application.internal;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.net.URISyntaxException;
+import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
-import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import de.lmu.ifi.dbs.elki.logging.Logging;
-import de.lmu.ifi.dbs.elki.properties.Properties;
+import de.lmu.ifi.dbs.elki.utilities.ELKIServiceLoader;
import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
import de.lmu.ifi.dbs.elki.utilities.InspectionUtil;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
/**
- * Helper application to test the ELKI properties file for "missing" implementations.
+ * Helper application to test the ELKI properties file for "missing"
+ * implementations.
*
* @author Erich Schubert
*
- * @apiviz.uses Properties
+ * @apiviz.uses ELKIServiceLoader
*/
-public class CheckELKIProperties {
+public class CheckELKIServices {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(CheckELKIProperties.class);
-
+ private static final Logging logger = Logging.getLogger(CheckELKIServices.class);
+
/**
* Pattern to strip comments, while keeping commented class names.
*/
- private Pattern strip = Pattern.compile("^[\\s#]*(.*?)[\\s,]*$");
+ private Pattern strip = Pattern.compile("^[\\s#]*(.*?)[\\s]*$");
/**
* Package to skip matches in - unreleased code.
@@ -65,25 +73,36 @@ public class CheckELKIProperties {
* @param argv Command line arguments
*/
public static void main(String[] argv) {
- new CheckELKIProperties().checkProperties();
+ new CheckELKIServices().checkServices();
}
-
+
/**
* Retrieve all properties and check them.
*/
- public void checkProperties() {
- Set<String> props = Properties.ELKI_PROPERTIES.getPropertyNames();
- for(String prop : props) {
- checkProperty(prop);
+ public void checkServices() {
+ URL u = getClass().getClassLoader().getResource(ELKIServiceLoader.PREFIX);
+ try {
+ for(String prop : new File(u.toURI()).list()) {
+ if (".svn".equals(prop)) {
+ continue;
+ }
+ if (logger.isVerbose()) {
+ logger.verbose("Checking property: "+prop);
+ }
+ checkService(prop);
+ }
+ }
+ catch(URISyntaxException e) {
+ throw new AbortException("Cannot check all properties, as some are not in a file: URL.");
}
}
/**
- * Check a single property
+ * Check a single service class
*
- * @param prop Property = Class name.
+ * @param prop Class name.
*/
- private void checkProperty(String prop) {
+ private void checkService(String prop) {
Class<?> cls;
try {
cls = Class.forName(prop);
@@ -108,23 +127,34 @@ public class CheckELKIProperties {
names.add(c2.getName());
}
- String[] known = Properties.ELKI_PROPERTIES.getProperty(cls.getName());
- for(String k : known) {
- Matcher m = strip.matcher(k);
- if(m.matches()) {
- String stripped = m.group(1);
- if(stripped.length() > 0) {
- if(names.contains(stripped)) {
- names.remove(stripped);
- }
- else {
- logger.warning("Name " + stripped + " found for property " + prop + " but no class discovered (or referenced twice?).");
+ try {
+ InputStream is = getClass().getClassLoader().getResource(ELKIServiceLoader.PREFIX + cls.getName()).openStream();
+ BufferedReader r = new BufferedReader(new InputStreamReader(is, "utf-8"));
+ for(String line;;) {
+ line = r.readLine();
+ // End of stream:
+ if(line == null) {
+ break;
+ }
+ Matcher m = strip.matcher(line);
+ if(m.matches()) {
+ String stripped = m.group(1);
+ if(stripped.length() > 0) {
+ if(names.contains(stripped)) {
+ names.remove(stripped);
+ }
+ else {
+ logger.warning("Name " + stripped + " found for property " + prop + " but no class discovered (or referenced twice?).");
+ }
}
}
+ else {
+ logger.warning("Line: " + line + " didn't match regexp.");
+ }
}
- else {
- logger.warning("Line: " + k + " didn't match regexp.");
- }
+ }
+ catch(IOException e) {
+ logger.exception(e);
}
if(names.size() > 0) {
StringBuffer message = new StringBuffer();
@@ -134,7 +164,7 @@ public class CheckELKIProperties {
// TODO: sort by package, then classname
Collections.sort(sorted);
for(String remaining : sorted) {
- message.append("# " + remaining + ",\\" + FormatUtil.NEWLINE);
+ message.append("# " + remaining + FormatUtil.NEWLINE);
}
logger.warning(message.toString());
}
diff --git a/src/de/lmu/ifi/dbs/elki/application/internal/CheckParameterizables.java b/src/de/lmu/ifi/dbs/elki/application/internal/CheckParameterizables.java
index 017f2d12..b94ee47a 100644
--- a/src/de/lmu/ifi/dbs/elki/application/internal/CheckParameterizables.java
+++ b/src/de/lmu/ifi/dbs/elki/application/internal/CheckParameterizables.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.application.internal;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/application/internal/DocumentParameters.java b/src/de/lmu/ifi/dbs/elki/application/internal/DocumentParameters.java
index 05af4b4d..c77e9dbb 100644
--- a/src/de/lmu/ifi/dbs/elki/application/internal/DocumentParameters.java
+++ b/src/de/lmu/ifi/dbs/elki/application/internal/DocumentParameters.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.application.internal;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -33,6 +33,7 @@ import java.lang.reflect.Constructor;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
+import java.util.Iterator;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
@@ -50,11 +51,10 @@ import org.w3c.dom.Document;
import org.w3c.dom.Element;
import de.lmu.ifi.dbs.elki.logging.Logging;
-import de.lmu.ifi.dbs.elki.properties.Properties;
import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil;
+import de.lmu.ifi.dbs.elki.utilities.ELKIServiceLoader;
import de.lmu.ifi.dbs.elki.utilities.InspectionUtil;
import de.lmu.ifi.dbs.elki.utilities.datastructures.HashMapList;
-import de.lmu.ifi.dbs.elki.utilities.iterator.IterableIterator;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.Parameterizable;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.Parameterizer;
@@ -635,8 +635,8 @@ public class DocumentParameters {
private static void appendKnownImplementationsIfNonempty(Document htmldoc, ClassParameter<?> opt, Element elemdd) {
if(opt.getRestrictionClass() != Object.class) {
- IterableIterator<Class<?>> iter = opt.getKnownImplementations();
- if(iter.hasNext()) {
+ List<Class<?>> iter = opt.getKnownImplementations();
+ if(!iter.isEmpty()) {
Element p = htmldoc.createElement(HTMLUtil.HTML_P_TAG);
p.appendChild(htmldoc.createTextNode(HEADER_KNOWN_IMPLEMENTATIONS));
elemdd.appendChild(p);
@@ -652,7 +652,8 @@ public class DocumentParameters {
elemdd.appendChild(ul);
}
// Report when not in properties file.
- if(Properties.ELKI_PROPERTIES.getProperty(opt.getRestrictionClass().getName()).length == 0) {
+ Iterator<Class<?>> clss = new ELKIServiceLoader(opt.getRestrictionClass());
+ if (!clss.hasNext()) {
logger.warning(opt.getRestrictionClass().getName() + " not in properties. No autocompletion available in release GUI.");
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/application/internal/DocumentReferences.java b/src/de/lmu/ifi/dbs/elki/application/internal/DocumentReferences.java
index 949a7874..2e002243 100644
--- a/src/de/lmu/ifi/dbs/elki/application/internal/DocumentReferences.java
+++ b/src/de/lmu/ifi/dbs/elki/application/internal/DocumentReferences.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.application.internal;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -25,12 +25,12 @@ package de.lmu.ifi.dbs.elki.application.internal;
import java.io.BufferedOutputStream;
import java.io.File;
-import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
+import java.io.PrintStream;
+import java.lang.reflect.Method;
import java.util.ArrayList;
-import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -44,9 +44,9 @@ import org.w3c.dom.DOMImplementation;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
+import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.LoggingUtil;
import de.lmu.ifi.dbs.elki.utilities.InspectionUtil;
-import de.lmu.ifi.dbs.elki.utilities.documentation.DocumentationUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
import de.lmu.ifi.dbs.elki.utilities.xml.HTMLUtil;
@@ -64,46 +64,56 @@ public class DocumentReferences {
private static final String MODIFICATION_WARNING = "WARNING: THIS DOCUMENT IS AUTOMATICALLY GENERATED. MODIFICATIONS MAY GET LOST.";
/**
+ * Logger
+ */
+ private static final Logging logger = Logging.getLogger(DocumentReferences.class);
+
+ /**
* @param args Command line arguments
*/
public static void main(String[] args) {
- if(args.length != 1) {
- LoggingUtil.warning("I need exactly one file name to operate!");
+ if(args.length < 1 || args.length > 2) {
+ LoggingUtil.warning("I need exactly one or two file names to operate!");
System.exit(1);
}
- if(!args[0].endsWith(".html")) {
- LoggingUtil.warning("File name doesn't end with .html!");
+ if(!args[0].endsWith(".html") || (args.length > 1 && !args[1].endsWith(".wiki"))) {
+ LoggingUtil.warning("File name doesn't end in expected extension!");
System.exit(1);
}
- File references = new File(args[0]);
- {
- FileOutputStream reffo;
- try {
- reffo = new FileOutputStream(references);
- }
- catch(FileNotFoundException e) {
- LoggingUtil.exception("Can't create output stream!", e);
- throw new RuntimeException(e);
- }
+ List<Pair<Reference, List<Class<?>>>> refs = sortedReferences();
+ try {
+ File references = new File(args[0]);
+ FileOutputStream reffo = new FileOutputStream(references);
+ Document refdoc = documentReferences(refs);
OutputStream refstream = new BufferedOutputStream(reffo);
- Document refdoc = documentParameters();
+ HTMLUtil.writeXHTML(refdoc, refstream);
+ refstream.flush();
+ refstream.close();
+ reffo.close();
+ }
+ catch(IOException e) {
+ LoggingUtil.exception("IO Exception writing HTML output.", e);
+ throw new RuntimeException(e);
+ }
+ if(args.length > 1) {
try {
- HTMLUtil.writeXHTML(refdoc, refstream);
- refstream.flush();
- refstream.close();
- reffo.close();
+ File refwiki = new File(args[1]);
+ FileOutputStream reffow = new FileOutputStream(refwiki);
+ PrintStream refstreamW = new PrintStream(reffow);
+ documentReferencesWiki(refs, refstreamW);
+ refstreamW.flush();
+ refstreamW.close();
+ reffow.close();
}
catch(IOException e) {
- LoggingUtil.exception("IO Exception writing output.", e);
+ LoggingUtil.exception("IO Exception writing Wiki output.", e);
throw new RuntimeException(e);
}
}
}
- private static Document documentParameters() {
- List<Pair<Reference, List<Class<?>>>> refs = sortedReferences();
-
+ private static Document documentReferences(List<Pair<Reference, List<Class<?>>>> refs) {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder;
try {
@@ -165,7 +175,7 @@ public class DocumentReferences {
{
boolean first = true;
for(Class<?> cls : pair.second) {
- if (!first) {
+ if(!first) {
classdt.appendChild(htmldoc.createTextNode(", "));
}
Element classan = htmldoc.createElement(HTMLUtil.HTML_A_TAG);
@@ -177,7 +187,7 @@ public class DocumentReferences {
classa.setAttribute(HTMLUtil.HTML_HREF_ATTRIBUTE, linkForClassName(cls.getName()));
classa.setTextContent(cls.getName());
classdt.appendChild(classa);
-
+
first = false;
}
}
@@ -222,25 +232,98 @@ public class DocumentReferences {
return htmldoc;
}
- private static List<Pair<Reference, List<Class<?>>>> sortedReferences() {
- ArrayList<Class<?>> classes = findAllClassesWithReferences();
- Collections.sort(classes, new InspectionUtil.ClassSorter());
+ private static void documentReferencesWiki(List<Pair<Reference, List<Class<?>>>> refs, PrintStream refstreamW) {
+ for(Pair<Reference, List<Class<?>>> pair : refs) {
+ // JavaDoc links for relevant classes.
+ {
+ boolean first = true;
+ for(Class<?> cls : pair.second) {
+ if(!first) {
+ refstreamW.println(",[[br]]");
+ }
+ refstreamW.print("[[javadoc(");
+ refstreamW.print(cls.getName());
+ refstreamW.print(",");
+ refstreamW.print(cls.getName());
+ refstreamW.print(")]]");
+
+ first = false;
+ }
+ }
+ refstreamW.println("");
- List<Pair<Reference, List<Class<?>>>> refs = new ArrayList<Pair<Reference, List<Class<?>>>>(classes.size());
- Map<Reference, List<Class<?>>> map = new HashMap<Reference, List<Class<?>>>(classes.size());
- for(Class<?> cls : classes) {
- Reference ref = DocumentationUtil.getReference(cls);
- List<Class<?>> list = map.get(ref);
- if(list == null) {
- list = new ArrayList<Class<?>>(5);
- map.put(ref, list);
- refs.add(new Pair<Reference, List<Class<?>>>(ref, list));
+ String indent = " ";
+ {
+ Reference ref = pair.first;
+ // Prefix
+ if(ref.prefix().length() > 0) {
+ refstreamW.println(indent + ref.prefix() + " [[br]]");
+ }
+ // Authors
+ refstreamW.println(indent + "By: " + ref.authors() + " [[br]]");
+ // Title
+ refstreamW.println(indent + "'''" + ref.title() + "'''" + " [[br]]");
+ // Booktitle
+ refstreamW.println(indent + "In: " + ref.booktitle() + " [[br]]");
+ // URL
+ if(ref.url().length() > 0) {
+ refstreamW.println(indent + "Online: [" + ref.url() + "][[br]]");
+ }
}
- list.add(cls);
+ refstreamW.println("");
+ refstreamW.println("");
+ }
+ }
+
+ private static List<Pair<Reference, List<Class<?>>>> sortedReferences() {
+ List<Pair<Reference, List<Class<?>>>> refs = new ArrayList<Pair<Reference, List<Class<?>>>>();
+ Map<Reference, List<Class<?>>> map = new HashMap<Reference, List<Class<?>>>();
+
+ for(final Class<?> cls : InspectionUtil.findAllImplementations(Object.class, true)) {
+ inspectClass(cls, refs, map);
}
return refs;
}
+ private static void inspectClass(final Class<?> cls, List<Pair<Reference, List<Class<?>>>> refs, Map<Reference, List<Class<?>>> map) {
+ try {
+ if(cls.isAnnotationPresent(Reference.class)) {
+ Reference ref = cls.getAnnotation(Reference.class);
+ List<Class<?>> list = map.get(ref);
+ if(list == null) {
+ list = new ArrayList<Class<?>>(5);
+ map.put(ref, list);
+ refs.add(new Pair<Reference, List<Class<?>>>(ref, list));
+ }
+ list.add(cls);
+ }
+ // Inner classes
+ for(Class<?> c2 : cls.getDeclaredClasses()) {
+ inspectClass(c2, refs, map);
+ }
+ for(Method m : cls.getDeclaredMethods()) {
+ if(m.isAnnotationPresent(Reference.class)) {
+ Reference ref = m.getAnnotation(Reference.class);
+ List<Class<?>> list = map.get(ref);
+ if(list == null) {
+ list = new ArrayList<Class<?>>(5);
+ map.put(ref, list);
+ refs.add(new Pair<Reference, List<Class<?>>>(ref, list));
+ }
+ list.add(cls);
+ }
+ }
+ }
+ catch(NoClassDefFoundError e) {
+ if(!cls.getCanonicalName().startsWith("experimentalcode.")) {
+ logger.warning("Exception in finding references for class " + cls.getCanonicalName() + " - missing referenced class?");
+ }
+ }
+ catch(Error e) {
+ logger.warning("Exception in finding references for class " + cls.getCanonicalName() + ": " + e, e);
+ }
+ }
+
private static String linkForClassName(String name) {
String link = name.replace(".", "/") + ".html";
return link;
@@ -253,10 +336,17 @@ public class DocumentReferences {
*/
public static ArrayList<Class<?>> findAllClassesWithReferences() {
ArrayList<Class<?>> references = new ArrayList<Class<?>>();
- for(final Class<?> cls : InspectionUtil.findAllImplementations(Object.class, false)) {
+ for(final Class<?> cls : InspectionUtil.findAllImplementations(Object.class, true)) {
if(cls.isAnnotationPresent(Reference.class)) {
references.add(cls);
}
+ else {
+ for(Method m : cls.getDeclaredMethods()) {
+ if(m.isAnnotationPresent(Reference.class)) {
+ references.add(cls);
+ }
+ }
+ }
}
return references;
}
diff --git a/src/de/lmu/ifi/dbs/elki/application/internal/package-info.java b/src/de/lmu/ifi/dbs/elki/application/internal/package-info.java
index dd7746e8..56326655 100644
--- a/src/de/lmu/ifi/dbs/elki/application/internal/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/application/internal/package-info.java
@@ -5,7 +5,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2011
+Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/application/jsmap/JSONBuffer.java b/src/de/lmu/ifi/dbs/elki/application/jsmap/JSONBuffer.java
index 9ef3f688..c077e9d3 100644
--- a/src/de/lmu/ifi/dbs/elki/application/jsmap/JSONBuffer.java
+++ b/src/de/lmu/ifi/dbs/elki/application/jsmap/JSONBuffer.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.application.jsmap;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/application/jsmap/JSONResultHandler.java b/src/de/lmu/ifi/dbs/elki/application/jsmap/JSONResultHandler.java
index 96a24dcb..7fc26496 100644
--- a/src/de/lmu/ifi/dbs/elki/application/jsmap/JSONResultHandler.java
+++ b/src/de/lmu/ifi/dbs/elki/application/jsmap/JSONResultHandler.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.application.jsmap;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/application/jsmap/JSONWebServer.java b/src/de/lmu/ifi/dbs/elki/application/jsmap/JSONWebServer.java
index 873420fd..5b210795 100644
--- a/src/de/lmu/ifi/dbs/elki/application/jsmap/JSONWebServer.java
+++ b/src/de/lmu/ifi/dbs/elki/application/jsmap/JSONWebServer.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.application.jsmap;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/application/jsmap/package-info.java b/src/de/lmu/ifi/dbs/elki/application/jsmap/package-info.java
index f359198a..e89e272c 100644
--- a/src/de/lmu/ifi/dbs/elki/application/jsmap/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/application/jsmap/package-info.java
@@ -5,7 +5,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2011
+Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/application/package-info.java b/src/de/lmu/ifi/dbs/elki/application/package-info.java
index b81ac8f5..35b3b3d9 100644
--- a/src/de/lmu/ifi/dbs/elki/application/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/application/package-info.java
@@ -5,7 +5,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2011
+Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/application/visualization/KNNExplorer.java b/src/de/lmu/ifi/dbs/elki/application/visualization/KNNExplorer.java
index 0cb950af..0419f791 100644
--- a/src/de/lmu/ifi/dbs/elki/application/visualization/KNNExplorer.java
+++ b/src/de/lmu/ifi/dbs/elki/application/visualization/KNNExplorer.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.application.visualization;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2011
+ Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -29,8 +29,6 @@ import java.awt.Component;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.util.HashMap;
-import java.util.List;
-import java.util.ListIterator;
import javax.swing.DefaultListCellRenderer;
import javax.swing.DefaultListModel;
@@ -62,12 +60,14 @@ import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.knn.KNNQuery;
+import de.lmu.ifi.dbs.elki.database.query.knn.KNNResult;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.LoggingUtil;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
+import de.lmu.ifi.dbs.elki.math.scales.LinearScale;
import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
@@ -79,7 +79,6 @@ import de.lmu.ifi.dbs.elki.visualization.batikutil.LazyCanvasResizer;
import de.lmu.ifi.dbs.elki.visualization.batikutil.NodeReplacer;
import de.lmu.ifi.dbs.elki.visualization.css.CSSClassManager.CSSNamingConflict;
import de.lmu.ifi.dbs.elki.visualization.savedialog.SVGSaveDialog;
-import de.lmu.ifi.dbs.elki.visualization.scales.LinearScale;
import de.lmu.ifi.dbs.elki.visualization.style.PropertiesBasedStyleLibrary;
import de.lmu.ifi.dbs.elki.visualization.style.StyleLibrary;
import de.lmu.ifi.dbs.elki.visualization.svg.SVGPath;
@@ -393,7 +392,7 @@ public class KNNExplorer<O extends NumberVector<?, ?>, D extends NumberDistance<
try {
StyleLibrary style = new PropertiesBasedStyleLibrary();
- SVGSimpleLinearAxis.drawAxis(plot, viewport, this.s, 0.0, StyleLibrary.SCALE, 0.0, 0.0, true, false, style);
+ SVGSimpleLinearAxis.drawAxis(plot, viewport, this.s, 0.0, StyleLibrary.SCALE, 0.0, 0.0, SVGSimpleLinearAxis.LabelStyle.LEFTHAND, style);
}
catch(CSSNamingConflict e) {
LoggingUtil.exception(e);
@@ -436,21 +435,21 @@ public class KNNExplorer<O extends NumberVector<?, ?>, D extends NumberDistance<
for(Object o : sel) {
DBID idx = (DBID) o;
- List<DistanceResultPair<D>> knn = knnQuery.getKNNForDBID(idx, k);
+ KNNResult<D> knn = knnQuery.getKNNForDBID(idx, k);
- double maxdist = knn.get(knn.size() - 1).getDistance().doubleValue();
+ double maxdist = knn.getKNNDistance().doubleValue();
// avoid division by zero.
if(maxdist == 0) {
maxdist = 1;
}
- for(ListIterator<DistanceResultPair<D>> iter = knn.listIterator(knn.size()); iter.hasPrevious();) {
- DistanceResultPair<D> pair = iter.previous();
+ for (int i = knn.size() - 1; i >= 0; i--) {
+ DistanceResultPair<D> pair = knn.get(i);
Element line = plotSeries(pair.getDBID(), MAXRESOLUTION);
double dist = pair.getDistance().doubleValue() / maxdist;
Color color = getColor(dist);
String colstr = "#" + Integer.toHexString(color.getRGB()).substring(2);
- String width = (pair.getDBID() == idx) ? "0.5%" : "0.2%";
+ String width = (pair.getDBID().equals(idx)) ? "0.5%" : "0.2%";
SVGUtil.setStyle(line, "stroke: " + colstr + "; stroke-width: " + width + "; fill: none");
newe.appendChild(line);
// put into cache
diff --git a/src/de/lmu/ifi/dbs/elki/application/visualization/package-info.java b/src/de/lmu/ifi/dbs/elki/application/visualization/package-info.java
index 70870cfd..717a80f9 100644
--- a/src/de/lmu/ifi/dbs/elki/application/visualization/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/application/visualization/package-info.java
@@ -6,7 +6,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2011
+Copyright (C) 2012
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team