summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/evaluation
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/evaluation')
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/AutomaticEvaluation.java159
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/NoAutomaticEvaluation.java56
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/clustering/BCubed.java1
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/clustering/ClusterContingencyTable.java15
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/clustering/Entropy.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/clustering/EvaluateClustering.java26
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/clustering/pairsegments/ClusterPairSegmentAnalysis.java22
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/clustering/pairsegments/Segment.java22
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/clustering/pairsegments/Segments.java29
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/histogram/ComputeOutlierHistogram.java66
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/outlier/JudgeOutlierScores.java10
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/outlier/OutlierPrecisionAtKCurve.java227
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/outlier/OutlierPrecisionRecallCurve.java238
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/outlier/OutlierROCCurve.java241
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/outlier/OutlierSmROCCurve.java269
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/outlier/OutlierThresholdClustering.java175
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/roc/ComputeROCCurve.java183
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/roc/ROC.java198
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/similaritymatrix/ComputeSimilarityMatrixImage.java40
19 files changed, 1577 insertions, 406 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/AutomaticEvaluation.java b/src/de/lmu/ifi/dbs/elki/evaluation/AutomaticEvaluation.java
new file mode 100644
index 00000000..888777a5
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/AutomaticEvaluation.java
@@ -0,0 +1,159 @@
+package de.lmu.ifi.dbs.elki.evaluation;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.regex.Pattern;
+
+import de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.ByLabelOrAllInOneClustering;
+import de.lmu.ifi.dbs.elki.data.Cluster;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.evaluation.clustering.EvaluateClustering;
+import de.lmu.ifi.dbs.elki.evaluation.histogram.ComputeOutlierHistogram;
+import de.lmu.ifi.dbs.elki.evaluation.outlier.OutlierPrecisionAtKCurve;
+import de.lmu.ifi.dbs.elki.evaluation.outlier.OutlierPrecisionRecallCurve;
+import de.lmu.ifi.dbs.elki.evaluation.outlier.OutlierROCCurve;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.result.HierarchicalResult;
+import de.lmu.ifi.dbs.elki.result.Result;
+import de.lmu.ifi.dbs.elki.result.ResultUtil;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.scaling.LinearScaling;
+
+/**
+ * Evaluator that tries to auto-run a number of evaluation methods.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.uses OutlierResult
+ * @apiviz.uses Clustering
+ * @apiviz.composedOf OutlierROCCurve
+ * @apiviz.composedOf OutlierPrecisionAtKCurve
+ * @apiviz.composedOf OutlierPrecisionRecallCurve
+ * @apiviz.composedOf ComputeOutlierHistogram
+ * @apiviz.composedOf EvaluateClustering
+ */
+public class AutomaticEvaluation implements Evaluator {
+ /**
+ * Class logger
+ */
+ private static final Logging logger = Logging.getLogger(AutomaticEvaluation.class);
+
+ @Override
+ public void processNewResult(HierarchicalResult baseResult, Result newResult) {
+ autoEvaluateClusterings(baseResult, newResult);
+ autoEvaluateOutliers(baseResult, newResult);
+ }
+
+ protected void autoEvaluateOutliers(HierarchicalResult baseResult, Result newResult) {
+ Collection<OutlierResult> outliers = ResultUtil.filterResults(newResult, OutlierResult.class);
+ if(logger.isDebugging()) {
+ logger.debug("Number of new outlier results: " + outliers.size());
+ }
+ if(outliers.size() > 0) {
+ ResultUtil.ensureClusteringResult(ResultUtil.findDatabase(baseResult), baseResult);
+ Collection<Clustering<?>> clusterings = ResultUtil.filterResults(baseResult, Clustering.class);
+ if(clusterings.size() == 0) {
+ logger.warning("Could not find a clustering result, even after running 'ensureClusteringResult'?!?");
+ return;
+ }
+ Clustering<?> basec = clusterings.iterator().next();
+ // Find minority class label
+ int min = Integer.MAX_VALUE;
+ int total = 0;
+ String label = null;
+ if(basec.getAllClusters().size() > 1) {
+ for(Cluster<?> c : basec.getAllClusters()) {
+ final int csize = c.getIDs().size();
+ total += csize;
+ if(csize < min) {
+ min = csize;
+ label = c.getName();
+ }
+ }
+ }
+ if(label == null) {
+ logger.warning("Could not evaluate outlier results, as I could not find a minority label.");
+ return;
+ }
+ if(min == 1) {
+ logger.warning("The minority class label had a single object. Try using 'ClassLabelFilter' to identify the class label column.");
+ }
+ if(min > 0.05 * total) {
+ logger.warning("The minority class I discovered (labeled '" + label + "') has " + (min * 100. / total) + "% of objects. Outlier classes should be more rare!");
+ }
+ logger.verbose("Evaluating using minority class: " + label);
+ Pattern pat = Pattern.compile("^" + Pattern.quote(label) + "$");
+ // Compute ROC curve
+ new OutlierROCCurve(pat).processNewResult(baseResult, newResult);
+ // Compute Precision at k
+ new OutlierPrecisionAtKCurve(pat, min * 2).processNewResult(baseResult, newResult);
+ // Compute ROC curve
+ new OutlierPrecisionRecallCurve(pat).processNewResult(baseResult, newResult);
+ // Compute outlier histogram
+ new ComputeOutlierHistogram(pat, 50, new LinearScaling(), false).processNewResult(baseResult, newResult);
+ }
+ }
+
+ protected void autoEvaluateClusterings(HierarchicalResult baseResult, Result newResult) {
+ Collection<Clustering<?>> clusterings = ResultUtil.filterResults(newResult, Clustering.class);
+ if(logger.isDebugging()) {
+ logger.warning("Number of new clustering results: " + clusterings.size());
+ }
+ for (Iterator<Clustering<?>> c = clusterings.iterator(); c.hasNext();) {
+ Clustering<?> test = c.next();
+ if ("allinone-clustering".equals(test.getShortName())) {
+ c.remove();
+ }
+ else if ("allinnoise-clustering".equals(test.getShortName())) {
+ c.remove();
+ }
+ else if ("bylabel-clustering".equals(test.getShortName())) {
+ c.remove();
+ }
+ else if ("bymodel-clustering".equals(test.getShortName())) {
+ c.remove();
+ }
+ }
+ if (clusterings.size() > 0) {
+ new EvaluateClustering(new ByLabelOrAllInOneClustering(), false, true).processNewResult(baseResult, newResult);
+ }
+ }
+
+ /**
+ * Parameterization class
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ @Override
+ protected AutomaticEvaluation makeInstance() {
+ return new AutomaticEvaluation();
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/NoAutomaticEvaluation.java b/src/de/lmu/ifi/dbs/elki/evaluation/NoAutomaticEvaluation.java
new file mode 100644
index 00000000..453a337f
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/NoAutomaticEvaluation.java
@@ -0,0 +1,56 @@
+package de.lmu.ifi.dbs.elki.evaluation;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.result.HierarchicalResult;
+import de.lmu.ifi.dbs.elki.result.Result;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+
+/**
+ * No-operation evaluator, that only serves the purpose of explicitely disabling
+ * the default value of {@link AutomaticEvaluation}, if you do not want
+ * evaluation to run.
+ *
+ * @author Erich Schubert
+ */
+public class NoAutomaticEvaluation implements Evaluator {
+ @Override
+ public void processNewResult(HierarchicalResult baseResult, Result newResult) {
+ return;
+ }
+
+ /**
+ * Parameterization class
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ @Override
+ protected NoAutomaticEvaluation makeInstance() {
+ return new NoAutomaticEvaluation();
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/clustering/BCubed.java b/src/de/lmu/ifi/dbs/elki/evaluation/clustering/BCubed.java
index d8087cf6..fdfaaaa9 100644
--- a/src/de/lmu/ifi/dbs/elki/evaluation/clustering/BCubed.java
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/clustering/BCubed.java
@@ -69,7 +69,6 @@ public class BCubed {
bCubedRecall += (recall * table.contingency[i1][i2]);
}
}
-
bCubedPrecision = bCubedPrecision / table.contingency[table.size1][table.size2];
bCubedRecall = bCubedRecall / table.contingency[table.size1][table.size2];
}
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/clustering/ClusterContingencyTable.java b/src/de/lmu/ifi/dbs/elki/evaluation/clustering/ClusterContingencyTable.java
index ac156a4e..70f6e6ce 100644
--- a/src/de/lmu/ifi/dbs/elki/evaluation/clustering/ClusterContingencyTable.java
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/clustering/ClusterContingencyTable.java
@@ -28,7 +28,7 @@ import java.util.List;
import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.data.Clustering;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.math.MeanVariance;
@@ -37,6 +37,15 @@ import de.lmu.ifi.dbs.elki.math.MeanVariance;
* Class storing the contingency table and related data on two clusterings.
*
* @author Erich Schubert
+ *
+ * @apiviz.landmark
+ *
+ * @apiviz.uses Clustering
+ * @apiviz.composedOf PairCounting
+ * @apiviz.composedOf Entropy
+ * @apiviz.composedOf EditDistance
+ * @apiviz.composedOf BCubed
+ * @apiviz.composedOf SetMatchingPurity
*/
public class ClusterContingencyTable {
/**
@@ -156,8 +165,8 @@ public class ClusterContingencyTable {
for(int i2 = 0; it2.hasNext(); i2++) {
final Cluster<?> c2 = it2.next();
int count = 0;
- for(DBID id : c2.getIDs()) {
- if(ids.contains(id)) {
+ for(DBIDIter iter = c2.getIDs().iter(); iter.valid(); iter.advance()) {
+ if(ids.contains(iter.getDBID())) {
count++;
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/clustering/Entropy.java b/src/de/lmu/ifi/dbs/elki/evaluation/clustering/Entropy.java
index ded70327..b174f3ed 100644
--- a/src/de/lmu/ifi/dbs/elki/evaluation/clustering/Entropy.java
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/clustering/Entropy.java
@@ -168,6 +168,9 @@ public class Entropy {
* @return Joint Normalized Mutual information
*/
public double entropyNMIJoint() {
+ if (entropyJoint() == 0) {
+ return 0;
+ }
return (entropyMutualInformation() / entropyJoint());
}
@@ -204,6 +207,9 @@ public class Entropy {
* @return Sqrt Normalized Mutual information
*/
public double entropyNMISqrt() {
+ if (entropyFirst() * entropySecond() <= 0) {
+ return entropyMutualInformation();
+ }
return (entropyMutualInformation() / Math.sqrt(entropyFirst() * entropySecond()));
}
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/clustering/EvaluateClustering.java b/src/de/lmu/ifi/dbs/elki/evaluation/clustering/EvaluateClustering.java
index c82f5cf5..a57fa188 100644
--- a/src/de/lmu/ifi/dbs/elki/evaluation/clustering/EvaluateClustering.java
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/clustering/EvaluateClustering.java
@@ -23,11 +23,11 @@ package de.lmu.ifi.dbs.elki.evaluation.clustering;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.Iterator;
+import java.util.Collection;
import java.util.List;
import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm;
-import de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.ByLabelClustering;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.ByLabelOrAllInOneClustering;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.evaluation.Evaluator;
@@ -115,9 +115,8 @@ public class EvaluateClustering implements Evaluator {
Clustering<?> refc = null;
// Try to find an existing reference clustering (globally)
if(refc == null) {
- Iterator<Clustering<?>> cs = ResultUtil.filteredResults(baseResult, Clustering.class);
- while(cs.hasNext()) {
- Clustering<?> test = cs.next();
+ Collection<Clustering<?>> cs = ResultUtil.filterResults(baseResult, Clustering.class);
+ for(Clustering<?> test : cs) {
if(isReferenceResult(test)) {
refc = test;
break;
@@ -126,9 +125,8 @@ public class EvaluateClustering implements Evaluator {
}
// Try to find an existing reference clustering (locally)
if(refc == null) {
- Iterator<Clustering<?>> cs = ResultUtil.filteredResults(result, Clustering.class);
- while(cs.hasNext()) {
- Clustering<?> test = cs.next();
+ Collection<Clustering<?>> cs = ResultUtil.filterResults(result, Clustering.class);
+ for(Clustering<?> test : cs) {
if(isReferenceResult(test)) {
refc = test;
break;
@@ -164,10 +162,16 @@ public class EvaluateClustering implements Evaluator {
private boolean isReferenceResult(Clustering<?> t) {
// FIXME: don't hard-code strings
- if(t.getShortName().startsWith("bylabel-")) {
+ if("bylabel-clustering".equals(t.getShortName())) {
return true;
}
- if(t.getShortName().startsWith("bymodel-")) {
+ if("bymodel-clustering".equals(t.getShortName())) {
+ return true;
+ }
+ if("allinone-clustering".equals(t.getShortName())) {
+ return true;
+ }
+ if("allinnoise-clustering".equals(t.getShortName())) {
return true;
}
return false;
@@ -264,7 +268,7 @@ public class EvaluateClustering implements Evaluator {
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- ObjectParameter<ClusteringAlgorithm<?>> referencealgP = new ObjectParameter<ClusteringAlgorithm<?>>(REFERENCE_ID, ClusteringAlgorithm.class, ByLabelClustering.class);
+ ObjectParameter<ClusteringAlgorithm<?>> referencealgP = new ObjectParameter<ClusteringAlgorithm<?>>(REFERENCE_ID, ClusteringAlgorithm.class, ByLabelOrAllInOneClustering.class);
if(config.grab(referencealgP)) {
referencealg = referencealgP.instantiateClass(config);
}
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/clustering/pairsegments/ClusterPairSegmentAnalysis.java b/src/de/lmu/ifi/dbs/elki/evaluation/clustering/pairsegments/ClusterPairSegmentAnalysis.java
index 2471b135..123cd4a1 100644
--- a/src/de/lmu/ifi/dbs/elki/evaluation/clustering/pairsegments/ClusterPairSegmentAnalysis.java
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/clustering/pairsegments/ClusterPairSegmentAnalysis.java
@@ -1,4 +1,26 @@
package de.lmu.ifi.dbs.elki.evaluation.clustering.pairsegments;
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
import java.util.List;
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/clustering/pairsegments/Segment.java b/src/de/lmu/ifi/dbs/elki/evaluation/clustering/pairsegments/Segment.java
index 8ce8fd4a..00700385 100644
--- a/src/de/lmu/ifi/dbs/elki/evaluation/clustering/pairsegments/Segment.java
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/clustering/pairsegments/Segment.java
@@ -1,4 +1,26 @@
package de.lmu.ifi.dbs.elki.evaluation.clustering.pairsegments;
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
import java.util.Arrays;
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/clustering/pairsegments/Segments.java b/src/de/lmu/ifi/dbs/elki/evaluation/clustering/pairsegments/Segments.java
index 97c79dfb..ca352367 100644
--- a/src/de/lmu/ifi/dbs/elki/evaluation/clustering/pairsegments/Segments.java
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/clustering/pairsegments/Segments.java
@@ -1,5 +1,28 @@
package de.lmu.ifi.dbs.elki.evaluation.clustering.pairsegments;
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
@@ -9,6 +32,7 @@ import java.util.TreeMap;
import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs;
@@ -47,6 +71,8 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
*
* @author Sascha Goldhofer
* @author Erich Schubert
+ *
+ * @apiviz.composedOf Segment
*/
@Reference(title = "Evaluation of Clusterings – Metrics and Visual Support", authors = "Elke Achtert, Sascha Goldhofer, Hans-Peter Kriegel, Erich Schubert, Arthur Zimek", booktitle = "Proc. 28th International Conference on Data Engineering (ICDE) 2012", url = "http://elki.dbs.ifi.lmu.de/wiki/PairSegments")
public class Segments extends BasicResult implements Iterable<Segment> {
@@ -154,7 +180,8 @@ public class Segments extends BasicResult implements Iterable<Segment> {
HashSetModifiableDBIDs ndelta1 = DBIDUtil.newHashSet(first);
HashSetModifiableDBIDs ndelta2 = DBIDUtil.newHashSet();
HashSetModifiableDBIDs nsecond = DBIDUtil.newHashSet(second.size());
- for(DBID id : clust.getIDs()) {
+ for(DBIDIter iter2 = clust.getIDs().iter(); iter2.valid(); iter2.advance()) {
+ DBID id = iter2.getDBID();
if(ndelta1.remove(id)) {
nfirstp.add(id);
}
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/histogram/ComputeOutlierHistogram.java b/src/de/lmu/ifi/dbs/elki/evaluation/histogram/ComputeOutlierHistogram.java
index 124abf9e..2a2bb069 100644
--- a/src/de/lmu/ifi/dbs/elki/evaluation/histogram/ComputeOutlierHistogram.java
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/histogram/ComputeOutlierHistogram.java
@@ -1,26 +1,27 @@
package de.lmu.ifi.dbs.elki.evaluation.histogram;
-/*
-This file is part of ELKI:
-Environment for Developing KDD-Applications Supported by Index-Structures
-
-Copyright (C) 2012
-Ludwig-Maximilians-Universität München
-Lehr- und Forschungseinheit für Datenbanksysteme
-ELKI Development Team
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
import java.util.ArrayList;
import java.util.Collection;
@@ -29,7 +30,7 @@ import java.util.regex.Pattern;
import de.lmu.ifi.dbs.elki.data.DoubleVector;
import de.lmu.ifi.dbs.elki.database.Database;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
@@ -68,6 +69,7 @@ import de.lmu.ifi.dbs.elki.utilities.scaling.outlier.OutlierScalingFunction;
* @author Erich Schubert
*
* @apiviz.landmark
+ * @apiviz.uses OutlierResult
* @apiviz.has ScalingFunction
* @apiviz.has HistogramResult oneway - - «create»
*/
@@ -186,13 +188,13 @@ public class ComputeOutlierHistogram implements Evaluator {
}
ids.removeDBIDs(outlierIds);
// fill histogram with values of each object
- for(DBID id : ids) {
- double result = or.getScores().get(id);
+ for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ double result = or.getScores().get(iter);
result = scaling.getScaled(result);
hist.aggregate(result, negative);
}
- for(DBID id : outlierIds) {
- double result = or.getScores().get(id);
+ for(DBIDIter iter = outlierIds.iter(); iter.valid(); iter.advance()) {
+ double result = or.getScores().get(iter);
result = scaling.getScaled(result);
hist.aggregate(result, positive);
}
@@ -223,11 +225,11 @@ public class ComputeOutlierHistogram implements Evaluator {
}
/**
- * Parameterization class.
- *
- * @author Erich Schubert
- *
- * @apiviz.exclude
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
*/
public static class Parameterizer extends AbstractParameterizer {
/**
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/outlier/JudgeOutlierScores.java b/src/de/lmu/ifi/dbs/elki/evaluation/outlier/JudgeOutlierScores.java
index b92e5bde..49775b1a 100644
--- a/src/de/lmu/ifi/dbs/elki/evaluation/outlier/JudgeOutlierScores.java
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/outlier/JudgeOutlierScores.java
@@ -28,7 +28,7 @@ import java.util.List;
import java.util.regex.Pattern;
import de.lmu.ifi.dbs.elki.database.Database;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
@@ -147,13 +147,13 @@ public class JudgeOutlierScores implements Evaluator {
double posscore = 0.0;
double negscore = 0.0;
// fill histogram with values of each object
- for(DBID id : ids) {
- double result = or.getScores().get(id);
+ for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
+ double result = or.getScores().get(iter);
result = innerScaling.getScaled(scaling.getScaled(result));
posscore += (1.0 - result);
}
- for(DBID id : outlierIds) {
- double result = or.getScores().get(id);
+ for (DBIDIter iter = outlierIds.iter(); iter.valid(); iter.advance()) {
+ double result = or.getScores().get(iter);
result = innerScaling.getScaled(scaling.getScaled(result));
negscore += result;
}
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/outlier/OutlierPrecisionAtKCurve.java b/src/de/lmu/ifi/dbs/elki/evaluation/outlier/OutlierPrecisionAtKCurve.java
new file mode 100644
index 00000000..6d65999e
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/outlier/OutlierPrecisionAtKCurve.java
@@ -0,0 +1,227 @@
+package de.lmu.ifi.dbs.elki.evaluation.outlier;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.List;
+import java.util.regex.Pattern;
+
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.SetDBIDs;
+import de.lmu.ifi.dbs.elki.evaluation.Evaluator;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.math.geometry.XYCurve;
+import de.lmu.ifi.dbs.elki.result.HierarchicalResult;
+import de.lmu.ifi.dbs.elki.result.OrderingResult;
+import de.lmu.ifi.dbs.elki.result.Result;
+import de.lmu.ifi.dbs.elki.result.ResultUtil;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.textwriter.TextWriterStream;
+import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.PatternParameter;
+
+/**
+ * Compute a curve containing the precision values for an outlier detection
+ * method.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has PrecisionAtKCurve
+ */
+public class OutlierPrecisionAtKCurve implements Evaluator {
+ /**
+ * The logger.
+ */
+ private static final Logging logger = Logging.getLogger(OutlierPrecisionAtKCurve.class);
+
+ /**
+ * The pattern to identify positive classes.
+ *
+ * <p>
+ * Key: {@code -precision.positive}
+ * </p>
+ */
+ public static final OptionID POSITIVE_CLASS_NAME_ID = OptionID.getOrCreateOptionID("precision.positive", "Class label for the 'positive' class.");
+
+ /**
+ * Maximum value for k
+ *
+ * <p>
+ * Key: {@code -precision.k}
+ * </p>
+ */
+ public static final OptionID MAX_K_ID = OptionID.getOrCreateOptionID("precision.maxk", "Maximum value of 'k' to compute the curve up to.");
+
+ /**
+ * Stores the "positive" class.
+ */
+ private Pattern positiveClassName;
+
+ /**
+ * Maximum value for k
+ */
+ int maxk = Integer.MAX_VALUE;
+
+ /**
+ * Constructor.
+ *
+ * @param positiveClassName Pattern to recognize outliers
+ * @param maxk Maximum value for k
+ */
+ public OutlierPrecisionAtKCurve(Pattern positiveClassName, int maxk) {
+ super();
+ this.positiveClassName = positiveClassName;
+ this.maxk = maxk;
+ }
+
+ @Override
+ public void processNewResult(HierarchicalResult baseResult, Result result) {
+ Database db = ResultUtil.findDatabase(baseResult);
+ // Prepare
+ SetDBIDs positiveids = DBIDUtil.ensureSet(DatabaseUtil.getObjectsByLabelMatch(db, positiveClassName));
+
+ if(positiveids.size() == 0) {
+ logger.warning("Computing a ROC curve failed - no objects matched.");
+ return;
+ }
+
+ List<OutlierResult> oresults = ResultUtil.getOutlierResults(result);
+ List<OrderingResult> orderings = ResultUtil.getOrderingResults(result);
+ // Outlier results are the main use case.
+ for(OutlierResult o : oresults) {
+ DBIDs sorted = o.getOrdering().iter(o.getOrdering().getDBIDs());
+ db.getHierarchy().add(o, computePrecisionResult(o.getScores().size(), positiveids, sorted));
+ // Process them only once.
+ orderings.remove(o.getOrdering());
+ }
+
+ // FIXME: find appropriate place to add the derived result
+ // otherwise apply an ordering to the database IDs.
+ for(OrderingResult or : orderings) {
+ DBIDs sorted = or.iter(or.getDBIDs());
+ db.getHierarchy().add(or, computePrecisionResult(or.getDBIDs().size(), positiveids, sorted));
+ }
+ }
+
+ private XYCurve computePrecisionResult(int size, SetDBIDs positiveids, DBIDs order) {
+ if(order.size() != size) {
+ throw new IllegalStateException("Iterable result doesn't match database size - incomplete ordering?");
+ }
+ int lastk = Math.min(size, maxk);
+ XYCurve curve = new PrecisionAtKCurve("k", "Precision", lastk);
+
+ int pos = 0;
+ DBIDIter i = order.iter();
+ for(int k = 1; k <= lastk; k++, i.advance()) {
+ if(positiveids.contains(i.getDBID())) {
+ pos++;
+ }
+ curve.addAndSimplify(k, pos / (double) k);
+ }
+ if(logger.isVerbose()) {
+ logger.verbose("Precision @ " + lastk + " " + ((pos * 1.0) / lastk));
+ }
+ return curve;
+ }
+
+ /**
+ * Precision at K curve.
+ *
+ * @author Erich Schubert
+ */
+ public static class PrecisionAtKCurve extends XYCurve {
+ /**
+ * Constructor.
+ *
+ * @param size Size estimation
+ */
+ public PrecisionAtKCurve(String labelx, String labely, int size) {
+ super("k", "Precision", size);
+ }
+
+ @Override
+ public String getLongName() {
+ return "Precision @ k Curve";
+ }
+
+ @Override
+ public String getShortName() {
+ return "precision-at-k";
+ }
+
+ @Override
+ public void writeToText(TextWriterStream out, String label) {
+ final int last = size() - 1;
+ out.commentPrintLn("Precision @ " + ((int) getX(last)) + ": " + getY(last));
+ out.commentPrintSeparator();
+ out.flush();
+ out.commentPrint(labelx);
+ out.commentPrint(" ");
+ out.commentPrint(labely);
+ out.flush();
+ for(int pos = 0; pos < data.size(); pos+=2) {
+ out.inlinePrint((int)data.get(pos));
+ out.inlinePrint(data.get(pos + 1));
+ out.flush();
+ }
+ }
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ protected Pattern positiveClassName = null;
+
+ protected int maxk = Integer.MAX_VALUE;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ PatternParameter positiveClassNameP = new PatternParameter(POSITIVE_CLASS_NAME_ID);
+ if(config.grab(positiveClassNameP)) {
+ positiveClassName = positiveClassNameP.getValue();
+ }
+ IntParameter maxkP = new IntParameter(MAX_K_ID, true);
+ if(config.grab(maxkP)) {
+ maxk = maxkP.getValue();
+ }
+ }
+
+ @Override
+ protected OutlierPrecisionAtKCurve makeInstance() {
+ return new OutlierPrecisionAtKCurve(positiveClassName, maxk);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/outlier/OutlierPrecisionRecallCurve.java b/src/de/lmu/ifi/dbs/elki/evaluation/outlier/OutlierPrecisionRecallCurve.java
new file mode 100644
index 00000000..c839016c
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/outlier/OutlierPrecisionRecallCurve.java
@@ -0,0 +1,238 @@
+package de.lmu.ifi.dbs.elki.evaluation.outlier;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.List;
+import java.util.regex.Pattern;
+
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.SetDBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.evaluation.Evaluator;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.math.geometry.XYCurve;
+import de.lmu.ifi.dbs.elki.result.HierarchicalResult;
+import de.lmu.ifi.dbs.elki.result.OrderingResult;
+import de.lmu.ifi.dbs.elki.result.Result;
+import de.lmu.ifi.dbs.elki.result.ResultUtil;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.textwriter.TextWriterStream;
+import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.PatternParameter;
+
+/**
+ * Compute a curve containing the precision values for an outlier detection
+ * method.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has PRCurve
+ */
+public class OutlierPrecisionRecallCurve implements Evaluator {
+ /**
+ * The logger.
+ */
+ private static final Logging logger = Logging.getLogger(OutlierPrecisionRecallCurve.class);
+
+ /**
+ * The pattern to identify positive classes.
+ *
+ * <p>
+ * Key: {@code -precision.positive}
+ * </p>
+ */
+ public static final OptionID POSITIVE_CLASS_NAME_ID = OptionID.getOrCreateOptionID("precision.positive", "Class label for the 'positive' class.");
+
+ /**
+ * Stores the "positive" class.
+ */
+ private Pattern positiveClassName;
+
+ /**
+ * Constructor.
+ *
+ * @param positiveClassName Pattern to recognize outliers
+ */
+ public OutlierPrecisionRecallCurve(Pattern positiveClassName) {
+ super();
+ this.positiveClassName = positiveClassName;
+ }
+
+ @Override
+ public void processNewResult(HierarchicalResult baseResult, Result result) {
+ Database db = ResultUtil.findDatabase(baseResult);
+ // Prepare
+ SetDBIDs positiveids = DBIDUtil.ensureSet(DatabaseUtil.getObjectsByLabelMatch(db, positiveClassName));
+
+ if(positiveids.size() == 0) {
+ logger.warning("Computing a ROC curve failed - no objects matched.");
+ return;
+ }
+
+ List<OutlierResult> oresults = ResultUtil.getOutlierResults(result);
+ List<OrderingResult> orderings = ResultUtil.getOrderingResults(result);
+ // Outlier results are the main use case.
+ for(OutlierResult o : oresults) {
+ DBIDs sorted = o.getOrdering().iter(o.getOrdering().getDBIDs());
+ XYCurve curve = computePrecisionResult(o.getScores().size(), positiveids, sorted.iter(), o.getScores());
+ db.getHierarchy().add(o, curve);
+ // Process them only once.
+ orderings.remove(o.getOrdering());
+ }
+
+ // FIXME: find appropriate place to add the derived result
+ // otherwise apply an ordering to the database IDs.
+ for(OrderingResult or : orderings) {
+ DBIDs sorted = or.iter(or.getDBIDs());
+ XYCurve curve = computePrecisionResult(or.getDBIDs().size(), positiveids, sorted.iter(), null);
+ db.getHierarchy().add(or, curve);
+ }
+ }
+
+ private XYCurve computePrecisionResult(int size, SetDBIDs ids, DBIDIter iter, Relation<Double> scores) {
+ final int postot = ids.size();
+ int poscnt = 0, total = 0;
+ XYCurve curve = new PRCurve(postot + 2);
+
+ double prevscore = Double.NaN;
+ for(; iter.valid(); iter.advance()) {
+ // Previous precision rate - y axis
+ final double curprec = ((double) poscnt) / total;
+ // Previous recall rate - x axis
+ final double curreca = ((double) poscnt) / postot;
+
+ // Analyze next point
+ DBID cur = iter.getDBID();
+ // positive or negative match?
+ if(ids.contains(cur)) {
+ poscnt += 1;
+ }
+ total += 1;
+ // First iteration ends here
+ if(total == 1) {
+ continue;
+ }
+ // defer calculation for ties
+ if(scores != null) {
+ double curscore = scores.get(cur);
+ if(Double.compare(prevscore, curscore) == 0) {
+ continue;
+ }
+ prevscore = curscore;
+ }
+ // Add a new point (for the previous entry - because of tie handling!)
+ curve.addAndSimplify(curreca, curprec);
+ }
+ // End curve - always at all positives found.
+ curve.addAndSimplify(1.0, postot / total);
+ return curve;
+ }
+
+ /**
+ * P/R Curve
+ *
+ * @author Erich Schubert
+ */
+ public static class PRCurve extends XYCurve {
+ /**
+ * AUC value for PR curve
+ */
+ public static final String PRAUC_LABEL = "PR-AUC";
+
+ /**
+ * Area under curve
+ */
+ double auc = Double.NaN;
+
+ /**
+ * Constructor.
+ *
+ * @param size Size estimation
+ */
+ public PRCurve(int size) {
+ super("Recall", "Precision", size);
+ }
+
+ @Override
+ public String getLongName() {
+ return "Precision-Recall-Curve";
+ }
+
+ @Override
+ public String getShortName() {
+ return "pr-curve";
+ }
+
+ /**
+ * Get AUC value
+ *
+ * @return AUC value
+ */
+ public double getAUC() {
+ if(Double.isNaN(auc)) {
+ auc = areaUnderCurve(this);
+ }
+ return auc;
+ }
+
+ @Override
+ public void writeToText(TextWriterStream out, String label) {
+ out.commentPrintLn(PRAUC_LABEL + ": " + getAUC());
+ out.flush();
+ super.writeToText(out, label);
+ }
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ protected Pattern positiveClassName = null;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ PatternParameter positiveClassNameP = new PatternParameter(POSITIVE_CLASS_NAME_ID);
+ if(config.grab(positiveClassNameP)) {
+ positiveClassName = positiveClassNameP.getValue();
+ }
+ }
+
+ @Override
+ protected OutlierPrecisionRecallCurve makeInstance() {
+ return new OutlierPrecisionRecallCurve(positiveClassName);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/outlier/OutlierROCCurve.java b/src/de/lmu/ifi/dbs/elki/evaluation/outlier/OutlierROCCurve.java
new file mode 100644
index 00000000..ec73d67f
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/outlier/OutlierROCCurve.java
@@ -0,0 +1,241 @@
+package de.lmu.ifi.dbs.elki.evaluation.outlier;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.List;
+import java.util.regex.Pattern;
+
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.SetDBIDs;
+import de.lmu.ifi.dbs.elki.evaluation.Evaluator;
+import de.lmu.ifi.dbs.elki.evaluation.roc.ROC;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.math.geometry.XYCurve;
+import de.lmu.ifi.dbs.elki.result.HierarchicalResult;
+import de.lmu.ifi.dbs.elki.result.OrderingResult;
+import de.lmu.ifi.dbs.elki.result.Result;
+import de.lmu.ifi.dbs.elki.result.ResultUtil;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.result.textwriter.TextWriterStream;
+import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.PatternParameter;
+
+/**
+ * Compute a ROC curve to evaluate a ranking algorithm and compute the
+ * corresponding ROCAUC value.
+ *
+ * The parameter {@code -rocauc.positive} specifies the class label of
+ * "positive" hits.
+ *
+ * The nested algorithm {@code -algorithm} will be run, the result will be
+ * searched for an iterable or ordering result, which then is compared with the
+ * clustering obtained via the given class label.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.landmark
+ *
+ * @apiviz.uses OutlierResult
+ * @apiviz.uses ROC
+ * @apiviz.has ROCResult oneway - - «create»
+ */
+// TODO: maybe add a way to process clustering results as well?
+public class OutlierROCCurve implements Evaluator {
+ /**
+ * The label we use for marking ROCAUC values.
+ */
+ public static final String ROCAUC_LABEL = "ROCAUC";
+
+ /**
+ * The logger.
+ */
+ private static final Logging logger = Logging.getLogger(OutlierROCCurve.class);
+
+ /**
+ * The pattern to identify positive classes.
+ *
+ * <p>
+ * Key: {@code -rocauc.positive}
+ * </p>
+ */
+ public static final OptionID POSITIVE_CLASS_NAME_ID = OptionID.getOrCreateOptionID("rocauc.positive", "Class label for the 'positive' class.");
+
+ /**
+ * Stores the "positive" class.
+ */
+ private Pattern positiveClassName;
+
+ /**
+ * Constructor.
+ *
+ * @param positive_class_name Positive class name pattern
+ */
+ public OutlierROCCurve(Pattern positive_class_name) {
+ super();
+ this.positiveClassName = positive_class_name;
+ }
+
+ private ROCResult computeROCResult(int size, SetDBIDs positiveids, DBIDs order) {
+ if(order.size() != size) {
+ throw new IllegalStateException("Iterable result doesn't match database size - incomplete ordering?");
+ }
+ XYCurve roccurve = ROC.materializeROC(size, positiveids, new ROC.SimpleAdapter(order.iter()));
+ double rocauc = XYCurve.areaUnderCurve(roccurve);
+ if(logger.isVerbose()) {
+ logger.verbose(ROCAUC_LABEL + ": " + rocauc);
+ }
+
+ final ROCResult rocresult = new ROCResult(roccurve, rocauc);
+
+ return rocresult;
+ }
+
+ private ROCResult computeROCResult(int size, SetDBIDs positiveids, OutlierResult or) {
+ XYCurve roccurve = ROC.materializeROC(size, positiveids, new ROC.OutlierScoreAdapter(or));
+ double rocauc = XYCurve.areaUnderCurve(roccurve);
+ if(logger.isVerbose()) {
+ logger.verbose(ROCAUC_LABEL + ": " + rocauc);
+ }
+
+ final ROCResult rocresult = new ROCResult(roccurve, rocauc);
+
+ return rocresult;
+ }
+
+ @Override
+ public void processNewResult(HierarchicalResult baseResult, Result result) {
+ Database db = ResultUtil.findDatabase(baseResult);
+ // Prepare
+ SetDBIDs positiveids = DBIDUtil.ensureSet(DatabaseUtil.getObjectsByLabelMatch(db, positiveClassName));
+
+ if(positiveids.size() == 0) {
+ logger.warning("Computing a ROC curve failed - no objects matched.");
+ return;
+ }
+
+ boolean nonefound = true;
+ List<OutlierResult> oresults = ResultUtil.getOutlierResults(result);
+ List<OrderingResult> orderings = ResultUtil.getOrderingResults(result);
+ // Outlier results are the main use case.
+ for(OutlierResult o : oresults) {
+ db.getHierarchy().add(o, computeROCResult(o.getScores().size(), positiveids, o));
+ // Process them only once.
+ orderings.remove(o.getOrdering());
+ nonefound = false;
+ }
+
+ // FIXME: find appropriate place to add the derived result
+ // otherwise apply an ordering to the database IDs.
+ for(OrderingResult or : orderings) {
+ DBIDs sorted = or.iter(or.getDBIDs());
+ db.getHierarchy().add(or, computeROCResult(or.getDBIDs().size(), positiveids, sorted));
+ nonefound = false;
+ }
+
+ if(nonefound) {
+ return;
+ // logger.warning("No results found to process with ROC curve analyzer. Got "+iterables.size()+" iterables, "+orderings.size()+" orderings.");
+ }
+ }
+
+ /**
+ * Result object for ROC curves.
+ *
+ * @author Erich Schubert
+ */
+ public static class ROCResult extends XYCurve {
+ /**
+ * AUC value
+ */
+ private double auc;
+
+ /**
+ * Constructor.
+ *
+ * @param col roc curve
+ * @param rocauc ROC AUC value
+ */
+ public ROCResult(XYCurve col, double rocauc) {
+ super(col);
+ this.auc = rocauc;
+ }
+
+ /**
+ * @return the area under curve
+ */
+ public double getAUC() {
+ return auc;
+ }
+
+ @Override
+ public String getLongName() {
+ return "ROC Curve";
+ }
+
+ @Override
+ public String getShortName() {
+ return "roc-curve";
+ }
+
+ @Override
+ public void writeToText(TextWriterStream out, String label) {
+ out.commentPrintLn(ROCAUC_LABEL + ": " + auc);
+ out.flush();
+ super.writeToText(out, label);
+ }
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ /**
+ * Pattern for positive class.
+ */
+ protected Pattern positiveClassName = null;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ PatternParameter positiveClassNameP = new PatternParameter(POSITIVE_CLASS_NAME_ID);
+ if(config.grab(positiveClassNameP)) {
+ positiveClassName = positiveClassNameP.getValue();
+ }
+ }
+
+ @Override
+ protected OutlierROCCurve makeInstance() {
+ return new OutlierROCCurve(positiveClassName);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/outlier/OutlierSmROCCurve.java b/src/de/lmu/ifi/dbs/elki/evaluation/outlier/OutlierSmROCCurve.java
new file mode 100644
index 00000000..928ac842
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/outlier/OutlierSmROCCurve.java
@@ -0,0 +1,269 @@
+package de.lmu.ifi.dbs.elki.evaluation.outlier;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.List;
+import java.util.regex.Pattern;
+
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.SetDBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.evaluation.Evaluator;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.math.geometry.XYCurve;
+import de.lmu.ifi.dbs.elki.result.HierarchicalResult;
+import de.lmu.ifi.dbs.elki.result.OrderingResult;
+import de.lmu.ifi.dbs.elki.result.Result;
+import de.lmu.ifi.dbs.elki.result.ResultUtil;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.PatternParameter;
+
+/**
+ * Smooth ROC curves are a variation of classic ROC curves that takes the scores
+ * into account.
+ *
+ * Reference:
+ * <p>
+ * W. Klement, P. A. Flach, N. Japkowicz, S. Matwin<br />
+ * Smooth Receiver Operating Characteristics (smROC) Curves.<br />
+ * In: European Conference on Machine Learning and Principles and Practice of
+ * Knowledge Discovery in Databases (ECML-PKDD'11)
+ * </p>
+ *
+ * However, this method has some deficiencies when the mean score is not 0.5, as
+ * discussed in:
+ * <p>
+ * E. Schubert, R. Wojdanowski, A. Zimek, H.-P. Kriegel<br>
+ * On Evaluation of Outlier Rankings and Outlier Scores<br>
+ * In Proceedings of the 12th SIAM International Conference on Data Mining
+ * (SDM), Anaheim, CA, 2012.
+ * </p>
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.landmark
+ *
+ * @apiviz.uses OutlierResult
+ * @apiviz.has SmROCResult oneway - - «create»
+ */
+@Reference(authors = "W. Klement, P. A. Flach, N. Japkowicz, S. Matwin", title = "Smooth Receiver Operating Characteristics (smROC) Curves", booktitle = "In: European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases (ECML-PKDD'11)", url = "http://dx.doi.org/10.1007/978-3-642-23783-6_13")
+public class OutlierSmROCCurve implements Evaluator {
+ /**
+ * The label we use for marking ROCAUC values.
+ */
+ public static final String SMROCAUC_LABEL = "ROCAUC";
+
+ /**
+ * The logger.
+ */
+ private static final Logging logger = Logging.getLogger(OutlierSmROCCurve.class);
+
+ /**
+ * Stores the "positive" class.
+ */
+ private Pattern positiveClassName;
+
+ /**
+ * Constructor.
+ *
+ * @param positive_class_name Positive class name pattern
+ */
+ public OutlierSmROCCurve(Pattern positive_class_name) {
+ super();
+ this.positiveClassName = positive_class_name;
+ }
+
+ private SmROCResult computeSmROCResult(SetDBIDs positiveids, OutlierResult or) {
+ Relation<Double> scores = or.getScores();
+ final int size = scores.size();
+
+ // Compute mean, for inversion
+ double mean = 0.0;
+ for(DBIDIter iditer = scores.iterDBIDs(); iditer.valid(); iditer.advance()) {
+ DBID id = iditer.getDBID();
+ mean += scores.get(id) / size;
+ }
+
+ SmROCResult curve = new SmROCResult(positiveids.size() + 2);
+
+ // start in bottom left
+ curve.add(0.0, 0.0);
+
+ int poscnt = 0, negcnt = 0;
+ double prevscore = Double.NaN;
+ double x = 0, y = 0;
+ for (DBIDIter nei = or.getOrdering().iter(or.getOrdering().getDBIDs()).iter(); nei.valid(); nei.advance()) {
+ // Analyze next point
+ final DBID curid = nei.getDBID();
+ final double curscore = scores.get(curid);
+ // defer calculation for ties
+ if(!Double.isNaN(prevscore) && (Double.compare(prevscore, curscore) == 0)) {
+ // positive or negative match?
+ if(positiveids.contains(curid)) {
+ poscnt += 1;
+ }
+ else {
+ negcnt += 1;
+ }
+ continue;
+ }
+ else {
+ // Add point for *previous* result (since we are no longer tied with it)
+ if(prevscore > mean) {
+ y += poscnt * prevscore + negcnt * (1.0 - prevscore);
+ x += poscnt * (1.0 - prevscore) + negcnt * prevscore;
+ }
+ else if(prevscore < mean) {
+ y += poscnt * (1.0 - prevscore) + negcnt * prevscore;
+ x += poscnt * prevscore + negcnt * (1.0 - prevscore);
+ }
+ curve.addAndSimplify(x, y);
+ // positive or negative match?
+ if(positiveids.contains(curid)) {
+ poscnt = 1;
+ negcnt = 0;
+ }
+ else {
+ poscnt = 0;
+ negcnt = 1;
+ }
+ prevscore = curscore;
+ }
+ }
+ // Last point
+ {
+ if(prevscore > mean) {
+ y += poscnt * prevscore + negcnt * (1.0 - prevscore);
+ x += poscnt * (1.0 - prevscore) + negcnt * prevscore;
+ }
+ else if(prevscore < mean) {
+ y += poscnt * (1.0 - prevscore) + negcnt * prevscore;
+ x += poscnt * prevscore + negcnt * (1.0 - prevscore);
+ }
+ curve.addAndSimplify(x, y);
+ }
+
+ double rocauc = XYCurve.areaUnderCurve(curve) / (x * y);
+ if(logger.isVerbose()) {
+ logger.verbose(SMROCAUC_LABEL + ": " + rocauc);
+ }
+ curve.rocauc = rocauc;
+
+ return curve;
+ }
+
+ @Override
+ public void processNewResult(HierarchicalResult baseResult, Result result) {
+ Database db = ResultUtil.findDatabase(baseResult);
+ // Prepare
+ SetDBIDs positiveids = DBIDUtil.ensureSet(DatabaseUtil.getObjectsByLabelMatch(db, positiveClassName));
+
+ if(positiveids.size() == 0) {
+ logger.warning("Computing a ROC curve failed - no objects matched.");
+ return;
+ }
+
+ List<OutlierResult> oresults = ResultUtil.getOutlierResults(result);
+ List<OrderingResult> orderings = ResultUtil.getOrderingResults(result);
+ for(OutlierResult o : oresults) {
+ db.getHierarchy().add(o, computeSmROCResult(positiveids, o));
+ orderings.remove(o.getOrdering());
+ }
+ }
+
+ /**
+ * Result object for Smooth ROC curves.
+ *
+ * @author Erich Schubert
+ */
+ public static class SmROCResult extends XYCurve {
+ /**
+ * ROC AUC score
+ */
+ double rocauc = Double.NaN;
+
+ /**
+ * Constructor.
+ *
+ * @param size Size estimate
+ */
+ public SmROCResult(int size) {
+ super("SmROC Negative", "SmROC Positive", size);
+ }
+
+ @Override
+ public String getLongName() {
+ return "SmROC Curve";
+ }
+
+ @Override
+ public String getShortName() {
+ return "smroc-curve";
+ }
+
+ /**
+ * SmROC AUC value
+ *
+ * @return SmROC auc value
+ */
+ public double getAUC() {
+ return rocauc;
+ }
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ /**
+ * Pattern for positive class.
+ */
+ protected Pattern positiveClassName = null;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ PatternParameter positiveClassNameP = new PatternParameter(OutlierROCCurve.POSITIVE_CLASS_NAME_ID);
+ if(config.grab(positiveClassNameP)) {
+ positiveClassName = positiveClassNameP.getValue();
+ }
+ }
+
+ @Override
+ protected OutlierSmROCCurve makeInstance() {
+ return new OutlierSmROCCurve(positiveClassName);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/outlier/OutlierThresholdClustering.java b/src/de/lmu/ifi/dbs/elki/evaluation/outlier/OutlierThresholdClustering.java
new file mode 100644
index 00000000..e5d35424
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/outlier/OutlierThresholdClustering.java
@@ -0,0 +1,175 @@
+package de.lmu.ifi.dbs.elki.evaluation.outlier;
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.data.Cluster;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.model.Model;
+import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.evaluation.Evaluator;
+import de.lmu.ifi.dbs.elki.result.HierarchicalResult;
+import de.lmu.ifi.dbs.elki.result.Result;
+import de.lmu.ifi.dbs.elki.result.ResultUtil;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.arraylike.ArrayLikeUtil;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleListParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+import de.lmu.ifi.dbs.elki.utilities.scaling.IdentityScaling;
+import de.lmu.ifi.dbs.elki.utilities.scaling.ScalingFunction;
+import de.lmu.ifi.dbs.elki.utilities.scaling.outlier.OutlierScalingFunction;
+
+/**
+ * Pseudo clustering algorithm that builds clusters based on their outlier
+ * score. Useful for transforming a numeric outlier score into a 2-class
+ * dataset.
+ *
+ * @author Erich Schubert
+ */
+public class OutlierThresholdClustering implements Evaluator {
+ /**
+ * Scaling function to use
+ */
+ ScalingFunction scaling = null;
+
+ /**
+ * Thresholds to use
+ */
+ double[] threshold;
+
+ /**
+ * Constructor.
+ *
+ * @param scaling Scaling function
+ * @param threshold Threshold
+ */
+ public OutlierThresholdClustering(ScalingFunction scaling, double[] threshold) {
+ super();
+ this.scaling = scaling;
+ this.threshold = threshold;
+ Arrays.sort(this.threshold);
+ }
+
+ @Override
+ public void processNewResult(HierarchicalResult baseResult, Result newResult) {
+ List<OutlierResult> ors = ResultUtil.filterResults(newResult, OutlierResult.class);
+ for(OutlierResult or : ors) {
+ baseResult.getHierarchy().add(or, split(or));
+ }
+ }
+
+ private Clustering<Model> split(OutlierResult or) {
+ Relation<Double> scores = or.getScores();
+ if(scaling instanceof OutlierScalingFunction) {
+ ((OutlierScalingFunction) scaling).prepare(or);
+ }
+ ArrayList<ModifiableDBIDs> idlists = new ArrayList<ModifiableDBIDs>(threshold.length + 1);
+ for(int i = 0; i <= threshold.length; i++) {
+ idlists.add(DBIDUtil.newHashSet());
+ }
+ for(DBIDIter iter = scores.getDBIDs().iter(); iter.valid(); iter.advance()) {
+ DBID id = iter.getDBID();
+ double score = scores.get(id);
+ if(scaling != null) {
+ score = scaling.getScaled(score);
+ }
+ int i = 0;
+ for(; i < threshold.length; i++) {
+ if(score < threshold[i]) {
+ break;
+ }
+ }
+ idlists.get(i).add(id);
+ }
+ Clustering<Model> c = new Clustering<Model>("Outlier threshold clustering", "threshold-clustering");
+ for(int i = 0; i <= threshold.length; i++) {
+ String name = (i == 0) ? "Inlier" : "Outlier_" + threshold[i - 1];
+ c.addCluster(new Cluster<Model>(name, idlists.get(i), (i > 0)));
+ }
+ return c;
+ }
+
+ /**
+ * Parameterization helper
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ /**
+ * Parameter to specify a scaling function to use.
+ * <p>
+ * Key: {@code -thresholdclust.scaling}
+ * </p>
+ */
+ public static final OptionID SCALING_ID = OptionID.getOrCreateOptionID("thresholdclust.scaling", "Class to use as scaling function.");
+
+ /**
+ * Parameter to specify the threshold
+ * <p>
+ * Key: {@code -thresholdclust.threshold}
+ * </p>
+ */
+ public static final OptionID THRESHOLD_ID = OptionID.getOrCreateOptionID("thresholdclust.threshold", "Threshold(s) to apply.");
+
+ /**
+ * Scaling function to use
+ */
+ ScalingFunction scaling = null;
+
+ /**
+ * Threshold to use
+ */
+ double[] threshold;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ ObjectParameter<ScalingFunction> scalingP = new ObjectParameter<ScalingFunction>(SCALING_ID, ScalingFunction.class, IdentityScaling.class);
+ if(config.grab(scalingP)) {
+ scaling = scalingP.instantiateClass(config);
+ }
+
+ DoubleListParameter thresholdP = new DoubleListParameter(THRESHOLD_ID);
+ if(config.grab(thresholdP)) {
+ threshold = ArrayLikeUtil.toPrimitiveDoubleArray(thresholdP.getValue());
+ }
+ }
+
+ @Override
+ protected OutlierThresholdClustering makeInstance() {
+ return new OutlierThresholdClustering(scaling, threshold);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/roc/ComputeROCCurve.java b/src/de/lmu/ifi/dbs/elki/evaluation/roc/ComputeROCCurve.java
index 232089ed..fae03d24 100644
--- a/src/de/lmu/ifi/dbs/elki/evaluation/roc/ComputeROCCurve.java
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/roc/ComputeROCCurve.java
@@ -23,191 +23,31 @@ package de.lmu.ifi.dbs.elki.evaluation.roc;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Iterator;
-import java.util.List;
import java.util.regex.Pattern;
-import de.lmu.ifi.dbs.elki.database.Database;
-import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
-import de.lmu.ifi.dbs.elki.database.ids.SetDBIDs;
-import de.lmu.ifi.dbs.elki.evaluation.Evaluator;
-import de.lmu.ifi.dbs.elki.logging.Logging;
-import de.lmu.ifi.dbs.elki.result.CollectionResult;
-import de.lmu.ifi.dbs.elki.result.HierarchicalResult;
-import de.lmu.ifi.dbs.elki.result.OrderingResult;
-import de.lmu.ifi.dbs.elki.result.Result;
-import de.lmu.ifi.dbs.elki.result.ResultUtil;
-import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.evaluation.outlier.OutlierROCCurve;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.PatternParameter;
-import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleDoublePair;
/**
- * Compute a ROC curve to evaluate a ranking algorithm and compute the
- * corresponding ROCAUC value.
+ * Compatibility stub. This class has been renamed.
*
- * The parameter {@code -rocauc.positive} specifies the class label of
- * "positive" hits.
- *
- * The nested algorithm {@code -algorithm} will be run, the result will be
- * searched for an iterable or ordering result, which then is compared with the
- * clustering obtained via the given class label.
+ * TODO: remove at 0.6.0 release.
*
* @author Erich Schubert
*
- * @apiviz.landmark
- * @apiviz.uses ROC
- * @apiviz.has ROCResult oneway - - «create»
+ * @deprecated Renamed to {@link OutlierROCCurve}.
*/
-// TODO: maybe add a way to process clustering results as well?
-public class ComputeROCCurve implements Evaluator {
- /**
- * The label we use for marking ROCAUC values.
- */
- public static final String ROCAUC_LABEL = "ROCAUC";
-
- /**
- * The logger.
- */
- static final Logging logger = Logging.getLogger(ComputeROCCurve.class);
-
- /**
- * The pattern to identify positive classes.
- *
- * <p>
- * Key: {@code -rocauc.positive}
- * </p>
- */
- public static final OptionID POSITIVE_CLASS_NAME_ID = OptionID.getOrCreateOptionID("rocauc.positive", "Class label for the 'positive' class.");
-
- /**
- * Stores the "positive" class.
- */
- private Pattern positiveClassName;
-
+@Deprecated
+public class ComputeROCCurve extends OutlierROCCurve {
/**
* Constructor.
*
- * @param positive_class_name Positive class name pattern
+ * @param positive_class_name Pattern
*/
public ComputeROCCurve(Pattern positive_class_name) {
- super();
- this.positiveClassName = positive_class_name;
- }
-
- private ROCResult computeROCResult(int size, SetDBIDs positiveids, Iterator<DBID> iter) {
- ArrayModifiableDBIDs order = DBIDUtil.newArray(size);
- while(iter.hasNext()) {
- Object o = iter.next();
- if(!(o instanceof DBID)) {
- throw new IllegalStateException("Iterable result contained non-DBID - result didn't satisfy requirements");
- }
- else {
- order.add((DBID) o);
- }
- }
- if(order.size() != size) {
- throw new IllegalStateException("Iterable result doesn't match database size - incomplete ordering?");
- }
- List<DoubleDoublePair> roccurve = ROC.materializeROC(size, positiveids, new ROC.SimpleAdapter(order.iterator()));
- double rocauc = ROC.computeAUC(roccurve);
- if(logger.isVerbose()) {
- logger.verbose(ROCAUC_LABEL +": " + rocauc);
- }
-
- List<String> header = new ArrayList<String>(1);
- header.add(ROCAUC_LABEL+": " + rocauc);
- final ROCResult rocresult = new ROCResult(roccurve, header, rocauc);
-
- return rocresult;
- }
-
- private ROCResult computeROCResult(int size, SetDBIDs positiveids, OutlierResult or) {
- List<DoubleDoublePair> roccurve = ROC.materializeROC(size, positiveids, new ROC.OutlierScoreAdapter(or));
- double rocauc = ROC.computeAUC(roccurve);
- if(logger.isVerbose()) {
- logger.verbose(ROCAUC_LABEL+": " + rocauc);
- }
-
- List<String> header = new ArrayList<String>(1);
- header.add(ROCAUC_LABEL+": " + rocauc);
- final ROCResult rocresult = new ROCResult(roccurve, header, rocauc);
-
- return rocresult;
- }
-
- @Override
- public void processNewResult(HierarchicalResult baseResult, Result result) {
- Database db = ResultUtil.findDatabase(baseResult);
- // Prepare
- SetDBIDs positiveids = DBIDUtil.ensureSet(DatabaseUtil.getObjectsByLabelMatch(db, positiveClassName));
-
- if (positiveids.size() == 0) {
- logger.warning("Computing a ROC curve failed - no objects matched.");
- return;
- }
-
- boolean nonefound = true;
- List<OutlierResult> oresults = ResultUtil.getOutlierResults(result);
- List<OrderingResult> orderings = ResultUtil.getOrderingResults(result);
- // Outlier results are the main use case.
- for(OutlierResult o : oresults) {
- db.getHierarchy().add(o, computeROCResult(o.getScores().size(), positiveids, o));
- // Process them only once.
- orderings.remove(o.getOrdering());
- nonefound = false;
- }
-
- // FIXME: find appropriate place to add the derived result
- // otherwise apply an ordering to the database IDs.
- for(OrderingResult or : orderings) {
- Iterator<DBID> iter = or.iter(or.getDBIDs());
- db.getHierarchy().add(or, computeROCResult(or.getDBIDs().size(), positiveids, iter));
- nonefound = false;
- }
-
- if(nonefound) {
- return;
- // logger.warning("No results found to process with ROC curve analyzer. Got "+iterables.size()+" iterables, "+orderings.size()+" orderings.");
- }
- }
-
- /**
- * Result object for ROC curves.
- *
- * @author Erich Schubert
- */
- public static class ROCResult extends CollectionResult<DoubleDoublePair> {
- /**
- * AUC value
- */
- private double auc;
-
- /**
- * Constructor.
- *
- * @param col roc curve
- * @param header header
- * @param rocauc ROC AUC value
- */
- public ROCResult(Collection<DoubleDoublePair> col, Collection<String> header, double rocauc) {
- super("ROC Curve", "roc", col, header);
- this.auc = rocauc;
- }
-
- /**
- * @return the area under curve
- */
- public double getAUC() {
- return auc;
- }
+ super(positive_class_name);
}
/**
@@ -218,12 +58,15 @@ public class ComputeROCCurve implements Evaluator {
* @apiviz.exclude
*/
public static class Parameterizer extends AbstractParameterizer {
+ /**
+ * Pattern for positive class.
+ */
protected Pattern positiveClassName = null;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- PatternParameter positiveClassNameP = new PatternParameter(POSITIVE_CLASS_NAME_ID);
+ PatternParameter positiveClassNameP = new PatternParameter(OutlierROCCurve.POSITIVE_CLASS_NAME_ID);
if(config.grab(positiveClassNameP)) {
positiveClassName = positiveClassNameP.getValue();
}
@@ -234,4 +77,4 @@ public class ComputeROCCurve implements Evaluator {
return new ComputeROCCurve(positiveClassName);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/roc/ROC.java b/src/de/lmu/ifi/dbs/elki/evaluation/roc/ROC.java
index f7fdd254..d85cb137 100644
--- a/src/de/lmu/ifi/dbs/elki/evaluation/roc/ROC.java
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/roc/ROC.java
@@ -23,13 +23,12 @@ package de.lmu.ifi.dbs.elki.evaluation.roc;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.ArrayList;
import java.util.Iterator;
-import java.util.List;
import java.util.Set;
import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDPair;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
@@ -37,8 +36,8 @@ import de.lmu.ifi.dbs.elki.database.ids.SetDBIDs;
import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
+import de.lmu.ifi.dbs.elki.math.geometry.XYCurve;
import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
-import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleDoublePair;
import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair;
import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
import de.lmu.ifi.dbs.elki.utilities.pairs.PairInterface;
@@ -81,23 +80,16 @@ public class ROC {
* 'same positions'.
* @return area under curve
*/
- public static <C extends Comparable<? super C>, T> List<DoubleDoublePair> materializeROC(int size, Set<? super T> ids, Iterator<? extends PairInterface<C, T>> nei) {
- int postot = ids.size();
- int negtot = size - postot;
- int poscnt = 0;
- int negcnt = 0;
- ArrayList<DoubleDoublePair> res = new ArrayList<DoubleDoublePair>(postot + 2);
+ public static <C extends Comparable<? super C>, T> XYCurve materializeROC(int size, Set<? super T> ids, Iterator<? extends PairInterface<C, T>> nei) {
+ final int postot = ids.size(), negtot = size - postot;
+ int poscnt = 0, negcnt = 0;
+ XYCurve curve = new XYCurve("True Negative Rate", "True Positive Rate", postot + 2);
// start in bottom left
- res.add(new DoubleDoublePair(0.0, 0.0));
+ curve.add(0.0, 0.0);
- PairInterface<C, T> prev = null;
+ C prevval = null;
while(nei.hasNext()) {
- // Previous positive rate - y axis
- double curpos = ((double) poscnt) / postot;
- // Previous negative rate - x axis
- double curneg = ((double) negcnt) / negtot;
-
// Analyze next point
PairInterface<C, T> cur = nei.next();
// positive or negative match?
@@ -108,46 +100,17 @@ public class ROC {
negcnt += 1;
}
// defer calculation for ties
- if((prev != null) && (prev.getFirst().compareTo(cur.getFirst()) == 0)) {
+ if((prevval != null) && (prevval.compareTo(cur.getFirst()) == 0)) {
continue;
}
- // simplify curve when possible:
- if(res.size() >= 2) {
- DoubleDoublePair last1 = res.get(res.size() - 2);
- DoubleDoublePair last2 = res.get(res.size() - 1);
- final double ldx = last2.first - last1.first;
- final double cdx = curneg - last2.first;
- final double ldy = last2.second - last1.second;
- final double cdy = curpos - last2.second;
- // vertical simplification
- if((ldx == 0) && (cdx == 0)) {
- res.remove(res.size() - 1);
- }
- // horizontal simplification
- else if((ldy == 0) && (cdy == 0)) {
- res.remove(res.size() - 1);
- }
- // diagonal simplification
- else if(ldy > 0 && cdy > 0) {
- if(Math.abs((ldx / ldy) - (cdx / cdy)) < 1E-10) {
- res.remove(res.size() - 1);
- }
- }
- }
- // Add a new point (for the previous entry!)
- res.add(new DoubleDoublePair(curneg, curpos));
- prev = cur;
+ // Add a new point.
+ curve.addAndSimplify(negcnt / (double) negtot, poscnt / (double) postot);
+ prevval = cur.getFirst();
}
- // ensure we end up in the top right corner.
- // Since we didn't add a point for the last entry yet, this likely is
- // needed.
- {
- DoubleDoublePair last = res.get(res.size() - 1);
- if(last.first < 1.0 || last.second < 1.0) {
- res.add(new DoubleDoublePair(1.0, 1.0));
- }
- }
- return res;
+ // Ensure we end up in the top right corner.
+ // Simplification will skip this if we already were.
+ curve.addAndSimplify(1.0, 1.0);
+ return curve;
}
/**
@@ -162,23 +125,19 @@ public class ROC {
* 'same positions'.
* @return area under curve
*/
- public static <C extends Comparable<? super C>> List<DoubleDoublePair> materializeROC(int size, SetDBIDs ids, Iterator<? extends PairInterface<C, DBID>> nei) {
- int postot = ids.size();
- int negtot = size - postot;
- int poscnt = 0;
- int negcnt = 0;
- ArrayList<DoubleDoublePair> res = new ArrayList<DoubleDoublePair>(postot + 2);
+ public static <C extends Comparable<? super C>> XYCurve materializeROC(int size, SetDBIDs ids, Iterator<? extends PairInterface<C, DBID>> nei) {
+ final int postot = ids.size(), negtot = size - postot;
+ int poscnt = 0, negcnt = 0;
+ XYCurve curve = new XYCurve("True Negative Rate", "True Positive Rate", postot + 2);
// start in bottom left
- res.add(new DoubleDoublePair(0.0, 0.0));
+ curve.add(0.0, 0.0);
- PairInterface<C, DBID> prev = null;
+ C prevval = null;
while(nei.hasNext()) {
- // Previous positive rate - y axis
- double curpos = ((double) poscnt) / postot;
- // Previous negative rate - x axis
- double curneg = ((double) negcnt) / negtot;
-
+ // Rates at *previous* data point. Because of tie handling strategy!
+ final double trueneg = negcnt / (double) negtot;
+ final double truepos = poscnt / (double) postot;
// Analyze next point
PairInterface<C, DBID> cur = nei.next();
// positive or negative match?
@@ -189,46 +148,17 @@ public class ROC {
negcnt += 1;
}
// defer calculation for ties
- if((prev != null) && (prev.getFirst().compareTo(cur.getFirst()) == 0)) {
+ if((prevval != null) && (prevval.compareTo(cur.getFirst()) == 0)) {
continue;
}
- // simplify curve when possible:
- if(res.size() >= 2) {
- DoubleDoublePair last1 = res.get(res.size() - 2);
- DoubleDoublePair last2 = res.get(res.size() - 1);
- final double ldx = last2.first - last1.first;
- final double cdx = curneg - last2.first;
- final double ldy = last2.second - last1.second;
- final double cdy = curpos - last2.second;
- // vertical simplification
- if((ldx == 0) && (cdx == 0)) {
- res.remove(res.size() - 1);
- }
- // horizontal simplification
- else if((ldy == 0) && (cdy == 0)) {
- res.remove(res.size() - 1);
- }
- // diagonal simplification
- else if(ldy > 0 && cdy > 0) {
- if(Math.abs((ldx / ldy) - (cdx / cdy)) < 1E-10) {
- res.remove(res.size() - 1);
- }
- }
- }
- // Add a new point (for the previous entry!)
- res.add(new DoubleDoublePair(curneg, curpos));
- prev = cur;
- }
- // ensure we end up in the top right corner.
- // Since we didn't add a point for the last entry yet, this likely is
- // needed.
- {
- DoubleDoublePair last = res.get(res.size() - 1);
- if(last.first < 1.0 || last.second < 1.0) {
- res.add(new DoubleDoublePair(1.0, 1.0));
- }
+ // Add point for *previous* result (since we are no longer tied with it)
+ curve.addAndSimplify(trueneg, truepos);
+ prevval = cur.getFirst();
}
- return res;
+ // Ensure we end up in the top right corner.
+ // Simplification will skip this if we already were.
+ curve.addAndSimplify(1.0, 1.0);
+ return curve;
}
/**
@@ -245,27 +175,28 @@ public class ROC {
/**
* Original Iterator
*/
- private Iterator<DBID> iter;
+ private DBIDIter iter;
/**
* Constructor
*
* @param iter Iterator for object IDs
*/
- public SimpleAdapter(Iterator<DBID> iter) {
+ public SimpleAdapter(DBIDIter iter) {
super();
this.iter = iter;
}
@Override
public boolean hasNext() {
- return this.iter.hasNext();
+ return this.iter.valid();
}
@Override
public DBIDPair next() {
- DBID id = this.iter.next();
- return DBIDUtil.newPair(id, id);
+ DBIDPair pair = DBIDUtil.newPair(iter, iter);
+ this.iter.advance();
+ return pair;
}
@Override
@@ -332,7 +263,7 @@ public class ROC {
/**
* Original Iterator
*/
- private Iterator<DBID> iter;
+ private DBIDIter iter;
/**
* Outlier score
@@ -346,18 +277,19 @@ public class ROC {
*/
public OutlierScoreAdapter(OutlierResult o) {
super();
- this.iter = o.getOrdering().iter(o.getScores().getDBIDs());
+ this.iter = o.getOrdering().iter(o.getScores().getDBIDs()).iter();
this.scores = o.getScores();
}
@Override
public boolean hasNext() {
- return this.iter.hasNext();
+ return this.iter.valid();
}
@Override
public DoubleObjPair<DBID> next() {
- DBID id = this.iter.next();
+ DBID id = this.iter.getDBID();
+ iter.advance();
return new DoubleObjPair<DBID>(scores.get(id), id);
}
@@ -368,38 +300,6 @@ public class ROC {
}
/**
- * compute the Area Under Curve (difference to y axis) for an arbitrary
- * polygon
- *
- * @param curve Iterable list of points (x,y)
- * @return area und curve
- */
- public static double computeAUC(Iterable<DoubleDoublePair> curve) {
- double result = 0.0;
- Iterator<DoubleDoublePair> iter = curve.iterator();
- // it doesn't make sense to speak about the "area under a curve" when there
- // is no curve.
- if(!iter.hasNext()) {
- return Double.NaN;
- }
- // starting point
- DoubleDoublePair prev = iter.next();
- // check there is at least a second point
- if(!iter.hasNext()) {
- return Double.NaN;
- }
- while(iter.hasNext()) {
- DoubleDoublePair next = iter.next();
- // width * height at half way.
- double width = next.first - prev.first;
- double meanheight = (next.second + prev.second) / 2;
- result += width * meanheight;
- prev = next;
- }
- return result;
- }
-
- /**
* Compute a ROC curves Area-under-curve for a QueryResult and a Cluster.
*
* @param <D> Distance type
@@ -424,8 +324,8 @@ public class ROC {
*/
public static <D extends Distance<D>> double computeROCAUCDistanceResult(int size, DBIDs ids, Iterable<? extends DistanceResultPair<D>> nei) {
// TODO: do not materialize the ROC, but introduce an iterator interface
- List<DoubleDoublePair> roc = materializeROC(size, DBIDUtil.ensureSet(ids), new DistanceResultAdapter<D>(nei.iterator()));
- return computeAUC(roc);
+ XYCurve roc = materializeROC(size, DBIDUtil.ensureSet(ids), new DistanceResultAdapter<D>(nei.iterator()));
+ return XYCurve.areaUnderCurve(roc);
}
/**
@@ -438,7 +338,7 @@ public class ROC {
*/
public static double computeROCAUCSimple(int size, DBIDs ids, DBIDs nei) {
// TODO: do not materialize the ROC, but introduce an iterator interface
- List<DoubleDoublePair> roc = materializeROC(size, DBIDUtil.ensureSet(ids), new SimpleAdapter(nei.iterator()));
- return computeAUC(roc);
+ XYCurve roc = materializeROC(size, DBIDUtil.ensureSet(ids), new SimpleAdapter(nei.iter()));
+ return XYCurve.areaUnderCurve(roc);
}
-}
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/similaritymatrix/ComputeSimilarityMatrixImage.java b/src/de/lmu/ifi/dbs/elki/evaluation/similaritymatrix/ComputeSimilarityMatrixImage.java
index 5344da15..0e5c7a02 100644
--- a/src/de/lmu/ifi/dbs/elki/evaluation/similaritymatrix/ComputeSimilarityMatrixImage.java
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/similaritymatrix/ComputeSimilarityMatrixImage.java
@@ -27,7 +27,6 @@ import java.awt.image.BufferedImage;
import java.awt.image.RenderedImage;
import java.io.File;
import java.io.IOException;
-import java.util.Iterator;
import java.util.List;
import javax.imageio.ImageIO;
@@ -37,6 +36,7 @@ import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
@@ -49,7 +49,6 @@ import de.lmu.ifi.dbs.elki.logging.LoggingUtil;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.math.DoubleMinMax;
import de.lmu.ifi.dbs.elki.result.HierarchicalResult;
-import de.lmu.ifi.dbs.elki.result.IterableResult;
import de.lmu.ifi.dbs.elki.result.OrderingResult;
import de.lmu.ifi.dbs.elki.result.PixmapResult;
import de.lmu.ifi.dbs.elki.result.Result;
@@ -124,10 +123,10 @@ public class ComputeSimilarityMatrixImage<O> implements Evaluator {
* @param iter DBID iterator
* @return result object
*/
- private SimilarityMatrix computeSimilarityMatrixImage(Relation<O> relation, Iterator<DBID> iter) {
+ private SimilarityMatrix computeSimilarityMatrixImage(Relation<O> relation, DBIDIter iter) {
ArrayModifiableDBIDs order = DBIDUtil.newArray(relation.size());
- while(iter.hasNext()) {
- Object o = iter.next();
+ for(; iter.valid(); iter.advance()) {
+ Object o = iter.getDBID();
if(!(o instanceof DBID)) {
throw new IllegalStateException("Iterable result contained non-DBID - result didn't satisfy requirements");
}
@@ -199,54 +198,27 @@ public class ComputeSimilarityMatrixImage<O> implements Evaluator {
return new SimilarityMatrix(img, relation, order);
}
- /**
- * Wrap the uncheckable cast with the manual check.
- *
- * @param ir Interable result
- * @return Iterator if Integer iterable, null otherwise.
- */
- @SuppressWarnings("unchecked")
- private Iterator<DBID> getDBIDIterator(IterableResult<?> ir) {
- Iterator<?> testit = ir.iterator();
- if(testit.hasNext() && (testit.next() instanceof DBID)) {
- // note: we DO want a fresh iterator here!
- return (Iterator<DBID>) ir.iterator();
- }
- return null;
- }
-
@Override
public void processNewResult(HierarchicalResult baseResult, Result result) {
Database db = ResultUtil.findDatabase(baseResult);
boolean nonefound = true;
List<OutlierResult> oresults = ResultUtil.getOutlierResults(result);
- List<IterableResult<?>> iterables = ResultUtil.getIterableResults(result);
List<OrderingResult> orderings = ResultUtil.getOrderingResults(result);
// Outlier results are the main use case.
for(OutlierResult o : oresults) {
final OrderingResult or = o.getOrdering();
Relation<O> relation = db.getRelation(distanceFunction.getInputTypeRestriction());
- db.getHierarchy().add(or, computeSimilarityMatrixImage(relation, or.iter(relation.getDBIDs())));
+ db.getHierarchy().add(or, computeSimilarityMatrixImage(relation, or.iter(relation.getDBIDs()).iter()));
// Process them only once.
orderings.remove(or);
nonefound = false;
}
- // try iterable results first
- // FIXME: find the appropriate place to call addDerivedResult
- for(IterableResult<?> ir : iterables) {
- Iterator<DBID> iter = getDBIDIterator(ir);
- if(iter != null) {
- Relation<O> relation = db.getRelation(distanceFunction.getInputTypeRestriction());
- db.getHierarchy().add(ir, computeSimilarityMatrixImage(relation, iter));
- nonefound = false;
- }
- }
// FIXME: find appropriate place to add the derived result
// otherwise apply an ordering to the database IDs.
for(OrderingResult or : orderings) {
Relation<O> relation = db.getRelation(distanceFunction.getInputTypeRestriction());
- Iterator<DBID> iter = or.iter(relation.getDBIDs());
+ DBIDIter iter = or.iter(relation.getDBIDs()).iter();
db.getHierarchy().add(or, computeSimilarityMatrixImage(relation, iter));
nonefound = false;
}