summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/evaluation
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/evaluation')
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/AutomaticEvaluation.java43
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/Evaluator.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/NoAutomaticEvaluation.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/classification/ConfusionMatrix.java359
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/classification/ConfusionMatrixEvaluationResult.java92
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/classification/holdout/AbstractHoldout.java118
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/classification/holdout/DisjointCrossValidation.java144
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/classification/holdout/Holdout.java67
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/classification/holdout/LeaveOneOut.java82
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/classification/holdout/RandomizedCrossValidation.java141
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/classification/holdout/RandomizedHoldout.java78
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/classification/holdout/StratifiedCrossValidation.java162
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/classification/holdout/TrainingAndTestSet.java89
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/classification/holdout/package-info.java27
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/classification/package-info.java27
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/clustering/BCubed.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/clustering/ClusterContingencyTable.java87
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/clustering/EditDistance.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/clustering/Entropy.java38
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/clustering/EvaluateClustering.java92
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/clustering/PairCounting.java70
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/clustering/SetMatchingPurity.java53
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/clustering/internal/EvaluateSilhouette.java265
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/clustering/internal/package-info.java26
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/clustering/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/clustering/pairsegments/ClusterPairSegmentAnalysis.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/clustering/pairsegments/Segment.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/clustering/pairsegments/Segments.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/clustering/pairsegments/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/histogram/ComputeOutlierHistogram.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/histogram/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/index/IndexPurity.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/index/IndexStatistics.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/index/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/outlier/JudgeOutlierScores.java6
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/outlier/OutlierPrecisionAtKCurve.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/outlier/OutlierPrecisionRecallCurve.java8
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/outlier/OutlierROCCurve.java35
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/outlier/OutlierRankingEvaluation.java211
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/outlier/OutlierSmROCCurve.java23
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/outlier/OutlierThresholdClustering.java8
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/outlier/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/package-info.java2
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/roc/ROC.java656
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/roc/package-info.java28
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/scores/AbstractScoreEvaluation.java55
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/scores/AveragePrecisionEvaluation.java76
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/scores/MaximumF1Evaluation.java76
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/scores/PrecisionAtKEvaluation.java127
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/scores/ROCEvaluation.java147
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/scores/ScoreEvaluation.java114
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/scores/adapter/DBIDRefIter.java39
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/scores/adapter/DBIDsTest.java59
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/scores/adapter/DecreasingVectorIter.java116
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/scores/adapter/DistanceResultAdapter.java92
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/scores/adapter/FilteredDistanceResultAdapter.java76
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/scores/adapter/IncreasingVectorIter.java116
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/scores/adapter/OutlierScoreAdapter.java103
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/scores/adapter/SimpleAdapter.java89
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/scores/adapter/VectorNonZero.java44
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/scores/adapter/VectorOverThreshold.java79
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/scores/adapter/VectorZero.java71
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/scores/adapter/package-info.java26
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/scores/package-info.java28
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/similaritymatrix/ComputeSimilarityMatrixImage.java29
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/similaritymatrix/package-info.java2
66 files changed, 3699 insertions, 938 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/AutomaticEvaluation.java b/src/de/lmu/ifi/dbs/elki/evaluation/AutomaticEvaluation.java
index fe2e1dc5..91202efe 100644
--- a/src/de/lmu/ifi/dbs/elki/evaluation/AutomaticEvaluation.java
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/AutomaticEvaluation.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.evaluation;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -36,6 +36,7 @@ import de.lmu.ifi.dbs.elki.evaluation.histogram.ComputeOutlierHistogram;
import de.lmu.ifi.dbs.elki.evaluation.outlier.OutlierPrecisionAtKCurve;
import de.lmu.ifi.dbs.elki.evaluation.outlier.OutlierPrecisionRecallCurve;
import de.lmu.ifi.dbs.elki.evaluation.outlier.OutlierROCCurve;
+import de.lmu.ifi.dbs.elki.evaluation.outlier.OutlierRankingEvaluation;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.result.HierarchicalResult;
import de.lmu.ifi.dbs.elki.result.Result;
@@ -73,13 +74,13 @@ public class AutomaticEvaluation implements Evaluator {
protected void autoEvaluateOutliers(HierarchicalResult baseResult, Result newResult) {
Collection<OutlierResult> outliers = ResultUtil.filterResults(newResult, OutlierResult.class);
- if (LOG.isDebugging()) {
+ if(LOG.isDebugging()) {
LOG.debug("Number of new outlier results: " + outliers.size());
}
- if (outliers.size() > 0) {
+ if(outliers.size() > 0) {
ResultUtil.ensureClusteringResult(ResultUtil.findDatabase(baseResult), baseResult);
Collection<Clustering<?>> clusterings = ResultUtil.filterResults(baseResult, Clustering.class);
- if (clusterings.size() == 0) {
+ if(clusterings.size() == 0) {
LOG.warning("Could not find a clustering result, even after running 'ensureClusteringResult'?!?");
return;
}
@@ -88,28 +89,30 @@ public class AutomaticEvaluation implements Evaluator {
int min = Integer.MAX_VALUE;
int total = 0;
String label = null;
- if (basec.getAllClusters().size() > 1) {
- for (Cluster<?> c : basec.getAllClusters()) {
+ if(basec.getAllClusters().size() > 1) {
+ for(Cluster<?> c : basec.getAllClusters()) {
final int csize = c.getIDs().size();
total += csize;
- if (csize < min) {
+ if(csize < min) {
min = csize;
label = c.getName();
}
}
}
- if (label == null) {
+ if(label == null) {
LOG.warning("Could not evaluate outlier results, as I could not find a minority label.");
return;
}
- if (min == 1) {
+ if(min == 1) {
LOG.warning("The minority class label had a single object. Try using 'ClassLabelFilter' to identify the class label column.");
}
- if (min > 0.05 * total) {
+ if(min > 0.05 * total) {
LOG.warning("The minority class I discovered (labeled '" + label + "') has " + (min * 100. / total) + "% of objects. Outlier classes should be more rare!");
}
LOG.verbose("Evaluating using minority class: " + label);
Pattern pat = Pattern.compile("^" + Pattern.quote(label) + "$");
+ // Evaluate rankings.
+ new OutlierRankingEvaluation(pat).processNewResult(baseResult, newResult);
// Compute ROC curve
new OutlierROCCurve(pat).processNewResult(baseResult, newResult);
// Compute Precision at k
@@ -123,25 +126,29 @@ public class AutomaticEvaluation implements Evaluator {
protected void autoEvaluateClusterings(HierarchicalResult baseResult, Result newResult) {
Collection<Clustering<?>> clusterings = ResultUtil.filterResults(newResult, Clustering.class);
- if (LOG.isDebugging()) {
+ if(LOG.isDebugging()) {
LOG.warning("Number of new clustering results: " + clusterings.size());
}
- for (Iterator<Clustering<?>> c = clusterings.iterator(); c.hasNext();) {
+ for(Iterator<Clustering<?>> c = clusterings.iterator(); c.hasNext();) {
Clustering<?> test = c.next();
- if ("allinone-clustering".equals(test.getShortName())) {
+ if("allinone-clustering".equals(test.getShortName())) {
c.remove();
- } else if ("allinnoise-clustering".equals(test.getShortName())) {
+ }
+ else if("allinnoise-clustering".equals(test.getShortName())) {
c.remove();
- } else if ("bylabel-clustering".equals(test.getShortName())) {
+ }
+ else if("bylabel-clustering".equals(test.getShortName())) {
c.remove();
- } else if ("bymodel-clustering".equals(test.getShortName())) {
+ }
+ else if("bymodel-clustering".equals(test.getShortName())) {
c.remove();
}
}
- if (clusterings.size() > 0) {
+ if(clusterings.size() > 0) {
try {
new EvaluateClustering(new ByLabelClustering(), false, true).processNewResult(baseResult, newResult);
- } catch (NoSupportedDataTypeException e) {
+ }
+ catch(NoSupportedDataTypeException e) {
// Pass - the data probably did not have labels.
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/Evaluator.java b/src/de/lmu/ifi/dbs/elki/evaluation/Evaluator.java
index 140b29ef..b060af13 100644
--- a/src/de/lmu/ifi/dbs/elki/evaluation/Evaluator.java
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/Evaluator.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.evaluation;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/NoAutomaticEvaluation.java b/src/de/lmu/ifi/dbs/elki/evaluation/NoAutomaticEvaluation.java
index 640a8c9d..7d7b4bed 100644
--- a/src/de/lmu/ifi/dbs/elki/evaluation/NoAutomaticEvaluation.java
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/NoAutomaticEvaluation.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.evaluation;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/classification/ConfusionMatrix.java b/src/de/lmu/ifi/dbs/elki/evaluation/classification/ConfusionMatrix.java
new file mode 100644
index 00000000..e7b4cd4f
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/classification/ConfusionMatrix.java
@@ -0,0 +1,359 @@
+package de.lmu.ifi.dbs.elki.evaluation.classification;
+/*
+This file is part of ELKI:
+Environment for Developing KDD-Applications Supported by Index-Structures
+
+Copyright (C) 2014
+Ludwig-Maximilians-Universität München
+Lehr- und Forschungseinheit für Datenbanksysteme
+ELKI Development Team
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+import java.text.NumberFormat;
+import java.util.ArrayList;
+
+import de.lmu.ifi.dbs.elki.data.ClassLabel;
+
+/**
+ * Provides a confusion matrix with some prediction performance measures that
+ * can be derived from a confusion matrix.
+ *
+ * @author Arthur Zimek
+ */
+public class ConfusionMatrix {
+ /**
+ * Holds the confusion matrix. Must be a square matrix. The rows (first index)
+ * give the values which classes are classified as row, the columns (second
+ * index) give the values the class col has been classified as. Thus,
+ * <code>confusion[predicted][real]</code> addresses the number of instances
+ * of class <i>real</i> that have been assigned to the class <i>predicted</i>.
+ */
+ private int[][] confusion;
+
+ /**
+ * Holds the class labels.
+ */
+ private ArrayList<ClassLabel> labels;
+
+ /**
+ * Provides a confusion matrix for the given values.
+ *
+ * @param labels the class labels - must conform the confusion matrix in
+ * length
+ * @param confusion the confusion matrix. Must be a square matrix. The rows
+ * (first index) give the values which classes are classified as row,
+ * the columns (second index) give the values the class col has been
+ * classified as. Thus, <code>confusion[predicted][real]</code>
+ * addresses the number of instances of class <i>real</i> that have
+ * been assigned to the class <i>predicted</i>.
+ * @throws IllegalArgumentException if the confusion matrix is not square or
+ * not complete or if the length of class labels does not conform the
+ * length of the confusion matrix
+ */
+ public ConfusionMatrix(ArrayList<ClassLabel> labels, int[][] confusion) throws IllegalArgumentException {
+ for(int i = 0; i < confusion.length; i++) {
+ if(confusion.length != confusion[i].length) {
+ throw new IllegalArgumentException("Confusion matrix irregular: row-dimension = " + confusion.length + ", col-dimension in col" + i + " = " + confusion[i].length);
+ }
+ }
+ if(confusion.length != labels.size()) {
+ throw new IllegalArgumentException("Number of class labels does not match row dimension of confusion matrix.");
+ }
+ this.confusion = confusion;
+ this.labels = labels;
+ }
+
+ /**
+ * Provides the <i>true positive rate</i>. Aka <i>accuracy</i> or
+ * <i>sensitivity</i> or <i>recall</i>: <code>TP / (TP+FN)</code>.
+ *
+ *
+ * @return the true positive rate
+ */
+ public double truePositiveRate() {
+ return ((double) truePositives()) / (double) totalInstances();
+ }
+
+ /**
+ * Provides the <i>false positive rate</i> for the specified class.
+ *
+ *
+ * @param classindex the index of the class to retrieve the false positive
+ * rate for
+ * @return the false positive rate for the specified class
+ */
+ public double falsePositiveRate(int classindex) {
+ int fp = falsePositives(classindex);
+ int tn = trueNegatives(classindex);
+ return ((double) fp) / ((double) (fp + tn));
+ }
+
+ /**
+ * Provides the <i>false positive rate</i>. Aka <i>false alarm rate</i>:
+ * <code>FP / (FP+TN)</code>.
+ *
+ *
+ * @return the false positive rate
+ */
+ public double falsePositiveRate() {
+ double fpr = 0;
+ for(int i = 0; i < confusion.length; i++) {
+ fpr += falsePositiveRate(i) * colSum(i);
+ }
+ return fpr / totalInstances();
+ }
+
+ /**
+ * Provides the <i>positive predicted value</i> for the specified class.
+ *
+ *
+ * @param classindex the index of the class to retrieve the positive predicted
+ * value for
+ * @return the positive predicted value for the specified class
+ */
+ public double positivePredictedValue(int classindex) {
+ int tp = truePositives(classindex);
+ return (double) tp / ((double) (tp + falsePositives(classindex)));
+ }
+
+ /**
+ * Provides the <i>positive predicted value</i>. Aka <i>precision</i> or
+ * <i>specificity</i>: <code>TP / (TP+FP)</code>.
+ *
+ * @return the positive predicted value
+ */
+ public double positivePredictedValue() {
+ double ppv = 0;
+ for(int i = 0; i < confusion.length; i++) {
+ ppv += positivePredictedValue(i) * colSum(i);
+ }
+ return ppv / totalInstances();
+ }
+
+ /**
+ * The number of correctly classified instances.
+ *
+ *
+ * @return the number of correctly classified instances
+ */
+ public int truePositives() {
+ int tp = 0;
+ for(int i = 0; i < confusion.length; i++) {
+ tp += truePositives(i);
+ }
+ return tp;
+ }
+
+ /**
+ * The number of correctly classified instances belonging to the specified
+ * class.
+ *
+ *
+ * @param classindex the index of the class to retrieve the correctly
+ * classified instances of
+ * @return the number of correctly classified instances belonging to the
+ * specified class
+ */
+ public int truePositives(int classindex) {
+ return confusion[classindex][classindex];
+ }
+
+ /**
+ * Provides the <i>true positive rate</i> for the specified class.
+ *
+ * @param classindex the index of the class to retrieve the true positive rate
+ * for
+ * @return the true positive rate
+ */
+ public double truePositiveRate(int classindex) {
+ int tp = truePositives(classindex);
+ return (double) tp / ((double) (tp + falseNegatives(classindex)));
+ }
+
+ /**
+ * The number of true negatives of the specified class.
+ *
+ *
+ * @param classindex the index of the class to retrieve the true negatives for
+ * @return the number of true negatives of the specified class
+ */
+ public int trueNegatives(int classindex) {
+ int tn = 0;
+ for(int i = 0; i < confusion.length; i++) {
+ for(int j = 0; j < confusion[i].length; j++) {
+ if(i != classindex && j != classindex) {
+ tn += confusion[i][j];
+ }
+ }
+ }
+ return tn;
+ }
+
+ /**
+ * The false positives for the specified class.
+ *
+ *
+ * @param classindex the index of the class to retrieve the false positives
+ * for
+ * @return the false positives for the specified class
+ */
+ public int falsePositives(int classindex) {
+ int fp = 0;
+ for(int i = 0; i < confusion[classindex].length; i++) {
+ if(i != classindex) {
+ fp += confusion[classindex][i];
+ }
+ }
+ return fp;
+ }
+
+ /**
+ * The false negatives for the specified class.
+ *
+ *
+ * @param classindex the index of the class to retrieve the false negatives
+ * for
+ * @return the false negatives for the specified class
+ */
+ public int falseNegatives(int classindex) {
+ int fn = 0;
+ for(int i = 0; i < confusion.length; i++) {
+ if(i != classindex) {
+ fn += confusion[i][classindex];
+ }
+ }
+ return fn;
+ }
+
+ /**
+ * The total number of instances covered by this confusion matrix.
+ *
+ *
+ * @return the total number of instances covered by this confusion matrix
+ */
+ public int totalInstances() {
+ int total = 0;
+ for(int i = 0; i < confusion.length; i++) {
+ for(int j = 0; j < confusion[i].length; j++) {
+ total += confusion[i][j];
+ }
+ }
+ return total;
+ }
+
+ /**
+ * The number of instances present in the specified row. I.e., classified as
+ * class <code>classindex</code>.
+ *
+ *
+ * @param classindex the index of the class the resulting number of instances
+ * has been classified as
+ * @return the number of instances present in the specified row
+ */
+ public int rowSum(int classindex) {
+ int s = 0;
+ for(int i = 0; i < confusion[classindex].length; i++) {
+ s += confusion[classindex][i];
+ }
+ return s;
+ }
+
+ /**
+ * The number of instances present in the specified column. I.e., the
+ * instances of class <code>classindex</code>.
+ *
+ *
+ * @param classindex the index of the class theresulting number of instances
+ * belongs to
+ * @return the number of instances present in the specified column
+ */
+ public int colSum(int classindex) {
+ int s = 0;
+ for(int i = 0; i < confusion.length; i++) {
+ s += confusion[i][classindex];
+ }
+ return s;
+ }
+
+ /**
+ * The number of instances belonging to class <code>trueClassindex</code> and
+ * predicted as <code>predictedClassindex</code>.
+ *
+ *
+ * @param trueClassindex the true class index
+ * @param predictedClassindex the predicted class index
+ * @return the number of instances belonging to class
+ * <code>trueClassindex</code> and predicted as
+ * <code>predictedClassindex</code>
+ */
+ public int value(int trueClassindex, int predictedClassindex) {
+ return confusion[predictedClassindex][trueClassindex];
+ }
+
+ /**
+ * Provides a String representation of this confusion matrix.
+ *
+ * @see java.lang.Object#toString()
+ */
+ @Override
+ public String toString() {
+ int max = 0;
+ for(int i = 0; i < confusion.length; i++) {
+ for(int j = 0; j < confusion[i].length; j++) {
+ if(confusion[i][j] > max) {
+ max = confusion[i][j];
+ }
+ }
+ }
+ String classPrefix = "C_";
+ NumberFormat nf = NumberFormat.getInstance();
+ nf.setParseIntegerOnly(true);
+ int labelLength = Integer.toString(labels.size()).length();
+ nf.setMaximumIntegerDigits(labelLength);
+ nf.setMinimumIntegerDigits(labelLength);
+ int cell = Math.max(Integer.toString(max).length(), labelLength + classPrefix.length());
+ String separator = " ";
+ StringBuilder representation = new StringBuilder();
+ for(int i = 1; i <= labels.size(); i++) {
+ representation.append(separator);
+ String label = classPrefix + nf.format(i);
+ int space = cell - labelLength - classPrefix.length();
+ for(int s = 0; s <= space; s++) {
+ representation.append(' ');
+ }
+ representation.append(label);
+ }
+ representation.append('\n');
+ for(int row = 0; row < confusion.length; row++) {
+ for(int col = 0; col < confusion[row].length; col++) {
+ representation.append(separator);
+ String entry = Integer.toString(confusion[row][col]);
+ int space = cell - entry.length();
+ for(int s = 0; s <= space; s++) {
+ representation.append(' ');
+ }
+ representation.append(entry);
+ }
+ representation.append(separator);
+ representation.append(classPrefix);
+ representation.append(nf.format(row + 1));
+ representation.append(": ");
+ representation.append(labels.get(row));
+ representation.append('\n');
+ }
+ return representation.toString();
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/classification/ConfusionMatrixEvaluationResult.java b/src/de/lmu/ifi/dbs/elki/evaluation/classification/ConfusionMatrixEvaluationResult.java
new file mode 100644
index 00000000..6bf0f648
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/classification/ConfusionMatrixEvaluationResult.java
@@ -0,0 +1,92 @@
+package de.lmu.ifi.dbs.elki.evaluation.classification;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.result.Result;
+import de.lmu.ifi.dbs.elki.result.textwriter.TextWriteable;
+import de.lmu.ifi.dbs.elki.result.textwriter.TextWriterStream;
+
+/**
+ * Provides the prediction performance measures for a classifier based on the
+ * confusion matrix.
+ *
+ * Note: this API is non-final, and will be refactored soon.
+ *
+ * @author Arthur Zimek
+ */
+public class ConfusionMatrixEvaluationResult implements Result, TextWriteable {
+ /**
+ * Holds the confusion matrix.
+ */
+ private ConfusionMatrix confusionmatrix;
+
+ /**
+ * Holds the used EvaluationProcedure.
+ */
+ private String evaluationName;
+
+ /**
+ * Provides an evaluation based on the given confusion matrix.
+ *
+ * @param confusionmatrix the confusion matrix to provide the prediction
+ * performance measures for
+ * @param evaluationName name of the evaluation procedure used
+ */
+ public ConfusionMatrixEvaluationResult(ConfusionMatrix confusionmatrix, String evaluationName) {
+ super();
+ this.confusionmatrix = confusionmatrix;
+ this.evaluationName = evaluationName;
+ }
+
+ @Override
+ public void writeToText(TextWriterStream out, String label) {
+ out.commentPrintLn("Evaluation:");
+ out.commentPrintLn(evaluationName);
+ // out.println(evaluationProcedure.setting());
+ out.commentPrintLn("Accuracy: \n correctly classified instances: ");
+ out.commentPrintLn(confusionmatrix.truePositives());
+ out.commentPrintLn("true positive rate: ");
+ double tpr = confusionmatrix.truePositiveRate();
+ out.commentPrintLn(tpr);
+ out.commentPrintLn("false positive rate: ");
+ out.commentPrintLn(confusionmatrix.falsePositiveRate());
+ out.commentPrintLn("positive predicted value: ");
+ double ppv = confusionmatrix.positivePredictedValue();
+ out.commentPrintLn(ppv);
+ out.commentPrintLn("F1-measure: ");
+ out.commentPrintLn((2 * ppv * tpr) / (ppv + tpr));
+ out.commentPrintLn("\nconfusion matrix:\n");
+ out.commentPrintLn(confusionmatrix.toString());
+ }
+
+ @Override
+ public String getLongName() {
+ return "confusionmatrixresult";
+ }
+
+ @Override
+ public String getShortName() {
+ return "confusionmatrixresult";
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/classification/holdout/AbstractHoldout.java b/src/de/lmu/ifi/dbs/elki/evaluation/classification/holdout/AbstractHoldout.java
new file mode 100644
index 00000000..af10eac0
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/classification/holdout/AbstractHoldout.java
@@ -0,0 +1,118 @@
+package de.lmu.ifi.dbs.elki.evaluation.classification.holdout;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+
+import de.lmu.ifi.dbs.elki.data.ClassLabel;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
+
+/**
+ * Split a data set for holdout evaluation.
+ *
+ * @author Erich Schubert
+ */
+public abstract class AbstractHoldout implements Holdout {
+ /**
+ * Labels in the current data set.
+ */
+ protected ArrayList<ClassLabel> labels;
+
+ /**
+ * Column containing the class labels.
+ */
+ protected int labelcol;
+
+ /**
+ * Input data bundle.
+ */
+ protected MultipleObjectsBundle bundle;
+
+ @Override
+ public void initialize(MultipleObjectsBundle bundle) {
+ this.bundle = bundle;
+ this.labelcol = findClassLabelColumn(bundle);
+ this.labels = allClassLabels(bundle);
+ }
+
+ @Override
+ public ArrayList<ClassLabel> getLabels() {
+ return labels;
+ }
+
+ /**
+ * Find the class label column in the given data set.
+ *
+ * @param bundle Bundle
+ * @return Class label column
+ */
+ public static int findClassLabelColumn(MultipleObjectsBundle bundle) {
+ for(int i = 0, l = bundle.metaLength(); i < l; ++i) {
+ if(TypeUtil.CLASSLABEL.isAssignableFromType(bundle.meta(i))) {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+ /**
+ * Get an array of all class labels in a given data set.
+ *
+ * @param bundle Bundle
+ * @return Class labels.
+ */
+ public static ArrayList<ClassLabel> allClassLabels(MultipleObjectsBundle bundle) {
+ int col = findClassLabelColumn(bundle);
+ // TODO: automatically infer class labels?
+ if(col < 0) {
+ throw new AbortException("No class label found (try using ClassLabelFilter).");
+ }
+ return allClassLabels(bundle, col);
+ }
+
+ /**
+ * Get an array of all class labels in a given data set.
+ *
+ * @param bundle Bundle
+ * @param col Column
+ * @return Class labels.
+ */
+ public static ArrayList<ClassLabel> allClassLabels(MultipleObjectsBundle bundle, int col) {
+ HashSet<ClassLabel> labels = new HashSet<ClassLabel>();
+ for(int i = 0, l = bundle.dataLength(); i < l; ++i) {
+ Object o = bundle.data(i, col);
+ if(o == null || !(o instanceof ClassLabel)) {
+ continue;
+ }
+ labels.add((ClassLabel) o);
+ }
+ ArrayList<ClassLabel> ret = new ArrayList<>(labels);
+ Collections.sort(ret);
+ return ret;
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/classification/holdout/DisjointCrossValidation.java b/src/de/lmu/ifi/dbs/elki/evaluation/classification/holdout/DisjointCrossValidation.java
new file mode 100644
index 00000000..75503018
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/classification/holdout/DisjointCrossValidation.java
@@ -0,0 +1,144 @@
+package de.lmu.ifi.dbs.elki.evaluation.classification.holdout;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.ArrayList;
+import java.util.Random;
+
+import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle;
+import de.lmu.ifi.dbs.elki.math.random.RandomFactory;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+
+/**
+ * DisjointCrossValidationHoldout provides a set of partitions of a database to
+ * perform cross-validation. The test sets are guaranteed to be disjoint.
+ *
+ * @author Arthur Zimek
+ */
+public class DisjointCrossValidation extends RandomizedHoldout {
+ /**
+ * Holds the number of folds, current fold.
+ */
+ protected int nfold, fold;
+
+ /**
+ * Partition assignment and size.
+ */
+ protected int[] assignment, sizes;
+
+ /**
+ * Constructor.
+ *
+ * @param random Random seeding
+ * @param nfold Number of folds.
+ */
+ public DisjointCrossValidation(RandomFactory random, int nfold) {
+ super(random);
+ this.nfold = nfold;
+ }
+
+ @Override
+ public void initialize(MultipleObjectsBundle bundle) {
+ super.initialize(bundle);
+ fold = 0;
+
+ Random rnd = random.getSingleThreadedRandom();
+ sizes = new int[nfold];
+ assignment = new int[bundle.dataLength()];
+ for(int i = 0; i < assignment.length; ++i) {
+ int p = rnd.nextInt(nfold);
+ assignment[i] = p;
+ ++sizes[p];
+ }
+ }
+
+ @Override
+ public int numberOfPartitions() {
+ return nfold;
+ }
+
+ @Override
+ public TrainingAndTestSet nextPartitioning() {
+ if(fold >= nfold) {
+ return null;
+ }
+ final int tesize = sizes[fold], trsize = bundle.dataLength() - tesize;
+ MultipleObjectsBundle training = new MultipleObjectsBundle();
+ MultipleObjectsBundle test = new MultipleObjectsBundle();
+ // Process column-wise.
+ for(int c = 0, cs = bundle.metaLength(); c < cs; ++c) {
+ ArrayList<Object> tr = new ArrayList<>(trsize), te = new ArrayList<>(tesize);
+ for(int i = 0; i < bundle.dataLength(); ++i) {
+ ((assignment[i] != fold) ? tr : te).add(bundle.data(i, c));
+ }
+ training.appendColumn(bundle.meta(c), tr);
+ test.appendColumn(bundle.meta(c), te);
+ }
+
+ ++fold;
+ return new TrainingAndTestSet(training, test, labels);
+ }
+
+ /**
+ * Parameterization class
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends RandomizedHoldout.Parameterizer {
+ /**
+ * Default number of folds.
+ */
+ public static final int N_DEFAULT = 10;
+
+ /**
+ * Parameter for number of folds.
+ */
+ public static final OptionID NFOLD_ID = new OptionID("nfold", "Number of folds for cross-validation.");
+
+ /**
+ * Holds the number of folds.
+ */
+ protected int nfold = N_DEFAULT;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ IntParameter nfoldP = new IntParameter(NFOLD_ID, N_DEFAULT)//
+ .addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(nfoldP)) {
+ nfold = nfoldP.intValue();
+ }
+ }
+
+ @Override
+ protected DisjointCrossValidation makeInstance() {
+ return new DisjointCrossValidation(random, nfold);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/classification/holdout/Holdout.java b/src/de/lmu/ifi/dbs/elki/evaluation/classification/holdout/Holdout.java
new file mode 100644
index 00000000..17cd56ac
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/classification/holdout/Holdout.java
@@ -0,0 +1,67 @@
+package de.lmu.ifi.dbs.elki.evaluation.classification.holdout;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.ArrayList;
+
+import de.lmu.ifi.dbs.elki.data.ClassLabel;
+import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle;
+
+/**
+ * A holdout procedure is to provide a range of partitions of a database to
+ * pairs of training and test data sets.
+ *
+ * @author Erich Schubert
+ */
+public interface Holdout {
+ /**
+ * Initialize the holdout procedure for a data set.
+ *
+ * @param bundle Data set bundle
+ */
+ void initialize(MultipleObjectsBundle bundle);
+
+ /**
+ * Get the next partitioning of the given holdout.
+ *
+ * @return Next partitioning of the data set
+ */
+ TrainingAndTestSet nextPartitioning();
+
+ /**
+ * Get the <i>sorted</i> class labels present in this data set.
+ *
+ * For indexing into assignment arrays.
+ *
+ * @return Class labels
+ */
+ ArrayList<ClassLabel> getLabels();
+
+ /**
+ * How many partitions to test.
+ *
+ * @return Number of partitions.
+ */
+ int numberOfPartitions();
+}
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/classification/holdout/LeaveOneOut.java b/src/de/lmu/ifi/dbs/elki/evaluation/classification/holdout/LeaveOneOut.java
new file mode 100644
index 00000000..1059f0a1
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/classification/holdout/LeaveOneOut.java
@@ -0,0 +1,82 @@
+package de.lmu.ifi.dbs.elki.evaluation.classification.holdout;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.ArrayList;
+
+import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle;
+
+/**
+ * A leave-one-out-holdout is to provide a set of partitions of a database where
+ * each instances once hold out as a test instance while the respectively
+ * remaining instances are training instances.
+ *
+ * @author Arthur Zimek
+ */
+public class LeaveOneOut extends AbstractHoldout {
+ /**
+ * Size of the data set.
+ */
+ private int len, pos;
+
+ /**
+ * Constructor.
+ */
+ public LeaveOneOut() {
+ super();
+ }
+
+ @Override
+ public void initialize(MultipleObjectsBundle bundle) {
+ super.initialize(bundle);
+ len = bundle.dataLength();
+ pos = 0;
+ }
+
+ @Override
+ public int numberOfPartitions() {
+ return len;
+ }
+
+ @Override
+ public TrainingAndTestSet nextPartitioning() {
+ if(pos >= len) {
+ return null;
+ }
+ MultipleObjectsBundle training = new MultipleObjectsBundle();
+ MultipleObjectsBundle test = new MultipleObjectsBundle();
+ // Process column-wise.
+ for(int c = 0, cs = bundle.metaLength(); c < cs; ++c) {
+ ArrayList<Object> tr = new ArrayList<>(len - 1), te = new ArrayList<>(1);
+ for(int i = 0; i < bundle.dataLength(); ++i) {
+ ((i != pos) ? tr : te).add(bundle.data(i, c));
+ }
+ training.appendColumn(bundle.meta(c), tr);
+ test.appendColumn(bundle.meta(c), te);
+ }
+
+ ++pos;
+ return new TrainingAndTestSet(training, test, labels);
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/classification/holdout/RandomizedCrossValidation.java b/src/de/lmu/ifi/dbs/elki/evaluation/classification/holdout/RandomizedCrossValidation.java
new file mode 100644
index 00000000..99607398
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/classification/holdout/RandomizedCrossValidation.java
@@ -0,0 +1,141 @@
+package de.lmu.ifi.dbs.elki.evaluation.classification.holdout;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.ArrayList;
+import java.util.Random;
+
+import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle;
+import de.lmu.ifi.dbs.elki.math.random.RandomFactory;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+
+/**
+ * RandomizedCrossValidationHoldout provides a set of partitions of a database
+ * to perform cross-validation. The test sets are not guaranteed to be disjoint.
+ *
+ * @author Arthur Zimek
+ */
+public class RandomizedCrossValidation extends RandomizedHoldout {
+ /**
+ * Holds the number of folds, current fold.
+ */
+ protected int nfold, fold;
+
+ /**
+ * Constructor for n-fold cross-validation.
+ *
+ * @param random Random seed
+ * @param nfold Number of folds
+ */
+ public RandomizedCrossValidation(RandomFactory random, int nfold) {
+ super(random);
+ this.nfold = nfold;
+ }
+
+ @Override
+ public void initialize(MultipleObjectsBundle bundle) {
+ super.initialize(bundle);
+ this.fold = 0;
+ }
+
+ @Override
+ public int numberOfPartitions() {
+ return nfold;
+ }
+
+ @Override
+ public TrainingAndTestSet nextPartitioning() {
+ if(fold >= nfold) {
+ return null;
+ }
+ MultipleObjectsBundle training = new MultipleObjectsBundle();
+ MultipleObjectsBundle test = new MultipleObjectsBundle();
+ Random rnd = random.getRandom();
+ int datalen = bundle.dataLength();
+
+ boolean[] assignment = new boolean[datalen];
+ int trsize = 0, tesize = 0;
+ for(int i = 0; i < assignment.length; ++i) {
+ boolean p = rnd.nextInt(nfold) < nfold - 1;
+ assignment[i] = p;
+ @SuppressWarnings("unused")
+ int discard = p ? ++trsize : ++tesize;
+ }
+ // Process column-wise.
+ for(int c = 0, cs = bundle.metaLength(); c < cs; ++c) {
+ ArrayList<Object> tr = new ArrayList<>(trsize), te = new ArrayList<>(tesize);
+ for(int i = 0; i < datalen; ++i) {
+ (assignment[i] ? tr : te).add(bundle.data(i, c));
+ }
+ training.appendColumn(bundle.meta(c), tr);
+ test.appendColumn(bundle.meta(c), te);
+ }
+
+ ++fold;
+ return new TrainingAndTestSet(training, test, labels);
+ }
+
+ /**
+ * Parameterization class
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends RandomizedHoldout.Parameterizer {
+ /**
+ * Parameter for number of folds.
+ */
+ public static final OptionID NFOLD_ID = new OptionID("nfold", "positive number of folds for cross-validation");
+
+ /**
+ * Default number of folds.
+ */
+ public static final int N_DEFAULT = 10;
+
+ /**
+ * Holds the number of folds.
+ */
+ protected int nfold;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ IntParameter nfoldP = new IntParameter(NFOLD_ID)//
+ .setDefaultValue(N_DEFAULT) //
+ .addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(nfoldP)) {
+ nfold = nfoldP.intValue();
+ }
+ }
+
+ @Override
+ protected RandomizedCrossValidation makeInstance() {
+ return new RandomizedCrossValidation(random, nfold);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/classification/holdout/RandomizedHoldout.java b/src/de/lmu/ifi/dbs/elki/evaluation/classification/holdout/RandomizedHoldout.java
new file mode 100644
index 00000000..6202af48
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/classification/holdout/RandomizedHoldout.java
@@ -0,0 +1,78 @@
+package de.lmu.ifi.dbs.elki.evaluation.classification.holdout;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.math.random.RandomFactory;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
+
+/**
+ * A holdout providing a seed for randomized operations.
+ *
+ * @author Arthur Zimek
+ */
+public abstract class RandomizedHoldout extends AbstractHoldout {
+ /**
+ * The random generator.
+ */
+ protected RandomFactory random;
+
+ /**
+ * Sets the parameter seed to the parameterToDescription map.
+ */
+ public RandomizedHoldout(RandomFactory random) {
+ super();
+ this.random = random;
+ }
+
+ /**
+ * Parameterization class
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static abstract class Parameterizer extends AbstractParameterizer {
+ /**
+ * Random seeding for holdout evaluation.
+ */
+ public static final OptionID SEED_ID = new OptionID("holdout.seed", "Random generator seed for holdout evaluation.");
+
+ /**
+ * The random generator.
+ */
+ protected RandomFactory random;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ RandomParameter seedP = new RandomParameter(SEED_ID);
+ if(config.grab(seedP)) {
+ random = seedP.getValue();
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/classification/holdout/StratifiedCrossValidation.java b/src/de/lmu/ifi/dbs/elki/evaluation/classification/holdout/StratifiedCrossValidation.java
new file mode 100644
index 00000000..40de7e88
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/classification/holdout/StratifiedCrossValidation.java
@@ -0,0 +1,162 @@
+package de.lmu.ifi.dbs.elki.evaluation.classification.holdout;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import gnu.trove.list.array.TIntArrayList;
+
+import java.util.ArrayList;
+import java.util.Collections;
+
+import de.lmu.ifi.dbs.elki.data.ClassLabel;
+import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+
+/**
+ * A stratified n-fold crossvalidation to distribute the data to n buckets where
+ * each bucket exhibits approximately the same distribution of classes as does
+ * the complete data set. The buckets are disjoint. The distribution is
+ * deterministic.
+ *
+ * @author Arthur Zimek
+ */
+public class StratifiedCrossValidation extends AbstractHoldout {
+ /**
+ * Holds the number of folds, current fold.
+ */
+ protected int nfold, fold;
+
+ /**
+ * Partition assignment, sizes
+ */
+ protected int[] assignment, sizes;
+
+ /**
+ * Provides a stratified crossvalidation. Setting parameter N_P to the
+ * OptionHandler.
+ */
+ public StratifiedCrossValidation(int nfold) {
+ super();
+ this.nfold = nfold;
+ }
+
+ @Override
+ public int numberOfPartitions() {
+ return nfold;
+ }
+
+ @Override
+ public void initialize(MultipleObjectsBundle bundle) {
+ super.initialize(bundle);
+ fold = 0;
+ TIntArrayList[] classBuckets = new TIntArrayList[this.labels.size()];
+ for(int i = 0; i < this.labels.size(); i++) {
+ classBuckets[i] = new TIntArrayList();
+ }
+ for(int i = 0, l = bundle.dataLength(); i < l; ++i) {
+ ClassLabel label = (ClassLabel) bundle.data(i, labelcol);
+ if(label == null) {
+ throw new AbortException("Unlabeled instances currently not supported.");
+ }
+ int classIndex = Collections.binarySearch(labels, label);
+ if(classIndex < 0) {
+ throw new AbortException("Label not in label list: " + label);
+ }
+ classBuckets[classIndex].add(i);
+ }
+ // TODO: shuffle the class buckets?
+ sizes = new int[nfold];
+ assignment = new int[bundle.dataLength()];
+ for(TIntArrayList bucket : classBuckets) {
+ for(int i = 0; i < bucket.size(); i++) {
+ assignment[bucket.get(i)] = i % nfold;
+ }
+ }
+ }
+
+ @Override
+ public TrainingAndTestSet nextPartitioning() {
+ if(fold >= nfold) {
+ return null;
+ }
+ final int tesize = sizes[fold], trsize = bundle.dataLength() - tesize;
+ MultipleObjectsBundle training = new MultipleObjectsBundle();
+ MultipleObjectsBundle test = new MultipleObjectsBundle();
+ // Process column-wise.
+ for(int c = 0, cs = bundle.metaLength(); c < cs; ++c) {
+ ArrayList<Object> tr = new ArrayList<>(trsize), te = new ArrayList<>(tesize);
+ for(int i = 0; i < bundle.dataLength(); ++i) {
+ ((assignment[i] != fold) ? tr : te).add(bundle.data(i, c));
+ }
+ training.appendColumn(bundle.meta(c), tr);
+ test.appendColumn(bundle.meta(c), te);
+ }
+
+ ++fold;
+ return new TrainingAndTestSet(training, test, labels);
+ }
+
+ /**
+ * Parameterization class
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ /**
+ * Default number of folds.
+ */
+ public static final int N_DEFAULT = 10;
+
+ /**
+ * Parameter for number of folds.
+ */
+ public static final OptionID NFOLD_ID = new OptionID("nfold", "Number of folds for cross-validation");
+
+ /**
+ * Holds the number of folds.
+ */
+ protected int nfold;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ IntParameter nfoldP = new IntParameter(NFOLD_ID, N_DEFAULT)//
+ .addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+ if(config.grab(nfoldP)) {
+ nfold = nfoldP.intValue();
+ }
+ }
+
+ @Override
+ protected StratifiedCrossValidation makeInstance() {
+ return new StratifiedCrossValidation(nfold);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/classification/holdout/TrainingAndTestSet.java b/src/de/lmu/ifi/dbs/elki/evaluation/classification/holdout/TrainingAndTestSet.java
new file mode 100644
index 00000000..a8725e1c
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/classification/holdout/TrainingAndTestSet.java
@@ -0,0 +1,89 @@
+package de.lmu.ifi.dbs.elki.evaluation.classification.holdout;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.ArrayList;
+
+import de.lmu.ifi.dbs.elki.data.ClassLabel;
+import de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle;
+
+/**
+ * Wrapper to hold a pair of training and test data sets. The labels of both
+ * training and test set are provided in labels.
+ *
+ * @author Arthur Zimek
+ */
+public class TrainingAndTestSet {
+ /**
+ * The overall labels.
+ */
+ private ArrayList<ClassLabel> labels;
+
+ /**
+ * The training data.
+ */
+ private MultipleObjectsBundle training;
+
+ /**
+ * The test data.
+ */
+ private MultipleObjectsBundle test;
+
+ /**
+ * Provides a pair of training and test data sets out of the given two
+ * databases.
+ */
+ public TrainingAndTestSet(MultipleObjectsBundle training, MultipleObjectsBundle test, ArrayList<ClassLabel> labels) {
+ this.training = training;
+ this.test = test;
+ this.labels = labels;
+ }
+
+ /**
+ * Returns the test data set.
+ *
+ * @return the test data set
+ */
+ public MultipleObjectsBundle getTest() {
+ return test;
+ }
+
+ /**
+ * Returns the training data set.
+ *
+ * @return the training data set
+ */
+ public MultipleObjectsBundle getTraining() {
+ return training;
+ }
+
+ /**
+ * Returns all labels present in the data set.
+ *
+ * @return all labels
+ */
+ public ArrayList<ClassLabel> getLabels() {
+ return labels;
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/classification/holdout/package-info.java b/src/de/lmu/ifi/dbs/elki/evaluation/classification/holdout/package-info.java
new file mode 100644
index 00000000..a469dfc3
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/classification/holdout/package-info.java
@@ -0,0 +1,27 @@
+/**
+ * Holdout and cross-validation strategies for evaluating classifiers.
+ */
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package de.lmu.ifi.dbs.elki.evaluation.classification.holdout;
+
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/classification/package-info.java b/src/de/lmu/ifi/dbs/elki/evaluation/classification/package-info.java
new file mode 100644
index 00000000..a6e3f484
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/classification/package-info.java
@@ -0,0 +1,27 @@
+/**
+ * Evaluation of classification algorithms.
+ */
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package de.lmu.ifi.dbs.elki.evaluation.classification;
+
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/clustering/BCubed.java b/src/de/lmu/ifi/dbs/elki/evaluation/clustering/BCubed.java
index 3353b593..1c84f0e1 100644
--- a/src/de/lmu/ifi/dbs/elki/evaluation/clustering/BCubed.java
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/clustering/BCubed.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.evaluation.clustering;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/clustering/ClusterContingencyTable.java b/src/de/lmu/ifi/dbs/elki/evaluation/clustering/ClusterContingencyTable.java
index c3f8129f..d5dec735 100644
--- a/src/de/lmu/ifi/dbs/elki/evaluation/clustering/ClusterContingencyTable.java
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/clustering/ClusterContingencyTable.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.evaluation.clustering;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -22,16 +22,15 @@ package de.lmu.ifi.dbs.elki.evaluation.clustering;
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.BitSet;
import java.util.Iterator;
import java.util.List;
import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.data.Clustering;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.math.MeanVariance;
+import de.lmu.ifi.dbs.elki.utilities.BitsUtil;
/**
* Class storing the contingency table and related data on two clusterings.
@@ -59,14 +58,9 @@ public class ClusterContingencyTable {
protected boolean selfPairing = true;
/**
- * Number of clusters in first
+ * Number of clusters.
*/
- protected int size1 = -1;
-
- /**
- * Number of clusters in second
- */
- protected int size2 = -1;
+ protected int size1 = -1, size2 = -1;
/**
* Contingency matrix
@@ -76,12 +70,7 @@ public class ClusterContingencyTable {
/**
* Noise flags
*/
- protected BitSet noise1 = null;
-
- /**
- * Noise flags
- */
- protected BitSet noise2 = null;
+ protected long[] noise1 = null, noise2 = null;
/**
* Pair counting measures
@@ -135,46 +124,39 @@ public class ClusterContingencyTable {
size1 = cs1.size();
size2 = cs2.size();
contingency = new int[size1 + 2][size2 + 2];
- noise1 = new BitSet(size1);
- noise2 = new BitSet(size2);
+ noise1 = BitsUtil.zero(size1);
+ noise2 = BitsUtil.zero(size2);
// Fill main part of matrix
{
- {
- final Iterator<? extends Cluster<?>> it2 = cs2.iterator();
- for(int i2 = 0; it2.hasNext(); i2++) {
- final Cluster<?> c2 = it2.next();
- if(c2.isNoise()) {
- noise2.set(i2);
- }
- contingency[size1 + 1][i2] = c2.size();
- contingency[size1 + 1][size2] += c2.size();
+ final Iterator<? extends Cluster<?>> it2 = cs2.iterator();
+ for(int i2 = 0; it2.hasNext(); i2++) {
+ final Cluster<?> c2 = it2.next();
+ if(c2.isNoise()) {
+ BitsUtil.setI(noise2, i2);
}
+ contingency[size1 + 1][i2] = c2.size();
+ contingency[size1 + 1][size2] += c2.size();
}
- final Iterator<? extends Cluster<?>> it1 = cs1.iterator();
- for(int i1 = 0; it1.hasNext(); i1++) {
- final Cluster<?> c1 = it1.next();
- if(c1.isNoise()) {
- noise1.set(i1);
- }
- final DBIDs ids = DBIDUtil.ensureSet(c1.getIDs());
- contingency[i1][size2 + 1] = c1.size();
- contingency[size1][size2 + 1] += c1.size();
-
- final Iterator<? extends Cluster<?>> it2 = cs2.iterator();
- for(int i2 = 0; it2.hasNext(); i2++) {
- final Cluster<?> c2 = it2.next();
- int count = 0;
- for(DBIDIter iter = c2.getIDs().iter(); iter.valid(); iter.advance()) {
- if(ids.contains(iter)) {
- count++;
- }
- }
- contingency[i1][i2] = count;
- contingency[i1][size2] += count;
- contingency[size1][i2] += count;
- contingency[size1][size2] += count;
- }
+ }
+ final Iterator<? extends Cluster<?>> it1 = cs1.iterator();
+ for(int i1 = 0; it1.hasNext(); i1++) {
+ final Cluster<?> c1 = it1.next();
+ if(c1.isNoise()) {
+ BitsUtil.setI(noise1, i1);
+ }
+ final DBIDs ids = DBIDUtil.ensureSet(c1.getIDs());
+ contingency[i1][size2 + 1] = c1.size();
+ contingency[size1][size2 + 1] += c1.size();
+
+ final Iterator<? extends Cluster<?>> it2 = cs2.iterator();
+ for(int i2 = 0; it2.hasNext(); i2++) {
+ final Cluster<?> c2 = it2.next();
+ int count = DBIDUtil.intersectionSize(ids, c2.getIDs());
+ contingency[i1][i2] = count;
+ contingency[i1][size2] += count;
+ contingency[size1][i2] += count;
+ contingency[size1][size2] += count;
}
}
}
@@ -196,9 +178,6 @@ public class ClusterContingencyTable {
buf.append('\n');
}
}
- // if(pairconfuse != null) {
- // buf.append(FormatUtil.format(pairconfuse));
- // }
return buf.toString();
}
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/clustering/EditDistance.java b/src/de/lmu/ifi/dbs/elki/evaluation/clustering/EditDistance.java
index 6bbda98a..41b69de6 100644
--- a/src/de/lmu/ifi/dbs/elki/evaluation/clustering/EditDistance.java
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/clustering/EditDistance.java
@@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.evaluation.clustering;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/clustering/Entropy.java b/src/de/lmu/ifi/dbs/elki/evaluation/clustering/Entropy.java
index 58f19f65..654cecbe 100644
--- a/src/de/lmu/ifi/dbs/elki/evaluation/clustering/Entropy.java
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/clustering/Entropy.java
@@ -1,9 +1,10 @@
package de.lmu.ifi.dbs.elki.evaluation.clustering;
+
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -36,7 +37,10 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
*
* @author Sascha Goldhofer
*/
-@Reference(authors = "Meilă, M.", title = "Comparing clusterings by the variation of information", booktitle = "Learning theory and kernel machines", url = "http://dx.doi.org/10.1007/978-3-540-45167-9_14")
+@Reference(authors = "Meilă, M.", //
+title = "Comparing clusterings by the variation of information", //
+booktitle = "Learning theory and kernel machines", //
+url = "http://dx.doi.org/10.1007/978-3-540-45167-9_14")
public class Entropy {
/**
* Entropy in first
@@ -105,8 +109,8 @@ public class Entropy {
}
/**
- * Get the entropy of the second clustering using Log_2. (not normalized, 0
- * = equal)
+ * Get the entropy of the second clustering using Log_2. (not normalized, 0 =
+ * equal)
*
* @return Entropy of second clustering
*/
@@ -144,8 +148,7 @@ public class Entropy {
}
/**
- * Get Powers entropy (normalized, 0 = equal)
- * Powers = 1 - NMI_Sum
+ * Get Powers entropy (normalized, 0 = equal) Powers = 1 - NMI_Sum
*
* @return Powers
*/
@@ -168,7 +171,7 @@ public class Entropy {
* @return Joint Normalized Mutual information
*/
public double entropyNMIJoint() {
- if (entropyJoint() == 0) {
+ if(entropyJoint() == 0) {
return 0;
}
return (entropyMutualInformation() / entropyJoint());
@@ -207,7 +210,7 @@ public class Entropy {
* @return Sqrt Normalized Mutual information
*/
public double entropyNMISqrt() {
- if (entropyFirst() * entropySecond() <= 0) {
+ if(entropyFirst() * entropySecond() <= 0) {
return entropyMutualInformation();
}
return (entropyMutualInformation() / Math.sqrt(entropyFirst() * entropySecond()));
@@ -223,20 +226,23 @@ public class Entropy {
}
/**
- * Get the normalized variation of information (normalized, 0 = equal)
- * NVI = 1 - NMI_Joint
+ * Get the normalized variation of information (normalized, 0 = equal) NVI = 1
+ * - NMI_Joint
*
* <p>
- * Vinh, N.X. and Epps, J. and Bailey, J.<br />
- * Information theoretic measures for clusterings comparison: is a
- * correction for chance necessary?<br />
- * In: Proc. ICML '09 Proceedings of the 26th Annual International
- * Conference on Machine Learning
+ * Nguyen, X. V. and Epps, J. and Bailey, J.<br />
+ * Information theoretic measures for clusterings comparison: is a correction
+ * for chance necessary?<br />
+ * In: Proc. ICML '09 Proceedings of the 26th Annual International Conference
+ * on Machine Learning
* </p>
*
* @return Normalized Variation of information
*/
- @Reference(authors = "Vinh, N.X. and Epps, J. and Bailey, J.", title = "Information theoretic measures for clusterings comparison: is a correction for chance necessary?", booktitle = "Proc. ICML '09 Proceedings of the 26th Annual International Conference on Machine Learning", url = "http://dx.doi.org/10.1145/1553374.1553511")
+ @Reference(authors = "Nguyen, X. V. and Epps, J. and Bailey, J.", //
+ title = "Information theoretic measures for clusterings comparison: is a correction for chance necessary?", //
+ booktitle = "Proc. ICML '09 Proceedings of the 26th Annual International Conference on Machine Learning", //
+ url = "http://dx.doi.org/10.1145/1553374.1553511")
public double normalizedVariationOfInformation() {
return (1.0 - (entropyMutualInformation() / entropyJoint()));
}
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/clustering/EvaluateClustering.java b/src/de/lmu/ifi/dbs/elki/evaluation/clustering/EvaluateClustering.java
index 6db25736..2a67dbe2 100644
--- a/src/de/lmu/ifi/dbs/elki/evaluation/clustering/EvaluateClustering.java
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/clustering/EvaluateClustering.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.evaluation.clustering;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -32,12 +32,12 @@ import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.evaluation.Evaluator;
import de.lmu.ifi.dbs.elki.logging.Logging;
-import de.lmu.ifi.dbs.elki.result.BasicResult;
+import de.lmu.ifi.dbs.elki.math.MeanVariance;
+import de.lmu.ifi.dbs.elki.result.EvaluationResult;
import de.lmu.ifi.dbs.elki.result.HierarchicalResult;
import de.lmu.ifi.dbs.elki.result.Result;
import de.lmu.ifi.dbs.elki.result.ResultUtil;
-import de.lmu.ifi.dbs.elki.result.textwriter.TextWriteable;
-import de.lmu.ifi.dbs.elki.result.textwriter.TextWriterStream;
+import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
@@ -156,7 +156,9 @@ public class EvaluateClustering implements Evaluator {
ClusterContingencyTable contmat = new ClusterContingencyTable(selfPairing, noiseSpecialHandling);
contmat.process(refc, c);
- db.getHierarchy().add(c, new ScoreResult(contmat));
+ ScoreResult sr = new ScoreResult(contmat);
+ sr.addHeader(c.getLongName());
+ db.getHierarchy().add(c, sr);
}
}
@@ -184,7 +186,7 @@ public class EvaluateClustering implements Evaluator {
*
* @apiviz.composedOf ClusterContingencyTable
*/
- public static class ScoreResult extends BasicResult implements TextWriteable {
+ public static class ScoreResult extends EvaluationResult {
/**
* Cluster contingency table
*/
@@ -198,6 +200,43 @@ public class EvaluateClustering implements Evaluator {
public ScoreResult(ClusterContingencyTable contmat) {
super("Cluster-Evalation", "cluster-evaluation");
this.contmat = contmat;
+
+ PairCounting paircount = contmat.getPaircount();
+ MeasurementGroup g = newGroup("Pair counting measures");
+ g.addMeasure("Jaccard", paircount.jaccard(), 0, 1, false);
+ g.addMeasure("F1-Measure", paircount.f1Measure(), 0, 1, false);
+ g.addMeasure("Precision", paircount.precision(), 0, 1, false);
+ g.addMeasure("Recall", paircount.recall(), 0, 1, false);
+ g.addMeasure("Rand", paircount.randIndex(), 0, 1, false);
+ g.addMeasure("ARI", paircount.adjustedRandIndex(), 0, 1, false);
+ g.addMeasure("FowlkesMallows", paircount.fowlkesMallows(), 0, 1, false);
+
+ Entropy entropy = contmat.getEntropy();
+ g = newGroup("Entropy based measures");
+ g.addMeasure("NMI Joint", entropy.entropyNMIJoint(), 0, 1, false);
+ g.addMeasure("NMI Sqrt", entropy.entropyNMISqrt(), 0, 1, false);
+
+ BCubed bcubed = contmat.getBCubed();
+ g = newGroup("BCubed-based measures");
+ g.addMeasure("F1-Measure", bcubed.f1Measure(), 0, 1, false);
+ g.addMeasure("Recall", bcubed.recall(), 0, 1, false);
+ g.addMeasure("Precision", bcubed.precision(), 0, 1, false);
+
+ SetMatchingPurity setm = contmat.getSetMatching();
+ g = newGroup("Set-Matching-based measures");
+ g.addMeasure("F1-Measure", setm.f1Measure(), 0, 1, false);
+ g.addMeasure("Purity", setm.purity(), 0, 1, false);
+ g.addMeasure("Inverse Purity", setm.inversePurity(), 0, 1, false);
+
+ EditDistance edit = contmat.getEdit();
+ g = newGroup("Editing-distance measures");
+ g.addMeasure("F1-Measure", edit.f1Measure(), 0, 1, false);
+ g.addMeasure("Precision", edit.editDistanceFirst(), 0, 1, false);
+ g.addMeasure("Recall", edit.editDistanceSecond(), 0, 1, false);
+
+ MeanVariance gini = contmat.averageSymmetricGini();
+ g = newGroup("Gini measures");
+ g.addMeasure("Mean +-" + FormatUtil.NF4.format(gini.getCount() > 1. ? gini.getSampleStddev() : 0.), gini.getMean(), 0, 1, false);
}
/**
@@ -208,47 +247,6 @@ public class EvaluateClustering implements Evaluator {
public ClusterContingencyTable getContingencyTable() {
return contmat;
}
-
- @Override
- public void writeToText(TextWriterStream out, String label) {
- out.commentPrint("Pair-F1, ");
- out.commentPrint("Pair-Precision, ");
- out.commentPrint("Pair-Recall, ");
- out.commentPrint("Pair-Rand, ");
- out.commentPrint("Pair-AdjustedRand, ");
- out.commentPrint("Pair-FowlkesMallows, ");
- out.commentPrint("Pair-Jaccard, ");
- out.commentPrint("Pair-Mirkin, ");
- out.commentPrint("Entropy-VI, ");
- out.commentPrint("Entropy-NormalizedVI, ");
- out.commentPrint("Entropy-F1, ");
- out.commentPrint("Edit-F1, ");
- out.commentPrint("SM-InvPurity, ");
- out.commentPrint("SM-Purity, ");
- out.commentPrint("SM-F1, ");
- out.commentPrint("BCubed-Precision, ");
- out.commentPrint("BCubed-Recall, ");
- out.commentPrint("BCubed-F1");
- out.flush();
- out.inlinePrint(contmat.getPaircount().f1Measure());
- out.inlinePrint(contmat.getPaircount().precision());
- out.inlinePrint(contmat.getPaircount().recall());
- out.inlinePrint(contmat.getPaircount().randIndex());
- out.inlinePrint(contmat.getPaircount().adjustedRandIndex());
- out.inlinePrint(contmat.getPaircount().fowlkesMallows());
- out.inlinePrint(contmat.getPaircount().jaccard());
- out.inlinePrint(contmat.getPaircount().mirkin());
- out.inlinePrint(contmat.getEntropy().variationOfInformation());
- out.inlinePrint(contmat.getEntropy().normalizedVariationOfInformation());
- out.inlinePrint(contmat.getEdit().f1Measure());
- out.inlinePrint(contmat.getSetMatching().inversePurity());
- out.inlinePrint(contmat.getSetMatching().purity());
- out.inlinePrint(contmat.getSetMatching().f1Measure());
- out.inlinePrint(contmat.getBCubed().precision());
- out.inlinePrint(contmat.getBCubed().recall());
- out.inlinePrint(contmat.getBCubed().f1Measure());
- out.flush();
- }
}
/**
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/clustering/PairCounting.java b/src/de/lmu/ifi/dbs/elki/evaluation/clustering/PairCounting.java
index ef7935d8..cbe8e618 100644
--- a/src/de/lmu/ifi/dbs/elki/evaluation/clustering/PairCounting.java
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/clustering/PairCounting.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.evaluation.clustering;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -24,6 +24,7 @@ package de.lmu.ifi.dbs.elki.evaluation.clustering;
*/
import de.lmu.ifi.dbs.elki.logging.LoggingUtil;
+import de.lmu.ifi.dbs.elki.utilities.BitsUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
/**
@@ -54,16 +55,18 @@ public class PairCounting {
long inBoth = 0, in1 = 0, in2 = 0, total = 0;
// Process first clustering:
{
- for (int i1 = 0; i1 < table.size1; i1++) {
+ for(int i1 = 0; i1 < table.size1; i1++) {
final int size = table.contingency[i1][table.size2 + 1];
- if (table.breakNoiseClusters && table.noise1.get(i1)) {
- if (table.selfPairing) {
+ if(table.breakNoiseClusters && BitsUtil.get(table.noise1, i1)) {
+ if(table.selfPairing) {
in1 += size;
} // else: 0
- } else {
- if (table.selfPairing) {
+ }
+ else {
+ if(table.selfPairing) {
in1 += size * size;
- } else {
+ }
+ else {
in1 += size * (size - 1);
}
}
@@ -71,33 +74,37 @@ public class PairCounting {
}
// Process second clustering:
{
- for (int i2 = 0; i2 < table.size2; i2++) {
+ for(int i2 = 0; i2 < table.size2; i2++) {
final int size = table.contingency[table.size1 + 1][i2];
- if (table.breakNoiseClusters && table.noise2.get(i2)) {
- if (table.selfPairing) {
+ if(table.breakNoiseClusters && BitsUtil.get(table.noise2, i2)) {
+ if(table.selfPairing) {
in2 += size;
} // else: 0
- } else {
- if (table.selfPairing) {
+ }
+ else {
+ if(table.selfPairing) {
in2 += size * size;
- } else {
+ }
+ else {
in2 += size * (size - 1);
}
}
}
}
// Process combinations
- for (int i1 = 0; i1 < table.size1; i1++) {
- for (int i2 = 0; i2 < table.size2; i2++) {
+ for(int i1 = 0; i1 < table.size1; i1++) {
+ for(int i2 = 0; i2 < table.size2; i2++) {
final int size = table.contingency[i1][i2];
- if (table.breakNoiseClusters && (table.noise1.get(i1) || table.noise2.get(i2))) {
- if (table.selfPairing) {
+ if(table.breakNoiseClusters && (BitsUtil.get(table.noise1, i1) || BitsUtil.get(table.noise2, i2))) {
+ if(table.selfPairing) {
inBoth += size;
} // else: 0
- } else {
- if (table.selfPairing) {
+ }
+ else {
+ if(table.selfPairing) {
inBoth += size * size;
- } else {
+ }
+ else {
inBoth += size * (size - 1);
}
}
@@ -105,15 +112,16 @@ public class PairCounting {
}
// The official sum
int tsize = table.contingency[table.size1][table.size2];
- if (table.contingency[table.size1][table.size2 + 1] != tsize || table.contingency[table.size1 + 1][table.size2] != tsize) {
+ if(table.contingency[table.size1][table.size2 + 1] != tsize || table.contingency[table.size1 + 1][table.size2] != tsize) {
LoggingUtil.warning("PairCounting F-Measure is not well defined for overlapping and incomplete clusterings. The number of elements are: " + table.contingency[table.size1][table.size2 + 1] + " != " + table.contingency[table.size1 + 1][table.size2] + " elements.");
}
- if (tsize < 0 || tsize >= MAX_SIZE) {
+ if(tsize < 0 || tsize >= MAX_SIZE) {
LoggingUtil.warning("Your data set size probably is too big for this implementation, which uses only long precision.");
}
- if (table.selfPairing) {
+ if(table.selfPairing) {
total = tsize * tsize;
- } else {
+ }
+ else {
total = tsize * (tsize - 1);
}
long inFirst = in1 - inBoth, inSecond = in2 - inBoth;
@@ -172,7 +180,9 @@ public class PairCounting {
* @return pair-counting Fowlkes-mallows
*/
// TODO: implement for non-flat clusterings!
- @Reference(authors = "Fowlkes, E.B. and Mallows, C.L.", title = "A method for comparing two hierarchical clusterings", booktitle = "Journal of the American Statistical Association, Vol. 78 Issue 383")
+ @Reference(authors = "Fowlkes, E.B. and Mallows, C.L.", //
+ title = "A method for comparing two hierarchical clusterings", //
+ booktitle = "Journal of the American Statistical Association, Vol. 78 Issue 383")
public double fowlkesMallows() {
return Math.sqrt(precision() * recall());
}
@@ -188,7 +198,10 @@ public class PairCounting {
*
* @return The Rand index (RI).
*/
- @Reference(authors = "Rand, W. M.", title = "Objective Criteria for the Evaluation of Clustering Methods", booktitle = "Journal of the American Statistical Association, Vol. 66 Issue 336", url = "http://www.jstor.org/stable/10.2307/2284239")
+ @Reference(authors = "Rand, W. M.", //
+ title = "Objective Criteria for the Evaluation of Clustering Methods", //
+ booktitle = "Journal of the American Statistical Association, Vol. 66 Issue 336", //
+ url = "http://www.jstor.org/stable/10.2307/2284239")
public double randIndex() {
final double sum = pairconfuse[0] + pairconfuse[1] + pairconfuse[2] + pairconfuse[3];
return (pairconfuse[0] + pairconfuse[3]) / sum;
@@ -203,9 +216,10 @@ public class PairCounting {
final double nom = pairconfuse[0] * pairconfuse[3] - pairconfuse[1] * pairconfuse[2];
final long d1 = (pairconfuse[0] + pairconfuse[1]) * (pairconfuse[1] + pairconfuse[3]);
final long d2 = (pairconfuse[0] + pairconfuse[2]) * (pairconfuse[2] + pairconfuse[3]);
- if (d1 + d2 > 0) {
+ if(d1 + d2 > 0) {
return 2 * nom / (d1 + d2);
- } else {
+ }
+ else {
return 1.;
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/clustering/SetMatchingPurity.java b/src/de/lmu/ifi/dbs/elki/evaluation/clustering/SetMatchingPurity.java
index 1a003117..d37d4693 100644
--- a/src/de/lmu/ifi/dbs/elki/evaluation/clustering/SetMatchingPurity.java
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/clustering/SetMatchingPurity.java
@@ -1,9 +1,10 @@
package de.lmu.ifi.dbs.elki.evaluation.clustering;
+
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -41,19 +42,29 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
* University of Washington, Seattle, Technical Report 418, 2002
* </p>
* <p>
- * Steinbach, M. and Karypis, G. and Kumar, V. and others<br />
+ * Steinbach, M. and Karypis, G. and Kumar, V.<br />
* A comparison of document clustering techniques<br />
* KDD workshop on text mining, 2000
* </p>
+ * <p>
+ * E. Amigó, J. Gonzalo, J. Artiles, and F. Verdejo <br />
+ * A comparison of extrinsic clustering evaluation metrics based on formal
+ * constraints<br />
+ * Inf. Retrieval, vol. 12, no. 5, pp. 461–486, 2009
+ * </p>
*
* @author Sascha Goldhofer
*/
-@Reference(authors = "Meilă, M", title = "Comparing clusterings", booktitle = "University of Washington, Seattle, Technical Report 418, 2002", url = "http://www.stat.washington.edu/mmp/www.stat.washington.edu/mmp/Papers/compare-colt.pdf")
+@Reference(authors = "Meilă, M", //
+title = "Comparing clusterings", //
+booktitle = "University of Washington, Seattle, Technical Report 418, 2002", //
+url = "http://www.stat.washington.edu/mmp/Papers/compare-colt.pdf")
public class SetMatchingPurity {
/**
* Result cache
*/
- protected double smPurity = -1.0, smInversePurity = -1.0, smFFirst = -1.0, smFSecond = - 1.0;
+ protected double smPurity = -1.0, smInversePurity = -1.0, smFFirst = -1.0,
+ smFSecond = -1.0;
/**
* Constructor.
@@ -105,14 +116,17 @@ public class SetMatchingPurity {
*
* @return purity
*/
- @Reference(authors = "Zhao, Y. and Karypis, G.", title = "Criterion functions for document clustering: Experiments and analysis", booktitle = "University of Minnesota, Department of Computer Science, Technical Report 01-40, 2001", url = "http://www-users.cs.umn.edu/~karypis/publications/Papers/PDF/vscluster.pdf")
+ @Reference(authors = "Zhao, Y. and Karypis, G.", //
+ title = "Criterion functions for document clustering: Experiments and analysis", //
+ booktitle = "University of Minnesota, Department of Computer Science, Technical Report 01-40, 2001", //
+ url = "http://www-users.cs.umn.edu/~karypis/publications/Papers/PDF/vscluster.pdf")
public double purity() {
return smPurity;
}
/**
- * Get the set matchings inverse purity (second:first clustering)
- * (normalized, 1 = equal)
+ * Get the set matchings inverse purity (second:first clustering) (normalized,
+ * 1 = equal)
*
* @return Inverse purity
*/
@@ -124,44 +138,57 @@ public class SetMatchingPurity {
* Get the set matching F1-Measure
*
* <p>
- * Steinbach, M. and Karypis, G. and Kumar, V. and others<br />
+ * Steinbach, M. and Karypis, G. and Kumar, V.<br />
* A comparison of document clustering techniques<br />
* KDD workshop on text mining, 2000
* </p>
*
* @return Set Matching F1-Measure
*/
- @Reference(authors = "Steinbach, M. and Karypis, G. and Kumar, V. and others", title = "A comparison of document clustering techniques", booktitle = "KDD workshop on text mining, 2000", url = "http://www-users.itlabs.umn.edu/~karypis/publications/Papers/PDF/doccluster.pdf")
+ @Reference(authors = "Steinbach, M. and Karypis, G. and Kumar, V.", //
+ title = "A comparison of document clustering techniques", //
+ booktitle = "KDD workshop on text mining, 2000", //
+ url = "http://www-users.itlabs.umn.edu/~karypis/publications/Papers/PDF/doccluster.pdf")
public double f1Measure() {
return Util.f1Measure(purity(), inversePurity());
}
-
+
/**
* Get the Van Rijsbergen’s F measure (asymmetric) for first clustering
*
* <p>
* E. Amigó, J. Gonzalo, J. Artiles, and F. Verdejo <br />
- * A comparison of extrinsic clustering evaluation metrics based on formal constraints<br />
+ * A comparison of extrinsic clustering evaluation metrics based on formal
+ * constraints<br />
* Inf. Retrieval, vol. 12, no. 5, pp. 461–486, 2009
* </p>
*
* @return Set Matching F-Measure of first clustering
*/
+ @Reference(authors = "E. Amigó, J. Gonzalo, J. Artiles, and F. Verdejo", //
+ title = "A comparison of extrinsic clustering evaluation metrics based on formal constraints", //
+ booktitle = "Inf. Retrieval, vol. 12, no. 5", //
+ url = "http://dx.doi.org/10.1007/s10791-009-9106-z")
public double fMeasureFirst() {
return smFFirst;
}
-
+
/**
* Get the Van Rijsbergen’s F measure (asymmetric) for second clustering
*
* <p>
* E. Amigó, J. Gonzalo, J. Artiles, and F. Verdejo <br />
- * A comparison of extrinsic clustering evaluation metrics based on formal constraints<br />
+ * A comparison of extrinsic clustering evaluation metrics based on formal
+ * constraints<br />
* Inf. Retrieval, vol. 12, no. 5, pp. 461–486, 2009
* </p>
*
* @return Set Matching F-Measure of second clustering
*/
+ @Reference(authors = "E. Amigó, J. Gonzalo, J. Artiles, and F. Verdejo", //
+ title = "A comparison of extrinsic clustering evaluation metrics based on formal constraints", //
+ booktitle = "Inf. Retrieval, vol. 12, no. 5", //
+ url = "http://dx.doi.org/10.1007/s10791-009-9106-z")
public double fMeasureSecond() {
return smFSecond;
}
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/clustering/internal/EvaluateSilhouette.java b/src/de/lmu/ifi/dbs/elki/evaluation/clustering/internal/EvaluateSilhouette.java
new file mode 100644
index 00000000..bc2817a6
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/clustering/internal/EvaluateSilhouette.java
@@ -0,0 +1,265 @@
+package de.lmu.ifi.dbs.elki.evaluation.clustering.internal;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.data.Cluster;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction;
+import de.lmu.ifi.dbs.elki.evaluation.Evaluator;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic;
+import de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic;
+import de.lmu.ifi.dbs.elki.math.MeanVariance;
+import de.lmu.ifi.dbs.elki.result.EvaluationResult;
+import de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup;
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import de.lmu.ifi.dbs.elki.result.HierarchicalResult;
+import de.lmu.ifi.dbs.elki.result.Result;
+import de.lmu.ifi.dbs.elki.result.ResultUtil;
+import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * Compute the silhouette of a data set.
+ *
+ * Reference:
+ * <p>
+ * P. J. Rousseeuw<br />
+ * Silhouettes: A graphical aid to the interpretation and validation of cluster
+ * analysis<br />
+ * In: Journal of Computational and Applied Mathematics Volume 20, November 1987
+ * </p>
+ *
+ * TODO: keep all silhouette values, and allow visualization!
+ *
+ * @author Erich Schubert
+ *
+ * @param <O> Object type
+ */
+@Reference(authors = "P. J. Rousseeuw", //
+title = "Silhouettes: A graphical aid to the interpretation and validation of cluster analysis", //
+booktitle = "Journal of Computational and Applied Mathematics, Volume 20", //
+url = "http://dx.doi.org/10.1016%2F0377-0427%2887%2990125-7")
+public class EvaluateSilhouette<O> implements Evaluator {
+ /**
+ * Logger for debug output.
+ */
+ private static final Logging LOG = Logging.getLogger(EvaluateSilhouette.class);
+
+ /**
+ * Keep noise "clusters" merged, instead of breaking them into singletons.
+ */
+ private boolean mergenoise = false;
+
+ /**
+ * Distance function to use.
+ */
+ private DistanceFunction<? super O> distance;
+
+ /**
+ * Key for logging statistics.
+ */
+ private String key = EvaluateSilhouette.class.getName();
+
+ /**
+ * Constructor.
+ *
+ * @param distance Distance function
+ * @param mergenoise Flag to treat noise as clusters, instead of breaking them
+ * into singletons.
+ */
+ public EvaluateSilhouette(DistanceFunction<? super O> distance, boolean mergenoise) {
+ super();
+ this.distance = distance;
+ this.mergenoise = mergenoise;
+ }
+
+ /**
+ * Evaluate a single clustering.
+ *
+ * @param db Database
+ * @param rel Data relation
+ * @param dq Distance query
+ * @param c Clustering
+ */
+ public void evaluateClustering(Database db, Relation<O> rel, DistanceQuery<O> dq, Clustering<?> c) {
+ List<? extends Cluster<?>> clusters = c.getAllClusters();
+ MeanVariance msil = new MeanVariance();
+ for(Cluster<?> cluster : clusters) {
+ if(cluster.size() <= 1 || (!mergenoise && cluster.isNoise())) {
+ // As suggested in Rousseeuw, we use 0 for singletons.
+ msil.put(0., cluster.size());
+ continue;
+ }
+ ArrayDBIDs ids = DBIDUtil.ensureArray(cluster.getIDs());
+ double[] as = new double[ids.size()]; // temporary storage.
+ DBIDArrayIter it1 = ids.iter(), it2 = ids.iter();
+ for(it1.seek(0); it1.valid(); it1.advance()) {
+ // a: In-cluster distances
+ double a = as[it1.getOffset()]; // Already computed distances
+ for(it2.seek(it1.getOffset() + 1); it2.valid(); it2.advance()) {
+ final double dist = dq.distance(it1, it2);
+ a += dist;
+ as[it2.getOffset()] += dist;
+ }
+ a /= (ids.size() - 1);
+ // b: other clusters:
+ double min = Double.POSITIVE_INFINITY;
+ for(Cluster<?> ocluster : clusters) {
+ if(ocluster == /* yes, reference identity */cluster) {
+ continue;
+ }
+ if(!mergenoise && ocluster.isNoise()) {
+ // Treat noise cluster as singletons:
+ for(DBIDIter it3 = ocluster.getIDs().iter(); it3.valid(); it3.advance()) {
+ double dist = dq.distance(it1, it3);
+ if(dist < min) {
+ min = dist;
+ }
+ }
+ continue;
+ }
+ final DBIDs oids = ocluster.getIDs();
+ double b = 0.;
+ for(DBIDIter it3 = oids.iter(); it3.valid(); it3.advance()) {
+ b += dq.distance(it1, it3);
+ }
+ b /= oids.size();
+ if(b < min) {
+ min = b;
+ }
+ }
+ msil.put((min - a) / Math.max(min, a));
+ }
+ }
+ if(LOG.isStatistics()) {
+ LOG.statistics(new LongStatistic(key + ".silhouette.noise", mergenoise ? 1L : 0L));
+ LOG.statistics(new DoubleStatistic(key + ".silhouette.mean", msil.getMean()));
+ LOG.statistics(new DoubleStatistic(key + ".silhouette.stddev", msil.getSampleStddev()));
+ }
+
+ EvaluationResult ev = new EvaluationResult("Internal Clustering Evaluation", "internal evaluation");
+ MeasurementGroup g = ev.newGroup("Distance-based Evaluation");
+ g.addMeasure("Silhouette coefficient +-" + FormatUtil.NF2.format(msil.getSampleStddev()), msil.getMean(), -1., 1., 0., false);
+ db.getHierarchy().add(c, ev);
+ }
+
+ @Override
+ public void processNewResult(HierarchicalResult baseResult, Result result) {
+ List<Clustering<?>> crs = ResultUtil.getClusteringResults(result);
+ if(crs.size() < 1) {
+ return;
+ }
+ Database db = ResultUtil.findDatabase(baseResult);
+ Relation<O> rel = db.getRelation(distance.getInputTypeRestriction());
+ DistanceQuery<O> dq = db.getDistanceQuery(rel, distance);
+ for(Clustering<?> c : crs) {
+ evaluateClustering(db, rel, dq, c);
+ }
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer<O> extends AbstractParameterizer {
+ /**
+ * Parameter for choosing the distance function.
+ */
+ public static final OptionID DISTANCE_ID = new OptionID("silhouette.distance", "Distance function to use for computing the silhouette.");
+
+ /**
+ * Parameter to treat noise as a single cluster.
+ */
+ public static final OptionID MERGENOISE_ID = new OptionID("silhouette.noisecluster", "Treat noise as a cluster, not as singletons.");
+
+ /**
+ * Distance function to use.
+ */
+ private DistanceFunction<? super O> distance;
+
+ /**
+ * Keep noise "clusters" merged.
+ */
+ private boolean mergenoise = false;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ ObjectParameter<DistanceFunction<? super O>> distP = new ObjectParameter<>(DISTANCE_ID, DistanceFunction.class, EuclideanDistanceFunction.class);
+ if(config.grab(distP)) {
+ distance = distP.instantiateClass(config);
+ }
+
+ Flag noiseP = new Flag(MERGENOISE_ID);
+ if(config.grab(noiseP)) {
+ mergenoise = noiseP.isTrue();
+ }
+ }
+
+ @Override
+ protected EvaluateSilhouette<O> makeInstance() {
+ return new EvaluateSilhouette<>(distance, mergenoise);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/clustering/internal/package-info.java b/src/de/lmu/ifi/dbs/elki/evaluation/clustering/internal/package-info.java
new file mode 100644
index 00000000..952794c8
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/clustering/internal/package-info.java
@@ -0,0 +1,26 @@
+/**
+ * Internal evaluation measures for clusterings.
+ */
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package de.lmu.ifi.dbs.elki.evaluation.clustering.internal; \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/clustering/package-info.java b/src/de/lmu/ifi/dbs/elki/evaluation/clustering/package-info.java
index 7dc244fd..666de5df 100644
--- a/src/de/lmu/ifi/dbs/elki/evaluation/clustering/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/clustering/package-info.java
@@ -5,7 +5,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/clustering/pairsegments/ClusterPairSegmentAnalysis.java b/src/de/lmu/ifi/dbs/elki/evaluation/clustering/pairsegments/ClusterPairSegmentAnalysis.java
index 76508563..dcd972a7 100644
--- a/src/de/lmu/ifi/dbs/elki/evaluation/clustering/pairsegments/ClusterPairSegmentAnalysis.java
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/clustering/pairsegments/ClusterPairSegmentAnalysis.java
@@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.evaluation.clustering.pairsegments;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/clustering/pairsegments/Segment.java b/src/de/lmu/ifi/dbs/elki/evaluation/clustering/pairsegments/Segment.java
index 76769d78..a9e2fbc9 100644
--- a/src/de/lmu/ifi/dbs/elki/evaluation/clustering/pairsegments/Segment.java
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/clustering/pairsegments/Segment.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.evaluation.clustering.pairsegments;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/clustering/pairsegments/Segments.java b/src/de/lmu/ifi/dbs/elki/evaluation/clustering/pairsegments/Segments.java
index b7215579..ced8d726 100644
--- a/src/de/lmu/ifi/dbs/elki/evaluation/clustering/pairsegments/Segments.java
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/clustering/pairsegments/Segments.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.evaluation.clustering.pairsegments;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/clustering/pairsegments/package-info.java b/src/de/lmu/ifi/dbs/elki/evaluation/clustering/pairsegments/package-info.java
index 01d0d09a..01d3bfa0 100644
--- a/src/de/lmu/ifi/dbs/elki/evaluation/clustering/pairsegments/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/clustering/pairsegments/package-info.java
@@ -5,7 +5,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/histogram/ComputeOutlierHistogram.java b/src/de/lmu/ifi/dbs/elki/evaluation/histogram/ComputeOutlierHistogram.java
index a9c24172..be06ade2 100644
--- a/src/de/lmu/ifi/dbs/elki/evaluation/histogram/ComputeOutlierHistogram.java
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/histogram/ComputeOutlierHistogram.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.evaluation.histogram;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -226,12 +226,12 @@ public class ComputeOutlierHistogram implements Evaluator {
ids.removeDBIDs(outlierIds);
// fill histogram with values of each object
for(DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
- double result = or.getScores().get(iter);
+ double result = or.getScores().doubleValue(iter);
result = scaling.getScaled(result);
hist.putData(result, negative);
}
for(DBIDIter iter = outlierIds.iter(); iter.valid(); iter.advance()) {
- double result = or.getScores().get(iter);
+ double result = or.getScores().doubleValue(iter);
result = scaling.getScaled(result);
hist.putData(result, positive);
}
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/histogram/package-info.java b/src/de/lmu/ifi/dbs/elki/evaluation/histogram/package-info.java
index a07bcc30..6daba3a0 100644
--- a/src/de/lmu/ifi/dbs/elki/evaluation/histogram/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/histogram/package-info.java
@@ -5,7 +5,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2013
+Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/index/IndexPurity.java b/src/de/lmu/ifi/dbs/elki/evaluation/index/IndexPurity.java
index ac0f3457..7b85387e 100644
--- a/src/de/lmu/ifi/dbs/elki/evaluation/index/IndexPurity.java
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/index/IndexPurity.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.evaluation.index;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/index/IndexStatistics.java b/src/de/lmu/ifi/dbs/elki/evaluation/index/IndexStatistics.java
index f7c9caa8..1ad74141 100644
--- a/src/de/lmu/ifi/dbs/elki/evaluation/index/IndexStatistics.java
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/index/IndexStatistics.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.evaluation.index;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/index/package-info.java b/src/de/lmu/ifi/dbs/elki/evaluation/index/package-info.java
index 67fdaa98..2fdf5caf 100644
--- a/src/de/lmu/ifi/dbs/elki/evaluation/index/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/index/package-info.java
@@ -5,7 +5,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2013
+Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/outlier/JudgeOutlierScores.java b/src/de/lmu/ifi/dbs/elki/evaluation/outlier/JudgeOutlierScores.java
index 1639515a..ff34f7c0 100644
--- a/src/de/lmu/ifi/dbs/elki/evaluation/outlier/JudgeOutlierScores.java
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/outlier/JudgeOutlierScores.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.evaluation.outlier;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -147,12 +147,12 @@ public class JudgeOutlierScores implements Evaluator {
double negscore = 0.0;
// fill histogram with values of each object
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
- double result = or.getScores().get(iter);
+ double result = or.getScores().doubleValue(iter);
result = innerScaling.getScaled(scaling.getScaled(result));
posscore += (1.0 - result);
}
for (DBIDIter iter = outlierIds.iter(); iter.valid(); iter.advance()) {
- double result = or.getScores().get(iter);
+ double result = or.getScores().doubleValue(iter);
result = innerScaling.getScaled(scaling.getScaled(result));
negscore += result;
}
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/outlier/OutlierPrecisionAtKCurve.java b/src/de/lmu/ifi/dbs/elki/evaluation/outlier/OutlierPrecisionAtKCurve.java
index 72967d58..f876469c 100644
--- a/src/de/lmu/ifi/dbs/elki/evaluation/outlier/OutlierPrecisionAtKCurve.java
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/outlier/OutlierPrecisionAtKCurve.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.evaluation.outlier;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/outlier/OutlierPrecisionRecallCurve.java b/src/de/lmu/ifi/dbs/elki/evaluation/outlier/OutlierPrecisionRecallCurve.java
index a3d9d92f..8ac9a559 100644
--- a/src/de/lmu/ifi/dbs/elki/evaluation/outlier/OutlierPrecisionRecallCurve.java
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/outlier/OutlierPrecisionRecallCurve.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.evaluation.outlier;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -31,7 +31,7 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.SetDBIDs;
-import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
import de.lmu.ifi.dbs.elki.evaluation.Evaluator;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.geometry.XYCurve;
@@ -116,7 +116,7 @@ public class OutlierPrecisionRecallCurve implements Evaluator {
}
}
- private XYCurve computePrecisionResult(int size, SetDBIDs ids, DBIDIter iter, Relation<Double> scores) {
+ private XYCurve computePrecisionResult(int size, SetDBIDs ids, DBIDIter iter, DoubleRelation scores) {
final int postot = ids.size();
int poscnt = 0, total = 0;
XYCurve curve = new PRCurve(postot + 2, postot);
@@ -140,7 +140,7 @@ public class OutlierPrecisionRecallCurve implements Evaluator {
}
// defer calculation for ties
if (scores != null) {
- double curscore = scores.get(iter);
+ double curscore = scores.doubleValue(iter);
if (Double.compare(prevscore, curscore) == 0) {
continue;
}
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/outlier/OutlierROCCurve.java b/src/de/lmu/ifi/dbs/elki/evaluation/outlier/OutlierROCCurve.java
index 0d92c50a..fc7250c1 100644
--- a/src/de/lmu/ifi/dbs/elki/evaluation/outlier/OutlierROCCurve.java
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/outlier/OutlierROCCurve.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.evaluation.outlier;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -31,7 +31,10 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.SetDBIDs;
import de.lmu.ifi.dbs.elki.evaluation.Evaluator;
-import de.lmu.ifi.dbs.elki.evaluation.roc.ROC;
+import de.lmu.ifi.dbs.elki.evaluation.scores.ROCEvaluation;
+import de.lmu.ifi.dbs.elki.evaluation.scores.adapter.DBIDsTest;
+import de.lmu.ifi.dbs.elki.evaluation.scores.adapter.OutlierScoreAdapter;
+import de.lmu.ifi.dbs.elki.evaluation.scores.adapter.SimpleAdapter;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.geometry.XYCurve;
import de.lmu.ifi.dbs.elki.result.HierarchicalResult;
@@ -103,30 +106,26 @@ public class OutlierROCCurve implements Evaluator {
}
private ROCResult computeROCResult(int size, SetDBIDs positiveids, DBIDs order) {
- if (order.size() != size) {
+ if(order.size() != size) {
throw new IllegalStateException("Iterable result doesn't match database size - incomplete ordering?");
}
- XYCurve roccurve = ROC.materializeROC(new ROC.DBIDsTest(positiveids), new ROC.SimpleAdapter(order.iter()));
+ XYCurve roccurve = ROCEvaluation.materializeROC(new DBIDsTest(positiveids), new SimpleAdapter(order.iter()));
double rocauc = XYCurve.areaUnderCurve(roccurve);
- if (LOG.isVerbose()) {
+ if(LOG.isVerbose()) {
LOG.verbose(ROCAUC_LABEL + ": " + rocauc);
}
- final ROCResult rocresult = new ROCResult(roccurve, rocauc);
-
- return rocresult;
+ return new ROCResult(roccurve, rocauc);
}
private ROCResult computeROCResult(int size, SetDBIDs positiveids, OutlierResult or) {
- XYCurve roccurve = ROC.materializeROC(new ROC.DBIDsTest(positiveids), new ROC.OutlierScoreAdapter(or));
+ XYCurve roccurve = ROCEvaluation.materializeROC(new DBIDsTest(positiveids), new OutlierScoreAdapter(or));
double rocauc = XYCurve.areaUnderCurve(roccurve);
- if (LOG.isVerbose()) {
+ if(LOG.isVerbose()) {
LOG.verbose(ROCAUC_LABEL + ": " + rocauc);
}
- final ROCResult rocresult = new ROCResult(roccurve, rocauc);
-
- return rocresult;
+ return new ROCResult(roccurve, rocauc);
}
@Override
@@ -135,7 +134,7 @@ public class OutlierROCCurve implements Evaluator {
// Prepare
SetDBIDs positiveids = DBIDUtil.ensureSet(DatabaseUtil.getObjectsByLabelMatch(db, positiveClassName));
- if (positiveids.size() == 0) {
+ if(positiveids.size() == 0) {
LOG.warning("Computing a ROC curve failed - no objects matched.");
return;
}
@@ -144,7 +143,7 @@ public class OutlierROCCurve implements Evaluator {
List<OutlierResult> oresults = ResultUtil.getOutlierResults(result);
List<OrderingResult> orderings = ResultUtil.getOrderingResults(result);
// Outlier results are the main use case.
- for (OutlierResult o : oresults) {
+ for(OutlierResult o : oresults) {
db.getHierarchy().add(o, computeROCResult(o.getScores().size(), positiveids, o));
// Process them only once.
orderings.remove(o.getOrdering());
@@ -153,13 +152,13 @@ public class OutlierROCCurve implements Evaluator {
// FIXME: find appropriate place to add the derived result
// otherwise apply an ordering to the database IDs.
- for (OrderingResult or : orderings) {
+ for(OrderingResult or : orderings) {
DBIDs sorted = or.iter(or.getDBIDs());
db.getHierarchy().add(or, computeROCResult(or.getDBIDs().size(), positiveids, sorted));
nonefound = false;
}
- if (nonefound) {
+ if(nonefound) {
return;
// logger.warning("No results found to process with ROC curve analyzer. Got "+iterables.size()+" iterables, "+orderings.size()+" orderings.");
}
@@ -229,7 +228,7 @@ public class OutlierROCCurve implements Evaluator {
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
PatternParameter positiveClassNameP = new PatternParameter(POSITIVE_CLASS_NAME_ID);
- if (config.grab(positiveClassNameP)) {
+ if(config.grab(positiveClassNameP)) {
positiveClassName = positiveClassNameP.getValue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/outlier/OutlierRankingEvaluation.java b/src/de/lmu/ifi/dbs/elki/evaluation/outlier/OutlierRankingEvaluation.java
new file mode 100644
index 00000000..f78a9790
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/outlier/OutlierRankingEvaluation.java
@@ -0,0 +1,211 @@
+package de.lmu.ifi.dbs.elki.evaluation.outlier;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.List;
+import java.util.regex.Pattern;
+
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.SetDBIDs;
+import de.lmu.ifi.dbs.elki.evaluation.Evaluator;
+import de.lmu.ifi.dbs.elki.evaluation.scores.AveragePrecisionEvaluation;
+import de.lmu.ifi.dbs.elki.evaluation.scores.MaximumF1Evaluation;
+import de.lmu.ifi.dbs.elki.evaluation.scores.PrecisionAtKEvaluation;
+import de.lmu.ifi.dbs.elki.evaluation.scores.ROCEvaluation;
+import de.lmu.ifi.dbs.elki.evaluation.scores.adapter.DBIDsTest;
+import de.lmu.ifi.dbs.elki.evaluation.scores.adapter.OutlierScoreAdapter;
+import de.lmu.ifi.dbs.elki.evaluation.scores.adapter.SimpleAdapter;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic;
+import de.lmu.ifi.dbs.elki.result.EvaluationResult;
+import de.lmu.ifi.dbs.elki.result.EvaluationResult.MeasurementGroup;
+import de.lmu.ifi.dbs.elki.result.HierarchicalResult;
+import de.lmu.ifi.dbs.elki.result.OrderingResult;
+import de.lmu.ifi.dbs.elki.result.Result;
+import de.lmu.ifi.dbs.elki.result.ResultUtil;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.PatternParameter;
+
+/**
+ * Evaluate outlier scores by their ranking
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.landmark
+ *
+ * @apiviz.uses OutlierResult
+ * @apiviz.has EvaluationResult oneway - - «create»
+ */
+public class OutlierRankingEvaluation implements Evaluator {
+ /**
+ * The logger.
+ */
+ private static final Logging LOG = Logging.getLogger(OutlierRankingEvaluation.class);
+
+ /**
+ * The pattern to identify positive classes.
+ *
+ * <p>
+ * Key: {@code -rocauc.positive}
+ * </p>
+ */
+ public static final OptionID POSITIVE_CLASS_NAME_ID = new OptionID("outliereval.positive", "Class label for the 'positive' class.");
+
+ /**
+ * Stores the "positive" class.
+ */
+ private Pattern positiveClassName;
+
+ /**
+ * Key prefix for statistics logging.
+ */
+ private String key = OutlierRankingEvaluation.class.getName();
+
+ /**
+ * Constructor.
+ *
+ * @param positive_class_name Positive class name pattern
+ */
+ public OutlierRankingEvaluation(Pattern positive_class_name) {
+ super();
+ this.positiveClassName = positive_class_name;
+ }
+
+ private EvaluationResult evaluateOutlierResult(int size, SetDBIDs positiveids, OutlierResult or) {
+ EvaluationResult res = new EvaluationResult("Evaluation of ranking", "ranking-evaluation");
+ DBIDsTest test = new DBIDsTest(positiveids);
+
+ MeasurementGroup g = res.newGroup("Evaluation measures");
+ double rocauc = ROCEvaluation.STATIC.evaluate(test, new OutlierScoreAdapter(or));
+ g.addMeasure("ROC AUC", rocauc, 0., 1. ,.5, false);
+ double avep = AveragePrecisionEvaluation.STATIC.evaluate(test, new OutlierScoreAdapter(or));
+ g.addMeasure("Average Precision", avep, 0., 1., 0., false);
+ double rprec = PrecisionAtKEvaluation.RPRECISION.evaluate(test, new OutlierScoreAdapter(or));
+ g.addMeasure("R-Precision", rprec, 0., 1., 0., false);
+ double maxf1 = MaximumF1Evaluation.STATIC.evaluate(test, new OutlierScoreAdapter(or));
+ g.addMeasure("Maximum F1", maxf1, 0., 1., 0., false);
+ if(LOG.isStatistics()) {
+ LOG.statistics(new DoubleStatistic(key + ".rocauc", rocauc));
+ LOG.statistics(new DoubleStatistic(key + ".precision.average", rocauc));
+ LOG.statistics(new DoubleStatistic(key + ".precision.r", rocauc));
+ LOG.statistics(new DoubleStatistic(key + ".f1.maximum", rocauc));
+ }
+ return res;
+ }
+
+ private EvaluationResult evaluateOrderingResult(int size, SetDBIDs positiveids, DBIDs order) {
+ if(order.size() != size) {
+ throw new IllegalStateException("Iterable result doesn't match database size - incomplete ordering?");
+ }
+
+ EvaluationResult res = new EvaluationResult("Evaluation of ranking", "ranking-evaluation");
+ DBIDsTest test = new DBIDsTest(positiveids);
+
+ MeasurementGroup g = res.newGroup("Evaluation measures");
+ double rocauc = ROCEvaluation.STATIC.evaluate(test, new SimpleAdapter(order.iter()));
+ g.addMeasure("ROC AUC", rocauc, 0., 1. ,.5, false);
+ double avep = AveragePrecisionEvaluation.STATIC.evaluate(test, new SimpleAdapter(order.iter()));
+ g.addMeasure("Average Precision", avep, 0., 1., 0., false);
+ double rprec = PrecisionAtKEvaluation.RPRECISION.evaluate(test, new SimpleAdapter(order.iter()));
+ g.addMeasure("R-Precision", rprec, 0., 1., 0., false);
+ double maxf1 = MaximumF1Evaluation.STATIC.evaluate(test, new SimpleAdapter(order.iter()));
+ g.addMeasure("Maximum F1", maxf1, 0., 1., 0., false);
+ if(LOG.isStatistics()) {
+ LOG.statistics(new DoubleStatistic(key + ".rocauc", rocauc));
+ LOG.statistics(new DoubleStatistic(key + ".precision.average", rocauc));
+ LOG.statistics(new DoubleStatistic(key + ".precision.r", rocauc));
+ LOG.statistics(new DoubleStatistic(key + ".f1.maximum", rocauc));
+ }
+ return res;
+ }
+
+ @Override
+ public void processNewResult(HierarchicalResult baseResult, Result result) {
+ Database db = ResultUtil.findDatabase(baseResult);
+ SetDBIDs positiveids = DBIDUtil.ensureSet(DatabaseUtil.getObjectsByLabelMatch(db, positiveClassName));
+
+ if(positiveids.size() == 0) {
+ LOG.warning("Cannot evaluate outlier results - no objects matched the given pattern.");
+ return;
+ }
+
+ boolean nonefound = true;
+ List<OutlierResult> oresults = ResultUtil.getOutlierResults(result);
+ List<OrderingResult> orderings = ResultUtil.getOrderingResults(result);
+ // Outlier results are the main use case.
+ for(OutlierResult o : oresults) {
+ db.getHierarchy().add(o, evaluateOutlierResult(o.getScores().size(), positiveids, o));
+ // Process them only once.
+ orderings.remove(o.getOrdering());
+ nonefound = false;
+ }
+
+ // FIXME: find appropriate place to add the derived result
+ // otherwise apply an ordering to the database IDs.
+ for(OrderingResult or : orderings) {
+ DBIDs sorted = or.iter(or.getDBIDs());
+ db.getHierarchy().add(or, evaluateOrderingResult(or.getDBIDs().size(), positiveids, sorted));
+ nonefound = false;
+ }
+
+ if(nonefound) {
+ return;
+ // logger.warning("No results found to process with ROC curve analyzer. Got "+iterables.size()+" iterables, "+orderings.size()+" orderings.");
+ }
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ /**
+ * Pattern for positive class.
+ */
+ protected Pattern positiveClassName = null;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ PatternParameter positiveClassNameP = new PatternParameter(POSITIVE_CLASS_NAME_ID);
+ if(config.grab(positiveClassNameP)) {
+ positiveClassName = positiveClassNameP.getValue();
+ }
+ }
+
+ @Override
+ protected OutlierRankingEvaluation makeInstance() {
+ return new OutlierRankingEvaluation(positiveClassName);
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/outlier/OutlierSmROCCurve.java b/src/de/lmu/ifi/dbs/elki/evaluation/outlier/OutlierSmROCCurve.java
index 71ec42f5..3400a4bf 100644
--- a/src/de/lmu/ifi/dbs/elki/evaluation/outlier/OutlierSmROCCurve.java
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/outlier/OutlierSmROCCurve.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.evaluation.outlier;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -30,7 +30,7 @@ import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.SetDBIDs;
-import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
import de.lmu.ifi.dbs.elki.evaluation.Evaluator;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.geometry.XYCurve;
@@ -60,20 +60,21 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.PatternParameter;
* However, this method has some deficiencies when the mean score is not 0.5, as
* discussed in:
* <p>
- * E. Schubert, R. Wojdanowski, A. Zimek, H.-P. Kriegel<br>
- * On Evaluation of Outlier Rankings and Outlier Scores<br>
+ * E. Schubert, R. Wojdanowski, A. Zimek, H.-P. Kriegel<br />
+ * On Evaluation of Outlier Rankings and Outlier Scores<br />
* In Proceedings of the 12th SIAM International Conference on Data Mining
* (SDM), Anaheim, CA, 2012.
* </p>
*
* @author Erich Schubert
*
- * @apiviz.landmark
- *
* @apiviz.uses OutlierResult
* @apiviz.has SmROCResult oneway - - «create»
*/
-@Reference(authors = "W. Klement, P. A. Flach, N. Japkowicz, S. Matwin", title = "Smooth Receiver Operating Characteristics (smROC) Curves", booktitle = "In: European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases (ECML-PKDD'11)", url = "http://dx.doi.org/10.1007/978-3-642-23783-6_13")
+@Reference(authors = "W. Klement, P. A. Flach, N. Japkowicz, S. Matwin", //
+title = "Smooth Receiver Operating Characteristics (smROC) Curves", //
+booktitle = "In: European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases (ECML-PKDD'11)", //
+url = "http://dx.doi.org/10.1007/978-3-642-23783-6_13")
public class OutlierSmROCCurve implements Evaluator {
/**
* The label we use for marking ROCAUC values.
@@ -101,13 +102,13 @@ public class OutlierSmROCCurve implements Evaluator {
}
private SmROCResult computeSmROCResult(SetDBIDs positiveids, OutlierResult or) {
- Relation<Double> scores = or.getScores();
+ DoubleRelation scores = or.getScores();
final int size = scores.size();
// Compute mean, for inversion
double mean = 0.0;
for(DBIDIter iditer = scores.iterDBIDs(); iditer.valid(); iditer.advance()) {
- mean += scores.get(iditer) / size;
+ mean += scores.doubleValue(iditer) / size;
}
SmROCResult curve = new SmROCResult(positiveids.size() + 2);
@@ -118,9 +119,9 @@ public class OutlierSmROCCurve implements Evaluator {
int poscnt = 0, negcnt = 0;
double prevscore = Double.NaN;
double x = 0, y = 0;
- for (DBIDIter nei = or.getOrdering().iter(or.getOrdering().getDBIDs()).iter(); nei.valid(); nei.advance()) {
+ for(DBIDIter nei = or.getOrdering().iter(or.getOrdering().getDBIDs()).iter(); nei.valid(); nei.advance()) {
// Analyze next point
- final double curscore = scores.get(nei);
+ final double curscore = scores.doubleValue(nei);
// defer calculation for ties
if(!Double.isNaN(prevscore) && (Double.compare(prevscore, curscore) == 0)) {
// positive or negative match?
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/outlier/OutlierThresholdClustering.java b/src/de/lmu/ifi/dbs/elki/evaluation/outlier/OutlierThresholdClustering.java
index 04233378..5591df3f 100644
--- a/src/de/lmu/ifi/dbs/elki/evaluation/outlier/OutlierThresholdClustering.java
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/outlier/OutlierThresholdClustering.java
@@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.evaluation.outlier;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -32,7 +32,7 @@ import de.lmu.ifi.dbs.elki.data.model.Model;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
import de.lmu.ifi.dbs.elki.evaluation.Evaluator;
import de.lmu.ifi.dbs.elki.result.HierarchicalResult;
import de.lmu.ifi.dbs.elki.result.Result;
@@ -88,7 +88,7 @@ public class OutlierThresholdClustering implements Evaluator {
}
private Clustering<Model> split(OutlierResult or) {
- Relation<Double> scores = or.getScores();
+ DoubleRelation scores = or.getScores();
if(scaling instanceof OutlierScalingFunction) {
((OutlierScalingFunction) scaling).prepare(or);
}
@@ -97,7 +97,7 @@ public class OutlierThresholdClustering implements Evaluator {
idlists.add(DBIDUtil.newHashSet());
}
for(DBIDIter iter = scores.getDBIDs().iter(); iter.valid(); iter.advance()) {
- double score = scores.get(iter);
+ double score = scores.doubleValue(iter);
if(scaling != null) {
score = scaling.getScaled(score);
}
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/outlier/package-info.java b/src/de/lmu/ifi/dbs/elki/evaluation/outlier/package-info.java
index 1e5e6933..81cf2248 100644
--- a/src/de/lmu/ifi/dbs/elki/evaluation/outlier/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/outlier/package-info.java
@@ -5,7 +5,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2013
+Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/package-info.java b/src/de/lmu/ifi/dbs/elki/evaluation/package-info.java
index 8b8a96dc..54755c75 100644
--- a/src/de/lmu/ifi/dbs/elki/evaluation/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/package-info.java
@@ -5,7 +5,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2013
+Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/roc/ROC.java b/src/de/lmu/ifi/dbs/elki/evaluation/roc/ROC.java
deleted file mode 100644
index 0ca42b29..00000000
--- a/src/de/lmu/ifi/dbs/elki/evaluation/roc/ROC.java
+++ /dev/null
@@ -1,656 +0,0 @@
-package de.lmu.ifi.dbs.elki.evaluation.roc;
-
-/*
- This file is part of ELKI:
- Environment for Developing KDD-Applications Supported by Index-Structures
-
- Copyright (C) 2013
- Ludwig-Maximilians-Universität München
- Lehr- und Forschungseinheit für Datenbanksysteme
- ELKI Development Team
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-import de.lmu.ifi.dbs.elki.data.Cluster;
-import de.lmu.ifi.dbs.elki.data.NumberVector;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDList;
-import de.lmu.ifi.dbs.elki.database.ids.distance.DistanceDBIDListIter;
-import de.lmu.ifi.dbs.elki.database.relation.Relation;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
-import de.lmu.ifi.dbs.elki.math.geometry.XYCurve;
-import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
-import de.lmu.ifi.dbs.elki.utilities.datastructures.arrays.IntegerArrayQuickSort;
-import de.lmu.ifi.dbs.elki.utilities.datastructures.arrays.IntegerComparator;
-import de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.ArrayIter;
-import de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.Iter;
-
-/**
- * Compute ROC (Receiver Operating Characteristics) curves.
- *
- * A ROC curve compares the true positive rate (y-axis) and false positive rate
- * (x-axis).
- *
- * It was first used in radio signal detection, but has since found widespread
- * use in information retrieval, in particular for evaluating binary
- * classification problems.
- *
- * ROC curves are particularly useful to evaluate a ranking of objects with
- * respect to a binary classification problem: a random sampling will
- * approximately achieve a ROC value of 0.5, while a perfect separation will
- * achieve 1.0 (all positives first) or 0.0 (all negatives first). In most use
- * cases, a score significantly below 0.5 indicates that the algorithm result
- * has been used the wrong way, and should be used backwards.
- *
- * @author Erich Schubert
- *
- * @apiviz.composedOf ScoreIter
- * @apiviz.composedOf Predicate
- * @apiviz.has XYCurve
- */
-public class ROC {
- /**
- * Iterator for comparing scores.
- *
- * @author Erich Schubert
- */
- public static interface ScoreIter extends Iter {
- /**
- * Test whether the score is the same as the previous objects score.
- *
- * When there is no previous result, implementations should return false!
- *
- * @return Boolean
- */
- boolean tiedToPrevious();
- }
-
- /**
- * Predicate to test whether an object is a true positive or false positive.
- *
- * @author Erich Schubert
- *
- * @param <T> Data type
- */
- public static interface Predicate<T> {
- boolean test(T o);
- }
-
- /**
- * Compute a ROC curve given a set of positive IDs and a sorted list of
- * (comparable, ID)s, where the comparable object is used to decided when two
- * objects are interchangeable.
- *
- * @param <I> Iterator type
- * @param predicate Predicate to test for positive objects
- * @param iter Iterator over results, with ties.
- * @return area under curve
- */
- public static <I extends ScoreIter> XYCurve materializeROC(Predicate<? super I> predicate, I iter) {
- int poscnt = 0, negcnt = 0;
- XYCurve curve = new XYCurve("False Positive Rate", "True Positive Rate");
-
- // start in bottom left
- curve.add(0.0, 0.0);
-
- while (iter.valid()) {
- // positive or negative match?
- do {
- if (predicate.test(iter)) {
- ++poscnt;
- } else {
- ++negcnt;
- }
- iter.advance();
- } // Loop while tied:
- while (iter.valid() && iter.tiedToPrevious());
- // Add a new point.
- curve.addAndSimplify(negcnt, poscnt);
- }
- // Ensure we end up in the top right corner.
- // Simplification will skip this if we already were.
- curve.addAndSimplify(negcnt, poscnt);
- curve.rescale(1. / negcnt, 1. / poscnt);
- return curve;
- }
-
- /**
- * This adapter can be used for an arbitrary collection of Integers, and uses
- * that id1.compareTo(id2) != 0 for id1 != id2 to satisfy the comparability.
- *
- * Note that of course, no id should occur more than once.
- *
- * The ROC values would be incorrect then anyway!
- *
- * @author Erich Schubert
- *
- * @apiviz.composedOf DBIDIter
- */
- public static class SimpleAdapter implements ScoreIter, DBIDRef {
- /**
- * Original Iterator
- */
- private DBIDIter iter;
-
- /**
- * Constructor
- *
- * @param iter Iterator for object IDs
- */
- public SimpleAdapter(DBIDIter iter) {
- super();
- this.iter = iter;
- }
-
- @Override
- public boolean valid() {
- return iter.valid();
- }
-
- @Override
- public void advance() {
- iter.advance();
- }
-
- @Override
- public boolean tiedToPrevious() {
- return false; // No information.
- }
-
- @Override
- public int internalGetIndex() {
- return iter.internalGetIndex();
- }
-
- @Deprecated
- @Override
- public int hashCode() {
- return super.hashCode();
- }
-
- @Deprecated
- @Override
- public boolean equals(Object obj) {
- return super.equals(obj);
- }
- }
-
- /**
- * This adapter can be used for an arbitrary collection of Integers, and uses
- * that id1.compareTo(id2) != 0 for id1 != id2 to satisfy the comparability.
- *
- * Note that of course, no id should occur more than once.
- *
- * The ROC values would be incorrect then anyway!
- *
- * @author Erich Schubert
- *
- * @apiviz.composedOf DistanceDBIDListIter
- *
- * @param <D> Distance type
- */
- public static class DistanceResultAdapter<D extends Distance<D>> implements ScoreIter, DBIDRef {
- /**
- * Original Iterator
- */
- private DistanceDBIDListIter<D> iter;
-
- /**
- * Distance of previous.
- */
- private D prevDist = null;
-
- /**
- * Constructor
- *
- * @param iter Iterator for distance results
- */
- public DistanceResultAdapter(DistanceDBIDListIter<D> iter) {
- super();
- this.iter = iter;
- }
-
- @Override
- public boolean valid() {
- return iter.valid();
- }
-
- @Override
- public void advance() {
- prevDist = iter.getDistance();
- iter.advance();
- }
-
- @Override
- public int internalGetIndex() {
- return iter.internalGetIndex();
- }
-
- @Override
- public boolean tiedToPrevious() {
- return iter.getDistance().equals(prevDist);
- }
-
- @Deprecated
- @Override
- public int hashCode() {
- return super.hashCode();
- }
-
- @Deprecated
- @Override
- public boolean equals(Object obj) {
- return super.equals(obj);
- }
- }
-
- /**
- * This adapter can be used for an arbitrary collection of Integers, and uses
- * that id1.compareTo(id2) != 0 for id1 != id2 to satisfy the comparability.
- *
- * Note that of course, no id should occur more than once.
- *
- * The ROC values would be incorrect then anyway!
- *
- * @author Erich Schubert
- *
- * @apiviz.composedOf OutlierResult
- */
- public static class OutlierScoreAdapter implements ScoreIter, DBIDRef {
- /**
- * Original iterator.
- */
- private DBIDIter iter;
-
- /**
- * Outlier score.
- */
- private Relation<Double> scores;
-
- /**
- * Previous value.
- */
- double prev = Double.NaN;
-
- /**
- * Constructor.
- *
- * @param o Result
- */
- public OutlierScoreAdapter(OutlierResult o) {
- super();
- this.iter = o.getOrdering().iter(o.getScores().getDBIDs()).iter();
- this.scores = o.getScores();
- }
-
- @Override
- public boolean valid() {
- return iter.valid();
- }
-
- @Override
- public void advance() {
- prev = scores.get(iter);
- iter.advance();
- }
-
- @Override
- public boolean tiedToPrevious() {
- return scores.get(iter) == prev;
- }
-
- @Override
- public int internalGetIndex() {
- return iter.internalGetIndex();
- }
-
- @Deprecated
- @Override
- public int hashCode() {
- return super.hashCode();
- }
-
- @Deprecated
- @Override
- public boolean equals(Object obj) {
- return super.equals(obj);
- }
- }
-
- /**
- * Class to iterate over a number vector in decreasing order.
- *
- * @author Erich Schubert
- *
- * @apiviz.composedOf NumberVector
- */
- public static class DecreasingVectorIter implements ScoreIter, IntegerComparator, ArrayIter {
- /**
- * Order of dimensions.
- */
- private int[] sort;
-
- /**
- * Data vector.
- */
- private NumberVector<?> vec;
-
- /**
- * Current position.
- */
- int pos = 0;
-
- /**
- * Constructor.
- *
- * @param vec Vector to iterate over.
- */
- public DecreasingVectorIter(NumberVector<?> vec) {
- this.vec = vec;
- final int dim = vec.getDimensionality();
- this.sort = new int[dim];
- for (int d = 0; d < dim; d++) {
- sort[d] = d;
- }
- IntegerArrayQuickSort.sort(sort, this);
- }
-
- @Override
- public int compare(int x, int y) {
- return Double.compare(vec.doubleValue(y), vec.doubleValue(x));
- }
-
- public int dim() {
- return sort[pos];
- }
-
- @Override
- public boolean valid() {
- return pos < vec.getDimensionality();
- }
-
- @Override
- public void advance() {
- ++pos;
- }
-
- @Override
- public boolean tiedToPrevious() {
- return pos > 0 && Double.compare(vec.doubleValue(sort[pos]), vec.doubleValue(sort[pos - 1])) == 0;
- }
-
- @Override
- public int getOffset() {
- return pos;
- }
-
- @Override
- public void advance(int count) {
- pos += count;
- }
-
- @Override
- public void retract() {
- pos--;
- }
-
- @Override
- public void seek(int off) {
- pos = off;
- }
- }
-
- /**
- * Class to iterate over a number vector in decreasing order.
- *
- * @author Erich Schubert
- *
- * @apiviz.composedOf NumberVector
- */
- public static class IncreasingVectorIter implements ScoreIter, IntegerComparator, ArrayIter {
- /**
- * Order of dimensions.
- */
- private int[] sort;
-
- /**
- * Data vector.
- */
- private NumberVector<?> vec;
-
- /**
- * Current position.
- */
- int pos = 0;
-
- /**
- * Constructor.
- *
- * @param vec Vector to iterate over.
- */
- public IncreasingVectorIter(NumberVector<?> vec) {
- this.vec = vec;
- final int dim = vec.getDimensionality();
- this.sort = new int[dim];
- for (int d = 0; d < dim; d++) {
- sort[d] = d;
- }
- IntegerArrayQuickSort.sort(sort, this);
- }
-
- @Override
- public int compare(int x, int y) {
- return Double.compare(vec.doubleValue(x), vec.doubleValue(y));
- }
-
- public int dim() {
- return sort[pos];
- }
-
- @Override
- public boolean valid() {
- return pos < vec.getDimensionality();
- }
-
- @Override
- public void advance() {
- ++pos;
- }
-
- @Override
- public boolean tiedToPrevious() {
- return pos > 0 && Double.compare(vec.doubleValue(sort[pos]), vec.doubleValue(sort[pos - 1])) == 0;
- }
-
- @Override
- public int getOffset() {
- return pos;
- }
-
- @Override
- public void advance(int count) {
- pos += count;
- }
-
- @Override
- public void retract() {
- pos--;
- }
-
- @Override
- public void seek(int off) {
- pos = off;
- }
- }
-
- /**
- * Class that uses a NumberVector as reference, and considers all non-zero
- * values as positive entries.
- *
- * @apiviz.composedOf NumberVector
- *
- * @author Erich Schubert
- */
- public static class VectorNonZero extends VectorOverThreshold {
- /**
- * Constructor.
- *
- * @param vec Reference vector.
- */
- public VectorNonZero(NumberVector<?> vec) {
- super(vec, 0.);
- }
- }
-
- /**
- * Class that uses a NumberVector as reference, and considers all non-zero
- * values as positive entries.
- *
- * @apiviz.composedOf NumberVector
- *
- * @author Erich Schubert
- */
- public static class VectorOverThreshold implements Predicate<DecreasingVectorIter> {
- /**
- * Vector to use as reference
- */
- NumberVector<?> vec;
-
- /**
- * Threshold
- */
- double threshold;
-
- /**
- * Constructor.
- *
- * @param vec Reference vector.
- * @param threshold Threshold value.
- */
- public VectorOverThreshold(NumberVector<?> vec, double threshold) {
- super();
- this.vec = vec;
- this.threshold = threshold;
- }
-
- @Override
- public boolean test(DecreasingVectorIter o) {
- return Math.abs(vec.doubleValue(o.dim())) > threshold;
- }
- }
-
- /**
- * Class that uses a NumberVector as reference, and considers all zero values
- * as positive entries.
- *
- * @apiviz.composedOf NumberVector
- *
- * @author Erich Schubert
- */
- public static class VectorZero implements Predicate<IncreasingVectorIter> {
- /**
- * Vector to use as reference
- */
- NumberVector<?> vec;
-
- /**
- * Constructor.
- *
- * @param vec Reference vector.
- */
- public VectorZero(NumberVector<?> vec) {
- this.vec = vec;
- }
-
- @Override
- public boolean test(IncreasingVectorIter o) {
- return Math.abs(vec.doubleValue(o.dim())) < Double.MIN_NORMAL;
- }
- }
-
- /**
- * Test predicate using a DBID set as positive elements.
- *
- * @apiviz.composedOf DBIDs
- *
- * @author Erich Schubert
- */
- public static class DBIDsTest implements Predicate<DBIDRef> {
- /**
- * DBID set.
- */
- private DBIDs set;
-
- /**
- * Constructor.
- *
- * @param set Set of positive objects
- */
- public DBIDsTest(DBIDs set) {
- this.set = set;
- }
-
- @Override
- public boolean test(DBIDRef o) {
- return set.contains(o);
- }
- }
-
- /**
- * Compute a ROC curves Area-under-curve for a QueryResult and a Cluster.
- *
- * @param <D> Distance type
- * @param size Database size
- * @param clus Cluster object
- * @param nei Query result
- * @return area under curve
- */
- public static <D extends Distance<D>> double computeROCAUCDistanceResult(int size, Cluster<?> clus, DistanceDBIDList<D> nei) {
- // TODO: ensure the collection has efficient "contains".
- return ROC.computeROCAUCDistanceResult(size, clus.getIDs(), nei);
- }
-
- /**
- * Compute a ROC curves Area-under-curve for a QueryResult and a Cluster.
- *
- * @param <D> Distance type
- * @param size Database size
- * @param ids Collection of positive IDs, should support efficient contains()
- * @param nei Query Result
- * @return area under curve
- */
- public static <D extends Distance<D>> double computeROCAUCDistanceResult(int size, DBIDs ids, DistanceDBIDList<D> nei) {
- // TODO: do not materialize the ROC, but introduce an iterator interface
- XYCurve roc = materializeROC(new DBIDsTest(DBIDUtil.ensureSet(ids)), new DistanceResultAdapter<>(nei.iter()));
- return XYCurve.areaUnderCurve(roc);
- }
-
- /**
- * Compute a ROC curves Area-under-curve for a QueryResult and a Cluster.
- *
- * @param size Database size
- * @param ids Collection of positive IDs, should support efficient contains()
- * @param nei Query Result
- * @return area under curve
- */
- public static double computeROCAUCSimple(int size, DBIDs ids, DBIDs nei) {
- // TODO: do not materialize the ROC, but introduce an iterator interface
- XYCurve roc = materializeROC(new DBIDsTest(DBIDUtil.ensureSet(ids)), new SimpleAdapter(nei.iter()));
- return XYCurve.areaUnderCurve(roc);
- }
-}
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/roc/package-info.java b/src/de/lmu/ifi/dbs/elki/evaluation/roc/package-info.java
deleted file mode 100644
index c6934ba2..00000000
--- a/src/de/lmu/ifi/dbs/elki/evaluation/roc/package-info.java
+++ /dev/null
@@ -1,28 +0,0 @@
-/**
- * <p>Evaluation of rankings using ROC AUC (Receiver Operation Characteristics - Area Under Curve)</p>
- *
- * @apiviz.exclude java.util.*
- */
-/*
-This file is part of ELKI:
-Environment for Developing KDD-Applications Supported by Index-Structures
-
-Copyright (C) 2013
-Ludwig-Maximilians-Universität München
-Lehr- und Forschungseinheit für Datenbanksysteme
-ELKI Development Team
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
-package de.lmu.ifi.dbs.elki.evaluation.roc; \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/scores/AbstractScoreEvaluation.java b/src/de/lmu/ifi/dbs/elki/evaluation/scores/AbstractScoreEvaluation.java
new file mode 100644
index 00000000..43de7ebd
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/scores/AbstractScoreEvaluation.java
@@ -0,0 +1,55 @@
+package de.lmu.ifi.dbs.elki.evaluation.scores;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.data.Cluster;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList;
+import de.lmu.ifi.dbs.elki.evaluation.scores.adapter.DBIDsTest;
+import de.lmu.ifi.dbs.elki.evaluation.scores.adapter.DistanceResultAdapter;
+import de.lmu.ifi.dbs.elki.evaluation.scores.adapter.OutlierScoreAdapter;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+
+/**
+ * Abstract base class for evaluating a scoring result.
+ *
+ * @author Erich Schubert
+ */
+public abstract class AbstractScoreEvaluation implements ScoreEvaluation {
+ @Override
+ public double evaluate(Cluster<?> clus, DoubleDBIDList nei) {
+ return evaluate(new DBIDsTest(DBIDUtil.ensureSet(clus.getIDs())), new DistanceResultAdapter(nei.iter()));
+ }
+
+ @Override
+ public double evaluate(DBIDs ids, DoubleDBIDList nei) {
+ return evaluate(new DBIDsTest(DBIDUtil.ensureSet(ids)), new DistanceResultAdapter(nei.iter()));
+ }
+
+ @Override
+ public double evaluate(DBIDs ids, OutlierResult outlier) {
+ return evaluate(new DBIDsTest(DBIDUtil.ensureSet(ids)), new OutlierScoreAdapter(outlier));
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/scores/AveragePrecisionEvaluation.java b/src/de/lmu/ifi/dbs/elki/evaluation/scores/AveragePrecisionEvaluation.java
new file mode 100644
index 00000000..bc3d1522
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/scores/AveragePrecisionEvaluation.java
@@ -0,0 +1,76 @@
+package de.lmu.ifi.dbs.elki.evaluation.scores;
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+
+/**
+ * Evaluate using average precision.
+ *
+ * @author Erich Schubert
+ */
+public class AveragePrecisionEvaluation extends AbstractScoreEvaluation {
+ /**
+ * Static instance
+ */
+ public static final AveragePrecisionEvaluation STATIC = new AveragePrecisionEvaluation();
+
+ @Override
+ public <I extends ScoreIter> double evaluate(Predicate<? super I> predicate, I iter) {
+ int poscnt = 0, negcnt = 0, pospre = 0;
+ double acc = 0.;
+ while(iter.valid()) {
+ // positive or negative match?
+ do {
+ if(predicate.test(iter)) {
+ ++poscnt;
+ }
+ else {
+ ++negcnt;
+ }
+ iter.advance();
+ } // Loop while tied:
+ while(iter.valid() && iter.tiedToPrevious());
+ // Add a new point.
+ if(poscnt > pospre) {
+ acc += (poscnt / (double) (poscnt + negcnt)) * (poscnt - pospre);
+ pospre = poscnt;
+ }
+ }
+ return (poscnt > 0) ? acc / poscnt : 0.;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ @Override
+ protected AveragePrecisionEvaluation makeInstance() {
+ return STATIC;
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/scores/MaximumF1Evaluation.java b/src/de/lmu/ifi/dbs/elki/evaluation/scores/MaximumF1Evaluation.java
new file mode 100644
index 00000000..bac59b45
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/scores/MaximumF1Evaluation.java
@@ -0,0 +1,76 @@
+package de.lmu.ifi.dbs.elki.evaluation.scores;
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+
+/**
+ * Evaluate using the maximum F1 score.
+ *
+ * @author Erich Schubert
+ */
+public class MaximumF1Evaluation extends AbstractScoreEvaluation {
+ /**
+ * Static instance
+ */
+ public static final MaximumF1Evaluation STATIC = new MaximumF1Evaluation();
+
+ @Override
+ public <I extends ScoreIter> double evaluate(Predicate<? super I> predicate, I iter) {
+ final int postot = predicate.numPositive();
+ int poscnt = 0, cnt = 0;
+ double maxf1 = 0.;
+ while(iter.valid()) {
+ // positive or negative match?
+ do {
+ if(predicate.test(iter)) {
+ ++poscnt;
+ }
+ ++cnt;
+ iter.advance();
+ } // Loop while tied:
+ while(iter.valid() && iter.tiedToPrevious());
+ // New F1 value:
+ double p = poscnt / (double) cnt, r = poscnt / (double) postot;
+ double f1 = 2. * p * r / (p + r);
+ if(f1 > maxf1) {
+ maxf1 = f1;
+ }
+ }
+ return maxf1;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ @Override
+ protected MaximumF1Evaluation makeInstance() {
+ return STATIC;
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/scores/PrecisionAtKEvaluation.java b/src/de/lmu/ifi/dbs/elki/evaluation/scores/PrecisionAtKEvaluation.java
new file mode 100644
index 00000000..97ab0abd
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/scores/PrecisionAtKEvaluation.java
@@ -0,0 +1,127 @@
+package de.lmu.ifi.dbs.elki.evaluation.scores;
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+
+/**
+ * Evaluate using Precision@k, or R-precision (when {@code k=0}).
+ *
+ * When {@code k=0}, then it is set to the number of positive objects, and the
+ * returned value is the R-precision, or the precision-recall break-even-point
+ * (BEP).
+ *
+ * @author Erich Schubert
+ */
+public class PrecisionAtKEvaluation extends AbstractScoreEvaluation {
+ /**
+ * Static instance
+ */
+ public static final PrecisionAtKEvaluation RPRECISION = new PrecisionAtKEvaluation(0);
+
+ /**
+ * Parameter k.
+ */
+ int k;
+
+ /**
+ * Constructor.
+ *
+ * @param k k to evaluate at. May be 0.
+ */
+ public PrecisionAtKEvaluation(int k) {
+ this.k = k;
+ }
+
+ @Override
+ public <I extends ScoreIter> double evaluate(Predicate<? super I> predicate, I iter) {
+ final int k = (this.k > 0) ? this.k : predicate.numPositive();
+ int total = 0;
+ double score = 0.;
+ while(iter.valid() && total < k) {
+ int posthis = 0, cntthis = 0;
+ // positive or negative match?
+ do {
+ if(predicate.test(iter)) {
+ ++posthis;
+ }
+ ++cntthis;
+ iter.advance();
+ } // Loop while tied:
+ while(iter.valid() && iter.tiedToPrevious());
+ // Special tie computations only when we reach k.
+ if(total + cntthis > k) {
+ // p = posthis / cntthis chance of being positive
+ // n = (k-total) draws.
+ // expected value = n * p
+ score += posthis / (double) cntthis * (k - total);
+ total = k;
+ break;
+ }
+ score += posthis;
+ total += cntthis;
+ }
+ return score / total;
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ /**
+ * Option ID for the k parameter.
+ */
+ public static final OptionID K_ID = new OptionID("precision.k", //
+ "k value for precision@k. Can be set to 0, to get R-precision, or the precision-recall-break-even-point.");
+
+ /**
+ * K parameter
+ */
+ int k;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+
+ IntParameter kP = new IntParameter(K_ID) //
+ .setDefaultValue(0) //
+ .addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_INT);
+ if(config.grab(kP)) {
+ k = kP.intValue();
+ }
+ }
+
+ @Override
+ protected PrecisionAtKEvaluation makeInstance() {
+ return k > 0 ? new PrecisionAtKEvaluation(k) : RPRECISION;
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/scores/ROCEvaluation.java b/src/de/lmu/ifi/dbs/elki/evaluation/scores/ROCEvaluation.java
new file mode 100644
index 00000000..1a74d25a
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/scores/ROCEvaluation.java
@@ -0,0 +1,147 @@
+package de.lmu.ifi.dbs.elki.evaluation.scores;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import de.lmu.ifi.dbs.elki.math.geometry.XYCurve;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+
+/**
+ * Compute ROC (Receiver Operating Characteristics) curves.
+ *
+ * A ROC curve compares the true positive rate (y-axis) and false positive rate
+ * (x-axis).
+ *
+ * It was first used in radio signal detection, but has since found widespread
+ * use in information retrieval, in particular for evaluating binary
+ * classification problems.
+ *
+ * ROC curves are particularly useful to evaluate a ranking of objects with
+ * respect to a binary classification problem: a random sampling will
+ * approximately achieve a ROC value of 0.5, while a perfect separation will
+ * achieve 1.0 (all positives first) or 0.0 (all negatives first). In most use
+ * cases, a score significantly below 0.5 indicates that the algorithm result
+ * has been used the wrong way, and should be used backwards.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.has XYCurve
+ */
+public class ROCEvaluation extends AbstractScoreEvaluation {
+ /**
+ * Static instance
+ */
+ public static final ROCEvaluation STATIC = new ROCEvaluation();
+
+ @Override
+ public <I extends ScoreIter> double evaluate(Predicate<? super I> predicate, I iter) {
+ return computeROCAUC(predicate, iter);
+ }
+
+ /**
+ * Compute a ROC curve given a set of positive IDs and a sorted list of
+ * (comparable, ID)s, where the comparable object is used to decided when two
+ * objects are interchangeable.
+ *
+ * @param <I> Iterator type
+ * @param predicate Predicate to test for positive objects
+ * @param iter Iterator over results, with ties.
+ * @return area under curve
+ */
+ public static <I extends ScoreIter> XYCurve materializeROC(Predicate<? super I> predicate, I iter) {
+ int poscnt = 0, negcnt = 0;
+ XYCurve curve = new XYCurve("False Positive Rate", "True Positive Rate");
+
+ // start in bottom left
+ curve.add(0.0, 0.0);
+
+ while(iter.valid()) {
+ // positive or negative match?
+ do {
+ if(predicate.test(iter)) {
+ ++poscnt;
+ }
+ else {
+ ++negcnt;
+ }
+ iter.advance();
+ } // Loop while tied:
+ while(iter.valid() && iter.tiedToPrevious());
+ // Add a new point.
+ curve.addAndSimplify(negcnt, poscnt);
+ }
+ // Ensure we end up in the top right corner.
+ // Simplification will skip this if we already were.
+ curve.addAndSimplify(negcnt, poscnt);
+ curve.rescale(1. / negcnt, 1. / poscnt);
+ return curve;
+ }
+
+ /**
+ * Compute the area under the ROC curve given a set of positive IDs and a
+ * sorted list of (comparable, ID)s, where the comparable object is used to
+ * decided when two objects are interchangeable.
+ *
+ * @param <I> Iterator type
+ * @param predicate Predicate to test for positive objects
+ * @param iter Iterator over results, with ties.
+ * @return area under curve
+ */
+ public static <I extends ScoreIter> double computeROCAUC(Predicate<? super I> predicate, I iter) {
+ int poscnt = 0, negcnt = 0, pospre = 0, negpre = 0;
+ double acc = 0.;
+ while(iter.valid()) {
+ // positive or negative match?
+ do {
+ if(predicate.test(iter)) {
+ ++poscnt;
+ }
+ else {
+ ++negcnt;
+ }
+ iter.advance();
+ } // Loop while tied:
+ while(iter.valid() && iter.tiedToPrevious());
+ if(negcnt > negpre) {
+ acc += (poscnt + pospre) * .5 * (negcnt - negpre);
+ negpre = negcnt;
+ }
+ pospre = poscnt;
+ }
+ acc /= negcnt * (long) poscnt;
+ return acc == acc ? acc : 0.5; /* Detect NaN */
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ @Override
+ protected ROCEvaluation makeInstance() {
+ return STATIC;
+ }
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/scores/ScoreEvaluation.java b/src/de/lmu/ifi/dbs/elki/evaluation/scores/ScoreEvaluation.java
new file mode 100644
index 00000000..d32714ad
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/scores/ScoreEvaluation.java
@@ -0,0 +1,114 @@
+package de.lmu.ifi.dbs.elki.evaluation.scores;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.data.Cluster;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.Iter;
+
+/**
+ * Compute ranking/scoring based evaluation measures.
+ *
+ * @author Erich Schubert
+ */
+public interface ScoreEvaluation {
+ /**
+ * Evaluate a given predicate and iterator.
+ *
+ * @param predicate Predicate (for positives)
+ * @param iter Iterator
+ * @return Score
+ * @param <I> Iterator type
+ */
+ <I extends ScoreIter> double evaluate(Predicate<? super I> predicate, I iter);
+
+ /**
+ * Evaluate given a cluster (of positive elements) and a scoring list.
+ *
+ * @param clus Cluster object
+ * @param nei Query result
+ * @return Score
+ */
+ double evaluate(Cluster<?> clus, DoubleDBIDList nei);
+
+ /**
+ * Evaluate given a list of positives and a scoring.
+ *
+ * @param ids Positive IDs, usually a set.
+ * @param nei Query Result
+ * @return Score
+ */
+ double evaluate(DBIDs ids, DoubleDBIDList nei);
+
+ /**
+ * Evaluate given a set of positives and a scoring.
+ *
+ * @param ids Positive IDs, usually a set.
+ * @param outlier Outlier detection result
+ * @return Score
+ */
+ double evaluate(DBIDs ids, OutlierResult outlier);
+
+ /**
+ * Iterator for comparing scores.
+ *
+ * @author Erich Schubert
+ */
+ public static interface ScoreIter extends Iter {
+ /**
+ * Test whether the score is the same as the previous objects score.
+ *
+ * When there is no previous result, implementations should return false!
+ *
+ * @return Boolean
+ */
+ boolean tiedToPrevious();
+ }
+
+ /**
+ * Predicate to test whether an object is a true positive or false positive.
+ *
+ * @author Erich Schubert
+ *
+ * @param <T> Data type
+ */
+ public static interface Predicate<T> {
+ /**
+ * Test a result.
+ *
+ * @param o Object to test
+ * @return {@code true} when positive.
+ */
+ boolean test(T o);
+
+ /**
+ * Return the number of positive ids.
+ *
+ * @return Number of positive elements
+ */
+ int numPositive();
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/scores/adapter/DBIDRefIter.java b/src/de/lmu/ifi/dbs/elki/evaluation/scores/adapter/DBIDRefIter.java
new file mode 100644
index 00000000..2fdb5978
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/scores/adapter/DBIDRefIter.java
@@ -0,0 +1,39 @@
+package de.lmu.ifi.dbs.elki.evaluation.scores.adapter;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
+
+/**
+ * A score iterator wrapping a DBIDRef object.
+ *
+ * @author Erich Schubert
+ */
+public interface DBIDRefIter {
+ /**
+ * Get the current DBIDRef.
+ *
+ * @return DBID reference
+ */
+ DBIDRef getRef();
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/scores/adapter/DBIDsTest.java b/src/de/lmu/ifi/dbs/elki/evaluation/scores/adapter/DBIDsTest.java
new file mode 100644
index 00000000..d41c7af4
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/scores/adapter/DBIDsTest.java
@@ -0,0 +1,59 @@
+package de.lmu.ifi.dbs.elki.evaluation.scores.adapter;
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.evaluation.scores.ScoreEvaluation.Predicate;
+
+/**
+ * Test predicate using a DBID set as positive elements.
+ *
+ * @apiviz.composedOf DBIDs
+ *
+ * @author Erich Schubert
+ */
+public class DBIDsTest implements Predicate<DBIDRefIter> {
+ /**
+ * DBID set.
+ */
+ private DBIDs set;
+
+ /**
+ * Constructor.
+ *
+ * @param set Set of positive objects
+ */
+ public DBIDsTest(DBIDs set) {
+ this.set = set;
+ }
+
+ @Override
+ public boolean test(DBIDRefIter o) {
+ return set.contains(o.getRef());
+ }
+
+ @Override
+ public int numPositive() {
+ return set.size();
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/scores/adapter/DecreasingVectorIter.java b/src/de/lmu/ifi/dbs/elki/evaluation/scores/adapter/DecreasingVectorIter.java
new file mode 100644
index 00000000..d0a55b19
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/scores/adapter/DecreasingVectorIter.java
@@ -0,0 +1,116 @@
+package de.lmu.ifi.dbs.elki.evaluation.scores.adapter;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.evaluation.scores.ScoreEvaluation.ScoreIter;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.arrays.IntegerArrayQuickSort;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.arrays.IntegerComparator;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.ArrayIter;
+
+/**
+ * Class to iterate over a number vector in decreasing order.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.composedOf NumberVector
+ */
+public class DecreasingVectorIter implements ScoreIter, IntegerComparator, ArrayIter {
+ /**
+ * Order of dimensions.
+ */
+ private int[] sort;
+
+ /**
+ * Data vector.
+ */
+ private NumberVector vec;
+
+ /**
+ * Current position.
+ */
+ int pos = 0;
+
+ /**
+ * Constructor.
+ *
+ * @param vec Vector to iterate over.
+ */
+ public DecreasingVectorIter(NumberVector vec) {
+ this.vec = vec;
+ final int dim = vec.getDimensionality();
+ this.sort = new int[dim];
+ for(int d = 0; d < dim; d++) {
+ sort[d] = d;
+ }
+ IntegerArrayQuickSort.sort(sort, this);
+ }
+
+ @Override
+ public int compare(int x, int y) {
+ return Double.compare(vec.doubleValue(y), vec.doubleValue(x));
+ }
+
+ public int dim() {
+ return sort[pos];
+ }
+
+ @Override
+ public boolean valid() {
+ return pos < vec.getDimensionality() && pos >= 0;
+ }
+
+ @Override
+ public DecreasingVectorIter advance() {
+ ++pos;
+ return this;
+ }
+
+ @Override
+ public boolean tiedToPrevious() {
+ return pos > 0 && Double.compare(vec.doubleValue(sort[pos]), vec.doubleValue(sort[pos - 1])) == 0;
+ }
+
+ @Override
+ public int getOffset() {
+ return pos;
+ }
+
+ @Override
+ public DecreasingVectorIter advance(int count) {
+ pos += count;
+ return this;
+ }
+
+ @Override
+ public DecreasingVectorIter retract() {
+ pos--;
+ return this;
+ }
+
+ @Override
+ public DecreasingVectorIter seek(int off) {
+ pos = off;
+ return this;
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/scores/adapter/DistanceResultAdapter.java b/src/de/lmu/ifi/dbs/elki/evaluation/scores/adapter/DistanceResultAdapter.java
new file mode 100644
index 00000000..704c38bc
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/scores/adapter/DistanceResultAdapter.java
@@ -0,0 +1,92 @@
+package de.lmu.ifi.dbs.elki.evaluation.scores.adapter;
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter;
+import de.lmu.ifi.dbs.elki.evaluation.scores.ScoreEvaluation.ScoreIter;
+
+/**
+ * This adapter is used to process a list of (double, DBID) objects. The list
+ * <em>must</em> be sorted appropriately, the score is only used to detect
+ * ties.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.composedOf DoubleDBIDListIter
+ */
+public class DistanceResultAdapter implements ScoreIter, DBIDRefIter {
+ /**
+ * Original Iterator
+ */
+ protected DoubleDBIDListIter iter;
+
+ /**
+ * Distance of previous.
+ */
+ protected double prevDist = Double.NaN;
+
+ /**
+ * Constructor
+ *
+ * @param iter Iterator for distance results
+ */
+ public DistanceResultAdapter(DoubleDBIDListIter iter) {
+ super();
+ this.iter = iter;
+ }
+
+ @Override
+ public boolean valid() {
+ return iter.valid();
+ }
+
+ @Override
+ public DistanceResultAdapter advance() {
+ prevDist = iter.doubleValue();
+ iter.advance();
+ return this;
+ }
+
+ @Override
+ public DBIDRef getRef() {
+ return iter;
+ }
+
+ @Override
+ public boolean tiedToPrevious() {
+ return iter.doubleValue() == prevDist;
+ }
+
+ @Deprecated
+ @Override
+ public int hashCode() {
+ return super.hashCode();
+ }
+
+ @Deprecated
+ @Override
+ public boolean equals(Object obj) {
+ return super.equals(obj);
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/scores/adapter/FilteredDistanceResultAdapter.java b/src/de/lmu/ifi/dbs/elki/evaluation/scores/adapter/FilteredDistanceResultAdapter.java
new file mode 100644
index 00000000..9a8a2ddc
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/scores/adapter/FilteredDistanceResultAdapter.java
@@ -0,0 +1,76 @@
+package de.lmu.ifi.dbs.elki.evaluation.scores.adapter;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter;
+
+/**
+ * This adapter is used to process a list of (double, DBID) objects, but allows
+ * skipping one object in the ranking. The list <em>must</em> be sorted
+ * appropriately, the score is only used to detect ties.
+ *
+ * @author Erich Schubert
+ */
+public class FilteredDistanceResultAdapter extends DistanceResultAdapter {
+ /**
+ * DBID to skip (usually: query object).
+ */
+ DBIDRef skip;
+
+ /**
+ * Constructor
+ *
+ * @param iter Iterator for distance results
+ * @param skip DBID to skip (reference must remain stable!)
+ */
+ public FilteredDistanceResultAdapter(DoubleDBIDListIter iter, DBIDRef skip) {
+ super(iter);
+ this.skip = skip;
+ if(iter.valid() && DBIDUtil.equal(iter, skip)) {
+ iter.advance();
+ }
+ }
+
+ @Override
+ public DistanceResultAdapter advance() {
+ super.advance();
+ if(iter.valid() && DBIDUtil.equal(iter, skip)) {
+ iter.advance();
+ }
+ return this;
+ }
+
+ @Deprecated
+ @Override
+ public int hashCode() {
+ return super.hashCode();
+ }
+
+ @Deprecated
+ @Override
+ public boolean equals(Object obj) {
+ return super.equals(obj);
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/scores/adapter/IncreasingVectorIter.java b/src/de/lmu/ifi/dbs/elki/evaluation/scores/adapter/IncreasingVectorIter.java
new file mode 100644
index 00000000..979dc166
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/scores/adapter/IncreasingVectorIter.java
@@ -0,0 +1,116 @@
+package de.lmu.ifi.dbs.elki.evaluation.scores.adapter;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.evaluation.scores.ScoreEvaluation.ScoreIter;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.arrays.IntegerArrayQuickSort;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.arrays.IntegerComparator;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.iterator.ArrayIter;
+
+/**
+ * Class to iterate over a number vector in decreasing order.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.composedOf NumberVector
+ */
+public class IncreasingVectorIter implements ScoreIter, IntegerComparator, ArrayIter {
+ /**
+ * Order of dimensions.
+ */
+ private int[] sort;
+
+ /**
+ * Data vector.
+ */
+ private NumberVector vec;
+
+ /**
+ * Current position.
+ */
+ int pos = 0;
+
+ /**
+ * Constructor.
+ *
+ * @param vec Vector to iterate over.
+ */
+ public IncreasingVectorIter(NumberVector vec) {
+ this.vec = vec;
+ final int dim = vec.getDimensionality();
+ this.sort = new int[dim];
+ for(int d = 0; d < dim; d++) {
+ sort[d] = d;
+ }
+ IntegerArrayQuickSort.sort(sort, this);
+ }
+
+ @Override
+ public int compare(int x, int y) {
+ return Double.compare(vec.doubleValue(x), vec.doubleValue(y));
+ }
+
+ public int dim() {
+ return sort[pos];
+ }
+
+ @Override
+ public boolean valid() {
+ return pos < vec.getDimensionality();
+ }
+
+ @Override
+ public IncreasingVectorIter advance() {
+ ++pos;
+ return this;
+ }
+
+ @Override
+ public boolean tiedToPrevious() {
+ return pos > 0 && Double.compare(vec.doubleValue(sort[pos]), vec.doubleValue(sort[pos - 1])) == 0;
+ }
+
+ @Override
+ public int getOffset() {
+ return pos;
+ }
+
+ @Override
+ public IncreasingVectorIter advance(int count) {
+ pos += count;
+ return this;
+ }
+
+ @Override
+ public IncreasingVectorIter retract() {
+ pos--;
+ return this;
+ }
+
+ @Override
+ public IncreasingVectorIter seek(int off) {
+ pos = off;
+ return this;
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/scores/adapter/OutlierScoreAdapter.java b/src/de/lmu/ifi/dbs/elki/evaluation/scores/adapter/OutlierScoreAdapter.java
new file mode 100644
index 00000000..a36a57d6
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/scores/adapter/OutlierScoreAdapter.java
@@ -0,0 +1,103 @@
+package de.lmu.ifi.dbs.elki.evaluation.scores.adapter;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
+import de.lmu.ifi.dbs.elki.database.relation.DoubleRelation;
+import de.lmu.ifi.dbs.elki.evaluation.scores.ScoreEvaluation.ScoreIter;
+import de.lmu.ifi.dbs.elki.result.outlier.OutlierResult;
+
+/**
+ * This adapter can be used for an arbitrary collection of Integers, and uses
+ * that id1.compareTo(id2) != 0 for id1 != id2 to satisfy the comparability.
+ *
+ * Note that of course, no id should occur more than once.
+ *
+ * The ROC values would be incorrect then anyway!
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.composedOf OutlierResult
+ */
+public class OutlierScoreAdapter implements ScoreIter, DBIDRefIter {
+ /**
+ * Original iterator.
+ */
+ private DBIDIter iter;
+
+ /**
+ * Outlier score.
+ */
+ private DoubleRelation scores;
+
+ /**
+ * Previous value.
+ */
+ double prev = Double.NaN;
+
+ /**
+ * Constructor.
+ *
+ * @param o Result
+ */
+ public OutlierScoreAdapter(OutlierResult o) {
+ super();
+ this.iter = o.getOrdering().iter(o.getScores().getDBIDs()).iter();
+ this.scores = o.getScores();
+ }
+
+ @Override
+ public boolean valid() {
+ return iter.valid();
+ }
+
+ @Override
+ public OutlierScoreAdapter advance() {
+ prev = scores.doubleValue(iter);
+ iter.advance();
+ return this;
+ }
+
+ @Override
+ public boolean tiedToPrevious() {
+ return scores.doubleValue(iter) == prev;
+ }
+
+ @Override
+ public DBIDRef getRef() {
+ return iter;
+ }
+
+ @Deprecated
+ @Override
+ public int hashCode() {
+ return super.hashCode();
+ }
+
+ @Deprecated
+ @Override
+ public boolean equals(Object obj) {
+ return super.equals(obj);
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/scores/adapter/SimpleAdapter.java b/src/de/lmu/ifi/dbs/elki/evaluation/scores/adapter/SimpleAdapter.java
new file mode 100644
index 00000000..beb301a8
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/scores/adapter/SimpleAdapter.java
@@ -0,0 +1,89 @@
+package de.lmu.ifi.dbs.elki.evaluation.scores.adapter;
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
+import de.lmu.ifi.dbs.elki.evaluation.scores.ScoreEvaluation.ScoreIter;
+
+/**
+ * This adapter can be used for an arbitrary collection of Integers, and uses
+ * that id1.compareTo(id2) != 0 for id1 != id2 to satisfy the comparability.
+ *
+ * Note that of course, no id should occur more than once.
+ *
+ * The ROC values would be incorrect then anyway!
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.composedOf DBIDIter
+ */
+public class SimpleAdapter implements ScoreIter, DBIDRefIter {
+ /**
+ * Original Iterator
+ */
+ private DBIDIter iter;
+
+ /**
+ * Constructor
+ *
+ * @param iter Iterator for object IDs
+ */
+ public SimpleAdapter(DBIDIter iter) {
+ super();
+ this.iter = iter;
+ }
+
+ @Override
+ public boolean valid() {
+ return iter.valid();
+ }
+
+ @Override
+ public SimpleAdapter advance() {
+ iter.advance();
+ return this;
+ }
+
+ @Override
+ public boolean tiedToPrevious() {
+ return false; // No information.
+ }
+
+ @Deprecated
+ @Override
+ public int hashCode() {
+ return super.hashCode();
+ }
+
+ @Deprecated
+ @Override
+ public boolean equals(Object obj) {
+ return super.equals(obj);
+ }
+
+ @Override
+ public DBIDRef getRef() {
+ return iter;
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/scores/adapter/VectorNonZero.java b/src/de/lmu/ifi/dbs/elki/evaluation/scores/adapter/VectorNonZero.java
new file mode 100644
index 00000000..cc97006f
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/scores/adapter/VectorNonZero.java
@@ -0,0 +1,44 @@
+package de.lmu.ifi.dbs.elki.evaluation.scores.adapter;
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+
+/**
+ * Class that uses a NumberVector as reference, and considers all non-zero
+ * values as positive entries.
+ *
+ * @apiviz.composedOf NumberVector
+ *
+ * @author Erich Schubert
+ */
+public class VectorNonZero extends VectorOverThreshold {
+ /**
+ * Constructor.
+ *
+ * @param vec Reference vector.
+ */
+ public VectorNonZero(NumberVector vec) {
+ super(vec, 0.);
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/scores/adapter/VectorOverThreshold.java b/src/de/lmu/ifi/dbs/elki/evaluation/scores/adapter/VectorOverThreshold.java
new file mode 100644
index 00000000..99c24332
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/scores/adapter/VectorOverThreshold.java
@@ -0,0 +1,79 @@
+package de.lmu.ifi.dbs.elki.evaluation.scores.adapter;
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.evaluation.scores.ScoreEvaluation.Predicate;
+
+/**
+ * Class that uses a NumberVector as reference, and considers all non-zero
+ * values as positive entries.
+ *
+ * @apiviz.composedOf NumberVector
+ *
+ * @author Erich Schubert
+ */
+public class VectorOverThreshold implements Predicate<DecreasingVectorIter> {
+ /**
+ * Vector to use as reference
+ */
+ NumberVector vec;
+
+ /**
+ * Threshold
+ */
+ double threshold;
+
+ /**
+ * Number of positive values.
+ */
+ int numpos;
+
+ /**
+ * Constructor.
+ *
+ * @param vec Reference vector.
+ * @param threshold Threshold value.
+ */
+ public VectorOverThreshold(NumberVector vec, double threshold) {
+ super();
+ this.vec = vec;
+ this.threshold = threshold;
+ this.numpos = 0;
+ for(int i = 0, l = vec.getDimensionality(); i < l; i++) {
+ if(vec.doubleValue(i) > threshold) {
+ ++numpos;
+ }
+ }
+ }
+
+ @Override
+ public boolean test(DecreasingVectorIter o) {
+ return Math.abs(vec.doubleValue(o.dim())) > threshold;
+ }
+
+ @Override
+ public int numPositive() {
+ return numpos;
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/scores/adapter/VectorZero.java b/src/de/lmu/ifi/dbs/elki/evaluation/scores/adapter/VectorZero.java
new file mode 100644
index 00000000..e7b728fc
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/scores/adapter/VectorZero.java
@@ -0,0 +1,71 @@
+package de.lmu.ifi.dbs.elki.evaluation.scores.adapter;
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.evaluation.scores.ScoreEvaluation.Predicate;
+
+/**
+ * Class that uses a NumberVector as reference, and considers all zero values as
+ * positive entries.
+ *
+ * @apiviz.composedOf NumberVector
+ *
+ * @author Erich Schubert
+ */
+public class VectorZero implements Predicate<IncreasingVectorIter> {
+ /**
+ * Vector to use as reference
+ */
+ NumberVector vec;
+
+ /**
+ * Number of positive values.
+ */
+ int numpos;
+
+ /**
+ * Constructor.
+ *
+ * @param vec Reference vector.
+ */
+ public VectorZero(NumberVector vec) {
+ this.vec = vec;
+ this.numpos = 0;
+ for(int i = 0, l = vec.getDimensionality(); i < l; i++) {
+ if(Math.abs(vec.doubleValue(i)) < Double.MIN_NORMAL) {
+ ++numpos;
+ }
+ }
+ }
+
+ @Override
+ public boolean test(IncreasingVectorIter o) {
+ return Math.abs(vec.doubleValue(o.dim())) < Double.MIN_NORMAL;
+ }
+
+ @Override
+ public int numPositive() {
+ return numpos;
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/scores/adapter/package-info.java b/src/de/lmu/ifi/dbs/elki/evaluation/scores/adapter/package-info.java
new file mode 100644
index 00000000..746d9a1c
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/scores/adapter/package-info.java
@@ -0,0 +1,26 @@
+/**
+ * Adapter classes for ranking and scoring measures.
+ */
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package de.lmu.ifi.dbs.elki.evaluation.scores.adapter; \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/scores/package-info.java b/src/de/lmu/ifi/dbs/elki/evaluation/scores/package-info.java
new file mode 100644
index 00000000..d409b7f9
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/scores/package-info.java
@@ -0,0 +1,28 @@
+/**
+ * <p>Evaluation of rankings and scorings.</p>
+ *
+ * @apiviz.exclude java.util.*
+ */
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package de.lmu.ifi.dbs.elki.evaluation.scores; \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/similaritymatrix/ComputeSimilarityMatrixImage.java b/src/de/lmu/ifi/dbs/elki/evaluation/similaritymatrix/ComputeSimilarityMatrixImage.java
index 90a11583..e09dba3a 100644
--- a/src/de/lmu/ifi/dbs/elki/evaluation/similaritymatrix/ComputeSimilarityMatrixImage.java
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/similaritymatrix/ComputeSimilarityMatrixImage.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.evaluation.similaritymatrix;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -42,7 +42,6 @@ import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.evaluation.Evaluator;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.LoggingUtil;
@@ -90,7 +89,7 @@ public class ComputeSimilarityMatrixImage<O> implements Evaluator {
/**
* The distance function to use
*/
- private DistanceFunction<? super O, ? extends NumberDistance<?, ?>> distanceFunction;
+ private DistanceFunction<? super O> distanceFunction;
/**
* Scaling function to use
@@ -109,7 +108,7 @@ public class ComputeSimilarityMatrixImage<O> implements Evaluator {
* @param scaling Scaling function to use for contrast
* @param skipzero Skip zero values when scaling.
*/
- public ComputeSimilarityMatrixImage(DistanceFunction<? super O, ? extends NumberDistance<?, ?>> distanceFunction, ScalingFunction scaling, boolean skipzero) {
+ public ComputeSimilarityMatrixImage(DistanceFunction<? super O> distanceFunction, ScalingFunction scaling, boolean skipzero) {
super();
this.distanceFunction = distanceFunction;
this.scaling = scaling;
@@ -131,7 +130,7 @@ public class ComputeSimilarityMatrixImage<O> implements Evaluator {
if(order.size() != relation.size()) {
throw new IllegalStateException("Iterable result doesn't match database size - incomplete ordering?");
}
- DistanceQuery<O, ? extends NumberDistance<?, ?>> dq = distanceFunction.instantiate(relation);
+ DistanceQuery<O> dq = distanceFunction.instantiate(relation);
final int size = order.size();
// When the logging is in the outer loop, it's just 2*size (providing enough
@@ -148,16 +147,14 @@ public class ComputeSimilarityMatrixImage<O> implements Evaluator {
for(; id1.valid(); id1.advance()) {
id2.seek(id1.getOffset());
for(; id2.valid(); id2.advance()) {
- final double dist = dq.distance(id1, id2).doubleValue();
+ final double dist = dq.distance(id1, id2);
if(!Double.isNaN(dist) && !Double.isInfinite(dist) /* && dist > 0.0 */) {
if(!skipzero || dist > 0.0) {
minmax.put(dist);
}
}
}
- if(prog != null) {
- prog.incrementProcessed(LOG);
- }
+ LOG.incrementProcessed(prog);
}
}
@@ -173,7 +170,7 @@ public class ComputeSimilarityMatrixImage<O> implements Evaluator {
for(int x = 0; x < size && id1.valid(); x++, id1.advance()) {
id2.seek(id1.getOffset());
for(int y = x; y < size && id2.valid(); y++, id2.advance()) {
- double ddist = dq.distance(id1, id2).doubleValue();
+ double ddist = dq.distance(id1, id2);
if(ddist > 0.0) {
ddist = scale.getScaled(ddist);
}
@@ -186,14 +183,10 @@ public class ComputeSimilarityMatrixImage<O> implements Evaluator {
img.setRGB(x, y, col);
img.setRGB(y, x, col);
}
- if(prog != null) {
- prog.incrementProcessed(LOG);
- }
+ LOG.incrementProcessed(prog);
}
}
- if(prog != null) {
- prog.ensureCompleted(LOG);
- }
+ LOG.ensureCompleted(prog);
return new SimilarityMatrix(img, relation, order);
}
@@ -338,7 +331,7 @@ public class ComputeSimilarityMatrixImage<O> implements Evaluator {
/**
* The distance function to use
*/
- private DistanceFunction<O, ? extends NumberDistance<?, ?>> distanceFunction;
+ private DistanceFunction<O> distanceFunction;
/**
* Scaling function to use
@@ -353,7 +346,7 @@ public class ComputeSimilarityMatrixImage<O> implements Evaluator {
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- ObjectParameter<DistanceFunction<O, ? extends NumberDistance<?, ?>>> distanceFunctionP = AbstractAlgorithm.makeParameterDistanceFunction(EuclideanDistanceFunction.class, DistanceFunction.class);
+ ObjectParameter<DistanceFunction<O>> distanceFunctionP = AbstractAlgorithm.makeParameterDistanceFunction(EuclideanDistanceFunction.class, DistanceFunction.class);
if(config.grab(distanceFunctionP)) {
distanceFunction = distanceFunctionP.instantiateClass(config);
}
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/similaritymatrix/package-info.java b/src/de/lmu/ifi/dbs/elki/evaluation/similaritymatrix/package-info.java
index 4acc0ec6..bcaf0520 100644
--- a/src/de/lmu/ifi/dbs/elki/evaluation/similaritymatrix/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/similaritymatrix/package-info.java
@@ -5,7 +5,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
-Copyright (C) 2013
+Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team