summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/evaluation/paircounting
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/evaluation/paircounting')
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/paircounting/EvaluatePairCountingFMeasure.java179
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/paircounting/PairCountingFMeasure.java240
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/paircounting/generator/PairGeneratorMerge.java71
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/paircounting/generator/PairGeneratorNoise.java90
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/paircounting/generator/PairGeneratorSingleCluster.java135
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/paircounting/generator/PairSortedGenerator.java75
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/paircounting/generator/PairSortedGeneratorInterface.java49
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/paircounting/generator/package-info.java26
-rw-r--r--src/de/lmu/ifi/dbs/elki/evaluation/paircounting/package-info.java26
9 files changed, 891 insertions, 0 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/EvaluatePairCountingFMeasure.java b/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/EvaluatePairCountingFMeasure.java
new file mode 100644
index 00000000..43de1cb7
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/EvaluatePairCountingFMeasure.java
@@ -0,0 +1,179 @@
+package de.lmu.ifi.dbs.elki.evaluation.paircounting;
+/*
+This file is part of ELKI:
+Environment for Developing KDD-Applications Supported by Index-Structures
+
+Copyright (C) 2011
+Ludwig-Maximilians-Universität München
+Lehr- und Forschungseinheit für Datenbanksysteme
+ELKI Development Team
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.ByLabelClustering;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.evaluation.Evaluator;
+import de.lmu.ifi.dbs.elki.evaluation.outlier.JudgeOutlierScores;
+import de.lmu.ifi.dbs.elki.evaluation.paircounting.generator.PairSortedGeneratorInterface;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+import de.lmu.ifi.dbs.elki.result.CollectionResult;
+import de.lmu.ifi.dbs.elki.result.HierarchicalResult;
+import de.lmu.ifi.dbs.elki.result.Result;
+import de.lmu.ifi.dbs.elki.result.ResultUtil;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+import de.lmu.ifi.dbs.elki.utilities.pairs.Triple;
+
+/**
+ * Evaluate a clustering result by comparing it to an existing cluster label.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.landmark
+ * @apiviz.has PairCountingFMeasure
+ * @apiviz.has EvaluatePairCountingFMeasure.ScoreResult oneway - - «create»
+ */
+public class EvaluatePairCountingFMeasure implements Evaluator {
+ /**
+ * Logger for debug output.
+ */
+ protected static final Logging logger = Logging.getLogger(JudgeOutlierScores.class);
+
+ /**
+ * Parameter to obtain the reference clustering. Defaults to a flat label
+ * clustering.
+ */
+ public static final OptionID REFERENCE_ID = OptionID.getOrCreateOptionID("paircounting.reference", "Reference clustering to compare with. Defaults to a by-label clustering.");
+
+ /**
+ * Parameter flag for special noise handling.
+ */
+ public static final OptionID NOISE_ID = OptionID.getOrCreateOptionID("paircounting.noisespecial", "Use special handling for noise clusters.");
+
+ /**
+ * Reference algorithm.
+ */
+ private ClusteringAlgorithm<?> referencealg;
+
+ /**
+ * Apply special handling to noise "clusters".
+ */
+ private boolean noiseSpecialHandling;
+
+ /**
+ * Constructor.
+ *
+ * @param referencealg Reference clustering
+ * @param noiseSpecialHandling Noise handling flag
+ */
+ public EvaluatePairCountingFMeasure(ClusteringAlgorithm<?> referencealg, boolean noiseSpecialHandling) {
+ super();
+ this.referencealg = referencealg;
+ this.noiseSpecialHandling = noiseSpecialHandling;
+ }
+
+ @Override
+ public void processNewResult(HierarchicalResult baseResult, Result result) {
+ Database db = ResultUtil.findDatabase(baseResult);
+ List<Clustering<?>> crs = ResultUtil.getClusteringResults(result);
+ if(crs == null || crs.size() < 1) {
+ // logger.warning("No clustering results found - nothing to evaluate!");
+ return;
+ }
+ // Compute the reference clustering
+ Result refres = referencealg.run(db);
+ List<Clustering<?>> refcrs = ResultUtil.getClusteringResults(refres);
+ if(refcrs.size() == 0) {
+ logger.warning("Reference algorithm did not return a clustering result!");
+ return;
+ }
+ if(refcrs.size() > 1) {
+ logger.warning("Reference algorithm returned more than one result!");
+ }
+ Clustering<?> refc = refcrs.get(0);
+ for(Clustering<?> c : crs) {
+ PairSortedGeneratorInterface first = PairCountingFMeasure.getPairGenerator(c, noiseSpecialHandling, false);
+ PairSortedGeneratorInterface second = PairCountingFMeasure.getPairGenerator(refc, noiseSpecialHandling, false);
+ Triple<Integer, Integer, Integer> countedPairs = PairCountingFMeasure.countPairs(first, second);
+ // Use double, since we want double results at the end!
+ double sum = countedPairs.first + countedPairs.second + countedPairs.third;
+ double inboth = countedPairs.first / sum;
+ double infirst = countedPairs.second / sum;
+ double insecond = countedPairs.third / sum;
+ double fmeasure = PairCountingFMeasure.fMeasure(countedPairs.first, countedPairs.second, countedPairs.third, 1.0);
+ ArrayList<Vector> s = new ArrayList<Vector>(4);
+ s.add(new Vector(new double[] { fmeasure, inboth, infirst, insecond }));
+ db.getHierarchy().add(c, new ScoreResult(s));
+ }
+ }
+
+ /**
+ * Result object for outlier score judgements.
+ *
+ * @author Erich Schubert
+ */
+ public static class ScoreResult extends CollectionResult<Vector> {
+ /**
+ * Constructor.
+ *
+ * @param col score result
+ */
+ public ScoreResult(Collection<Vector> col) {
+ super("Pair Counting F-Measure", "pair-fmeasure", col);
+ }
+ }
+
+ /**
+ * Parameterization class.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.exclude
+ */
+ public static class Parameterizer extends AbstractParameterizer {
+ protected ClusteringAlgorithm< ?> referencealg = null;
+
+ protected boolean noiseSpecialHandling = false;
+
+ @Override
+ protected void makeOptions(Parameterization config) {
+ super.makeOptions(config);
+ ObjectParameter<ClusteringAlgorithm<?>> referencealgP = new ObjectParameter<ClusteringAlgorithm<?>>(REFERENCE_ID, ClusteringAlgorithm.class, ByLabelClustering.class);
+ if(config.grab(referencealgP)) {
+ referencealg = referencealgP.instantiateClass(config);
+ }
+
+ Flag noiseSpecialHandlingF = new Flag(NOISE_ID);
+ if(config.grab(noiseSpecialHandlingF)) {
+ noiseSpecialHandling = noiseSpecialHandlingF.getValue();
+ }
+ }
+
+ @Override
+ protected EvaluatePairCountingFMeasure makeInstance() {
+ return new EvaluatePairCountingFMeasure(referencealg, noiseSpecialHandling);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/PairCountingFMeasure.java b/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/PairCountingFMeasure.java
new file mode 100644
index 00000000..aae966fe
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/PairCountingFMeasure.java
@@ -0,0 +1,240 @@
+package de.lmu.ifi.dbs.elki.evaluation.paircounting;
+/*
+This file is part of ELKI:
+Environment for Developing KDD-Applications Supported by Index-Structures
+
+Copyright (C) 2011
+Ludwig-Maximilians-Universität München
+Lehr- und Forschungseinheit für Datenbanksysteme
+ELKI Development Team
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+import java.util.Collection;
+
+import de.lmu.ifi.dbs.elki.data.Cluster;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.model.Model;
+import de.lmu.ifi.dbs.elki.evaluation.paircounting.generator.PairGeneratorMerge;
+import de.lmu.ifi.dbs.elki.evaluation.paircounting.generator.PairGeneratorNoise;
+import de.lmu.ifi.dbs.elki.evaluation.paircounting.generator.PairGeneratorSingleCluster;
+import de.lmu.ifi.dbs.elki.evaluation.paircounting.generator.PairSortedGeneratorInterface;
+import de.lmu.ifi.dbs.elki.utilities.pairs.Triple;
+
+/**
+ * Compare two clustering results using a pair-counting F-Measure.
+ *
+ * A pair are any two objects that belong to the same cluster.
+ *
+ * Two clusterings are compared by comparing their pairs; if two clusterings
+ * completely agree, they also agree on every pair; even when the clusters and
+ * points are ordered differently.
+ *
+ * An empty clustering will of course have no pairs, the trivial all-in-one
+ * clustering of course has n^2 pairs. Therefore neither recall nor precision
+ * itself are useful, however their combination -- the F-Measure -- is useful.
+ *
+ * @author Erich Schubert
+ *
+ * @apiviz.uses de.lmu.ifi.dbs.elki.evaluation.paircounting.generator.PairSortedGeneratorInterface
+ * @apiviz.uses de.lmu.ifi.dbs.elki.evaluation.paircounting.generator.PairGeneratorNoise
+ * @apiviz.uses de.lmu.ifi.dbs.elki.evaluation.paircounting.generator.PairGeneratorSingleCluster
+ * @apiviz.uses de.lmu.ifi.dbs.elki.evaluation.paircounting.generator.PairGeneratorMerge
+ */
+public class PairCountingFMeasure {
+ /**
+ * Get a pair generator for the given Clustering
+ *
+ * @param <R> Clustering result class
+ * @param <M> Model type
+ * @param clusters Clustering result
+ * @param noiseSpecial Special handling for "noise clusters"
+ * @param hierarchicalSpecial Special handling for hierarchical clusters
+ * @return Sorted pair generator
+ */
+ public static <R extends Clustering<M>, M extends Model> PairSortedGeneratorInterface getPairGenerator(R clusters, boolean noiseSpecial, boolean hierarchicalSpecial) {
+ // collect all clusters into a flat list.
+ Collection<Cluster<M>> allclusters = clusters.getAllClusters();
+
+ // Make generators for each cluster
+ PairSortedGeneratorInterface[] gens = new PairSortedGeneratorInterface[allclusters.size()];
+ int i = 0;
+ for(Cluster<?> c : allclusters) {
+ if(noiseSpecial && c.isNoise()) {
+ gens[i] = new PairGeneratorNoise(c);
+ }
+ else {
+ gens[i] = new PairGeneratorSingleCluster(c, hierarchicalSpecial);
+ }
+ i++;
+ }
+ return new PairGeneratorMerge(gens);
+ }
+
+ /**
+ * Compare two clustering results.
+ *
+ * @param <R> Result type
+ * @param <M> Model type
+ * @param <S> Result type
+ * @param <N> Model type
+ * @param result1 first result
+ * @param result2 second result
+ * @param beta Beta value for the F-Measure
+ * @param noiseSpecial Noise receives special treatment
+ * @param hierarchicalSpecial Special handling for hierarchical clusters
+ * @return Pair counting F-Measure result.
+ */
+ public static <R extends Clustering<M>, M extends Model, S extends Clustering<N>, N extends Model> double compareClusterings(R result1, S result2, double beta, boolean noiseSpecial, boolean hierarchicalSpecial) {
+ PairSortedGeneratorInterface first = getPairGenerator(result1, noiseSpecial, hierarchicalSpecial);
+ PairSortedGeneratorInterface second = getPairGenerator(result2, noiseSpecial, hierarchicalSpecial);
+ Triple<Integer, Integer, Integer> countedPairs = countPairs(first, second);
+ return fMeasure(countedPairs.first, countedPairs.second, countedPairs.third, beta);
+ }
+
+ /**
+ * Compare two clustering results.
+ *
+ * @param <R> Result type
+ * @param <M> Model type
+ * @param <S> Result type
+ * @param <N> Model type
+ * @param result1 first result
+ * @param result2 second result
+ * @param beta Beta value for the F-Measure
+ * @return Pair counting F-Measure result.
+ */
+ public static <R extends Clustering<M>, M extends Model, S extends Clustering<N>, N extends Model> double compareClusterings(R result1, S result2, double beta) {
+ return compareClusterings(result1, result2, beta, false, false);
+ }
+
+ /**
+ * Compare two clustering results.
+ *
+ * @param <R> Result type
+ * @param <M> Model type
+ * @param <S> Result type
+ * @param <N> Model type
+ * @param result1 first result
+ * @param result2 second result
+ * @param noiseSpecial Noise receives special treatment
+ * @return Pair counting F-1-Measure result.
+ */
+ public static <R extends Clustering<M>, M extends Model, S extends Clustering<N>, N extends Model> double compareClusterings(R result1, S result2, boolean noiseSpecial, boolean hierarchicalSpecial) {
+ return compareClusterings(result1, result2, 1.0, noiseSpecial, hierarchicalSpecial);
+ }
+
+ /**
+ * Compare two clustering results.
+ *
+ * @param <R> Result type
+ * @param <M> Model type
+ * @param <S> Result type
+ * @param <N> Model type
+ * @param result1 first result
+ * @param result2 second result
+ * @return Pair counting F-1-Measure result.
+ */
+ public static <R extends Clustering<M>, M extends Model, S extends Clustering<N>, N extends Model> double compareClusterings(R result1, S result2) {
+ return compareClusterings(result1, result2, 1.0, false, false);
+ }
+
+ /**
+ * Compare two sets of generated pairs. It determines how many objects of the
+ * first set are in both sets, just in the first set or just in the second
+ * set.</p>
+ *
+ *
+ * @param <R> Result type
+ * @param <M> Model type
+ * @param <S> Result type
+ * @param <N> Model type
+ * @param result1 first result
+ * @param result2 second result
+ * @return Returns a {@link Triple} that contains the number of objects that
+ * are in both sets (FIRST), the number of objects that are just in
+ * the first set (SECOND) and the number of object that are just in
+ * the second set (THIRD).
+ *
+ */
+ public static <R extends Clustering<M>, M extends Model, S extends Clustering<N>, N extends Model> Triple<Integer, Integer, Integer> countPairs(R result1, S result2) {
+ PairSortedGeneratorInterface first = getPairGenerator(result1, false, false);
+ PairSortedGeneratorInterface second = getPairGenerator(result2, false, false);
+ return countPairs(first, second);
+ }
+
+ /**
+ * Compare two sets of generated pairs. It determines how many objects of the
+ * first set are in both sets, just in the first set or just in the second
+ * set.</p>
+ *
+ * @param first first set
+ * @param second second set
+ * @return Returns a {@link Triple} that contains the number of objects that
+ * are in both sets (FIRST), the number of objects that are just in
+ * the first set (SECOND) and the number of object that are just in
+ * the second set (THIRD).
+ */
+ public static Triple<Integer, Integer, Integer> countPairs(PairSortedGeneratorInterface first, PairSortedGeneratorInterface second) {
+ int inboth = 0;
+ int infirst = 0;
+ int insecond = 0;
+
+ while(first.current() != null && second.current() != null) {
+ int cmp = first.current().compareTo(second.current());
+ if(cmp == 0) {
+ inboth++;
+ first.next();
+ second.next();
+ }
+ else if(cmp < 0) {
+ infirst++;
+ first.next();
+ }
+ else {
+ insecond++;
+ second.next();
+ }
+ }
+ while(first.current() != null) {
+ infirst++;
+ first.next();
+ }
+ while(second.current() != null) {
+ insecond++;
+ second.next();
+ }
+ return new Triple<Integer, Integer, Integer>(inboth, infirst, insecond);
+ }
+
+ /**
+ * Computes the F-measure of the given parameters.</p>
+ * <p>
+ * Returns
+ * <code>((1+beta*beta) * inBoth) / ((1+beta*beta) * inBoth + (beta*beta)*inFirst + inSecond)</code>
+ * </p>
+ *
+ * @param inBoth The number of objects that are in both sets.
+ * @param inFirst The number of objects that are in the first set.
+ * @param inSecond The number of objects that are in the second set.
+ * @param beta The beta values for the f-measure.
+ * @return The F-measure.
+ */
+ public static double fMeasure(int inBoth, int inFirst, int inSecond, double beta) {
+ // System.out.println("Both: "+inboth+" First: "+infirst+" Second: "+insecond);
+ double fmeasure = ((1 + beta * beta) * inBoth) / ((1 + beta * beta) * inBoth + (beta * beta) * inFirst + inSecond);
+ return fmeasure;
+ }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/generator/PairGeneratorMerge.java b/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/generator/PairGeneratorMerge.java
new file mode 100644
index 00000000..70ec1c33
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/generator/PairGeneratorMerge.java
@@ -0,0 +1,71 @@
+package de.lmu.ifi.dbs.elki.evaluation.paircounting.generator;
+/*
+This file is part of ELKI:
+Environment for Developing KDD-Applications Supported by Index-Structures
+
+Copyright (C) 2011
+Ludwig-Maximilians-Universität München
+Lehr- und Forschungseinheit für Datenbanksysteme
+ELKI Development Team
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+import de.lmu.ifi.dbs.elki.utilities.pairs.IntIntPair;
+
+/**
+ * Merge the output of multiple generators.
+ *
+ * @author Erich Schubert
+ */
+public class PairGeneratorMerge extends PairSortedGenerator {
+ /**
+ * Generators to merge
+ */
+ private PairSortedGeneratorInterface[] generators;
+
+ /**
+ * Set up merging generator.
+ * param generators will not be copied!
+ *
+ * @param generators array of generators.
+ */
+ public PairGeneratorMerge(PairSortedGeneratorInterface[] generators) {
+ this.generators = generators;
+ setCurrent(advance());
+ }
+
+ /**
+ * Advance iterator and return next pair.
+ *
+ * This will return the smallest of all the "merged" generator results.
+ */
+ @Override
+ protected IntIntPair advance() {
+ IntIntPair min = null;
+ PairSortedGeneratorInterface best = null;
+ for (PairSortedGeneratorInterface gen : this.generators) {
+ IntIntPair n = gen.current();
+ if (n != null && (min == null || n.compareTo(min) < 0)) {
+ min = n;
+ best = gen;
+ }
+ }
+ // advance best generator
+ if (best != null) {
+ best.next();
+ }
+ return min;
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/generator/PairGeneratorNoise.java b/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/generator/PairGeneratorNoise.java
new file mode 100644
index 00000000..0680f15b
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/generator/PairGeneratorNoise.java
@@ -0,0 +1,90 @@
+package de.lmu.ifi.dbs.elki.evaluation.paircounting.generator;
+/*
+This file is part of ELKI:
+Environment for Developing KDD-Applications Supported by Index-Structures
+
+Copyright (C) 2011
+Ludwig-Maximilians-Universität München
+Lehr- und Forschungseinheit für Datenbanksysteme
+ELKI Development Team
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+import java.util.Arrays;
+
+import de.lmu.ifi.dbs.elki.data.Cluster;
+import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.utilities.pairs.IntIntPair;
+
+/**
+ * Generator for noise points.
+ *
+ * This generator will generate pairs (a,a) for all elements a in the given
+ * list.
+ *
+ * @author Erich Schubert
+ */
+public class PairGeneratorNoise extends PairSortedGenerator {
+ /**
+ * Ids to use
+ */
+ private int[] ids;
+
+ /**
+ * Current position.
+ */
+ private int pos;
+
+ /**
+ * Crate new generator for a base cluster object.
+ *
+ * @param cluster object
+ */
+ public PairGeneratorNoise(Cluster<?> cluster) {
+ // build int array for the cluster
+ // TODO: copy less.
+ DBIDs dbids = cluster.getIDs();
+ ids = new int[dbids.size()];
+ int j = 0;
+ for (DBID id : dbids) {
+ ids[j] = id.getIntegerID();
+ j++;
+ }
+ Arrays.sort(ids);
+
+ pos = 0;
+ if(ids.length > 0) {
+ setCurrent(new IntIntPair(ids[pos], ids[pos]));
+ }
+ }
+
+ /**
+ * Advance iterator and return new pair.
+ */
+ @Override
+ protected IntIntPair advance() {
+ if(current() == null) {
+ return null;
+ }
+ pos++;
+ if(pos >= ids.length) {
+ return null;
+ }
+ else {
+ return new IntIntPair(ids[pos], ids[pos]);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/generator/PairGeneratorSingleCluster.java b/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/generator/PairGeneratorSingleCluster.java
new file mode 100644
index 00000000..cc9b4fb7
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/generator/PairGeneratorSingleCluster.java
@@ -0,0 +1,135 @@
+package de.lmu.ifi.dbs.elki.evaluation.paircounting.generator;
+/*
+This file is part of ELKI:
+Environment for Developing KDD-Applications Supported by Index-Structures
+
+Copyright (C) 2011
+Ludwig-Maximilians-Universität München
+Lehr- und Forschungseinheit für Datenbanksysteme
+ELKI Development Team
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+import java.util.Arrays;
+
+import de.lmu.ifi.dbs.elki.data.Cluster;
+import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.utilities.pairs.IntIntPair;
+
+/**
+ * Generate sorted ID pairs for a {@link Cluster}.
+ *
+ * @author Erich Schubert
+ */
+public class PairGeneratorSingleCluster extends PairSortedGenerator {
+ /**
+ * Ids in parent clusters
+ */
+ private int[] parentids;
+
+ /**
+ * ids in this cluster
+ */
+ private int[] thisids;
+
+ /**
+ * Position in first set
+ */
+ private int pos1;
+
+ /**
+ * Position in second set
+ */
+ private int pos2;
+
+ /**
+ * Generate pairs for a hierarchical cluster.
+ *
+ * @param cluster Cluster
+ * @param useHierarchical Use hierarchical mode
+ */
+ public PairGeneratorSingleCluster(Cluster<?> cluster, boolean useHierarchical) {
+ // collect all parent clusters into a flat list.
+ java.util.Vector<Cluster<?>> allparents = new java.util.Vector<Cluster<?>>();
+ if(useHierarchical && cluster.isHierarchical()) {
+ allparents.addAll(cluster.getParents());
+ for(int i = 0; i < allparents.size(); i++) {
+ for(Cluster<?> newc : allparents.get(i).getParents()) {
+ if(!allparents.contains(newc)) {
+ allparents.add(newc);
+ }
+ }
+ }
+ }
+
+ // build int array for the cluster
+ DBIDs cids = cluster.getIDs();
+ thisids = new int[cids.size()];
+ {
+ int j = 0;
+ for(DBID id : cids) {
+ thisids[j] = id.getIntegerID();
+ j++;
+ }
+ Arrays.sort(thisids);
+ }
+ // TODO: ensure there are no duplicate IDs?
+
+ ModifiableDBIDs idsset = DBIDUtil.newHashSet(cids);
+ for(Cluster<?> parent : allparents) {
+ idsset.addAll(parent.getIDs().asCollection());
+ }
+ parentids = new int[idsset.size()];
+ {
+ int j = 0;
+ for(DBID in : idsset) {
+ parentids[j] = in.getIntegerID();
+ j++;
+ }
+ Arrays.sort(parentids);
+ }
+
+ // initialize iterator.
+ pos1 = 0;
+ pos2 = 0;
+ if(thisids.length > 0) {
+ setCurrent(new IntIntPair(parentids[pos1], thisids[pos2]));
+ }
+ }
+
+ /**
+ * Advance iterator
+ */
+ @Override
+ protected IntIntPair advance() {
+ if(current() == null) {
+ return null;
+ }
+ pos2++;
+ if(pos2 >= thisids.length) {
+ pos2 = 0;
+ pos1++;
+ }
+ if(pos1 >= parentids.length) {
+ return null;
+ }
+ else {
+ return new IntIntPair(parentids[pos1], thisids[pos2]);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/generator/PairSortedGenerator.java b/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/generator/PairSortedGenerator.java
new file mode 100644
index 00000000..39ad7452
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/generator/PairSortedGenerator.java
@@ -0,0 +1,75 @@
+package de.lmu.ifi.dbs.elki.evaluation.paircounting.generator;
+/*
+This file is part of ELKI:
+Environment for Developing KDD-Applications Supported by Index-Structures
+
+Copyright (C) 2011
+Ludwig-Maximilians-Universität München
+Lehr- und Forschungseinheit für Datenbanksysteme
+ELKI Development Team
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+import de.lmu.ifi.dbs.elki.utilities.pairs.IntIntPair;
+
+/**
+ * Implement the common functionality of caching the current result in a base class.
+ *
+ * @author Erich Schubert
+ *
+ */
+public abstract class PairSortedGenerator implements PairSortedGeneratorInterface {
+ /**
+ * Current pair
+ */
+ private IntIntPair cur = null;
+
+ /**
+ * Set the current pair.
+ *
+ * @param cur new current pair.
+ */
+ protected final void setCurrent(IntIntPair cur) {
+ this.cur = cur;
+ }
+
+ /**
+ * Return current pair.
+ *
+ * Marked as final to avoid bad implementations.
+ * If you intend to override this, just implement the interface!
+ */
+ @Override
+ public final IntIntPair current() {
+ return cur;
+ }
+
+ /**
+ * Return next pair.
+ *
+ * Marked as final to avoid bad implementations.
+ * If you intend to override this, just implement the interface!
+ */
+ @Override
+ public final IntIntPair next() {
+ setCurrent(advance());
+ return current();
+ }
+
+ /**
+ * Main advance method.
+ */
+ protected abstract IntIntPair advance();
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/generator/PairSortedGeneratorInterface.java b/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/generator/PairSortedGeneratorInterface.java
new file mode 100644
index 00000000..2cdad2be
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/generator/PairSortedGeneratorInterface.java
@@ -0,0 +1,49 @@
+package de.lmu.ifi.dbs.elki.evaluation.paircounting.generator;
+/*
+This file is part of ELKI:
+Environment for Developing KDD-Applications Supported by Index-Structures
+
+Copyright (C) 2011
+Ludwig-Maximilians-Universität München
+Lehr- und Forschungseinheit für Datenbanksysteme
+ELKI Development Team
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+import de.lmu.ifi.dbs.elki.utilities.pairs.IntIntPair;
+
+/**
+ * Pair generator interface.
+ * Implementation must return pairs <em>sorted</em>.
+ *
+ * Basically this is an Iterator interface, but it deliberately has a current() method,
+ * that is useful e.g. for merging.
+ *
+ * @author Erich Schubert
+ */
+public interface PairSortedGeneratorInterface {
+ /**
+ * Return current pair.
+ *
+ * @return current pair, null if there are no more pairs
+ */
+ public IntIntPair current();
+ /**
+ * Return next pair, advance generator
+ *
+ * @return next pair, null if there are no more pairs
+ */
+ public IntIntPair next();
+} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/generator/package-info.java b/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/generator/package-info.java
new file mode 100644
index 00000000..e78ebde2
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/generator/package-info.java
@@ -0,0 +1,26 @@
+/**
+ * <p>Pair generation for pair counting evaluation.</p>
+ */
+/*
+This file is part of ELKI:
+Environment for Developing KDD-Applications Supported by Index-Structures
+
+Copyright (C) 2011
+Ludwig-Maximilians-Universität München
+Lehr- und Forschungseinheit für Datenbanksysteme
+ELKI Development Team
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+package de.lmu.ifi.dbs.elki.evaluation.paircounting.generator; \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/package-info.java b/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/package-info.java
new file mode 100644
index 00000000..0c1a11cc
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/package-info.java
@@ -0,0 +1,26 @@
+/**
+ * <p>Evaluation of clustering results via pair counting.</p>
+ * */
+/*
+This file is part of ELKI:
+Environment for Developing KDD-Applications Supported by Index-Structures
+
+Copyright (C) 2011
+Ludwig-Maximilians-Universität München
+Lehr- und Forschungseinheit für Datenbanksysteme
+ELKI Development Team
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+package de.lmu.ifi.dbs.elki.evaluation.paircounting; \ No newline at end of file