diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/evaluation/paircounting')
9 files changed, 891 insertions, 0 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/EvaluatePairCountingFMeasure.java b/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/EvaluatePairCountingFMeasure.java new file mode 100644 index 00000000..43de1cb7 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/EvaluatePairCountingFMeasure.java @@ -0,0 +1,179 @@ +package de.lmu.ifi.dbs.elki.evaluation.paircounting; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm; +import de.lmu.ifi.dbs.elki.algorithm.clustering.trivial.ByLabelClustering; +import de.lmu.ifi.dbs.elki.data.Clustering; +import de.lmu.ifi.dbs.elki.database.Database; +import de.lmu.ifi.dbs.elki.evaluation.Evaluator; +import de.lmu.ifi.dbs.elki.evaluation.outlier.JudgeOutlierScores; +import de.lmu.ifi.dbs.elki.evaluation.paircounting.generator.PairSortedGeneratorInterface; +import de.lmu.ifi.dbs.elki.logging.Logging; +import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector; +import de.lmu.ifi.dbs.elki.result.CollectionResult; +import de.lmu.ifi.dbs.elki.result.HierarchicalResult; +import de.lmu.ifi.dbs.elki.result.Result; +import de.lmu.ifi.dbs.elki.result.ResultUtil; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; +import de.lmu.ifi.dbs.elki.utilities.pairs.Triple; + +/** + * Evaluate a clustering result by comparing it to an existing cluster label. + * + * @author Erich Schubert + * + * @apiviz.landmark + * @apiviz.has PairCountingFMeasure + * @apiviz.has EvaluatePairCountingFMeasure.ScoreResult oneway - - «create» + */ +public class EvaluatePairCountingFMeasure implements Evaluator { + /** + * Logger for debug output. + */ + protected static final Logging logger = Logging.getLogger(JudgeOutlierScores.class); + + /** + * Parameter to obtain the reference clustering. Defaults to a flat label + * clustering. + */ + public static final OptionID REFERENCE_ID = OptionID.getOrCreateOptionID("paircounting.reference", "Reference clustering to compare with. Defaults to a by-label clustering."); + + /** + * Parameter flag for special noise handling. + */ + public static final OptionID NOISE_ID = OptionID.getOrCreateOptionID("paircounting.noisespecial", "Use special handling for noise clusters."); + + /** + * Reference algorithm. + */ + private ClusteringAlgorithm<?> referencealg; + + /** + * Apply special handling to noise "clusters". + */ + private boolean noiseSpecialHandling; + + /** + * Constructor. + * + * @param referencealg Reference clustering + * @param noiseSpecialHandling Noise handling flag + */ + public EvaluatePairCountingFMeasure(ClusteringAlgorithm<?> referencealg, boolean noiseSpecialHandling) { + super(); + this.referencealg = referencealg; + this.noiseSpecialHandling = noiseSpecialHandling; + } + + @Override + public void processNewResult(HierarchicalResult baseResult, Result result) { + Database db = ResultUtil.findDatabase(baseResult); + List<Clustering<?>> crs = ResultUtil.getClusteringResults(result); + if(crs == null || crs.size() < 1) { + // logger.warning("No clustering results found - nothing to evaluate!"); + return; + } + // Compute the reference clustering + Result refres = referencealg.run(db); + List<Clustering<?>> refcrs = ResultUtil.getClusteringResults(refres); + if(refcrs.size() == 0) { + logger.warning("Reference algorithm did not return a clustering result!"); + return; + } + if(refcrs.size() > 1) { + logger.warning("Reference algorithm returned more than one result!"); + } + Clustering<?> refc = refcrs.get(0); + for(Clustering<?> c : crs) { + PairSortedGeneratorInterface first = PairCountingFMeasure.getPairGenerator(c, noiseSpecialHandling, false); + PairSortedGeneratorInterface second = PairCountingFMeasure.getPairGenerator(refc, noiseSpecialHandling, false); + Triple<Integer, Integer, Integer> countedPairs = PairCountingFMeasure.countPairs(first, second); + // Use double, since we want double results at the end! + double sum = countedPairs.first + countedPairs.second + countedPairs.third; + double inboth = countedPairs.first / sum; + double infirst = countedPairs.second / sum; + double insecond = countedPairs.third / sum; + double fmeasure = PairCountingFMeasure.fMeasure(countedPairs.first, countedPairs.second, countedPairs.third, 1.0); + ArrayList<Vector> s = new ArrayList<Vector>(4); + s.add(new Vector(new double[] { fmeasure, inboth, infirst, insecond })); + db.getHierarchy().add(c, new ScoreResult(s)); + } + } + + /** + * Result object for outlier score judgements. + * + * @author Erich Schubert + */ + public static class ScoreResult extends CollectionResult<Vector> { + /** + * Constructor. + * + * @param col score result + */ + public ScoreResult(Collection<Vector> col) { + super("Pair Counting F-Measure", "pair-fmeasure", col); + } + } + + /** + * Parameterization class. + * + * @author Erich Schubert + * + * @apiviz.exclude + */ + public static class Parameterizer extends AbstractParameterizer { + protected ClusteringAlgorithm< ?> referencealg = null; + + protected boolean noiseSpecialHandling = false; + + @Override + protected void makeOptions(Parameterization config) { + super.makeOptions(config); + ObjectParameter<ClusteringAlgorithm<?>> referencealgP = new ObjectParameter<ClusteringAlgorithm<?>>(REFERENCE_ID, ClusteringAlgorithm.class, ByLabelClustering.class); + if(config.grab(referencealgP)) { + referencealg = referencealgP.instantiateClass(config); + } + + Flag noiseSpecialHandlingF = new Flag(NOISE_ID); + if(config.grab(noiseSpecialHandlingF)) { + noiseSpecialHandling = noiseSpecialHandlingF.getValue(); + } + } + + @Override + protected EvaluatePairCountingFMeasure makeInstance() { + return new EvaluatePairCountingFMeasure(referencealg, noiseSpecialHandling); + } + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/PairCountingFMeasure.java b/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/PairCountingFMeasure.java new file mode 100644 index 00000000..aae966fe --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/PairCountingFMeasure.java @@ -0,0 +1,240 @@ +package de.lmu.ifi.dbs.elki.evaluation.paircounting; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import java.util.Collection; + +import de.lmu.ifi.dbs.elki.data.Cluster; +import de.lmu.ifi.dbs.elki.data.Clustering; +import de.lmu.ifi.dbs.elki.data.model.Model; +import de.lmu.ifi.dbs.elki.evaluation.paircounting.generator.PairGeneratorMerge; +import de.lmu.ifi.dbs.elki.evaluation.paircounting.generator.PairGeneratorNoise; +import de.lmu.ifi.dbs.elki.evaluation.paircounting.generator.PairGeneratorSingleCluster; +import de.lmu.ifi.dbs.elki.evaluation.paircounting.generator.PairSortedGeneratorInterface; +import de.lmu.ifi.dbs.elki.utilities.pairs.Triple; + +/** + * Compare two clustering results using a pair-counting F-Measure. + * + * A pair are any two objects that belong to the same cluster. + * + * Two clusterings are compared by comparing their pairs; if two clusterings + * completely agree, they also agree on every pair; even when the clusters and + * points are ordered differently. + * + * An empty clustering will of course have no pairs, the trivial all-in-one + * clustering of course has n^2 pairs. Therefore neither recall nor precision + * itself are useful, however their combination -- the F-Measure -- is useful. + * + * @author Erich Schubert + * + * @apiviz.uses de.lmu.ifi.dbs.elki.evaluation.paircounting.generator.PairSortedGeneratorInterface + * @apiviz.uses de.lmu.ifi.dbs.elki.evaluation.paircounting.generator.PairGeneratorNoise + * @apiviz.uses de.lmu.ifi.dbs.elki.evaluation.paircounting.generator.PairGeneratorSingleCluster + * @apiviz.uses de.lmu.ifi.dbs.elki.evaluation.paircounting.generator.PairGeneratorMerge + */ +public class PairCountingFMeasure { + /** + * Get a pair generator for the given Clustering + * + * @param <R> Clustering result class + * @param <M> Model type + * @param clusters Clustering result + * @param noiseSpecial Special handling for "noise clusters" + * @param hierarchicalSpecial Special handling for hierarchical clusters + * @return Sorted pair generator + */ + public static <R extends Clustering<M>, M extends Model> PairSortedGeneratorInterface getPairGenerator(R clusters, boolean noiseSpecial, boolean hierarchicalSpecial) { + // collect all clusters into a flat list. + Collection<Cluster<M>> allclusters = clusters.getAllClusters(); + + // Make generators for each cluster + PairSortedGeneratorInterface[] gens = new PairSortedGeneratorInterface[allclusters.size()]; + int i = 0; + for(Cluster<?> c : allclusters) { + if(noiseSpecial && c.isNoise()) { + gens[i] = new PairGeneratorNoise(c); + } + else { + gens[i] = new PairGeneratorSingleCluster(c, hierarchicalSpecial); + } + i++; + } + return new PairGeneratorMerge(gens); + } + + /** + * Compare two clustering results. + * + * @param <R> Result type + * @param <M> Model type + * @param <S> Result type + * @param <N> Model type + * @param result1 first result + * @param result2 second result + * @param beta Beta value for the F-Measure + * @param noiseSpecial Noise receives special treatment + * @param hierarchicalSpecial Special handling for hierarchical clusters + * @return Pair counting F-Measure result. + */ + public static <R extends Clustering<M>, M extends Model, S extends Clustering<N>, N extends Model> double compareClusterings(R result1, S result2, double beta, boolean noiseSpecial, boolean hierarchicalSpecial) { + PairSortedGeneratorInterface first = getPairGenerator(result1, noiseSpecial, hierarchicalSpecial); + PairSortedGeneratorInterface second = getPairGenerator(result2, noiseSpecial, hierarchicalSpecial); + Triple<Integer, Integer, Integer> countedPairs = countPairs(first, second); + return fMeasure(countedPairs.first, countedPairs.second, countedPairs.third, beta); + } + + /** + * Compare two clustering results. + * + * @param <R> Result type + * @param <M> Model type + * @param <S> Result type + * @param <N> Model type + * @param result1 first result + * @param result2 second result + * @param beta Beta value for the F-Measure + * @return Pair counting F-Measure result. + */ + public static <R extends Clustering<M>, M extends Model, S extends Clustering<N>, N extends Model> double compareClusterings(R result1, S result2, double beta) { + return compareClusterings(result1, result2, beta, false, false); + } + + /** + * Compare two clustering results. + * + * @param <R> Result type + * @param <M> Model type + * @param <S> Result type + * @param <N> Model type + * @param result1 first result + * @param result2 second result + * @param noiseSpecial Noise receives special treatment + * @return Pair counting F-1-Measure result. + */ + public static <R extends Clustering<M>, M extends Model, S extends Clustering<N>, N extends Model> double compareClusterings(R result1, S result2, boolean noiseSpecial, boolean hierarchicalSpecial) { + return compareClusterings(result1, result2, 1.0, noiseSpecial, hierarchicalSpecial); + } + + /** + * Compare two clustering results. + * + * @param <R> Result type + * @param <M> Model type + * @param <S> Result type + * @param <N> Model type + * @param result1 first result + * @param result2 second result + * @return Pair counting F-1-Measure result. + */ + public static <R extends Clustering<M>, M extends Model, S extends Clustering<N>, N extends Model> double compareClusterings(R result1, S result2) { + return compareClusterings(result1, result2, 1.0, false, false); + } + + /** + * Compare two sets of generated pairs. It determines how many objects of the + * first set are in both sets, just in the first set or just in the second + * set.</p> + * + * + * @param <R> Result type + * @param <M> Model type + * @param <S> Result type + * @param <N> Model type + * @param result1 first result + * @param result2 second result + * @return Returns a {@link Triple} that contains the number of objects that + * are in both sets (FIRST), the number of objects that are just in + * the first set (SECOND) and the number of object that are just in + * the second set (THIRD). + * + */ + public static <R extends Clustering<M>, M extends Model, S extends Clustering<N>, N extends Model> Triple<Integer, Integer, Integer> countPairs(R result1, S result2) { + PairSortedGeneratorInterface first = getPairGenerator(result1, false, false); + PairSortedGeneratorInterface second = getPairGenerator(result2, false, false); + return countPairs(first, second); + } + + /** + * Compare two sets of generated pairs. It determines how many objects of the + * first set are in both sets, just in the first set or just in the second + * set.</p> + * + * @param first first set + * @param second second set + * @return Returns a {@link Triple} that contains the number of objects that + * are in both sets (FIRST), the number of objects that are just in + * the first set (SECOND) and the number of object that are just in + * the second set (THIRD). + */ + public static Triple<Integer, Integer, Integer> countPairs(PairSortedGeneratorInterface first, PairSortedGeneratorInterface second) { + int inboth = 0; + int infirst = 0; + int insecond = 0; + + while(first.current() != null && second.current() != null) { + int cmp = first.current().compareTo(second.current()); + if(cmp == 0) { + inboth++; + first.next(); + second.next(); + } + else if(cmp < 0) { + infirst++; + first.next(); + } + else { + insecond++; + second.next(); + } + } + while(first.current() != null) { + infirst++; + first.next(); + } + while(second.current() != null) { + insecond++; + second.next(); + } + return new Triple<Integer, Integer, Integer>(inboth, infirst, insecond); + } + + /** + * Computes the F-measure of the given parameters.</p> + * <p> + * Returns + * <code>((1+beta*beta) * inBoth) / ((1+beta*beta) * inBoth + (beta*beta)*inFirst + inSecond)</code> + * </p> + * + * @param inBoth The number of objects that are in both sets. + * @param inFirst The number of objects that are in the first set. + * @param inSecond The number of objects that are in the second set. + * @param beta The beta values for the f-measure. + * @return The F-measure. + */ + public static double fMeasure(int inBoth, int inFirst, int inSecond, double beta) { + // System.out.println("Both: "+inboth+" First: "+infirst+" Second: "+insecond); + double fmeasure = ((1 + beta * beta) * inBoth) / ((1 + beta * beta) * inBoth + (beta * beta) * inFirst + inSecond); + return fmeasure; + } +} diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/generator/PairGeneratorMerge.java b/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/generator/PairGeneratorMerge.java new file mode 100644 index 00000000..70ec1c33 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/generator/PairGeneratorMerge.java @@ -0,0 +1,71 @@ +package de.lmu.ifi.dbs.elki.evaluation.paircounting.generator; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import de.lmu.ifi.dbs.elki.utilities.pairs.IntIntPair; + +/** + * Merge the output of multiple generators. + * + * @author Erich Schubert + */ +public class PairGeneratorMerge extends PairSortedGenerator { + /** + * Generators to merge + */ + private PairSortedGeneratorInterface[] generators; + + /** + * Set up merging generator. + * param generators will not be copied! + * + * @param generators array of generators. + */ + public PairGeneratorMerge(PairSortedGeneratorInterface[] generators) { + this.generators = generators; + setCurrent(advance()); + } + + /** + * Advance iterator and return next pair. + * + * This will return the smallest of all the "merged" generator results. + */ + @Override + protected IntIntPair advance() { + IntIntPair min = null; + PairSortedGeneratorInterface best = null; + for (PairSortedGeneratorInterface gen : this.generators) { + IntIntPair n = gen.current(); + if (n != null && (min == null || n.compareTo(min) < 0)) { + min = n; + best = gen; + } + } + // advance best generator + if (best != null) { + best.next(); + } + return min; + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/generator/PairGeneratorNoise.java b/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/generator/PairGeneratorNoise.java new file mode 100644 index 00000000..0680f15b --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/generator/PairGeneratorNoise.java @@ -0,0 +1,90 @@ +package de.lmu.ifi.dbs.elki.evaluation.paircounting.generator; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import java.util.Arrays; + +import de.lmu.ifi.dbs.elki.data.Cluster; +import de.lmu.ifi.dbs.elki.database.ids.DBID; +import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.utilities.pairs.IntIntPair; + +/** + * Generator for noise points. + * + * This generator will generate pairs (a,a) for all elements a in the given + * list. + * + * @author Erich Schubert + */ +public class PairGeneratorNoise extends PairSortedGenerator { + /** + * Ids to use + */ + private int[] ids; + + /** + * Current position. + */ + private int pos; + + /** + * Crate new generator for a base cluster object. + * + * @param cluster object + */ + public PairGeneratorNoise(Cluster<?> cluster) { + // build int array for the cluster + // TODO: copy less. + DBIDs dbids = cluster.getIDs(); + ids = new int[dbids.size()]; + int j = 0; + for (DBID id : dbids) { + ids[j] = id.getIntegerID(); + j++; + } + Arrays.sort(ids); + + pos = 0; + if(ids.length > 0) { + setCurrent(new IntIntPair(ids[pos], ids[pos])); + } + } + + /** + * Advance iterator and return new pair. + */ + @Override + protected IntIntPair advance() { + if(current() == null) { + return null; + } + pos++; + if(pos >= ids.length) { + return null; + } + else { + return new IntIntPair(ids[pos], ids[pos]); + } + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/generator/PairGeneratorSingleCluster.java b/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/generator/PairGeneratorSingleCluster.java new file mode 100644 index 00000000..cc9b4fb7 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/generator/PairGeneratorSingleCluster.java @@ -0,0 +1,135 @@ +package de.lmu.ifi.dbs.elki.evaluation.paircounting.generator; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import java.util.Arrays; + +import de.lmu.ifi.dbs.elki.data.Cluster; +import de.lmu.ifi.dbs.elki.database.ids.DBID; +import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; +import de.lmu.ifi.dbs.elki.database.ids.DBIDs; +import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs; +import de.lmu.ifi.dbs.elki.utilities.pairs.IntIntPair; + +/** + * Generate sorted ID pairs for a {@link Cluster}. + * + * @author Erich Schubert + */ +public class PairGeneratorSingleCluster extends PairSortedGenerator { + /** + * Ids in parent clusters + */ + private int[] parentids; + + /** + * ids in this cluster + */ + private int[] thisids; + + /** + * Position in first set + */ + private int pos1; + + /** + * Position in second set + */ + private int pos2; + + /** + * Generate pairs for a hierarchical cluster. + * + * @param cluster Cluster + * @param useHierarchical Use hierarchical mode + */ + public PairGeneratorSingleCluster(Cluster<?> cluster, boolean useHierarchical) { + // collect all parent clusters into a flat list. + java.util.Vector<Cluster<?>> allparents = new java.util.Vector<Cluster<?>>(); + if(useHierarchical && cluster.isHierarchical()) { + allparents.addAll(cluster.getParents()); + for(int i = 0; i < allparents.size(); i++) { + for(Cluster<?> newc : allparents.get(i).getParents()) { + if(!allparents.contains(newc)) { + allparents.add(newc); + } + } + } + } + + // build int array for the cluster + DBIDs cids = cluster.getIDs(); + thisids = new int[cids.size()]; + { + int j = 0; + for(DBID id : cids) { + thisids[j] = id.getIntegerID(); + j++; + } + Arrays.sort(thisids); + } + // TODO: ensure there are no duplicate IDs? + + ModifiableDBIDs idsset = DBIDUtil.newHashSet(cids); + for(Cluster<?> parent : allparents) { + idsset.addAll(parent.getIDs().asCollection()); + } + parentids = new int[idsset.size()]; + { + int j = 0; + for(DBID in : idsset) { + parentids[j] = in.getIntegerID(); + j++; + } + Arrays.sort(parentids); + } + + // initialize iterator. + pos1 = 0; + pos2 = 0; + if(thisids.length > 0) { + setCurrent(new IntIntPair(parentids[pos1], thisids[pos2])); + } + } + + /** + * Advance iterator + */ + @Override + protected IntIntPair advance() { + if(current() == null) { + return null; + } + pos2++; + if(pos2 >= thisids.length) { + pos2 = 0; + pos1++; + } + if(pos1 >= parentids.length) { + return null; + } + else { + return new IntIntPair(parentids[pos1], thisids[pos2]); + } + } +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/generator/PairSortedGenerator.java b/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/generator/PairSortedGenerator.java new file mode 100644 index 00000000..39ad7452 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/generator/PairSortedGenerator.java @@ -0,0 +1,75 @@ +package de.lmu.ifi.dbs.elki.evaluation.paircounting.generator; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import de.lmu.ifi.dbs.elki.utilities.pairs.IntIntPair; + +/** + * Implement the common functionality of caching the current result in a base class. + * + * @author Erich Schubert + * + */ +public abstract class PairSortedGenerator implements PairSortedGeneratorInterface { + /** + * Current pair + */ + private IntIntPair cur = null; + + /** + * Set the current pair. + * + * @param cur new current pair. + */ + protected final void setCurrent(IntIntPair cur) { + this.cur = cur; + } + + /** + * Return current pair. + * + * Marked as final to avoid bad implementations. + * If you intend to override this, just implement the interface! + */ + @Override + public final IntIntPair current() { + return cur; + } + + /** + * Return next pair. + * + * Marked as final to avoid bad implementations. + * If you intend to override this, just implement the interface! + */ + @Override + public final IntIntPair next() { + setCurrent(advance()); + return current(); + } + + /** + * Main advance method. + */ + protected abstract IntIntPair advance(); +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/generator/PairSortedGeneratorInterface.java b/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/generator/PairSortedGeneratorInterface.java new file mode 100644 index 00000000..2cdad2be --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/generator/PairSortedGeneratorInterface.java @@ -0,0 +1,49 @@ +package de.lmu.ifi.dbs.elki.evaluation.paircounting.generator; +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +import de.lmu.ifi.dbs.elki.utilities.pairs.IntIntPair; + +/** + * Pair generator interface. + * Implementation must return pairs <em>sorted</em>. + * + * Basically this is an Iterator interface, but it deliberately has a current() method, + * that is useful e.g. for merging. + * + * @author Erich Schubert + */ +public interface PairSortedGeneratorInterface { + /** + * Return current pair. + * + * @return current pair, null if there are no more pairs + */ + public IntIntPair current(); + /** + * Return next pair, advance generator + * + * @return next pair, null if there are no more pairs + */ + public IntIntPair next(); +}
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/generator/package-info.java b/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/generator/package-info.java new file mode 100644 index 00000000..e78ebde2 --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/generator/package-info.java @@ -0,0 +1,26 @@ +/** + * <p>Pair generation for pair counting evaluation.</p> + */ +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ +package de.lmu.ifi.dbs.elki.evaluation.paircounting.generator;
\ No newline at end of file diff --git a/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/package-info.java b/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/package-info.java new file mode 100644 index 00000000..0c1a11cc --- /dev/null +++ b/src/de/lmu/ifi/dbs/elki/evaluation/paircounting/package-info.java @@ -0,0 +1,26 @@ +/** + * <p>Evaluation of clustering results via pair counting.</p> + * */ +/* +This file is part of ELKI: +Environment for Developing KDD-Applications Supported by Index-Structures + +Copyright (C) 2011 +Ludwig-Maximilians-Universität München +Lehr- und Forschungseinheit für Datenbanksysteme +ELKI Development Team + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ +package de.lmu.ifi.dbs.elki.evaluation.paircounting;
\ No newline at end of file |