diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java')
-rw-r--r-- | src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java | 154 |
1 files changed, 77 insertions, 77 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java index e6245f6e..d6cf40fd 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2013 + Copyright (C) 2014 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -24,7 +24,6 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace; */ import java.util.ArrayList; -import java.util.BitSet; import java.util.HashMap; import java.util.List; import java.util.TreeMap; @@ -45,10 +44,10 @@ import de.lmu.ifi.dbs.elki.database.relation.Relation; import de.lmu.ifi.dbs.elki.database.relation.RelationUtil; import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.DimensionSelectingSubspaceDistanceFunction; import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction; -import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance; import de.lmu.ifi.dbs.elki.logging.Logging; import de.lmu.ifi.dbs.elki.logging.progress.StepProgress; import de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid; +import de.lmu.ifi.dbs.elki.utilities.BitsUtil; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; @@ -56,7 +55,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer; import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; -import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DistanceParameter; +import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; @@ -69,8 +68,8 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * </p> * <p> * Reference: <br> - * K. Kailing, H.-P. Kriegel, P. Kroeger: Density connected Subspace Clustering - * for High Dimensional Data. <br> + * K. Kailing, H.-P. Kriegel, P. Kröger:<br /> + * Density connected Subspace Clustering for High Dimensional Data<br /> * In Proc. SIAM Int. Conf. on Data Mining (SDM'04), Lake Buena Vista, FL, 2004. * </p> * @@ -83,9 +82,14 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter; * @param <V> the type of FeatureVector handled by this Algorithm */ @Title("SUBCLU: Density connected Subspace Clustering") -@Description("Algorithm to detect arbitrarily shaped and positioned clusters in subspaces. SUBCLU delivers for each subspace the same clusters DBSCAN would have found, when applied to this subspace seperately.") -@Reference(authors = "K. Kailing, H.-P. Kriegel, P. Kröger", title = "Density connected Subspace Clustering for High Dimensional Data. ", booktitle = "Proc. SIAM Int. Conf. on Data Mining (SDM'04), Lake Buena Vista, FL, 2004") -public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Clustering<SubspaceModel<V>>> implements SubspaceClusteringAlgorithm<SubspaceModel<V>> { +@Description("Algorithm to detect arbitrarily shaped and positioned clusters in subspaces. "// + + "SUBCLU delivers for each subspace the same clusters DBSCAN would have found, "// + + "when applied to this subspace seperately.") +@Reference(authors = "K. Kailing, H.-P. Kriegel, P. Kröger", // +title = "Density connected Subspace Clustering for High Dimensional Data", // +booktitle = "Proc. SIAM Int. Conf. on Data Mining (SDM'04), Lake Buena Vista, FL, 2004", // +url = "http://www.siam.org/meetings/sdm04/proceedings/sdm04_023.pdf") +public class SUBCLU<V extends NumberVector> extends AbstractAlgorithm<Clustering<SubspaceModel>> implements SubspaceClusteringAlgorithm<SubspaceModel> { /** * The logger for this class. */ @@ -125,12 +129,12 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster * Holds the instance of the distance function specified by * {@link #DISTANCE_FUNCTION_ID}. */ - private DimensionSelectingSubspaceDistanceFunction<V, DoubleDistance> distanceFunction; + private DimensionSelectingSubspaceDistanceFunction<V> distanceFunction; /** * Holds the value of {@link #EPSILON_ID}. */ - private DoubleDistance epsilon; + private double epsilon; /** * Holds the value of {@link #MINPTS_ID}. @@ -140,7 +144,7 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster /** * Holds the result; */ - private Clustering<SubspaceModel<V>> result; + private Clustering<SubspaceModel> result; /** * Constructor. @@ -149,7 +153,7 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster * @param epsilon Epsilon value * @param minpts Minpts value */ - public SUBCLU(DimensionSelectingSubspaceDistanceFunction<V, DoubleDistance> distanceFunction, DoubleDistance epsilon, int minpts) { + public SUBCLU(DimensionSelectingSubspaceDistanceFunction<V> distanceFunction, double epsilon, int minpts) { super(); this.distanceFunction = distanceFunction; this.epsilon = epsilon; @@ -162,15 +166,13 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster * @param relation Relation to process * @return Clustering result */ - public Clustering<SubspaceModel<V>> run(Relation<V> relation) { + public Clustering<SubspaceModel> run(Relation<V> relation) { final int dimensionality = RelationUtil.dimensionality(relation); StepProgress stepprog = LOG.isVerbose() ? new StepProgress(dimensionality) : null; // Generate all 1-dimensional clusters - if (stepprog != null) { - stepprog.beginStep(1, "Generate all 1-dimensional clusters.", LOG); - } + LOG.beginStep(stepprog, 1, "Generate all 1-dimensional clusters."); // mapping of dimensionality to set of subspaces HashMap<Integer, List<Subspace>> subspaceMap = new HashMap<>(); @@ -182,35 +184,35 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster // mapping of subspaces to list of clusters TreeMap<Subspace, List<Cluster<Model>>> clusterMap = new TreeMap<>(new Subspace.DimensionComparator()); - for (int d = 0; d < dimensionality; d++) { + for(int d = 0; d < dimensionality; d++) { Subspace currentSubspace = new Subspace(d); List<Cluster<Model>> clusters = runDBSCAN(relation, null, currentSubspace); - if (LOG.isDebuggingFiner()) { + if(LOG.isDebuggingFiner()) { StringBuilder msg = new StringBuilder(); msg.append('\n').append(clusters.size()).append(" clusters in subspace ").append(currentSubspace.dimensonsToString()).append(": \n"); - for (Cluster<Model> cluster : clusters) { + for(Cluster<Model> cluster : clusters) { msg.append(" " + cluster.getIDs() + "\n"); } LOG.debugFiner(msg.toString()); } - if (!clusters.isEmpty()) { + if(!clusters.isEmpty()) { s_1.add(currentSubspace); clusterMap.put(currentSubspace, clusters); } } // Generate (d+1)-dimensional clusters from d-dimensional clusters - for (int d = 0; d < dimensionality - 1; d++) { - if (stepprog != null) { + for(int d = 0; d < dimensionality - 1; d++) { + if(stepprog != null) { stepprog.beginStep(d + 2, "Generate " + (d + 2) + "-dimensional clusters from " + (d + 1) + "-dimensional clusters.", LOG); } List<Subspace> subspaces = subspaceMap.get(d); - if (subspaces == null || subspaces.isEmpty()) { - if (stepprog != null) { - for (int dim = d + 1; dim < dimensionality - 1; dim++) { + if(subspaces == null || subspaces.isEmpty()) { + if(stepprog != null) { + for(int dim = d + 1; dim < dimensionality - 1; dim++) { stepprog.beginStep(dim + 2, "Generation of" + (dim + 2) + "-dimensional clusters not applicable, because no more " + (d + 2) + "-dimensional subspaces found.", LOG); } } @@ -220,37 +222,37 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster List<Subspace> candidates = generateSubspaceCandidates(subspaces); List<Subspace> s_d = new ArrayList<>(); - for (Subspace candidate : candidates) { + for(Subspace candidate : candidates) { Subspace bestSubspace = bestSubspace(subspaces, candidate, clusterMap); - if (LOG.isDebuggingFine()) { + if(LOG.isDebuggingFine()) { LOG.debugFine("best subspace of " + candidate.dimensonsToString() + ": " + bestSubspace.dimensonsToString()); } List<Cluster<Model>> bestSubspaceClusters = clusterMap.get(bestSubspace); List<Cluster<Model>> clusters = new ArrayList<>(); - for (Cluster<Model> cluster : bestSubspaceClusters) { + for(Cluster<Model> cluster : bestSubspaceClusters) { List<Cluster<Model>> candidateClusters = runDBSCAN(relation, cluster.getIDs(), candidate); - if (!candidateClusters.isEmpty()) { + if(!candidateClusters.isEmpty()) { clusters.addAll(candidateClusters); } } - if (LOG.isDebuggingFine()) { + if(LOG.isDebuggingFine()) { StringBuilder msg = new StringBuilder(); msg.append(clusters.size() + " cluster(s) in subspace " + candidate + ": \n"); - for (Cluster<Model> c : clusters) { + for(Cluster<Model> c : clusters) { msg.append(" " + c.getIDs() + "\n"); } LOG.debugFine(msg.toString()); } - if (!clusters.isEmpty()) { + if(!clusters.isEmpty()) { s_d.add(candidate); clusterMap.put(candidate, clusters); } } - if (!s_d.isEmpty()) { + if(!s_d.isEmpty()) { subspaceMap.put(d + 1, s_d); } } @@ -258,19 +260,17 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster // build result int numClusters = 1; result = new Clustering<>("SUBCLU clustering", "subclu-clustering"); - for (Subspace subspace : clusterMap.descendingKeySet()) { + for(Subspace subspace : clusterMap.descendingKeySet()) { List<Cluster<Model>> clusters = clusterMap.get(subspace); - for (Cluster<Model> cluster : clusters) { - Cluster<SubspaceModel<V>> newCluster = new Cluster<>(cluster.getIDs()); - newCluster.setModel(new SubspaceModel<>(subspace, Centroid.make(relation, cluster.getIDs()).toVector(relation))); + for(Cluster<Model> cluster : clusters) { + Cluster<SubspaceModel> newCluster = new Cluster<>(cluster.getIDs()); + newCluster.setModel(new SubspaceModel(subspace, Centroid.make(relation, cluster.getIDs()))); newCluster.setName("cluster_" + numClusters++); result.addToplevelCluster(newCluster); } } - if (stepprog != null) { - stepprog.setCompleted(LOG); - } + LOG.setCompleted(stepprog); return result; } @@ -279,7 +279,7 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster * * @return the result of the algorithm */ - public Clustering<SubspaceModel<V>> getResult() { + public Clustering<SubspaceModel> getResult() { return result; } @@ -300,7 +300,7 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster distanceFunction.setSelectedDimensions(subspace.getDimensions()); ProxyDatabase proxy; - if (ids == null) { + if(ids == null) { // TODO: in this case, we might want to use an index - the proxy below // will prevent this! ids = relation.getDBIDs(); @@ -308,9 +308,9 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster proxy = new ProxyDatabase(ids, relation); - DBSCAN<V, DoubleDistance> dbscan = new DBSCAN<>(distanceFunction, epsilon, minpts); + DBSCAN<V> dbscan = new DBSCAN<>(distanceFunction, epsilon, minpts); // run DBSCAN - if (LOG.isVerbose()) { + if(LOG.isVerbose()) { LOG.verbose("\nRun DBSCAN on subspace " + subspace.dimensonsToString()); } Clustering<Model> dbsres = dbscan.run(proxy); @@ -318,8 +318,8 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster // separate cluster and noise List<Cluster<Model>> clusterAndNoise = dbsres.getAllClusters(); List<Cluster<Model>> clusters = new ArrayList<>(); - for (Cluster<Model> c : clusterAndNoise) { - if (!c.isNoise()) { + for(Cluster<Model> c : clusterAndNoise) { + if(!c.isNoise()) { clusters.add(c); } } @@ -336,7 +336,7 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster private List<Subspace> generateSubspaceCandidates(List<Subspace> subspaces) { List<Subspace> candidates = new ArrayList<>(); - if (subspaces.isEmpty()) { + if(subspaces.isEmpty()) { return candidates; } @@ -344,46 +344,46 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster int d = subspaces.get(0).dimensionality(); StringBuilder msgFine = new StringBuilder("\n"); - if (LOG.isDebuggingFiner()) { + if(LOG.isDebuggingFiner()) { msgFine.append("subspaces ").append(subspaces).append('\n'); } - for (int i = 0; i < subspaces.size(); i++) { + for(int i = 0; i < subspaces.size(); i++) { Subspace s1 = subspaces.get(i); - for (int j = i + 1; j < subspaces.size(); j++) { + for(int j = i + 1; j < subspaces.size(); j++) { Subspace s2 = subspaces.get(j); Subspace candidate = s1.join(s2); - if (candidate != null) { - if (LOG.isDebuggingFiner()) { + if(candidate != null) { + if(LOG.isDebuggingFiner()) { msgFine.append("candidate: ").append(candidate.dimensonsToString()).append('\n'); } // prune irrelevant candidate subspaces List<Subspace> lowerSubspaces = lowerSubspaces(candidate); - if (LOG.isDebuggingFiner()) { + if(LOG.isDebuggingFiner()) { msgFine.append("lowerSubspaces: ").append(lowerSubspaces).append('\n'); } boolean irrelevantCandidate = false; - for (Subspace s : lowerSubspaces) { - if (!subspaces.contains(s)) { + for(Subspace s : lowerSubspaces) { + if(!subspaces.contains(s)) { irrelevantCandidate = true; break; } } - if (!irrelevantCandidate) { + if(!irrelevantCandidate) { candidates.add(candidate); } } } } - if (LOG.isDebuggingFiner()) { + if(LOG.isDebuggingFiner()) { LOG.debugFiner(msgFine.toString()); } - if (LOG.isDebugging()) { + if(LOG.isDebugging()) { StringBuilder msg = new StringBuilder(); msg.append(d + 1).append("-dimensional candidate subspaces: "); - for (Subspace candidate : candidates) { + for(Subspace candidate : candidates) { msg.append(candidate.dimensonsToString()).append(' '); } LOG.debug(msg.toString()); @@ -401,16 +401,16 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster */ private List<Subspace> lowerSubspaces(Subspace subspace) { int dimensionality = subspace.dimensionality(); - if (dimensionality <= 1) { + if(dimensionality <= 1) { return null; } // order result according to the dimensions List<Subspace> result = new ArrayList<>(); - BitSet dimensions = subspace.getDimensions(); - for (int dim = dimensions.nextSetBit(0); dim >= 0; dim = dimensions.nextSetBit(dim + 1)) { - BitSet newDimensions = (BitSet) dimensions.clone(); - newDimensions.set(dim, false); + long[] dimensions = subspace.getDimensions(); + for(int dim = BitsUtil.nextSetBit(dimensions, 0); dim >= 0; dim = BitsUtil.nextSetBit(dimensions, dim + 1)) { + long[] newDimensions = dimensions.clone(); + BitsUtil.clearI(newDimensions, dim); result.add(new Subspace(newDimensions)); } @@ -432,14 +432,14 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster private Subspace bestSubspace(List<Subspace> subspaces, Subspace candidate, TreeMap<Subspace, List<Cluster<Model>>> clusterMap) { Subspace bestSubspace = null; - for (Subspace subspace : subspaces) { + for(Subspace subspace : subspaces) { int min = Integer.MAX_VALUE; - if (subspace.isSubspace(candidate)) { + if(subspace.isSubspace(candidate)) { List<Cluster<Model>> clusters = clusterMap.get(subspace); - for (Cluster<Model> cluster : clusters) { + for(Cluster<Model> cluster : clusters) { int clusterSize = cluster.size(); - if (clusterSize < min) { + if(clusterSize < min) { min = clusterSize; bestSubspace = subspace; } @@ -467,29 +467,29 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster * * @apiviz.exclude */ - public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer { + public static class Parameterizer<V extends NumberVector> extends AbstractParameterizer { protected int minpts = 0; - protected DoubleDistance epsilon = null; + protected double epsilon; - protected DimensionSelectingSubspaceDistanceFunction<V, DoubleDistance> distance = null; + protected DimensionSelectingSubspaceDistanceFunction<V> distance = null; @Override protected void makeOptions(Parameterization config) { super.makeOptions(config); - ObjectParameter<DimensionSelectingSubspaceDistanceFunction<V, DoubleDistance>> param = new ObjectParameter<>(DISTANCE_FUNCTION_ID, DimensionSelectingSubspaceDistanceFunction.class, SubspaceEuclideanDistanceFunction.class); - if (config.grab(param)) { + ObjectParameter<DimensionSelectingSubspaceDistanceFunction<V>> param = new ObjectParameter<>(DISTANCE_FUNCTION_ID, DimensionSelectingSubspaceDistanceFunction.class, SubspaceEuclideanDistanceFunction.class); + if(config.grab(param)) { distance = param.instantiateClass(config); } - DistanceParameter<DoubleDistance> epsilonP = new DistanceParameter<>(EPSILON_ID, distance); - if (config.grab(epsilonP)) { + DoubleParameter epsilonP = new DoubleParameter(EPSILON_ID); + if(config.grab(epsilonP)) { epsilon = epsilonP.getValue(); } IntParameter minptsP = new IntParameter(MINPTS_ID); minptsP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT); - if (config.grab(minptsP)) { + if(config.grab(minptsP)) { minpts = minptsP.getValue(); } } |