summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java')
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java154
1 files changed, 77 insertions, 77 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java
index e6245f6e..d6cf40fd 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -24,7 +24,6 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace;
*/
import java.util.ArrayList;
-import java.util.BitSet;
import java.util.HashMap;
import java.util.List;
import java.util.TreeMap;
@@ -45,10 +44,10 @@ import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.DimensionSelectingSubspaceDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid;
+import de.lmu.ifi.dbs.elki.utilities.BitsUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -56,7 +55,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DistanceParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
@@ -69,8 +68,8 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
* </p>
* <p>
* Reference: <br>
- * K. Kailing, H.-P. Kriegel, P. Kroeger: Density connected Subspace Clustering
- * for High Dimensional Data. <br>
+ * K. Kailing, H.-P. Kriegel, P. Kröger:<br />
+ * Density connected Subspace Clustering for High Dimensional Data<br />
* In Proc. SIAM Int. Conf. on Data Mining (SDM'04), Lake Buena Vista, FL, 2004.
* </p>
*
@@ -83,9 +82,14 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
* @param <V> the type of FeatureVector handled by this Algorithm
*/
@Title("SUBCLU: Density connected Subspace Clustering")
-@Description("Algorithm to detect arbitrarily shaped and positioned clusters in subspaces. SUBCLU delivers for each subspace the same clusters DBSCAN would have found, when applied to this subspace seperately.")
-@Reference(authors = "K. Kailing, H.-P. Kriegel, P. Kröger", title = "Density connected Subspace Clustering for High Dimensional Data. ", booktitle = "Proc. SIAM Int. Conf. on Data Mining (SDM'04), Lake Buena Vista, FL, 2004")
-public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Clustering<SubspaceModel<V>>> implements SubspaceClusteringAlgorithm<SubspaceModel<V>> {
+@Description("Algorithm to detect arbitrarily shaped and positioned clusters in subspaces. "//
+ + "SUBCLU delivers for each subspace the same clusters DBSCAN would have found, "//
+ + "when applied to this subspace seperately.")
+@Reference(authors = "K. Kailing, H.-P. Kriegel, P. Kröger", //
+title = "Density connected Subspace Clustering for High Dimensional Data", //
+booktitle = "Proc. SIAM Int. Conf. on Data Mining (SDM'04), Lake Buena Vista, FL, 2004", //
+url = "http://www.siam.org/meetings/sdm04/proceedings/sdm04_023.pdf")
+public class SUBCLU<V extends NumberVector> extends AbstractAlgorithm<Clustering<SubspaceModel>> implements SubspaceClusteringAlgorithm<SubspaceModel> {
/**
* The logger for this class.
*/
@@ -125,12 +129,12 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
* Holds the instance of the distance function specified by
* {@link #DISTANCE_FUNCTION_ID}.
*/
- private DimensionSelectingSubspaceDistanceFunction<V, DoubleDistance> distanceFunction;
+ private DimensionSelectingSubspaceDistanceFunction<V> distanceFunction;
/**
* Holds the value of {@link #EPSILON_ID}.
*/
- private DoubleDistance epsilon;
+ private double epsilon;
/**
* Holds the value of {@link #MINPTS_ID}.
@@ -140,7 +144,7 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
/**
* Holds the result;
*/
- private Clustering<SubspaceModel<V>> result;
+ private Clustering<SubspaceModel> result;
/**
* Constructor.
@@ -149,7 +153,7 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
* @param epsilon Epsilon value
* @param minpts Minpts value
*/
- public SUBCLU(DimensionSelectingSubspaceDistanceFunction<V, DoubleDistance> distanceFunction, DoubleDistance epsilon, int minpts) {
+ public SUBCLU(DimensionSelectingSubspaceDistanceFunction<V> distanceFunction, double epsilon, int minpts) {
super();
this.distanceFunction = distanceFunction;
this.epsilon = epsilon;
@@ -162,15 +166,13 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
* @param relation Relation to process
* @return Clustering result
*/
- public Clustering<SubspaceModel<V>> run(Relation<V> relation) {
+ public Clustering<SubspaceModel> run(Relation<V> relation) {
final int dimensionality = RelationUtil.dimensionality(relation);
StepProgress stepprog = LOG.isVerbose() ? new StepProgress(dimensionality) : null;
// Generate all 1-dimensional clusters
- if (stepprog != null) {
- stepprog.beginStep(1, "Generate all 1-dimensional clusters.", LOG);
- }
+ LOG.beginStep(stepprog, 1, "Generate all 1-dimensional clusters.");
// mapping of dimensionality to set of subspaces
HashMap<Integer, List<Subspace>> subspaceMap = new HashMap<>();
@@ -182,35 +184,35 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
// mapping of subspaces to list of clusters
TreeMap<Subspace, List<Cluster<Model>>> clusterMap = new TreeMap<>(new Subspace.DimensionComparator());
- for (int d = 0; d < dimensionality; d++) {
+ for(int d = 0; d < dimensionality; d++) {
Subspace currentSubspace = new Subspace(d);
List<Cluster<Model>> clusters = runDBSCAN(relation, null, currentSubspace);
- if (LOG.isDebuggingFiner()) {
+ if(LOG.isDebuggingFiner()) {
StringBuilder msg = new StringBuilder();
msg.append('\n').append(clusters.size()).append(" clusters in subspace ").append(currentSubspace.dimensonsToString()).append(": \n");
- for (Cluster<Model> cluster : clusters) {
+ for(Cluster<Model> cluster : clusters) {
msg.append(" " + cluster.getIDs() + "\n");
}
LOG.debugFiner(msg.toString());
}
- if (!clusters.isEmpty()) {
+ if(!clusters.isEmpty()) {
s_1.add(currentSubspace);
clusterMap.put(currentSubspace, clusters);
}
}
// Generate (d+1)-dimensional clusters from d-dimensional clusters
- for (int d = 0; d < dimensionality - 1; d++) {
- if (stepprog != null) {
+ for(int d = 0; d < dimensionality - 1; d++) {
+ if(stepprog != null) {
stepprog.beginStep(d + 2, "Generate " + (d + 2) + "-dimensional clusters from " + (d + 1) + "-dimensional clusters.", LOG);
}
List<Subspace> subspaces = subspaceMap.get(d);
- if (subspaces == null || subspaces.isEmpty()) {
- if (stepprog != null) {
- for (int dim = d + 1; dim < dimensionality - 1; dim++) {
+ if(subspaces == null || subspaces.isEmpty()) {
+ if(stepprog != null) {
+ for(int dim = d + 1; dim < dimensionality - 1; dim++) {
stepprog.beginStep(dim + 2, "Generation of" + (dim + 2) + "-dimensional clusters not applicable, because no more " + (d + 2) + "-dimensional subspaces found.", LOG);
}
}
@@ -220,37 +222,37 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
List<Subspace> candidates = generateSubspaceCandidates(subspaces);
List<Subspace> s_d = new ArrayList<>();
- for (Subspace candidate : candidates) {
+ for(Subspace candidate : candidates) {
Subspace bestSubspace = bestSubspace(subspaces, candidate, clusterMap);
- if (LOG.isDebuggingFine()) {
+ if(LOG.isDebuggingFine()) {
LOG.debugFine("best subspace of " + candidate.dimensonsToString() + ": " + bestSubspace.dimensonsToString());
}
List<Cluster<Model>> bestSubspaceClusters = clusterMap.get(bestSubspace);
List<Cluster<Model>> clusters = new ArrayList<>();
- for (Cluster<Model> cluster : bestSubspaceClusters) {
+ for(Cluster<Model> cluster : bestSubspaceClusters) {
List<Cluster<Model>> candidateClusters = runDBSCAN(relation, cluster.getIDs(), candidate);
- if (!candidateClusters.isEmpty()) {
+ if(!candidateClusters.isEmpty()) {
clusters.addAll(candidateClusters);
}
}
- if (LOG.isDebuggingFine()) {
+ if(LOG.isDebuggingFine()) {
StringBuilder msg = new StringBuilder();
msg.append(clusters.size() + " cluster(s) in subspace " + candidate + ": \n");
- for (Cluster<Model> c : clusters) {
+ for(Cluster<Model> c : clusters) {
msg.append(" " + c.getIDs() + "\n");
}
LOG.debugFine(msg.toString());
}
- if (!clusters.isEmpty()) {
+ if(!clusters.isEmpty()) {
s_d.add(candidate);
clusterMap.put(candidate, clusters);
}
}
- if (!s_d.isEmpty()) {
+ if(!s_d.isEmpty()) {
subspaceMap.put(d + 1, s_d);
}
}
@@ -258,19 +260,17 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
// build result
int numClusters = 1;
result = new Clustering<>("SUBCLU clustering", "subclu-clustering");
- for (Subspace subspace : clusterMap.descendingKeySet()) {
+ for(Subspace subspace : clusterMap.descendingKeySet()) {
List<Cluster<Model>> clusters = clusterMap.get(subspace);
- for (Cluster<Model> cluster : clusters) {
- Cluster<SubspaceModel<V>> newCluster = new Cluster<>(cluster.getIDs());
- newCluster.setModel(new SubspaceModel<>(subspace, Centroid.make(relation, cluster.getIDs()).toVector(relation)));
+ for(Cluster<Model> cluster : clusters) {
+ Cluster<SubspaceModel> newCluster = new Cluster<>(cluster.getIDs());
+ newCluster.setModel(new SubspaceModel(subspace, Centroid.make(relation, cluster.getIDs())));
newCluster.setName("cluster_" + numClusters++);
result.addToplevelCluster(newCluster);
}
}
- if (stepprog != null) {
- stepprog.setCompleted(LOG);
- }
+ LOG.setCompleted(stepprog);
return result;
}
@@ -279,7 +279,7 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
*
* @return the result of the algorithm
*/
- public Clustering<SubspaceModel<V>> getResult() {
+ public Clustering<SubspaceModel> getResult() {
return result;
}
@@ -300,7 +300,7 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
distanceFunction.setSelectedDimensions(subspace.getDimensions());
ProxyDatabase proxy;
- if (ids == null) {
+ if(ids == null) {
// TODO: in this case, we might want to use an index - the proxy below
// will prevent this!
ids = relation.getDBIDs();
@@ -308,9 +308,9 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
proxy = new ProxyDatabase(ids, relation);
- DBSCAN<V, DoubleDistance> dbscan = new DBSCAN<>(distanceFunction, epsilon, minpts);
+ DBSCAN<V> dbscan = new DBSCAN<>(distanceFunction, epsilon, minpts);
// run DBSCAN
- if (LOG.isVerbose()) {
+ if(LOG.isVerbose()) {
LOG.verbose("\nRun DBSCAN on subspace " + subspace.dimensonsToString());
}
Clustering<Model> dbsres = dbscan.run(proxy);
@@ -318,8 +318,8 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
// separate cluster and noise
List<Cluster<Model>> clusterAndNoise = dbsres.getAllClusters();
List<Cluster<Model>> clusters = new ArrayList<>();
- for (Cluster<Model> c : clusterAndNoise) {
- if (!c.isNoise()) {
+ for(Cluster<Model> c : clusterAndNoise) {
+ if(!c.isNoise()) {
clusters.add(c);
}
}
@@ -336,7 +336,7 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
private List<Subspace> generateSubspaceCandidates(List<Subspace> subspaces) {
List<Subspace> candidates = new ArrayList<>();
- if (subspaces.isEmpty()) {
+ if(subspaces.isEmpty()) {
return candidates;
}
@@ -344,46 +344,46 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
int d = subspaces.get(0).dimensionality();
StringBuilder msgFine = new StringBuilder("\n");
- if (LOG.isDebuggingFiner()) {
+ if(LOG.isDebuggingFiner()) {
msgFine.append("subspaces ").append(subspaces).append('\n');
}
- for (int i = 0; i < subspaces.size(); i++) {
+ for(int i = 0; i < subspaces.size(); i++) {
Subspace s1 = subspaces.get(i);
- for (int j = i + 1; j < subspaces.size(); j++) {
+ for(int j = i + 1; j < subspaces.size(); j++) {
Subspace s2 = subspaces.get(j);
Subspace candidate = s1.join(s2);
- if (candidate != null) {
- if (LOG.isDebuggingFiner()) {
+ if(candidate != null) {
+ if(LOG.isDebuggingFiner()) {
msgFine.append("candidate: ").append(candidate.dimensonsToString()).append('\n');
}
// prune irrelevant candidate subspaces
List<Subspace> lowerSubspaces = lowerSubspaces(candidate);
- if (LOG.isDebuggingFiner()) {
+ if(LOG.isDebuggingFiner()) {
msgFine.append("lowerSubspaces: ").append(lowerSubspaces).append('\n');
}
boolean irrelevantCandidate = false;
- for (Subspace s : lowerSubspaces) {
- if (!subspaces.contains(s)) {
+ for(Subspace s : lowerSubspaces) {
+ if(!subspaces.contains(s)) {
irrelevantCandidate = true;
break;
}
}
- if (!irrelevantCandidate) {
+ if(!irrelevantCandidate) {
candidates.add(candidate);
}
}
}
}
- if (LOG.isDebuggingFiner()) {
+ if(LOG.isDebuggingFiner()) {
LOG.debugFiner(msgFine.toString());
}
- if (LOG.isDebugging()) {
+ if(LOG.isDebugging()) {
StringBuilder msg = new StringBuilder();
msg.append(d + 1).append("-dimensional candidate subspaces: ");
- for (Subspace candidate : candidates) {
+ for(Subspace candidate : candidates) {
msg.append(candidate.dimensonsToString()).append(' ');
}
LOG.debug(msg.toString());
@@ -401,16 +401,16 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
*/
private List<Subspace> lowerSubspaces(Subspace subspace) {
int dimensionality = subspace.dimensionality();
- if (dimensionality <= 1) {
+ if(dimensionality <= 1) {
return null;
}
// order result according to the dimensions
List<Subspace> result = new ArrayList<>();
- BitSet dimensions = subspace.getDimensions();
- for (int dim = dimensions.nextSetBit(0); dim >= 0; dim = dimensions.nextSetBit(dim + 1)) {
- BitSet newDimensions = (BitSet) dimensions.clone();
- newDimensions.set(dim, false);
+ long[] dimensions = subspace.getDimensions();
+ for(int dim = BitsUtil.nextSetBit(dimensions, 0); dim >= 0; dim = BitsUtil.nextSetBit(dimensions, dim + 1)) {
+ long[] newDimensions = dimensions.clone();
+ BitsUtil.clearI(newDimensions, dim);
result.add(new Subspace(newDimensions));
}
@@ -432,14 +432,14 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
private Subspace bestSubspace(List<Subspace> subspaces, Subspace candidate, TreeMap<Subspace, List<Cluster<Model>>> clusterMap) {
Subspace bestSubspace = null;
- for (Subspace subspace : subspaces) {
+ for(Subspace subspace : subspaces) {
int min = Integer.MAX_VALUE;
- if (subspace.isSubspace(candidate)) {
+ if(subspace.isSubspace(candidate)) {
List<Cluster<Model>> clusters = clusterMap.get(subspace);
- for (Cluster<Model> cluster : clusters) {
+ for(Cluster<Model> cluster : clusters) {
int clusterSize = cluster.size();
- if (clusterSize < min) {
+ if(clusterSize < min) {
min = clusterSize;
bestSubspace = subspace;
}
@@ -467,29 +467,29 @@ public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Cluster
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector> extends AbstractParameterizer {
protected int minpts = 0;
- protected DoubleDistance epsilon = null;
+ protected double epsilon;
- protected DimensionSelectingSubspaceDistanceFunction<V, DoubleDistance> distance = null;
+ protected DimensionSelectingSubspaceDistanceFunction<V> distance = null;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- ObjectParameter<DimensionSelectingSubspaceDistanceFunction<V, DoubleDistance>> param = new ObjectParameter<>(DISTANCE_FUNCTION_ID, DimensionSelectingSubspaceDistanceFunction.class, SubspaceEuclideanDistanceFunction.class);
- if (config.grab(param)) {
+ ObjectParameter<DimensionSelectingSubspaceDistanceFunction<V>> param = new ObjectParameter<>(DISTANCE_FUNCTION_ID, DimensionSelectingSubspaceDistanceFunction.class, SubspaceEuclideanDistanceFunction.class);
+ if(config.grab(param)) {
distance = param.instantiateClass(config);
}
- DistanceParameter<DoubleDistance> epsilonP = new DistanceParameter<>(EPSILON_ID, distance);
- if (config.grab(epsilonP)) {
+ DoubleParameter epsilonP = new DoubleParameter(EPSILON_ID);
+ if(config.grab(epsilonP)) {
epsilon = epsilonP.getValue();
}
IntParameter minptsP = new IntParameter(MINPTS_ID);
minptsP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
- if (config.grab(minptsP)) {
+ if(config.grab(minptsP)) {
minpts = minptsP.getValue();
}
}