summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/CLIQUE.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/CLIQUE.java')
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/CLIQUE.java156
1 files changed, 79 insertions, 77 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/CLIQUE.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/CLIQUE.java
index 01a693e4..37b3eb57 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/CLIQUE.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/CLIQUE.java
@@ -43,13 +43,13 @@ import de.lmu.ifi.dbs.elki.data.Subspace;
import de.lmu.ifi.dbs.elki.data.model.SubspaceModel;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
@@ -57,7 +57,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.IntervalConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
@@ -96,11 +96,11 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
@Title("CLIQUE: Automatic Subspace Clustering of High Dimensional Data for Data Mining Applications")
@Description("Grid-based algorithm to identify dense clusters in subspaces of maximum dimensionality.")
@Reference(authors = "R. Agrawal, J. Gehrke, D. Gunopulos, P. Raghavan", title = "Automatic Subspace Clustering of High Dimensional Data for Data Mining Applications", booktitle = "Proc. SIGMOD Conference, Seattle, WA, 1998", url = "http://dx.doi.org/10.1145/276304.276314")
-public class CLIQUE<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clustering<SubspaceModel<V>>> implements SubspaceClusteringAlgorithm<SubspaceModel<V>> {
+public class CLIQUE<V extends NumberVector<?>> extends AbstractAlgorithm<Clustering<SubspaceModel<V>>> implements SubspaceClusteringAlgorithm<SubspaceModel<V>> {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(CLIQUE.class);
+ private static final Logging LOG = Logging.getLogger(CLIQUE.class);
/**
* Parameter to specify the number of intervals (units) in each dimension,
@@ -109,7 +109,7 @@ public class CLIQUE<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
* Key: {@code -clique.xsi}
* </p>
*/
- public static final OptionID XSI_ID = OptionID.getOrCreateOptionID("clique.xsi", "The number of intervals (units) in each dimension.");
+ public static final OptionID XSI_ID = new OptionID("clique.xsi", "The number of intervals (units) in each dimension.");
/**
* Parameter to specify the density threshold for the selectivity of a unit,
@@ -119,7 +119,7 @@ public class CLIQUE<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
* Key: {@code -clique.tau}
* </p>
*/
- public static final OptionID TAU_ID = OptionID.getOrCreateOptionID("clique.tau", "The density threshold for the selectivity of a unit, where the selectivity is" + "the fraction of total feature vectors contained in this unit.");
+ public static final OptionID TAU_ID = new OptionID("clique.tau", "The density threshold for the selectivity of a unit, where the selectivity is" + "the fraction of total feature vectors contained in this unit.");
/**
* Flag to indicate that only subspaces with large coverage (i.e. the fraction
@@ -129,7 +129,7 @@ public class CLIQUE<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
* Key: {@code -clique.prune}
* </p>
*/
- public static final OptionID PRUNE_ID = OptionID.getOrCreateOptionID("clique.prune", "Flag to indicate that only subspaces with large coverage " + "(i.e. the fraction of the database that is covered by the dense units) " + "are selected, the rest will be pruned.");
+ public static final OptionID PRUNE_ID = new OptionID("clique.prune", "Flag to indicate that only subspaces with large coverage " + "(i.e. the fraction of the database that is covered by the dense units) " + "are selected, the rest will be pruned.");
/**
* Holds the value of {@link #XSI_ID}.
@@ -169,53 +169,53 @@ public class CLIQUE<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
public Clustering<SubspaceModel<V>> run(Relation<V> relation) {
// 1. Identification of subspaces that contain clusters
// TODO: use step logging.
- if(logger.isVerbose()) {
- logger.verbose("*** 1. Identification of subspaces that contain clusters ***");
+ if(LOG.isVerbose()) {
+ LOG.verbose("*** 1. Identification of subspaces that contain clusters ***");
}
SortedMap<Integer, List<CLIQUESubspace<V>>> dimensionToDenseSubspaces = new TreeMap<Integer, List<CLIQUESubspace<V>>>();
List<CLIQUESubspace<V>> denseSubspaces = findOneDimensionalDenseSubspaces(relation);
- dimensionToDenseSubspaces.put(0, denseSubspaces);
- if(logger.isVerbose()) {
- logger.verbose(" 1-dimensional dense subspaces: " + denseSubspaces.size());
+ dimensionToDenseSubspaces.put(Integer.valueOf(0), denseSubspaces);
+ if(LOG.isVerbose()) {
+ LOG.verbose(" 1-dimensional dense subspaces: " + denseSubspaces.size());
}
- if(logger.isDebugging()) {
+ if(LOG.isDebugging()) {
for(CLIQUESubspace<V> s : denseSubspaces) {
- logger.debug(s.toString(" "));
+ LOG.debug(s.toString(" "));
}
}
- int dimensionality = DatabaseUtil.dimensionality(relation);
+ int dimensionality = RelationUtil.dimensionality(relation);
for(int k = 2; k <= dimensionality && !denseSubspaces.isEmpty(); k++) {
denseSubspaces = findDenseSubspaces(relation, denseSubspaces);
- dimensionToDenseSubspaces.put(k - 1, denseSubspaces);
- if(logger.isVerbose()) {
- logger.verbose(" " + k + "-dimensional dense subspaces: " + denseSubspaces.size());
+ dimensionToDenseSubspaces.put(Integer.valueOf(k - 1), denseSubspaces);
+ if(LOG.isVerbose()) {
+ LOG.verbose(" " + k + "-dimensional dense subspaces: " + denseSubspaces.size());
}
- if(logger.isDebugging()) {
+ if(LOG.isDebugging()) {
for(CLIQUESubspace<V> s : denseSubspaces) {
- logger.debug(s.toString(" "));
+ LOG.debug(s.toString(" "));
}
}
}
// 2. Identification of clusters
- if(logger.isVerbose()) {
- logger.verbose("*** 2. Identification of clusters ***");
+ if(LOG.isVerbose()) {
+ LOG.verbose("*** 2. Identification of clusters ***");
}
// build result
int numClusters = 1;
Clustering<SubspaceModel<V>> result = new Clustering<SubspaceModel<V>>("CLIQUE clustering", "clique-clustering");
for(Integer dim : dimensionToDenseSubspaces.keySet()) {
List<CLIQUESubspace<V>> subspaces = dimensionToDenseSubspaces.get(dim);
- List<Pair<Subspace<V>, ModifiableDBIDs>> modelsAndClusters = determineClusters(subspaces);
+ List<Pair<Subspace, ModifiableDBIDs>> modelsAndClusters = determineClusters(subspaces);
- if(logger.isVerbose()) {
- logger.verbose(" " + (dim + 1) + "-dimensional clusters: " + modelsAndClusters.size());
+ if(LOG.isVerbose()) {
+ LOG.verbose(" " + (dim + 1) + "-dimensional clusters: " + modelsAndClusters.size());
}
- for(Pair<Subspace<V>, ModifiableDBIDs> modelAndCluster : modelsAndClusters) {
+ for(Pair<Subspace, ModifiableDBIDs> modelAndCluster : modelsAndClusters) {
Cluster<SubspaceModel<V>> newCluster = new Cluster<SubspaceModel<V>>(modelAndCluster.second);
- newCluster.setModel(new SubspaceModel<V>(modelAndCluster.first, DatabaseUtil.centroid(relation, modelAndCluster.second)));
+ newCluster.setModel(new SubspaceModel<V>(modelAndCluster.first, Centroid.make(relation, modelAndCluster.second).toVector(relation)));
newCluster.setName("cluster_" + numClusters++);
result.addCluster(newCluster);
}
@@ -232,13 +232,13 @@ public class CLIQUE<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
* @return the clusters in the specified dense subspaces and the corresponding
* cluster models
*/
- private List<Pair<Subspace<V>, ModifiableDBIDs>> determineClusters(List<CLIQUESubspace<V>> denseSubspaces) {
- List<Pair<Subspace<V>, ModifiableDBIDs>> clusters = new ArrayList<Pair<Subspace<V>, ModifiableDBIDs>>();
+ private List<Pair<Subspace, ModifiableDBIDs>> determineClusters(List<CLIQUESubspace<V>> denseSubspaces) {
+ List<Pair<Subspace, ModifiableDBIDs>> clusters = new ArrayList<Pair<Subspace, ModifiableDBIDs>>();
for(CLIQUESubspace<V> subspace : denseSubspaces) {
- List<Pair<Subspace<V>, ModifiableDBIDs>> clustersInSubspace = subspace.determineClusters();
- if(logger.isDebugging()) {
- logger.debugFine("Subspace " + subspace + " clusters " + clustersInSubspace.size());
+ List<Pair<Subspace, ModifiableDBIDs>> clustersInSubspace = subspace.determineClusters();
+ if(LOG.isDebugging()) {
+ LOG.debugFine("Subspace " + subspace + " clusters " + clustersInSubspace.size());
}
clusters.addAll(clustersInSubspace);
}
@@ -289,7 +289,7 @@ public class CLIQUE<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
* @return the created one dimensional units
*/
private Collection<CLIQUEUnit<V>> initOneDimensionalUnits(Relation<V> database) {
- int dimensionality = DatabaseUtil.dimensionality(database);
+ int dimensionality = RelationUtil.dimensionality(database);
// initialize minima and maxima
double[] minima = new double[dimensionality];
double[] maxima = new double[dimensionality];
@@ -312,12 +312,12 @@ public class CLIQUE<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
unit_lengths[d] = (maxima[d] - minima[d]) / xsi;
}
- if(logger.isDebuggingFiner()) {
- StringBuffer msg = new StringBuffer();
+ if(LOG.isDebuggingFiner()) {
+ StringBuilder msg = new StringBuilder();
msg.append(" minima: ").append(FormatUtil.format(minima, ", ", 2));
msg.append("\n maxima: ").append(FormatUtil.format(maxima, ", ", 2));
msg.append("\n unit lengths: ").append(FormatUtil.format(unit_lengths, ", ", 2));
- logger.debugFiner(msg.toString());
+ LOG.debugFiner(msg.toString());
}
// determine the boundaries of the units
@@ -332,10 +332,10 @@ public class CLIQUE<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
}
}
}
- if(logger.isDebuggingFiner()) {
- StringBuffer msg = new StringBuffer();
+ if(LOG.isDebuggingFiner()) {
+ StringBuilder msg = new StringBuilder();
msg.append(" unit bounds ").append(FormatUtil.format(new Matrix(unit_bounds), " "));
- logger.debugFiner(msg.toString());
+ LOG.debugFiner(msg.toString());
}
// build the 1 dimensional units
@@ -346,10 +346,10 @@ public class CLIQUE<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
}
}
- if(logger.isDebuggingFiner()) {
- StringBuffer msg = new StringBuffer();
+ if(LOG.isDebuggingFiner()) {
+ StringBuilder msg = new StringBuilder();
msg.append(" total number of 1-dim units: ").append(units.size());
- logger.debugFiner(msg.toString());
+ LOG.debugFiner(msg.toString());
}
return units;
@@ -367,12 +367,12 @@ public class CLIQUE<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
if(minima.length != featureVector.getDimensionality()) {
throw new IllegalArgumentException("FeatureVectors differ in length.");
}
- for(int d = 1; d <= featureVector.getDimensionality(); d++) {
- if((featureVector.doubleValue(d)) > maxima[d - 1]) {
- maxima[d - 1] = (featureVector.doubleValue(d));
+ for(int d = 0; d < featureVector.getDimensionality(); d++) {
+ if((featureVector.doubleValue(d)) > maxima[d]) {
+ maxima[d] = (featureVector.doubleValue(d));
}
- if((featureVector.doubleValue(d)) < minima[d - 1]) {
- minima[d - 1] = (featureVector.doubleValue(d));
+ if((featureVector.doubleValue(d)) < minima[d]) {
+ minima[d] = (featureVector.doubleValue(d));
}
}
}
@@ -387,38 +387,37 @@ public class CLIQUE<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
*/
private List<CLIQUESubspace<V>> findOneDimensionalDenseSubspaceCandidates(Relation<V> database) {
Collection<CLIQUEUnit<V>> units = initOneDimensionalUnits(database);
- Collection<CLIQUEUnit<V>> denseUnits = new ArrayList<CLIQUEUnit<V>>();
- Map<Integer, CLIQUESubspace<V>> denseSubspaces = new HashMap<Integer, CLIQUESubspace<V>>();
-
// identify dense units
double total = database.size();
- for(DBIDIter it = database.iterDBIDs(); it.valid();) {
+ for(DBIDIter it = database.iterDBIDs(); it.valid(); it.advance()) {
V featureVector = database.get(it);
- final DBID id = it.getDBID();
- it.advance();
for(CLIQUEUnit<V> unit : units) {
- unit.addFeatureVector(id, featureVector);
- // unit is a dense unit
- // FIXME: why it.valid()?
- if(!it.valid() && unit.selectivity(total) >= tau) {
- denseUnits.add(unit);
- // add the dense unit to its subspace
- int dim = unit.getIntervals().iterator().next().getDimension();
- CLIQUESubspace<V> subspace_d = denseSubspaces.get(dim);
- if(subspace_d == null) {
- subspace_d = new CLIQUESubspace<V>(dim);
- denseSubspaces.put(dim, subspace_d);
- }
- subspace_d.addDenseUnit(unit);
+ unit.addFeatureVector(it, featureVector);
+ }
+ }
+
+ Collection<CLIQUEUnit<V>> denseUnits = new ArrayList<CLIQUEUnit<V>>();
+ Map<Integer, CLIQUESubspace<V>> denseSubspaces = new HashMap<Integer, CLIQUESubspace<V>>();
+ for(CLIQUEUnit<V> unit : units) {
+ // unit is a dense unit
+ if(unit.selectivity(total) >= tau) {
+ denseUnits.add(unit);
+ // add the dense unit to its subspace
+ int dim = unit.getIntervals().iterator().next().getDimension();
+ CLIQUESubspace<V> subspace_d = denseSubspaces.get(Integer.valueOf(dim));
+ if(subspace_d == null) {
+ subspace_d = new CLIQUESubspace<V>(dim);
+ denseSubspaces.put(Integer.valueOf(dim), subspace_d);
}
+ subspace_d.addDenseUnit(unit);
}
}
- if(logger.isDebugging()) {
- StringBuffer msg = new StringBuffer();
+ if(LOG.isDebugging()) {
+ StringBuilder msg = new StringBuilder();
msg.append(" number of 1-dim dense units: ").append(denseUnits.size());
msg.append("\n number of 1-dim dense subspace candidates: ").append(denseSubspaces.size());
- logger.debugFine(msg.toString());
+ LOG.debugFine(msg.toString());
}
List<CLIQUESubspace<V>> subspaceCandidates = new ArrayList<CLIQUESubspace<V>>(denseSubspaces.values());
@@ -574,7 +573,7 @@ public class CLIQUE<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -584,7 +583,7 @@ public class CLIQUE<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
protected int xsi;
protected double tau;
@@ -594,19 +593,22 @@ public class CLIQUE<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- IntParameter xsiP = new IntParameter(XSI_ID, new GreaterConstraint(0));
+ IntParameter xsiP = new IntParameter(XSI_ID);
+ xsiP.addConstraint(new GreaterConstraint(0));
if(config.grab(xsiP)) {
- xsi = xsiP.getValue();
+ xsi = xsiP.intValue();
}
- DoubleParameter tauP = new DoubleParameter(TAU_ID, new IntervalConstraint(0, IntervalConstraint.IntervalBoundary.OPEN, 1, IntervalConstraint.IntervalBoundary.OPEN));
+ DoubleParameter tauP = new DoubleParameter(TAU_ID);
+ tauP.addConstraint(new GreaterConstraint(0));
+ tauP.addConstraint(new LessConstraint(1));
if(config.grab(tauP)) {
- tau = tauP.getValue();
+ tau = tauP.doubleValue();
}
Flag pruneF = new Flag(PRUNE_ID);
if(config.grab(pruneF)) {
- prune = pruneF.getValue();
+ prune = pruneF.isTrue();
}
}