summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace')
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/CLIQUE.java156
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java259
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/HiSC.java17
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PROCLUS.java310
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PreDeCon.java8
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java132
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUESubspace.java18
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUEUnit.java22
8 files changed, 467 insertions, 455 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/CLIQUE.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/CLIQUE.java
index 01a693e4..37b3eb57 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/CLIQUE.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/CLIQUE.java
@@ -43,13 +43,13 @@ import de.lmu.ifi.dbs.elki.data.Subspace;
import de.lmu.ifi.dbs.elki.data.model.SubspaceModel;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
@@ -57,7 +57,7 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.IntervalConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.Flag;
@@ -96,11 +96,11 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
@Title("CLIQUE: Automatic Subspace Clustering of High Dimensional Data for Data Mining Applications")
@Description("Grid-based algorithm to identify dense clusters in subspaces of maximum dimensionality.")
@Reference(authors = "R. Agrawal, J. Gehrke, D. Gunopulos, P. Raghavan", title = "Automatic Subspace Clustering of High Dimensional Data for Data Mining Applications", booktitle = "Proc. SIGMOD Conference, Seattle, WA, 1998", url = "http://dx.doi.org/10.1145/276304.276314")
-public class CLIQUE<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clustering<SubspaceModel<V>>> implements SubspaceClusteringAlgorithm<SubspaceModel<V>> {
+public class CLIQUE<V extends NumberVector<?>> extends AbstractAlgorithm<Clustering<SubspaceModel<V>>> implements SubspaceClusteringAlgorithm<SubspaceModel<V>> {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(CLIQUE.class);
+ private static final Logging LOG = Logging.getLogger(CLIQUE.class);
/**
* Parameter to specify the number of intervals (units) in each dimension,
@@ -109,7 +109,7 @@ public class CLIQUE<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
* Key: {@code -clique.xsi}
* </p>
*/
- public static final OptionID XSI_ID = OptionID.getOrCreateOptionID("clique.xsi", "The number of intervals (units) in each dimension.");
+ public static final OptionID XSI_ID = new OptionID("clique.xsi", "The number of intervals (units) in each dimension.");
/**
* Parameter to specify the density threshold for the selectivity of a unit,
@@ -119,7 +119,7 @@ public class CLIQUE<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
* Key: {@code -clique.tau}
* </p>
*/
- public static final OptionID TAU_ID = OptionID.getOrCreateOptionID("clique.tau", "The density threshold for the selectivity of a unit, where the selectivity is" + "the fraction of total feature vectors contained in this unit.");
+ public static final OptionID TAU_ID = new OptionID("clique.tau", "The density threshold for the selectivity of a unit, where the selectivity is" + "the fraction of total feature vectors contained in this unit.");
/**
* Flag to indicate that only subspaces with large coverage (i.e. the fraction
@@ -129,7 +129,7 @@ public class CLIQUE<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
* Key: {@code -clique.prune}
* </p>
*/
- public static final OptionID PRUNE_ID = OptionID.getOrCreateOptionID("clique.prune", "Flag to indicate that only subspaces with large coverage " + "(i.e. the fraction of the database that is covered by the dense units) " + "are selected, the rest will be pruned.");
+ public static final OptionID PRUNE_ID = new OptionID("clique.prune", "Flag to indicate that only subspaces with large coverage " + "(i.e. the fraction of the database that is covered by the dense units) " + "are selected, the rest will be pruned.");
/**
* Holds the value of {@link #XSI_ID}.
@@ -169,53 +169,53 @@ public class CLIQUE<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
public Clustering<SubspaceModel<V>> run(Relation<V> relation) {
// 1. Identification of subspaces that contain clusters
// TODO: use step logging.
- if(logger.isVerbose()) {
- logger.verbose("*** 1. Identification of subspaces that contain clusters ***");
+ if(LOG.isVerbose()) {
+ LOG.verbose("*** 1. Identification of subspaces that contain clusters ***");
}
SortedMap<Integer, List<CLIQUESubspace<V>>> dimensionToDenseSubspaces = new TreeMap<Integer, List<CLIQUESubspace<V>>>();
List<CLIQUESubspace<V>> denseSubspaces = findOneDimensionalDenseSubspaces(relation);
- dimensionToDenseSubspaces.put(0, denseSubspaces);
- if(logger.isVerbose()) {
- logger.verbose(" 1-dimensional dense subspaces: " + denseSubspaces.size());
+ dimensionToDenseSubspaces.put(Integer.valueOf(0), denseSubspaces);
+ if(LOG.isVerbose()) {
+ LOG.verbose(" 1-dimensional dense subspaces: " + denseSubspaces.size());
}
- if(logger.isDebugging()) {
+ if(LOG.isDebugging()) {
for(CLIQUESubspace<V> s : denseSubspaces) {
- logger.debug(s.toString(" "));
+ LOG.debug(s.toString(" "));
}
}
- int dimensionality = DatabaseUtil.dimensionality(relation);
+ int dimensionality = RelationUtil.dimensionality(relation);
for(int k = 2; k <= dimensionality && !denseSubspaces.isEmpty(); k++) {
denseSubspaces = findDenseSubspaces(relation, denseSubspaces);
- dimensionToDenseSubspaces.put(k - 1, denseSubspaces);
- if(logger.isVerbose()) {
- logger.verbose(" " + k + "-dimensional dense subspaces: " + denseSubspaces.size());
+ dimensionToDenseSubspaces.put(Integer.valueOf(k - 1), denseSubspaces);
+ if(LOG.isVerbose()) {
+ LOG.verbose(" " + k + "-dimensional dense subspaces: " + denseSubspaces.size());
}
- if(logger.isDebugging()) {
+ if(LOG.isDebugging()) {
for(CLIQUESubspace<V> s : denseSubspaces) {
- logger.debug(s.toString(" "));
+ LOG.debug(s.toString(" "));
}
}
}
// 2. Identification of clusters
- if(logger.isVerbose()) {
- logger.verbose("*** 2. Identification of clusters ***");
+ if(LOG.isVerbose()) {
+ LOG.verbose("*** 2. Identification of clusters ***");
}
// build result
int numClusters = 1;
Clustering<SubspaceModel<V>> result = new Clustering<SubspaceModel<V>>("CLIQUE clustering", "clique-clustering");
for(Integer dim : dimensionToDenseSubspaces.keySet()) {
List<CLIQUESubspace<V>> subspaces = dimensionToDenseSubspaces.get(dim);
- List<Pair<Subspace<V>, ModifiableDBIDs>> modelsAndClusters = determineClusters(subspaces);
+ List<Pair<Subspace, ModifiableDBIDs>> modelsAndClusters = determineClusters(subspaces);
- if(logger.isVerbose()) {
- logger.verbose(" " + (dim + 1) + "-dimensional clusters: " + modelsAndClusters.size());
+ if(LOG.isVerbose()) {
+ LOG.verbose(" " + (dim + 1) + "-dimensional clusters: " + modelsAndClusters.size());
}
- for(Pair<Subspace<V>, ModifiableDBIDs> modelAndCluster : modelsAndClusters) {
+ for(Pair<Subspace, ModifiableDBIDs> modelAndCluster : modelsAndClusters) {
Cluster<SubspaceModel<V>> newCluster = new Cluster<SubspaceModel<V>>(modelAndCluster.second);
- newCluster.setModel(new SubspaceModel<V>(modelAndCluster.first, DatabaseUtil.centroid(relation, modelAndCluster.second)));
+ newCluster.setModel(new SubspaceModel<V>(modelAndCluster.first, Centroid.make(relation, modelAndCluster.second).toVector(relation)));
newCluster.setName("cluster_" + numClusters++);
result.addCluster(newCluster);
}
@@ -232,13 +232,13 @@ public class CLIQUE<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
* @return the clusters in the specified dense subspaces and the corresponding
* cluster models
*/
- private List<Pair<Subspace<V>, ModifiableDBIDs>> determineClusters(List<CLIQUESubspace<V>> denseSubspaces) {
- List<Pair<Subspace<V>, ModifiableDBIDs>> clusters = new ArrayList<Pair<Subspace<V>, ModifiableDBIDs>>();
+ private List<Pair<Subspace, ModifiableDBIDs>> determineClusters(List<CLIQUESubspace<V>> denseSubspaces) {
+ List<Pair<Subspace, ModifiableDBIDs>> clusters = new ArrayList<Pair<Subspace, ModifiableDBIDs>>();
for(CLIQUESubspace<V> subspace : denseSubspaces) {
- List<Pair<Subspace<V>, ModifiableDBIDs>> clustersInSubspace = subspace.determineClusters();
- if(logger.isDebugging()) {
- logger.debugFine("Subspace " + subspace + " clusters " + clustersInSubspace.size());
+ List<Pair<Subspace, ModifiableDBIDs>> clustersInSubspace = subspace.determineClusters();
+ if(LOG.isDebugging()) {
+ LOG.debugFine("Subspace " + subspace + " clusters " + clustersInSubspace.size());
}
clusters.addAll(clustersInSubspace);
}
@@ -289,7 +289,7 @@ public class CLIQUE<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
* @return the created one dimensional units
*/
private Collection<CLIQUEUnit<V>> initOneDimensionalUnits(Relation<V> database) {
- int dimensionality = DatabaseUtil.dimensionality(database);
+ int dimensionality = RelationUtil.dimensionality(database);
// initialize minima and maxima
double[] minima = new double[dimensionality];
double[] maxima = new double[dimensionality];
@@ -312,12 +312,12 @@ public class CLIQUE<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
unit_lengths[d] = (maxima[d] - minima[d]) / xsi;
}
- if(logger.isDebuggingFiner()) {
- StringBuffer msg = new StringBuffer();
+ if(LOG.isDebuggingFiner()) {
+ StringBuilder msg = new StringBuilder();
msg.append(" minima: ").append(FormatUtil.format(minima, ", ", 2));
msg.append("\n maxima: ").append(FormatUtil.format(maxima, ", ", 2));
msg.append("\n unit lengths: ").append(FormatUtil.format(unit_lengths, ", ", 2));
- logger.debugFiner(msg.toString());
+ LOG.debugFiner(msg.toString());
}
// determine the boundaries of the units
@@ -332,10 +332,10 @@ public class CLIQUE<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
}
}
}
- if(logger.isDebuggingFiner()) {
- StringBuffer msg = new StringBuffer();
+ if(LOG.isDebuggingFiner()) {
+ StringBuilder msg = new StringBuilder();
msg.append(" unit bounds ").append(FormatUtil.format(new Matrix(unit_bounds), " "));
- logger.debugFiner(msg.toString());
+ LOG.debugFiner(msg.toString());
}
// build the 1 dimensional units
@@ -346,10 +346,10 @@ public class CLIQUE<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
}
}
- if(logger.isDebuggingFiner()) {
- StringBuffer msg = new StringBuffer();
+ if(LOG.isDebuggingFiner()) {
+ StringBuilder msg = new StringBuilder();
msg.append(" total number of 1-dim units: ").append(units.size());
- logger.debugFiner(msg.toString());
+ LOG.debugFiner(msg.toString());
}
return units;
@@ -367,12 +367,12 @@ public class CLIQUE<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
if(minima.length != featureVector.getDimensionality()) {
throw new IllegalArgumentException("FeatureVectors differ in length.");
}
- for(int d = 1; d <= featureVector.getDimensionality(); d++) {
- if((featureVector.doubleValue(d)) > maxima[d - 1]) {
- maxima[d - 1] = (featureVector.doubleValue(d));
+ for(int d = 0; d < featureVector.getDimensionality(); d++) {
+ if((featureVector.doubleValue(d)) > maxima[d]) {
+ maxima[d] = (featureVector.doubleValue(d));
}
- if((featureVector.doubleValue(d)) < minima[d - 1]) {
- minima[d - 1] = (featureVector.doubleValue(d));
+ if((featureVector.doubleValue(d)) < minima[d]) {
+ minima[d] = (featureVector.doubleValue(d));
}
}
}
@@ -387,38 +387,37 @@ public class CLIQUE<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
*/
private List<CLIQUESubspace<V>> findOneDimensionalDenseSubspaceCandidates(Relation<V> database) {
Collection<CLIQUEUnit<V>> units = initOneDimensionalUnits(database);
- Collection<CLIQUEUnit<V>> denseUnits = new ArrayList<CLIQUEUnit<V>>();
- Map<Integer, CLIQUESubspace<V>> denseSubspaces = new HashMap<Integer, CLIQUESubspace<V>>();
-
// identify dense units
double total = database.size();
- for(DBIDIter it = database.iterDBIDs(); it.valid();) {
+ for(DBIDIter it = database.iterDBIDs(); it.valid(); it.advance()) {
V featureVector = database.get(it);
- final DBID id = it.getDBID();
- it.advance();
for(CLIQUEUnit<V> unit : units) {
- unit.addFeatureVector(id, featureVector);
- // unit is a dense unit
- // FIXME: why it.valid()?
- if(!it.valid() && unit.selectivity(total) >= tau) {
- denseUnits.add(unit);
- // add the dense unit to its subspace
- int dim = unit.getIntervals().iterator().next().getDimension();
- CLIQUESubspace<V> subspace_d = denseSubspaces.get(dim);
- if(subspace_d == null) {
- subspace_d = new CLIQUESubspace<V>(dim);
- denseSubspaces.put(dim, subspace_d);
- }
- subspace_d.addDenseUnit(unit);
+ unit.addFeatureVector(it, featureVector);
+ }
+ }
+
+ Collection<CLIQUEUnit<V>> denseUnits = new ArrayList<CLIQUEUnit<V>>();
+ Map<Integer, CLIQUESubspace<V>> denseSubspaces = new HashMap<Integer, CLIQUESubspace<V>>();
+ for(CLIQUEUnit<V> unit : units) {
+ // unit is a dense unit
+ if(unit.selectivity(total) >= tau) {
+ denseUnits.add(unit);
+ // add the dense unit to its subspace
+ int dim = unit.getIntervals().iterator().next().getDimension();
+ CLIQUESubspace<V> subspace_d = denseSubspaces.get(Integer.valueOf(dim));
+ if(subspace_d == null) {
+ subspace_d = new CLIQUESubspace<V>(dim);
+ denseSubspaces.put(Integer.valueOf(dim), subspace_d);
}
+ subspace_d.addDenseUnit(unit);
}
}
- if(logger.isDebugging()) {
- StringBuffer msg = new StringBuffer();
+ if(LOG.isDebugging()) {
+ StringBuilder msg = new StringBuilder();
msg.append(" number of 1-dim dense units: ").append(denseUnits.size());
msg.append("\n number of 1-dim dense subspace candidates: ").append(denseSubspaces.size());
- logger.debugFine(msg.toString());
+ LOG.debugFine(msg.toString());
}
List<CLIQUESubspace<V>> subspaceCandidates = new ArrayList<CLIQUESubspace<V>>(denseSubspaces.values());
@@ -574,7 +573,7 @@ public class CLIQUE<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -584,7 +583,7 @@ public class CLIQUE<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
protected int xsi;
protected double tau;
@@ -594,19 +593,22 @@ public class CLIQUE<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- IntParameter xsiP = new IntParameter(XSI_ID, new GreaterConstraint(0));
+ IntParameter xsiP = new IntParameter(XSI_ID);
+ xsiP.addConstraint(new GreaterConstraint(0));
if(config.grab(xsiP)) {
- xsi = xsiP.getValue();
+ xsi = xsiP.intValue();
}
- DoubleParameter tauP = new DoubleParameter(TAU_ID, new IntervalConstraint(0, IntervalConstraint.IntervalBoundary.OPEN, 1, IntervalConstraint.IntervalBoundary.OPEN));
+ DoubleParameter tauP = new DoubleParameter(TAU_ID);
+ tauP.addConstraint(new GreaterConstraint(0));
+ tauP.addConstraint(new LessConstraint(1));
if(config.grab(tauP)) {
- tau = tauP.getValue();
+ tau = tauP.doubleValue();
}
Flag pruneF = new Flag(PRUNE_ID);
if(config.grab(pruneF)) {
- prune = pruneF.getValue();
+ prune = pruneF.isTrue();
}
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java
index df3fe8b5..a3496a0e 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java
@@ -47,6 +47,7 @@ import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.IndexBasedDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.ProxyDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.DiSHDistanceFunction;
@@ -55,10 +56,11 @@ import de.lmu.ifi.dbs.elki.distance.distancevalue.PreferenceVectorBasedCorrelati
import de.lmu.ifi.dbs.elki.index.preprocessed.preference.DiSHPreferenceVectorIndex;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.ProjectedCentroid;
import de.lmu.ifi.dbs.elki.result.optics.ClusterOrderEntry;
import de.lmu.ifi.dbs.elki.result.optics.ClusterOrderResult;
import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
import de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy.HierarchyReferenceLists;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
@@ -99,11 +101,11 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
@Title("DiSH: Detecting Subspace cluster Hierarchies")
@Description("Algorithm to find hierarchical correlation clusters in subspaces.")
@Reference(authors = "E. Achtert, C. Böhm, H.-P. Kriegel, P. Kröger, I. Müller-Gorman, A. Zimek", title = "Detection and Visualization of Subspace Cluster Hierarchies", booktitle = "Proc. 12th International Conference on Database Systems for Advanced Applications (DASFAA), Bangkok, Thailand, 2007", url = "http://dx.doi.org/10.1007/978-3-540-71703-4_15")
-public class DiSH<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clustering<SubspaceModel<V>>> implements SubspaceClusteringAlgorithm<SubspaceModel<V>> {
+public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clustering<SubspaceModel<V>>> implements SubspaceClusteringAlgorithm<SubspaceModel<V>> {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(DiSH.class);
+ private static final Logging LOG = Logging.getLogger(DiSH.class);
/**
* Parameter that specifies the maximum radius of the neighborhood to be
@@ -116,7 +118,7 @@ public class DiSH<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
* Key: {@code -dish.epsilon}
* </p>
*/
- public static final OptionID EPSILON_ID = OptionID.getOrCreateOptionID("dish.epsilon", "The maximum radius of the neighborhood " + "to be considered in each dimension for determination of " + "the preference vector.");
+ public static final OptionID EPSILON_ID = new OptionID("dish.epsilon", "The maximum radius of the neighborhood " + "to be considered in each dimension for determination of " + "the preference vector.");
/**
* Parameter that specifies the a minimum number of points as a smoothing
@@ -128,7 +130,7 @@ public class DiSH<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
* Key: {@code -dish.mu}
* </p>
*/
- public static final OptionID MU_ID = OptionID.getOrCreateOptionID("dish.mu", "The minimum number of points as a smoothing factor to avoid the single-link-effekt.");
+ public static final OptionID MU_ID = new OptionID("dish.mu", "The minimum number of points as a smoothing factor to avoid the single-link-effekt.");
/**
* Holds the value of {@link #EPSILON_ID}.
@@ -167,13 +169,13 @@ public class DiSH<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
*/
public Clustering<SubspaceModel<V>> run(Database database, Relation<V> relation) {
// Instantiate DiSH distance (and thus run the preprocessor)
- if(logger.isVerbose()) {
- logger.verbose("*** Run DiSH preprocessor.");
+ if (LOG.isVerbose()) {
+ LOG.verbose("*** Run DiSH preprocessor.");
}
DiSHDistanceFunction.Instance<V> dishDistanceQuery = dishDistance.instantiate(relation);
// Configure and run OPTICS.
- if(logger.isVerbose()) {
- logger.verbose("*** Run OPTICS algorithm.");
+ if (LOG.isVerbose()) {
+ LOG.verbose("*** Run OPTICS algorithm.");
}
ListParameterization opticsconfig = new ListParameterization(opticsAlgorithmParameters);
opticsconfig.addParameter(OPTICS.DISTANCE_FUNCTION_ID, ProxyDistanceFunction.proxy(dishDistanceQuery));
@@ -183,8 +185,8 @@ public class DiSH<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
optics = opticsconfig.tryInstantiate(cls);
ClusterOrderResult<PreferenceVectorBasedCorrelationDistance> opticsResult = optics.run(database, relation);
- if(logger.isVerbose()) {
- logger.verbose("*** Compute Clusters.");
+ if (LOG.isVerbose()) {
+ LOG.verbose("*** Compute Clusters.");
}
return computeClusters(relation, opticsResult, dishDistanceQuery);
}
@@ -197,64 +199,64 @@ public class DiSH<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
* @param distFunc Distance function
*/
private Clustering<SubspaceModel<V>> computeClusters(Relation<V> database, ClusterOrderResult<PreferenceVectorBasedCorrelationDistance> clusterOrder, DiSHDistanceFunction.Instance<V> distFunc) {
- int dimensionality = DatabaseUtil.dimensionality(database);
+ int dimensionality = RelationUtil.dimensionality(database);
int minpts = dishDistance.getMinpts();
// extract clusters
Map<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>> clustersMap = extractClusters(database, distFunc, clusterOrder);
- if(logger.isVerbose()) {
- StringBuffer msg = new StringBuffer("Step 1: extract clusters");
- for(List<Pair<BitSet, ArrayModifiableDBIDs>> clusterList : clustersMap.values()) {
- for(Pair<BitSet, ArrayModifiableDBIDs> c : clusterList) {
- msg.append("\n").append(FormatUtil.format(dimensionality, c.first)).append(" ids ").append(c.second.size());
+ if (LOG.isVerbose()) {
+ StringBuilder msg = new StringBuilder("Step 1: extract clusters");
+ for (List<Pair<BitSet, ArrayModifiableDBIDs>> clusterList : clustersMap.values()) {
+ for (Pair<BitSet, ArrayModifiableDBIDs> c : clusterList) {
+ msg.append('\n').append(FormatUtil.format(dimensionality, c.first)).append(" ids ").append(c.second.size());
}
}
- logger.verbose(msg.toString());
+ LOG.verbose(msg.toString());
}
// check if there are clusters < minpts
checkClusters(database, distFunc, clustersMap, minpts);
- if(logger.isVerbose()) {
- StringBuffer msg = new StringBuffer("Step 2: check clusters");
- for(List<Pair<BitSet, ArrayModifiableDBIDs>> clusterList : clustersMap.values()) {
- for(Pair<BitSet, ArrayModifiableDBIDs> c : clusterList) {
- msg.append("\n").append(FormatUtil.format(dimensionality, c.first)).append(" ids ").append(c.second.size());
+ if (LOG.isVerbose()) {
+ StringBuilder msg = new StringBuilder("Step 2: check clusters");
+ for (List<Pair<BitSet, ArrayModifiableDBIDs>> clusterList : clustersMap.values()) {
+ for (Pair<BitSet, ArrayModifiableDBIDs> c : clusterList) {
+ msg.append('\n').append(FormatUtil.format(dimensionality, c.first)).append(" ids ").append(c.second.size());
}
}
- logger.verbose(msg.toString());
+ LOG.verbose(msg.toString());
}
// sort the clusters
List<Cluster<SubspaceModel<V>>> clusters = sortClusters(database, clustersMap);
- if(logger.isVerbose()) {
- StringBuffer msg = new StringBuffer("Step 3: sort clusters");
- for(Cluster<SubspaceModel<V>> c : clusters) {
- msg.append("\n").append(FormatUtil.format(dimensionality, c.getModel().getSubspace().getDimensions())).append(" ids ").append(c.size());
+ if (LOG.isVerbose()) {
+ StringBuilder msg = new StringBuilder("Step 3: sort clusters");
+ for (Cluster<SubspaceModel<V>> c : clusters) {
+ msg.append('\n').append(FormatUtil.format(dimensionality, c.getModel().getSubspace().getDimensions())).append(" ids ").append(c.size());
}
- logger.verbose(msg.toString());
+ LOG.verbose(msg.toString());
}
// build the hierarchy
buildHierarchy(database, distFunc, clusters, dimensionality);
- if(logger.isVerbose()) {
- StringBuffer msg = new StringBuffer("Step 4: build hierarchy");
- for(Cluster<SubspaceModel<V>> c : clusters) {
- msg.append("\n").append(FormatUtil.format(dimensionality, c.getModel().getDimensions())).append(" ids ").append(c.size());
- for(Cluster<SubspaceModel<V>> cluster : c.getParents()) {
+ if (LOG.isVerbose()) {
+ StringBuilder msg = new StringBuilder("Step 4: build hierarchy");
+ for (Cluster<SubspaceModel<V>> c : clusters) {
+ msg.append('\n').append(FormatUtil.format(dimensionality, c.getModel().getDimensions())).append(" ids ").append(c.size());
+ for (Cluster<SubspaceModel<V>> cluster : c.getParents()) {
msg.append("\n parent ").append(cluster);
}
- for(Cluster<SubspaceModel<V>> cluster : c.getChildren()) {
+ for (Cluster<SubspaceModel<V>> cluster : c.getChildren()) {
msg.append("\n child ").append(cluster);
}
}
- logger.verbose(msg.toString());
+ LOG.verbose(msg.toString());
}
// build result
Clustering<SubspaceModel<V>> result = new Clustering<SubspaceModel<V>>("DiSH clustering", "dish-clustering");
- for(Cluster<SubspaceModel<V>> c : clusters) {
- if(c.getParents() == null || c.getParents().isEmpty()) {
+ for (Cluster<SubspaceModel<V>> c : clusters) {
+ if (c.getParents() == null || c.getParents().isEmpty()) {
result.addCluster(c);
}
}
@@ -270,12 +272,12 @@ public class DiSH<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
* @return the extracted clusters
*/
private Map<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>> extractClusters(Relation<V> database, DiSHDistanceFunction.Instance<V> distFunc, ClusterOrderResult<PreferenceVectorBasedCorrelationDistance> clusterOrder) {
- FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("Extract Clusters", database.size(), logger) : null;
+ FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Extract Clusters", database.size(), LOG) : null;
int processed = 0;
Map<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>> clustersMap = new HashMap<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>>();
Map<DBID, ClusterOrderEntry<PreferenceVectorBasedCorrelationDistance>> entryMap = new HashMap<DBID, ClusterOrderEntry<PreferenceVectorBasedCorrelationDistance>>();
Map<DBID, Pair<BitSet, ArrayModifiableDBIDs>> entryToClusterMap = new HashMap<DBID, Pair<BitSet, ArrayModifiableDBIDs>>();
- for(Iterator<ClusterOrderEntry<PreferenceVectorBasedCorrelationDistance>> it = clusterOrder.iterator(); it.hasNext();) {
+ for (Iterator<ClusterOrderEntry<PreferenceVectorBasedCorrelationDistance>> it = clusterOrder.iterator(); it.hasNext();) {
ClusterOrderEntry<PreferenceVectorBasedCorrelationDistance> entry = it.next();
entryMap.put(entry.getID(), entry);
@@ -284,68 +286,68 @@ public class DiSH<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
// get the list of (parallel) clusters for the preference vector
List<Pair<BitSet, ArrayModifiableDBIDs>> parallelClusters = clustersMap.get(preferenceVector);
- if(parallelClusters == null) {
+ if (parallelClusters == null) {
parallelClusters = new ArrayList<Pair<BitSet, ArrayModifiableDBIDs>>();
clustersMap.put(preferenceVector, parallelClusters);
}
// look for the proper cluster
Pair<BitSet, ArrayModifiableDBIDs> cluster = null;
- for(Pair<BitSet, ArrayModifiableDBIDs> c : parallelClusters) {
- V c_centroid = DatabaseUtil.centroid(database, c.second, c.first);
+ for (Pair<BitSet, ArrayModifiableDBIDs> c : parallelClusters) {
+ V c_centroid = ProjectedCentroid.make(c.first, database, c.second).toVector(database);
PreferenceVectorBasedCorrelationDistance dist = distFunc.correlationDistance(object, c_centroid, preferenceVector, preferenceVector);
- if(dist.getCorrelationValue() == entry.getReachability().getCorrelationValue()) {
+ if (dist.getCorrelationValue() == entry.getReachability().getCorrelationValue()) {
double d = distFunc.weightedDistance(object, c_centroid, dist.getCommonPreferenceVector());
- if(d <= 2 * epsilon) {
+ if (d <= 2 * epsilon) {
cluster = c;
break;
}
}
}
- if(cluster == null) {
+ if (cluster == null) {
cluster = new Pair<BitSet, ArrayModifiableDBIDs>(preferenceVector, DBIDUtil.newArray());
parallelClusters.add(cluster);
}
cluster.second.add(entry.getID());
entryToClusterMap.put(entry.getID(), cluster);
- if(progress != null) {
- progress.setProcessed(++processed, logger);
+ if (progress != null) {
+ progress.setProcessed(++processed, LOG);
}
}
- if(progress != null) {
- progress.ensureCompleted(logger);
+ if (progress != null) {
+ progress.ensureCompleted(LOG);
}
- if(logger.isDebuggingFiner()) {
- StringBuffer msg = new StringBuffer("Step 0");
- for(List<Pair<BitSet, ArrayModifiableDBIDs>> clusterList : clustersMap.values()) {
- for(Pair<BitSet, ArrayModifiableDBIDs> c : clusterList) {
- msg.append("\n").append(FormatUtil.format(DatabaseUtil.dimensionality(database), c.first)).append(" ids ").append(c.second.size());
+ if (LOG.isDebuggingFiner()) {
+ StringBuilder msg = new StringBuilder("Step 0");
+ for (List<Pair<BitSet, ArrayModifiableDBIDs>> clusterList : clustersMap.values()) {
+ for (Pair<BitSet, ArrayModifiableDBIDs> c : clusterList) {
+ msg.append('\n').append(FormatUtil.format(RelationUtil.dimensionality(database), c.first)).append(" ids ").append(c.second.size());
}
}
- logger.debugFiner(msg.toString());
+ LOG.debugFiner(msg.toString());
}
// add the predecessor to the cluster
- for(BitSet pv : clustersMap.keySet()) {
+ for (BitSet pv : clustersMap.keySet()) {
List<Pair<BitSet, ArrayModifiableDBIDs>> parallelClusters = clustersMap.get(pv);
- for(Pair<BitSet, ArrayModifiableDBIDs> cluster : parallelClusters) {
- if(cluster.second.isEmpty()) {
+ for (Pair<BitSet, ArrayModifiableDBIDs> cluster : parallelClusters) {
+ if (cluster.second.isEmpty()) {
continue;
}
DBID firstID = cluster.second.get(0);
ClusterOrderEntry<PreferenceVectorBasedCorrelationDistance> entry = entryMap.get(firstID);
DBID predecessorID = entry.getPredecessorID();
- if(predecessorID == null) {
+ if (predecessorID == null) {
continue;
}
ClusterOrderEntry<PreferenceVectorBasedCorrelationDistance> predecessor = entryMap.get(predecessorID);
// parallel cluster
- if(predecessor.getReachability().getCommonPreferenceVector().equals(entry.getReachability().getCommonPreferenceVector())) {
+ if (predecessor.getReachability().getCommonPreferenceVector().equals(entry.getReachability().getCommonPreferenceVector())) {
continue;
}
- if(predecessor.getReachability().compareTo(entry.getReachability()) < 0) {
+ if (predecessor.getReachability().compareTo(entry.getReachability()) < 0) {
continue;
}
@@ -369,22 +371,21 @@ public class DiSH<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
* @return a sorted list of the clusters
*/
private List<Cluster<SubspaceModel<V>>> sortClusters(Relation<V> database, Map<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>> clustersMap) {
- final int db_dim = DatabaseUtil.dimensionality(database);
+ final int db_dim = RelationUtil.dimensionality(database);
// int num = 1;
List<Cluster<SubspaceModel<V>>> clusters = new ArrayList<Cluster<SubspaceModel<V>>>();
- for(BitSet pv : clustersMap.keySet()) {
+ for (BitSet pv : clustersMap.keySet()) {
List<Pair<BitSet, ArrayModifiableDBIDs>> parallelClusters = clustersMap.get(pv);
- for(int i = 0; i < parallelClusters.size(); i++) {
+ for (int i = 0; i < parallelClusters.size(); i++) {
Pair<BitSet, ArrayModifiableDBIDs> c = parallelClusters.get(i);
Cluster<SubspaceModel<V>> cluster = new Cluster<SubspaceModel<V>>(c.second);
- cluster.setModel(new SubspaceModel<V>(new Subspace<V>(c.first), DatabaseUtil.centroid(database, c.second)));
+ cluster.setModel(new SubspaceModel<V>(new Subspace(c.first), Centroid.make(database, c.second).toVector(database)));
cluster.setHierarchy(new HierarchyReferenceLists<Cluster<SubspaceModel<V>>>(cluster, new ArrayList<Cluster<SubspaceModel<V>>>(), new ArrayList<Cluster<SubspaceModel<V>>>()));
// cluster.setName("Cluster_" + num++);
String subspace = FormatUtil.format(cluster.getModel().getSubspace().getDimensions(), db_dim, "");
- if(parallelClusters.size() > 1) {
+ if (parallelClusters.size() > 1) {
cluster.setName("Cluster_" + subspace + "_" + i);
- }
- else {
+ } else {
cluster.setName("Cluster_" + subspace);
}
clusters.add(cluster);
@@ -417,11 +418,11 @@ public class DiSH<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
List<Pair<BitSet, ArrayModifiableDBIDs>> notAssigned = new ArrayList<Pair<BitSet, ArrayModifiableDBIDs>>();
Map<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>> newClustersMap = new HashMap<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>>();
Pair<BitSet, ArrayModifiableDBIDs> noise = new Pair<BitSet, ArrayModifiableDBIDs>(new BitSet(), DBIDUtil.newArray());
- for(BitSet pv : clustersMap.keySet()) {
+ for (BitSet pv : clustersMap.keySet()) {
// noise
- if(pv.cardinality() == 0) {
+ if (pv.cardinality() == 0) {
List<Pair<BitSet, ArrayModifiableDBIDs>> parallelClusters = clustersMap.get(pv);
- for(Pair<BitSet, ArrayModifiableDBIDs> c : parallelClusters) {
+ for (Pair<BitSet, ArrayModifiableDBIDs> c : parallelClusters) {
noise.second.addDBIDs(c.second);
}
}
@@ -429,11 +430,10 @@ public class DiSH<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
else {
List<Pair<BitSet, ArrayModifiableDBIDs>> parallelClusters = clustersMap.get(pv);
List<Pair<BitSet, ArrayModifiableDBIDs>> newParallelClusters = new ArrayList<Pair<BitSet, ArrayModifiableDBIDs>>(parallelClusters.size());
- for(Pair<BitSet, ArrayModifiableDBIDs> c : parallelClusters) {
- if(!pv.equals(new BitSet()) && c.second.size() < minpts) {
+ for (Pair<BitSet, ArrayModifiableDBIDs> c : parallelClusters) {
+ if (!pv.equals(new BitSet()) && c.second.size() < minpts) {
notAssigned.add(c);
- }
- else {
+ } else {
newParallelClusters.add(c);
}
}
@@ -444,15 +444,14 @@ public class DiSH<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
clustersMap.clear();
clustersMap.putAll(newClustersMap);
- for(Pair<BitSet, ArrayModifiableDBIDs> c : notAssigned) {
- if(c.second.isEmpty()) {
+ for (Pair<BitSet, ArrayModifiableDBIDs> c : notAssigned) {
+ if (c.second.isEmpty()) {
continue;
}
Pair<BitSet, ArrayModifiableDBIDs> parent = findParent(database, distFunc, c, clustersMap);
- if(parent != null) {
+ if (parent != null) {
parent.second.addDBIDs(c.second);
- }
- else {
+ } else {
noise.second.addDBIDs(c.second);
}
}
@@ -472,30 +471,30 @@ public class DiSH<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
* @return the parent of the specified cluster
*/
private Pair<BitSet, ArrayModifiableDBIDs> findParent(Relation<V> database, DiSHDistanceFunction.Instance<V> distFunc, Pair<BitSet, ArrayModifiableDBIDs> child, Map<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>> clustersMap) {
- V child_centroid = DatabaseUtil.centroid(database, child.second, child.first);
+ V child_centroid = ProjectedCentroid.make(child.first, database, child.second).toVector(database);
Pair<BitSet, ArrayModifiableDBIDs> result = null;
int resultCardinality = -1;
BitSet childPV = child.first;
int childCardinality = childPV.cardinality();
- for(BitSet parentPV : clustersMap.keySet()) {
+ for (BitSet parentPV : clustersMap.keySet()) {
int parentCardinality = parentPV.cardinality();
- if(parentCardinality >= childCardinality) {
+ if (parentCardinality >= childCardinality) {
continue;
}
- if(resultCardinality != -1 && parentCardinality <= resultCardinality) {
+ if (resultCardinality != -1 && parentCardinality <= resultCardinality) {
continue;
}
BitSet pv = (BitSet) childPV.clone();
pv.and(parentPV);
- if(pv.equals(parentPV)) {
+ if (pv.equals(parentPV)) {
List<Pair<BitSet, ArrayModifiableDBIDs>> parentList = clustersMap.get(parentPV);
- for(Pair<BitSet, ArrayModifiableDBIDs> parent : parentList) {
- V parent_centroid = DatabaseUtil.centroid(database, parent.second, parentPV);
+ for (Pair<BitSet, ArrayModifiableDBIDs> parent : parentList) {
+ V parent_centroid = ProjectedCentroid.make(parentPV, database, parent.second).toVector(database);
double d = distFunc.weightedDistance(child_centroid, parent_centroid, parentPV);
- if(d <= 2 * epsilon) {
+ if (d <= 2 * epsilon) {
result = parent;
resultCardinality = parentCardinality;
break;
@@ -516,64 +515,62 @@ public class DiSH<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
* @param database the database containing the data objects
*/
private void buildHierarchy(Relation<V> database, DiSHDistanceFunction.Instance<V> distFunc, List<Cluster<SubspaceModel<V>>> clusters, int dimensionality) {
- StringBuffer msg = new StringBuffer();
- final int db_dim = DatabaseUtil.dimensionality(database);
+ StringBuilder msg = new StringBuilder();
+ final int db_dim = RelationUtil.dimensionality(database);
- for(int i = 0; i < clusters.size() - 1; i++) {
+ for (int i = 0; i < clusters.size() - 1; i++) {
Cluster<SubspaceModel<V>> c_i = clusters.get(i);
int subspaceDim_i = dimensionality - c_i.getModel().getSubspace().dimensionality();
- V ci_centroid = DatabaseUtil.centroid(database, c_i.getIDs(), c_i.getModel().getDimensions());
+ V ci_centroid = ProjectedCentroid.make(c_i.getModel().getDimensions(), database, c_i.getIDs()).toVector(database);
- for(int j = i + 1; j < clusters.size(); j++) {
+ for (int j = i + 1; j < clusters.size(); j++) {
Cluster<SubspaceModel<V>> c_j = clusters.get(j);
int subspaceDim_j = dimensionality - c_j.getModel().getSubspace().dimensionality();
- if(subspaceDim_i < subspaceDim_j) {
- if(logger.isDebugging()) {
- msg.append("\n l_i=").append(subspaceDim_i).append(" pv_i=[").append(FormatUtil.format(db_dim, c_i.getModel().getSubspace().getDimensions())).append("]");
- msg.append("\n l_j=").append(subspaceDim_j).append(" pv_j=[").append(FormatUtil.format(db_dim, c_j.getModel().getSubspace().getDimensions())).append("]");
+ if (subspaceDim_i < subspaceDim_j) {
+ if (LOG.isDebugging()) {
+ msg.append("\n l_i=").append(subspaceDim_i).append(" pv_i=[").append(FormatUtil.format(db_dim, c_i.getModel().getSubspace().getDimensions())).append(']');
+ msg.append("\n l_j=").append(subspaceDim_j).append(" pv_j=[").append(FormatUtil.format(db_dim, c_j.getModel().getSubspace().getDimensions())).append(']');
}
// noise level reached
- if(c_j.getModel().getSubspace().dimensionality() == 0) {
+ if (c_j.getModel().getSubspace().dimensionality() == 0) {
// no parents exists -> parent is noise
- if(c_i.getParents().isEmpty()) {
+ if (c_i.getParents().isEmpty()) {
c_j.getChildren().add(c_i);
c_i.getParents().add(c_j);
- if(logger.isDebugging()) {
+ if (LOG.isDebugging()) {
msg.append("\n [").append(FormatUtil.format(db_dim, c_j.getModel().getSubspace().getDimensions()));
msg.append("] is parent of [").append(FormatUtil.format(db_dim, c_i.getModel().getSubspace().getDimensions()));
- msg.append("]");
+ msg.append(']');
}
}
- }
- else {
- V cj_centroid = DatabaseUtil.centroid(database, c_j.getIDs(), c_j.getModel().getDimensions());
+ } else {
+ V cj_centroid = ProjectedCentroid.make(c_j.getModel().getDimensions(), database, c_j.getIDs()).toVector(database);
PreferenceVectorBasedCorrelationDistance distance = distFunc.correlationDistance(ci_centroid, cj_centroid, c_i.getModel().getSubspace().getDimensions(), c_j.getModel().getSubspace().getDimensions());
double d = distFunc.weightedDistance(ci_centroid, cj_centroid, distance.getCommonPreferenceVector());
- if(logger.isDebugging()) {
+ if (LOG.isDebugging()) {
msg.append("\n dist = ").append(distance.getCorrelationValue());
}
- if(distance.getCorrelationValue() == subspaceDim_j) {
- if(logger.isDebugging()) {
+ if (distance.getCorrelationValue() == subspaceDim_j) {
+ if (LOG.isDebugging()) {
msg.append("\n d = ").append(d);
}
- if(d <= 2 * epsilon) {
+ if (d <= 2 * epsilon) {
// no parent exists or c_j is not a parent of the already
// existing parents
- if(c_i.getParents().isEmpty() || !isParent(database, distFunc, c_j, c_i.getParents())) {
+ if (c_i.getParents().isEmpty() || !isParent(database, distFunc, c_j, c_i.getParents())) {
c_j.getChildren().add(c_i);
c_i.getParents().add(c_j);
- if(logger.isDebugging()) {
+ if (LOG.isDebugging()) {
msg.append("\n [").append(FormatUtil.format(db_dim, c_j.getModel().getSubspace().getDimensions()));
msg.append("] is parent of [");
msg.append(FormatUtil.format(db_dim, c_i.getModel().getSubspace().getDimensions()));
- msg.append("]");
+ msg.append(']');
}
}
- }
- else {
+ } else {
throw new RuntimeException("Should never happen: d = " + d);
}
}
@@ -581,8 +578,8 @@ public class DiSH<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
}
}
}
- if(logger.isDebugging()) {
- logger.debug(msg.toString());
+ if (LOG.isDebugging()) {
+ LOG.debug(msg.toString());
}
}
@@ -599,14 +596,14 @@ public class DiSH<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
* the children clusters, false otherwise
*/
private boolean isParent(Relation<V> database, DiSHDistanceFunction.Instance<V> distFunc, Cluster<SubspaceModel<V>> parent, List<Cluster<SubspaceModel<V>>> children) {
- V parent_centroid = DatabaseUtil.centroid(database, parent.getIDs(), parent.getModel().getDimensions());
- int dimensionality = DatabaseUtil.dimensionality(database);
+ V parent_centroid = ProjectedCentroid.make(parent.getModel().getDimensions(), database, parent.getIDs()).toVector(database);
+ int dimensionality = RelationUtil.dimensionality(database);
int subspaceDim_parent = dimensionality - parent.getModel().getSubspace().dimensionality();
- for(Cluster<SubspaceModel<V>> child : children) {
- V child_centroid = DatabaseUtil.centroid(database, child.getIDs(), child.getModel().getDimensions());
+ for (Cluster<SubspaceModel<V>> child : children) {
+ V child_centroid = ProjectedCentroid.make(child.getModel().getDimensions(), database, child.getIDs()).toVector(database);
PreferenceVectorBasedCorrelationDistance distance = distFunc.correlationDistance(parent_centroid, child_centroid, parent.getModel().getSubspace().getDimensions(), child.getModel().getSubspace().getDimensions());
- if(distance.getCorrelationValue() == subspaceDim_parent) {
+ if (distance.getCorrelationValue() == subspaceDim_parent) {
return true;
}
}
@@ -621,7 +618,7 @@ public class DiSH<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -631,7 +628,7 @@ public class DiSH<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
protected double epsilon = 0.0;
protected int mu = 1;
@@ -644,14 +641,16 @@ public class DiSH<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- DoubleParameter epsilonP = new DoubleParameter(EPSILON_ID, new GreaterEqualConstraint(0), 0.001);
- if(config.grab(epsilonP)) {
- epsilon = epsilonP.getValue();
+ DoubleParameter epsilonP = new DoubleParameter(EPSILON_ID, 0.001);
+ epsilonP.addConstraint(new GreaterEqualConstraint(0));
+ if (config.grab(epsilonP)) {
+ epsilon = epsilonP.doubleValue();
}
- IntParameter muP = new IntParameter(MU_ID, new GreaterConstraint(0), 1);
- if(config.grab(muP)) {
- mu = muP.getValue();
+ IntParameter muP = new IntParameter(MU_ID, 1);
+ muP.addConstraint(new GreaterConstraint(0));
+ if (config.grab(muP)) {
+ mu = muP.intValue();
}
configDiSHDistance(config, epsilon, mu);
@@ -703,4 +702,4 @@ public class DiSH<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
return new DiSH<V>(epsilon, dishDistance, opticsO);
}
}
-} \ No newline at end of file
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/HiSC.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/HiSC.java
index 40ab60a8..58f3acef 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/HiSC.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/HiSC.java
@@ -34,7 +34,8 @@ import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.IntervalConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.LessConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ChainedParameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.ListParameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
@@ -60,11 +61,11 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
@Title("Finding Hierarchies of Subspace Clusters")
@Description("Algorithm for detecting hierarchies of subspace clusters.")
@Reference(authors = "E. Achtert, C. Böhm, H.-P. Kriegel, P. Kröger, I. Müller-Gorman, A. Zimek", title = "Finding Hierarchies of Subspace Clusters", booktitle = "Proc. 10th Europ. Conf. on Principles and Practice of Knowledge Discovery in Databases (PKDD'06), Berlin, Germany, 2006", url = "http://www.dbs.ifi.lmu.de/Publikationen/Papers/PKDD06-HiSC.pdf")
-public class HiSC<V extends NumberVector<V, ?>> extends OPTICS<V, PreferenceVectorBasedCorrelationDistance> {
+public class HiSC<V extends NumberVector<?>> extends OPTICS<V, PreferenceVectorBasedCorrelationDistance> {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(HiSC.class);
+ private static final Logging LOG = Logging.getLogger(HiSC.class);
/**
* Constructor.
@@ -77,7 +78,7 @@ public class HiSC<V extends NumberVector<V, ?>> extends OPTICS<V, PreferenceVect
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -87,16 +88,18 @@ public class HiSC<V extends NumberVector<V, ?>> extends OPTICS<V, PreferenceVect
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
HiSCDistanceFunction<V> distanceFunction;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- DoubleParameter alphaP = new DoubleParameter(HiSCPreferenceVectorIndex.Factory.ALPHA_ID, new IntervalConstraint(0.0, IntervalConstraint.IntervalBoundary.OPEN, 1.0, IntervalConstraint.IntervalBoundary.OPEN), HiSCPreferenceVectorIndex.Factory.DEFAULT_ALPHA);
+ DoubleParameter alphaP = new DoubleParameter(HiSCPreferenceVectorIndex.Factory.ALPHA_ID, HiSCPreferenceVectorIndex.Factory.DEFAULT_ALPHA);
+ alphaP.addConstraint(new GreaterConstraint(0.0));
+ alphaP.addConstraint(new LessConstraint(1.0));
double alpha = 0.0;
if(config.grab(alphaP)) {
- alpha = alphaP.getValue();
+ alpha = alphaP.doubleValue();
}
// Configure HiSC distance function
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PROCLUS.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PROCLUS.java
index 4eedbecd..ef49ff10 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PROCLUS.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PROCLUS.java
@@ -23,15 +23,17 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+import gnu.trove.iterator.TIntIterator;
+import gnu.trove.set.TIntSet;
+import gnu.trove.set.hash.TIntHashSet;
+
import java.util.ArrayList;
import java.util.BitSet;
import java.util.Collections;
import java.util.HashMap;
-import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Random;
-import java.util.Set;
import de.lmu.ifi.dbs.elki.algorithm.clustering.AbstractProjectedClustering;
import de.lmu.ifi.dbs.elki.data.Cluster;
@@ -47,16 +49,20 @@ import de.lmu.ifi.dbs.elki.database.ids.DBID;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DistanceDBIDPair;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.query.DistanceResultPair;
-import de.lmu.ifi.dbs.elki.database.query.GenericDistanceResultPair;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResult;
+import de.lmu.ifi.dbs.elki.distance.distanceresultlist.DistanceDBIDResultUtil;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.math.Mean;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid;
+import de.lmu.ifi.dbs.elki.utilities.RandomFactory;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -64,7 +70,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.LongParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.RandomParameter;
import de.lmu.ifi.dbs.elki.utilities.pairs.CTriple;
import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
@@ -90,11 +96,11 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
@Title("PROCLUS: PROjected CLUStering")
@Description("Algorithm to find subspace clusters in high dimensional spaces.")
@Reference(authors = "C. C. Aggarwal, C. Procopiuc, J. L. Wolf, P. S. Yu, J. S. Park", title = "Fast Algorithms for Projected Clustering", booktitle = "Proc. ACM SIGMOD Int. Conf. on Management of Data (SIGMOD '99)", url = "http://dx.doi.org/10.1145/304181.304188")
-public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClustering<Clustering<SubspaceModel<V>>, V> implements SubspaceClusteringAlgorithm<SubspaceModel<V>> {
+public class PROCLUS<V extends NumberVector<?>> extends AbstractProjectedClustering<Clustering<SubspaceModel<V>>, V> implements SubspaceClusteringAlgorithm<SubspaceModel<V>> {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(PROCLUS.class);
+ private static final Logging LOG = Logging.getLogger(PROCLUS.class);
/**
* Parameter to specify the multiplier for the initial number of medoids, must
@@ -106,12 +112,7 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
* Key: {@code -proclus.mi}
* </p>
*/
- public static final OptionID M_I_ID = OptionID.getOrCreateOptionID("proclus.mi", "The multiplier for the initial number of medoids.");
-
- /**
- * Parameter to specify the random generator seed.
- */
- public static final OptionID SEED_ID = OptionID.getOrCreateOptionID("proclus.seed", "The random number generator seed.");
+ public static final OptionID M_I_ID = new OptionID("proclus.mi", "The multiplier for the initial number of medoids.");
/**
* Holds the value of {@link #M_I_ID}.
@@ -119,9 +120,9 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
private int m_i;
/**
- * Holds the value of {@link #SEED_ID}.
+ * Random generator
*/
- private Long seed;
+ private RandomFactory rnd;
/**
* Java constructor.
@@ -130,12 +131,12 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
* @param k_i k_i Parameter
* @param l l Parameter
* @param m_i m_i Parameter
- * @param seed Random generator seed
+ * @param rnd Random generator
*/
- public PROCLUS(int k, int k_i, int l, int m_i, Long seed) {
+ public PROCLUS(int k, int k_i, int l, int m_i, RandomFactory rnd) {
super(k, k_i, l);
this.m_i = m_i;
- this.seed = seed;
+ this.rnd = rnd;
}
/**
@@ -147,19 +148,16 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
public Clustering<SubspaceModel<V>> run(Database database, Relation<V> relation) {
DistanceQuery<V, DoubleDistance> distFunc = this.getDistanceQuery(database);
RangeQuery<V, DoubleDistance> rangeQuery = database.getRangeQuery(distFunc);
- final Random random = new Random();
- if(seed != null) {
- random.setSeed(seed);
- }
+ final Random random = rnd.getRandom();
- if(DatabaseUtil.dimensionality(relation) < l) {
- throw new IllegalStateException("Dimensionality of data < parameter l! " + "(" + DatabaseUtil.dimensionality(relation) + " < " + l + ")");
+ if(RelationUtil.dimensionality(relation) < l) {
+ throw new IllegalStateException("Dimensionality of data < parameter l! " + "(" + RelationUtil.dimensionality(relation) + " < " + l + ")");
}
// TODO: use a StepProgress!
// initialization phase
- if(logger.isVerbose()) {
- logger.verbose("1. Initialization phase...");
+ if(LOG.isVerbose()) {
+ LOG.verbose("1. Initialization phase...");
}
int sampleSize = Math.min(relation.size(), k_i * k);
DBIDs sampleSet = DBIDUtil.randomSample(relation.getDBIDs(), sampleSize, random.nextLong());
@@ -167,39 +165,39 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
int medoidSize = Math.min(relation.size(), m_i * k);
DBIDs medoids = greedy(distFunc, sampleSet, medoidSize, random);
- if(logger.isDebugging()) {
- StringBuffer msg = new StringBuffer();
- msg.append("\n");
- msg.append("sampleSize ").append(sampleSize).append("\n");
- msg.append("sampleSet ").append(sampleSet).append("\n");
- msg.append("medoidSize ").append(medoidSize).append("\n");
- msg.append("m ").append(medoids).append("\n");
- logger.debugFine(msg.toString());
+ if(LOG.isDebugging()) {
+ StringBuilder msg = new StringBuilder();
+ msg.append('\n');
+ msg.append("sampleSize ").append(sampleSize).append('\n');
+ msg.append("sampleSet ").append(sampleSet).append('\n');
+ msg.append("medoidSize ").append(medoidSize).append('\n');
+ msg.append("m ").append(medoids).append('\n');
+ LOG.debugFine(msg.toString());
}
// iterative phase
- if(logger.isVerbose()) {
- logger.verbose("2. Iterative phase...");
+ if(LOG.isVerbose()) {
+ LOG.verbose("2. Iterative phase...");
}
double bestObjective = Double.POSITIVE_INFINITY;
ModifiableDBIDs m_best = null;
ModifiableDBIDs m_bad = null;
ModifiableDBIDs m_current = initialSet(medoids, k, random);
- if(logger.isDebugging()) {
- StringBuffer msg = new StringBuffer();
- msg.append("\n");
- msg.append("m_c ").append(m_current).append("\n");
- logger.debugFine(msg.toString());
+ if(LOG.isDebugging()) {
+ StringBuilder msg = new StringBuilder();
+ msg.append('\n');
+ msg.append("m_c ").append(m_current).append('\n');
+ LOG.debugFine(msg.toString());
}
- IndefiniteProgress cprogress = logger.isVerbose() ? new IndefiniteProgress("Current number of clusters:", logger) : null;
+ IndefiniteProgress cprogress = LOG.isVerbose() ? new IndefiniteProgress("Current number of clusters:", LOG) : null;
// TODO: Use DataStore and Trove for performance
Map<DBID, PROCLUSCluster> clusters = null;
int loops = 0;
while(loops < 10) {
- Map<DBID, Set<Integer>> dimensions = findDimensions(m_current, relation, distFunc, rangeQuery);
+ Map<DBID, TIntSet> dimensions = findDimensions(m_current, relation, distFunc, rangeQuery);
clusters = assignPoints(dimensions, relation);
double objectiveFunction = evaluateClusters(clusters, dimensions, relation);
@@ -214,20 +212,20 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
m_current = computeM_current(medoids, m_best, m_bad, random);
loops++;
if(cprogress != null) {
- cprogress.setProcessed(clusters.size(), logger);
+ cprogress.setProcessed(clusters.size(), LOG);
}
}
if(cprogress != null) {
- cprogress.setCompleted(logger);
+ cprogress.setCompleted(LOG);
}
// refinement phase
- if(logger.isVerbose()) {
- logger.verbose("3. Refinement phase...");
+ if(LOG.isVerbose()) {
+ LOG.verbose("3. Refinement phase...");
}
- List<Pair<V, Set<Integer>>> dimensions = findDimensions(new ArrayList<PROCLUSCluster>(clusters.values()), relation);
+ List<Pair<V, TIntSet>> dimensions = findDimensions(new ArrayList<PROCLUSCluster>(clusters.values()), relation);
List<PROCLUSCluster> finalClusters = finalAssignment(dimensions, relation);
// build result
@@ -235,7 +233,7 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
Clustering<SubspaceModel<V>> result = new Clustering<SubspaceModel<V>>("ProClus clustering", "proclus-clustering");
for(PROCLUSCluster c : finalClusters) {
Cluster<SubspaceModel<V>> cluster = new Cluster<SubspaceModel<V>>(c.objectIDs);
- cluster.setModel(new SubspaceModel<V>(new Subspace<V>(c.getDimensions()), c.centroid));
+ cluster.setModel(new SubspaceModel<V>(new Subspace(c.getDimensions()), c.centroid));
cluster.setName("cluster_" + numClusters++);
result.addCluster(cluster);
@@ -259,40 +257,41 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
// m_1 is random point of S
DBID m_i = s.remove(random.nextInt(s.size()));
medoids.add(m_i);
- if(logger.isDebugging()) {
- logger.debugFiner("medoids " + medoids);
+ if(LOG.isDebugging()) {
+ LOG.debugFiner("medoids " + medoids);
}
// compute distances between each point in S and m_i
- Map<DBID, DistanceResultPair<DoubleDistance>> distances = new HashMap<DBID, DistanceResultPair<DoubleDistance>>();
+ // FIXME: don't use maps, so we can work with DBIDRef
+ Map<DBID, DistanceDBIDPair<DoubleDistance>> distances = new HashMap<DBID, DistanceDBIDPair<DoubleDistance>>();
for(DBIDIter iter = s.iter(); iter.valid(); iter.advance()) {
- DBID id = iter.getDBID();
+ DBID id = DBIDUtil.deref(iter);
DoubleDistance dist = distFunc.distance(id, m_i);
- distances.put(id, new GenericDistanceResultPair<DoubleDistance>(dist, id));
+ distances.put(id, DBIDUtil.newDistancePair(dist, id));
}
for(int i = 1; i < m; i++) {
// choose medoid m_i to be far from prevois medoids
- List<DistanceResultPair<DoubleDistance>> d = new ArrayList<DistanceResultPair<DoubleDistance>>(distances.values());
- Collections.sort(d);
+ List<DistanceDBIDPair<DoubleDistance>> d = new ArrayList<DistanceDBIDPair<DoubleDistance>>(distances.values());
+ DistanceDBIDResultUtil.sortByDistance(d);
- m_i = d.get(d.size() - 1).getDBID();
+ m_i = DBIDUtil.deref(d.get(d.size() - 1));
medoids.add(m_i);
s.remove(m_i);
distances.remove(m_i);
// compute distances of each point to closest medoid
for(DBIDIter iter = s.iter(); iter.valid(); iter.advance()) {
- DBID id = iter.getDBID();
+ DBID id = DBIDUtil.deref(iter);
DoubleDistance dist_new = distFunc.distance(id, m_i);
DoubleDistance dist_old = distances.get(id).getDistance();
DoubleDistance dist = dist_new.compareTo(dist_old) < 0 ? dist_new : dist_old;
- distances.put(id, new GenericDistanceResultPair<DoubleDistance>(dist, id));
+ distances.put(id, DBIDUtil.newDistancePair(dist, id));
}
- if(logger.isDebugging()) {
- logger.debugFiner("medoids " + medoids);
+ if(LOG.isDebugging()) {
+ LOG.debugFiner("medoids " + medoids);
}
}
@@ -332,7 +331,7 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
ModifiableDBIDs m_current = DBIDUtil.newHashSet();
for(DBIDIter iter = m_best.iter(); iter.valid(); iter.advance()) {
- DBID m_i = iter.getDBID();
+ DBID m_i = DBIDUtil.deref(iter);
if(m_bad.contains(m_i)) {
int currentSize = m_current.size();
while(m_current.size() == currentSize) {
@@ -359,17 +358,17 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
* @param distFunc the distance function
* @return a mapping of the medoid's id to its locality
*/
- private Map<DBID, List<DistanceResultPair<DoubleDistance>>> getLocalities(DBIDs medoids, Relation<V> database, DistanceQuery<V, DoubleDistance> distFunc, RangeQuery<V, DoubleDistance> rangeQuery) {
- Map<DBID, List<DistanceResultPair<DoubleDistance>>> result = new HashMap<DBID, List<DistanceResultPair<DoubleDistance>>>();
+ private Map<DBID, DistanceDBIDResult<DoubleDistance>> getLocalities(DBIDs medoids, Relation<V> database, DistanceQuery<V, DoubleDistance> distFunc, RangeQuery<V, DoubleDistance> rangeQuery) {
+ Map<DBID, DistanceDBIDResult<DoubleDistance>> result = new HashMap<DBID, DistanceDBIDResult<DoubleDistance>>();
for(DBIDIter iter = medoids.iter(); iter.valid(); iter.advance()) {
- DBID m = iter.getDBID();
+ DBID m = DBIDUtil.deref(iter);
// determine minimum distance between current medoid m and any other
// medoid m_i
DoubleDistance minDist = null;
for(DBIDIter iter2 = medoids.iter(); iter2.valid(); iter2.advance()) {
- DBID m_i = iter2.getDBID();
- if(m_i == m) {
+ DBID m_i = DBIDUtil.deref(iter2);
+ if(DBIDUtil.equal(m_i, m)) {
continue;
}
DoubleDistance currentDist = distFunc.distance(m, m_i);
@@ -380,7 +379,7 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
// determine points in sphere centered at m with radius minDist
assert minDist != null;
- List<DistanceResultPair<DoubleDistance>> qr = rangeQuery.getRangeForDBID(m, minDist);
+ DistanceDBIDResult<DoubleDistance> qr = rangeQuery.getRangeForDBID(m, minDist);
result.put(m, qr);
}
@@ -397,23 +396,23 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
* @return the set of correlated dimensions for each medoid in the specified
* medoid set
*/
- private Map<DBID, Set<Integer>> findDimensions(DBIDs medoids, Relation<V> database, DistanceQuery<V, DoubleDistance> distFunc, RangeQuery<V, DoubleDistance> rangeQuery) {
+ private Map<DBID, TIntSet> findDimensions(DBIDs medoids, Relation<V> database, DistanceQuery<V, DoubleDistance> distFunc, RangeQuery<V, DoubleDistance> rangeQuery) {
// get localities
- Map<DBID, List<DistanceResultPair<DoubleDistance>>> localities = getLocalities(medoids, database, distFunc, rangeQuery);
+ Map<DBID, DistanceDBIDResult<DoubleDistance>> localities = getLocalities(medoids, database, distFunc, rangeQuery);
// compute x_ij = avg distance from points in l_i to medoid m_i
- int dim = DatabaseUtil.dimensionality(database);
+ int dim = RelationUtil.dimensionality(database);
Map<DBID, double[]> averageDistances = new HashMap<DBID, double[]>();
for(DBIDIter iter = medoids.iter(); iter.valid(); iter.advance()) {
- DBID m_i = iter.getDBID();
+ DBID m_i = DBIDUtil.deref(iter);
V medoid_i = database.get(m_i);
- List<DistanceResultPair<DoubleDistance>> l_i = localities.get(m_i);
+ DistanceDBIDResult<DoubleDistance> l_i = localities.get(m_i);
double[] x_i = new double[dim];
- for(DistanceResultPair<DoubleDistance> qr : l_i) {
- V o = database.get(qr.getDBID());
+ for(DBIDIter qr = l_i.iter(); qr.valid(); qr.advance()) {
+ V o = database.get(qr);
for(int d = 0; d < dim; d++) {
- x_i[d] += Math.abs(medoid_i.doubleValue(d + 1) - o.doubleValue(d + 1));
+ x_i[d] += Math.abs(medoid_i.doubleValue(d) - o.doubleValue(d));
}
}
for(int d = 0; d < dim; d++) {
@@ -422,11 +421,11 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
averageDistances.put(m_i, x_i);
}
- Map<DBID, Set<Integer>> dimensionMap = new HashMap<DBID, Set<Integer>>();
+ Map<DBID, TIntSet> dimensionMap = new HashMap<DBID, TIntSet>();
List<CTriple<Double, DBID, Integer>> z_ijs = new ArrayList<CTriple<Double, DBID, Integer>>();
for(DBIDIter iter = medoids.iter(); iter.valid(); iter.advance()) {
- DBID m_i = iter.getDBID();
- Set<Integer> dims_i = new HashSet<Integer>();
+ DBID m_i = DBIDUtil.deref(iter);
+ TIntSet dims_i = new TIntHashSet();
dimensionMap.put(m_i, dims_i);
double[] x_i = averageDistances.get(m_i);
@@ -447,7 +446,7 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
sigma_i = Math.sqrt(sigma_i);
for(int j = 0; j < dim; j++) {
- z_ijs.add(new CTriple<Double, DBID, Integer>((x_i[j] - y_i) / sigma_i, m_i, j + 1));
+ z_ijs.add(new CTriple<Double, DBID, Integer>((x_i[j] - y_i) / sigma_i, m_i, j));
}
}
Collections.sort(z_ijs);
@@ -455,15 +454,15 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
int max = Math.max(k * l, 2);
for(int m = 0; m < max; m++) {
CTriple<Double, DBID, Integer> z_ij = z_ijs.get(m);
- Set<Integer> dims_i = dimensionMap.get(z_ij.getSecond());
+ TIntSet dims_i = dimensionMap.get(z_ij.getSecond());
dims_i.add(z_ij.getThird());
- if(logger.isDebugging()) {
- StringBuffer msg = new StringBuffer();
- msg.append("\n");
- msg.append("z_ij ").append(z_ij).append("\n");
- msg.append("D_i ").append(dims_i).append("\n");
- logger.debugFiner(msg.toString());
+ if(LOG.isDebugging()) {
+ StringBuilder msg = new StringBuilder();
+ msg.append('\n');
+ msg.append("z_ij ").append(z_ij).append('\n');
+ msg.append("D_i ").append(dims_i).append('\n');
+ LOG.debugFiner(msg.toString());
}
}
return dimensionMap;
@@ -478,9 +477,9 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
* @return the set of correlated dimensions for each specified cluster
* centroid
*/
- private List<Pair<V, Set<Integer>>> findDimensions(List<PROCLUSCluster> clusters, Relation<V> database) {
+ private List<Pair<V, TIntSet>> findDimensions(List<PROCLUSCluster> clusters, Relation<V> database) {
// compute x_ij = avg distance from points in c_i to c_i.centroid
- int dim = DatabaseUtil.dimensionality(database);
+ int dim = RelationUtil.dimensionality(database);
Map<Integer, double[]> averageDistances = new HashMap<Integer, double[]>();
for(int i = 0; i < clusters.size(); i++) {
@@ -489,7 +488,7 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
for(DBIDIter iter = c_i.objectIDs.iter(); iter.valid(); iter.advance()) {
V o = database.get(iter);
for(int d = 0; d < dim; d++) {
- x_i[d] += Math.abs(c_i.centroid.doubleValue(d + 1) - o.doubleValue(d + 1));
+ x_i[d] += Math.abs(c_i.centroid.doubleValue(d) - o.doubleValue(d));
}
}
for(int d = 0; d < dim; d++) {
@@ -518,38 +517,38 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
sigma_i = Math.sqrt(sigma_i);
for(int j = 0; j < dim; j++) {
- z_ijs.add(new CTriple<Double, Integer, Integer>((x_i[j] - y_i) / sigma_i, i, j + 1));
+ z_ijs.add(new CTriple<Double, Integer, Integer>((x_i[j] - y_i) / sigma_i, i, j));
}
}
Collections.sort(z_ijs);
// mapping cluster index -> dimensions
- Map<Integer, Set<Integer>> dimensionMap = new HashMap<Integer, Set<Integer>>();
+ Map<Integer, TIntSet> dimensionMap = new HashMap<Integer, TIntSet>();
int max = Math.max(k * l, 2);
for(int m = 0; m < max; m++) {
CTriple<Double, Integer, Integer> z_ij = z_ijs.get(m);
- Set<Integer> dims_i = dimensionMap.get(z_ij.getSecond());
+ TIntSet dims_i = dimensionMap.get(z_ij.getSecond());
if(dims_i == null) {
- dims_i = new HashSet<Integer>();
+ dims_i = new TIntHashSet();
dimensionMap.put(z_ij.getSecond(), dims_i);
}
dims_i.add(z_ij.getThird());
- if(logger.isDebugging()) {
- StringBuffer msg = new StringBuffer();
- msg.append("\n");
- msg.append("z_ij ").append(z_ij).append("\n");
- msg.append("D_i ").append(dims_i).append("\n");
- logger.debugFiner(msg.toString());
+ if(LOG.isDebugging()) {
+ StringBuilder msg = new StringBuilder();
+ msg.append('\n');
+ msg.append("z_ij ").append(z_ij).append('\n');
+ msg.append("D_i ").append(dims_i).append('\n');
+ LOG.debugFiner(msg.toString());
}
}
// mapping cluster -> dimensions
- List<Pair<V, Set<Integer>>> result = new ArrayList<Pair<V, Set<Integer>>>();
+ List<Pair<V, TIntSet>> result = new ArrayList<Pair<V, TIntSet>>();
for(int i : dimensionMap.keySet()) {
- Set<Integer> dims_i = dimensionMap.get(i);
+ TIntSet dims_i = dimensionMap.get(i);
PROCLUSCluster c_i = clusters.get(i);
- result.add(new Pair<V, Set<Integer>>(c_i.centroid, dims_i));
+ result.add(new Pair<V, TIntSet>(c_i.centroid, dims_i));
}
return result;
}
@@ -562,26 +561,26 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
* @param database the database containing the objects
* @return the assignments of the object to the clusters
*/
- private Map<DBID, PROCLUSCluster> assignPoints(Map<DBID, Set<Integer>> dimensions, Relation<V> database) {
+ private Map<DBID, PROCLUSCluster> assignPoints(Map<DBID, TIntSet> dimensions, Relation<V> database) {
Map<DBID, ModifiableDBIDs> clusterIDs = new HashMap<DBID, ModifiableDBIDs>();
for(DBID m_i : dimensions.keySet()) {
clusterIDs.put(m_i, DBIDUtil.newHashSet());
}
for(DBIDIter it = database.iterDBIDs(); it.valid(); it.advance()) {
- DBID p_id = it.getDBID();
+ DBID p_id = DBIDUtil.deref(it);
V p = database.get(p_id);
- DistanceResultPair<DoubleDistance> minDist = null;
+ DistanceDBIDPair<DoubleDistance> minDist = null;
for(DBID m_i : dimensions.keySet()) {
V m = database.get(m_i);
- DistanceResultPair<DoubleDistance> currentDist = new GenericDistanceResultPair<DoubleDistance>(manhattanSegmentalDistance(p, m, dimensions.get(m_i)), m_i);
- if(minDist == null || currentDist.compareTo(minDist) < 0) {
+ DistanceDBIDPair<DoubleDistance> currentDist = DBIDUtil.newDistancePair(manhattanSegmentalDistance(p, m, dimensions.get(m_i)), m_i);
+ if(minDist == null || currentDist.compareByDistance(minDist) < 0) {
minDist = currentDist;
}
}
// add p to cluster with mindist
assert minDist != null;
- ModifiableDBIDs ids = clusterIDs.get(minDist.getDBID());
+ ModifiableDBIDs ids = clusterIDs.get(DBIDUtil.deref(minDist));
ids.add(p_id);
}
@@ -589,17 +588,17 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
for(DBID m_i : dimensions.keySet()) {
ModifiableDBIDs objectIDs = clusterIDs.get(m_i);
if(!objectIDs.isEmpty()) {
- Set<Integer> clusterDimensions = dimensions.get(m_i);
- V centroid = DatabaseUtil.centroid(database, objectIDs);
+ TIntSet clusterDimensions = dimensions.get(m_i);
+ V centroid = Centroid.make(database, objectIDs).toVector(database);
clusters.put(m_i, new PROCLUSCluster(objectIDs, clusterDimensions, centroid));
}
}
- if(logger.isDebugging()) {
- StringBuffer msg = new StringBuffer();
- msg.append("\n");
- msg.append("clusters ").append(clusters).append("\n");
- logger.debugFine(msg.toString());
+ if(LOG.isDebugging()) {
+ StringBuilder msg = new StringBuilder();
+ msg.append('\n');
+ msg.append("clusters ").append(clusters).append('\n');
+ LOG.debugFine(msg.toString());
}
return clusters;
}
@@ -612,20 +611,20 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
* @param database the database containing the objects
* @return the assignments of the object to the clusters
*/
- private List<PROCLUSCluster> finalAssignment(List<Pair<V, Set<Integer>>> dimensions, Relation<V> database) {
+ private List<PROCLUSCluster> finalAssignment(List<Pair<V, TIntSet>> dimensions, Relation<V> database) {
Map<Integer, ModifiableDBIDs> clusterIDs = new HashMap<Integer, ModifiableDBIDs>();
for(int i = 0; i < dimensions.size(); i++) {
clusterIDs.put(i, DBIDUtil.newHashSet());
}
for(DBIDIter it = database.iterDBIDs(); it.valid(); it.advance()) {
- DBID p_id = it.getDBID();
+ DBID p_id = DBIDUtil.deref(it);
V p = database.get(p_id);
Pair<DoubleDistance, Integer> minDist = null;
for(int i = 0; i < dimensions.size(); i++) {
- Pair<V, Set<Integer>> pair_i = dimensions.get(i);
+ Pair<V, TIntSet> pair_i = dimensions.get(i);
V c_i = pair_i.first;
- Set<Integer> dimensions_i = pair_i.second;
+ TIntSet dimensions_i = pair_i.second;
DoubleDistance currentDist = manhattanSegmentalDistance(p, c_i, dimensions_i);
if(minDist == null || currentDist.compareTo(minDist.first) < 0) {
minDist = new Pair<DoubleDistance, Integer>(currentDist, i);
@@ -641,17 +640,17 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
for(int i = 0; i < dimensions.size(); i++) {
ModifiableDBIDs objectIDs = clusterIDs.get(i);
if(!objectIDs.isEmpty()) {
- Set<Integer> clusterDimensions = dimensions.get(i).second;
- V centroid = DatabaseUtil.centroid(database, objectIDs);
+ TIntSet clusterDimensions = dimensions.get(i).second;
+ V centroid = Centroid.make(database, objectIDs).toVector(database);
clusters.add(new PROCLUSCluster(objectIDs, clusterDimensions, centroid));
}
}
- if(logger.isDebugging()) {
- StringBuffer msg = new StringBuffer();
- msg.append("\n");
- msg.append("clusters ").append(clusters).append("\n");
- logger.debugFine(msg.toString());
+ if(LOG.isDebugging()) {
+ StringBuilder msg = new StringBuilder();
+ msg.append('\n');
+ msg.append("clusters ").append(clusters).append('\n');
+ LOG.debugFine(msg.toString());
}
return clusters;
}
@@ -666,9 +665,10 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
* @return the Manhattan segmental distance between o1 and o2 relative to the
* specified dimensions
*/
- private DoubleDistance manhattanSegmentalDistance(V o1, V o2, Set<Integer> dimensions) {
+ private DoubleDistance manhattanSegmentalDistance(V o1, V o2, TIntSet dimensions) {
double result = 0;
- for(Integer d : dimensions) {
+ for (TIntIterator iter = dimensions.iterator(); iter.hasNext(); ) {
+ final int d = iter.next();
result += Math.abs(o1.doubleValue(d) - o2.doubleValue(d));
}
result /= dimensions.size();
@@ -683,15 +683,16 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
* @param database the database holding the objects
* @return a measure for the cluster quality
*/
- private double evaluateClusters(Map<DBID, PROCLUSCluster> clusters, Map<DBID, Set<Integer>> dimensions, Relation<V> database) {
+ private double evaluateClusters(Map<DBID, PROCLUSCluster> clusters, Map<DBID, TIntSet> dimensions, Relation<V> database) {
double result = 0;
for(DBID m_i : clusters.keySet()) {
PROCLUSCluster c_i = clusters.get(m_i);
V centroid_i = c_i.centroid;
- Set<Integer> dims_i = dimensions.get(m_i);
+ TIntSet dims_i = dimensions.get(m_i);
double w_i = 0;
- for(Integer j : dims_i) {
+ for (TIntIterator iter = dims_i.iterator(); iter.hasNext(); ) {
+ final int j = iter.next();
w_i += avgDistance(centroid_i, c_i.objectIDs, database, j);
}
@@ -714,12 +715,12 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
* specified dimension
*/
private double avgDistance(V centroid, DBIDs objectIDs, Relation<V> database, int dimension) {
- double avg = 0;
+ Mean avg = new Mean();
for(DBIDIter iter = objectIDs.iter(); iter.valid(); iter.advance()) {
V o = database.get(iter);
- avg += Math.abs(centroid.doubleValue(dimension) - o.doubleValue(dimension));
+ avg.put(Math.abs(centroid.doubleValue(dimension) - o.doubleValue(dimension)));
}
- return avg / objectIDs.size();
+ return avg.getMean();
}
/**
@@ -748,7 +749,7 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -765,7 +766,7 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
/**
* The correlated dimensions of this cluster.
*/
- Set<Integer> dimensions;
+ TIntSet dimensions;
/**
* The centroids of this cluster along each dimension.
@@ -779,7 +780,7 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
* @param dimensions the correlated dimensions of this cluster
* @param centroid the centroid of this cluster
*/
- public PROCLUSCluster(ModifiableDBIDs objectIDs, Set<Integer> dimensions, V centroid) {
+ public PROCLUSCluster(ModifiableDBIDs objectIDs, TIntSet dimensions, V centroid) {
this.objectIDs = objectIDs;
this.dimensions = dimensions;
this.centroid = centroid;
@@ -787,19 +788,19 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
@Override
public String toString() {
- StringBuffer result = new StringBuffer();
+ StringBuilder result = new StringBuilder();
result.append("Dimensions: [");
boolean notFirst = false;
- for(Integer d : dimensions) {
+ for(TIntIterator iter = dimensions.iterator(); iter.hasNext(); ) {
if(notFirst) {
- result.append(",");
+ result.append(',');
}
else {
notFirst = true;
}
- result.append(d);
+ result.append(iter.next());
}
- result.append("]");
+ result.append(']');
result.append("\nCentroid: ").append(centroid);
return result.toString();
@@ -812,8 +813,8 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
*/
public BitSet getDimensions() {
BitSet result = new BitSet();
- for(int d : dimensions) {
- result.set(d - 1);
+ for(TIntIterator iter = dimensions.iterator(); iter.hasNext(); ) {
+ result.set(iter.next());
}
return result;
}
@@ -826,10 +827,15 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractProjectedClustering.Parameterizer {
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractProjectedClustering.Parameterizer {
+ /**
+ * Parameter to specify the random generator seed.
+ */
+ public static final OptionID SEED_ID = new OptionID("proclus.seed", "The random number generator seed.");
+
protected int m_i = -1;
- protected Long seed = null;
+ protected RandomFactory rnd;
@Override
protected void makeOptions(Parameterization config) {
@@ -845,15 +851,15 @@ public class PROCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClus
m_i = m_iP.getValue();
}
- LongParameter seedP = new LongParameter(SEED_ID, true);
- if(config.grab(seedP)) {
- seed = seedP.getValue();
+ RandomParameter rndP = new RandomParameter(SEED_ID);
+ if(config.grab(rndP)) {
+ rnd = rndP.getValue();
}
}
@Override
protected PROCLUS<V> makeInstance() {
- return new PROCLUS<V>(k, k_i, l, m_i, seed);
+ return new PROCLUS<V>(k, k_i, l, m_i, rnd);
}
}
} \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PreDeCon.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PreDeCon.java
index 4ca5a564..fc3228eb 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PreDeCon.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PreDeCon.java
@@ -58,11 +58,11 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameteriz
@Title("PreDeCon: Subspace Preference weighted Density Connected Clustering")
@Description("PreDeCon computes clusters of subspace preference weighted connected points. " + "The algorithm searches for local subgroups of a set of feature vectors having " + "a low variance along one or more (but not all) attributes.")
@Reference(authors = "C. Böhm, K. Kailing, H.-P. Kriegel, P. Kröger", title = "Density Connected Clustering with Local Subspace Preferences", booktitle = "Proc. 4th IEEE Int. Conf. on Data Mining (ICDM'04), Brighton, UK, 2004", url = "http://dx.doi.org/10.1109/ICDM.2004.10087")
-public class PreDeCon<V extends NumberVector<V, ?>> extends AbstractProjectedDBSCAN<Clustering<Model>, V> {
+public class PreDeCon<V extends NumberVector<?>> extends AbstractProjectedDBSCAN<Clustering<Model>, V> {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(PreDeCon.class);
+ private static final Logging LOG = Logging.getLogger(PreDeCon.class);
/**
* Constructor.
@@ -88,7 +88,7 @@ public class PreDeCon<V extends NumberVector<V, ?>> extends AbstractProjectedDBS
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -98,7 +98,7 @@ public class PreDeCon<V extends NumberVector<V, ?>> extends AbstractProjectedDBS
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractProjectedDBSCAN.Parameterizer<V, DoubleDistance> {
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractProjectedDBSCAN.Parameterizer<V, DoubleDistance> {
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java
index c47c74b6..46c5f0b8 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java
@@ -42,12 +42,13 @@ import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.ProxyDatabase;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.AbstractDimensionsSelectingDoubleDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -84,11 +85,11 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
@Title("SUBCLU: Density connected Subspace Clustering")
@Description("Algorithm to detect arbitrarily shaped and positioned clusters in subspaces. SUBCLU delivers for each subspace the same clusters DBSCAN would have found, when applied to this subspace seperately.")
@Reference(authors = "K. Kailing, H.-P. Kriegel, P. Kröger", title = "Density connected Subspace Clustering for High Dimensional Data. ", booktitle = "Proc. SIAM Int. Conf. on Data Mining (SDM'04), Lake Buena Vista, FL, 2004")
-public class SUBCLU<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clustering<SubspaceModel<V>>> implements SubspaceClusteringAlgorithm<SubspaceModel<V>> {
+public class SUBCLU<V extends NumberVector<?>> extends AbstractAlgorithm<Clustering<SubspaceModel<V>>> implements SubspaceClusteringAlgorithm<SubspaceModel<V>> {
/**
* The logger for this class.
*/
- private static final Logging logger = Logging.getLogger(SUBCLU.class);
+ private static final Logging LOG = Logging.getLogger(SUBCLU.class);
/**
* The distance function to determine the distance between database objects.
@@ -99,7 +100,7 @@ public class SUBCLU<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
* Key: {@code -subclu.distancefunction}
* </p>
*/
- public static final OptionID DISTANCE_FUNCTION_ID = OptionID.getOrCreateOptionID("subclu.distancefunction", "Distance function to determine the distance between database objects.");
+ public static final OptionID DISTANCE_FUNCTION_ID = new OptionID("subclu.distancefunction", "Distance function to determine the distance between database objects.");
/**
* Parameter to specify the maximum radius of the neighborhood to be
@@ -109,7 +110,7 @@ public class SUBCLU<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
* Key: {@code -subclu.epsilon}
* </p>
*/
- public static final OptionID EPSILON_ID = OptionID.getOrCreateOptionID("subclu.epsilon", "The maximum radius of the neighborhood to be considered.");
+ public static final OptionID EPSILON_ID = new OptionID("subclu.epsilon", "The maximum radius of the neighborhood to be considered.");
/**
* Parameter to specify the threshold for minimum number of points in the
@@ -118,7 +119,7 @@ public class SUBCLU<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
* Key: {@code -subclu.minpts}
* </p>
*/
- public static final OptionID MINPTS_ID = OptionID.getOrCreateOptionID("subclu.minpts", "Threshold for minimum number of points in the epsilon-neighborhood of a point.");
+ public static final OptionID MINPTS_ID = new OptionID("subclu.minpts", "Threshold for minimum number of points in the epsilon-neighborhood of a point.");
/**
* Holds the instance of the distance function specified by
@@ -162,36 +163,36 @@ public class SUBCLU<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
* @return Clustering result
*/
public Clustering<SubspaceModel<V>> run(Relation<V> relation) {
- final int dimensionality = DatabaseUtil.dimensionality(relation);
+ final int dimensionality = RelationUtil.dimensionality(relation);
- StepProgress stepprog = logger.isVerbose() ? new StepProgress(dimensionality) : null;
+ StepProgress stepprog = LOG.isVerbose() ? new StepProgress(dimensionality) : null;
// Generate all 1-dimensional clusters
if(stepprog != null) {
- stepprog.beginStep(1, "Generate all 1-dimensional clusters.", logger);
+ stepprog.beginStep(1, "Generate all 1-dimensional clusters.", LOG);
}
// mapping of dimensionality to set of subspaces
- HashMap<Integer, List<Subspace<V>>> subspaceMap = new HashMap<Integer, List<Subspace<V>>>();
+ HashMap<Integer, List<Subspace>> subspaceMap = new HashMap<Integer, List<Subspace>>();
// list of 1-dimensional subspaces containing clusters
- List<Subspace<V>> s_1 = new ArrayList<Subspace<V>>();
+ List<Subspace> s_1 = new ArrayList<Subspace>();
subspaceMap.put(0, s_1);
// mapping of subspaces to list of clusters
- TreeMap<Subspace<V>, List<Cluster<Model>>> clusterMap = new TreeMap<Subspace<V>, List<Cluster<Model>>>(new Subspace.DimensionComparator());
+ TreeMap<Subspace, List<Cluster<Model>>> clusterMap = new TreeMap<Subspace, List<Cluster<Model>>>(new Subspace.DimensionComparator());
for(int d = 0; d < dimensionality; d++) {
- Subspace<V> currentSubspace = new Subspace<V>(d);
+ Subspace currentSubspace = new Subspace(d);
List<Cluster<Model>> clusters = runDBSCAN(relation, null, currentSubspace);
- if(logger.isDebuggingFiner()) {
- StringBuffer msg = new StringBuffer();
- msg.append("\n").append(clusters.size()).append(" clusters in subspace ").append(currentSubspace.dimensonsToString()).append(": \n");
+ if(LOG.isDebuggingFiner()) {
+ StringBuilder msg = new StringBuilder();
+ msg.append('\n').append(clusters.size()).append(" clusters in subspace ").append(currentSubspace.dimensonsToString()).append(": \n");
for(Cluster<Model> cluster : clusters) {
msg.append(" " + cluster.getIDs() + "\n");
}
- logger.debugFiner(msg.toString());
+ LOG.debugFiner(msg.toString());
}
if(!clusters.isEmpty()) {
@@ -203,26 +204,26 @@ public class SUBCLU<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
// Generate (d+1)-dimensional clusters from d-dimensional clusters
for(int d = 0; d < dimensionality - 1; d++) {
if(stepprog != null) {
- stepprog.beginStep(d + 2, "Generate " + (d + 2) + "-dimensional clusters from " + (d + 1) + "-dimensional clusters.", logger);
+ stepprog.beginStep(d + 2, "Generate " + (d + 2) + "-dimensional clusters from " + (d + 1) + "-dimensional clusters.", LOG);
}
- List<Subspace<V>> subspaces = subspaceMap.get(d);
+ List<Subspace> subspaces = subspaceMap.get(d);
if(subspaces == null || subspaces.isEmpty()) {
if(stepprog != null) {
for(int dim = d + 1; dim < dimensionality - 1; dim++) {
- stepprog.beginStep(dim + 2, "Generation of" + (dim + 2) + "-dimensional clusters not applicable, because no more " + (d + 2) + "-dimensional subspaces found.", logger);
+ stepprog.beginStep(dim + 2, "Generation of" + (dim + 2) + "-dimensional clusters not applicable, because no more " + (d + 2) + "-dimensional subspaces found.", LOG);
}
}
break;
}
- List<Subspace<V>> candidates = generateSubspaceCandidates(subspaces);
- List<Subspace<V>> s_d = new ArrayList<Subspace<V>>();
+ List<Subspace> candidates = generateSubspaceCandidates(subspaces);
+ List<Subspace> s_d = new ArrayList<Subspace>();
- for(Subspace<V> candidate : candidates) {
- Subspace<V> bestSubspace = bestSubspace(subspaces, candidate, clusterMap);
- if(logger.isDebuggingFine()) {
- logger.debugFine("best subspace of " + candidate.dimensonsToString() + ": " + bestSubspace.dimensonsToString());
+ for(Subspace candidate : candidates) {
+ Subspace bestSubspace = bestSubspace(subspaces, candidate, clusterMap);
+ if(LOG.isDebuggingFine()) {
+ LOG.debugFine("best subspace of " + candidate.dimensonsToString() + ": " + bestSubspace.dimensonsToString());
}
List<Cluster<Model>> bestSubspaceClusters = clusterMap.get(bestSubspace);
@@ -234,13 +235,13 @@ public class SUBCLU<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
}
}
- if(logger.isDebuggingFine()) {
- StringBuffer msg = new StringBuffer();
+ if(LOG.isDebuggingFine()) {
+ StringBuilder msg = new StringBuilder();
msg.append(clusters.size() + " cluster(s) in subspace " + candidate + ": \n");
for(Cluster<Model> c : clusters) {
msg.append(" " + c.getIDs() + "\n");
}
- logger.debugFine(msg.toString());
+ LOG.debugFine(msg.toString());
}
if(!clusters.isEmpty()) {
@@ -257,18 +258,18 @@ public class SUBCLU<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
// build result
int numClusters = 1;
result = new Clustering<SubspaceModel<V>>("SUBCLU clustering", "subclu-clustering");
- for(Subspace<V> subspace : clusterMap.descendingKeySet()) {
+ for(Subspace subspace : clusterMap.descendingKeySet()) {
List<Cluster<Model>> clusters = clusterMap.get(subspace);
for(Cluster<Model> cluster : clusters) {
Cluster<SubspaceModel<V>> newCluster = new Cluster<SubspaceModel<V>>(cluster.getIDs());
- newCluster.setModel(new SubspaceModel<V>(subspace, DatabaseUtil.centroid(relation, cluster.getIDs())));
+ newCluster.setModel(new SubspaceModel<V>(subspace, Centroid.make(relation, cluster.getIDs()).toVector(relation)));
newCluster.setName("cluster_" + numClusters++);
result.addCluster(newCluster);
}
}
if(stepprog != null) {
- stepprog.setCompleted(logger);
+ stepprog.setCompleted(LOG);
}
return result;
}
@@ -294,7 +295,7 @@ public class SUBCLU<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
* @param subspace the subspace to run DBSCAN on
* @return the clustering result of the DBSCAN run
*/
- private List<Cluster<Model>> runDBSCAN(Relation<V> relation, DBIDs ids, Subspace<V> subspace) {
+ private List<Cluster<Model>> runDBSCAN(Relation<V> relation, DBIDs ids, Subspace subspace) {
// distance function
distanceFunction.setSelectedDimensions(subspace.getDimensions());
@@ -309,8 +310,8 @@ public class SUBCLU<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
DBSCAN<V, DoubleDistance> dbscan = new DBSCAN<V, DoubleDistance>(distanceFunction, epsilon, minpts);
// run DBSCAN
- if(logger.isVerbose()) {
- logger.verbose("\nRun DBSCAN on subspace " + subspace.dimensonsToString());
+ if(LOG.isVerbose()) {
+ LOG.verbose("\nRun DBSCAN on subspace " + subspace.dimensonsToString());
}
Clustering<Model> dbsres = dbscan.run(proxy);
@@ -332,8 +333,8 @@ public class SUBCLU<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
* @param subspaces the {@code d}-dimensional subspaces
* @return the {@code d+1}-dimensional subspace candidates
*/
- private List<Subspace<V>> generateSubspaceCandidates(List<Subspace<V>> subspaces) {
- List<Subspace<V>> candidates = new ArrayList<Subspace<V>>();
+ private List<Subspace> generateSubspaceCandidates(List<Subspace> subspaces) {
+ List<Subspace> candidates = new ArrayList<Subspace>();
if(subspaces.isEmpty()) {
return candidates;
@@ -342,28 +343,28 @@ public class SUBCLU<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
// Generate (d+1)-dimensional candidate subspaces
int d = subspaces.get(0).dimensionality();
- StringBuffer msgFine = new StringBuffer("\n");
- if(logger.isDebuggingFiner()) {
- msgFine.append("subspaces ").append(subspaces).append("\n");
+ StringBuilder msgFine = new StringBuilder("\n");
+ if(LOG.isDebuggingFiner()) {
+ msgFine.append("subspaces ").append(subspaces).append('\n');
}
for(int i = 0; i < subspaces.size(); i++) {
- Subspace<V> s1 = subspaces.get(i);
+ Subspace s1 = subspaces.get(i);
for(int j = i + 1; j < subspaces.size(); j++) {
- Subspace<V> s2 = subspaces.get(j);
- Subspace<V> candidate = s1.join(s2);
+ Subspace s2 = subspaces.get(j);
+ Subspace candidate = s1.join(s2);
if(candidate != null) {
- if(logger.isDebuggingFiner()) {
- msgFine.append("candidate: ").append(candidate.dimensonsToString()).append("\n");
+ if(LOG.isDebuggingFiner()) {
+ msgFine.append("candidate: ").append(candidate.dimensonsToString()).append('\n');
}
// prune irrelevant candidate subspaces
- List<Subspace<V>> lowerSubspaces = lowerSubspaces(candidate);
- if(logger.isDebuggingFiner()) {
- msgFine.append("lowerSubspaces: ").append(lowerSubspaces).append("\n");
+ List<Subspace> lowerSubspaces = lowerSubspaces(candidate);
+ if(LOG.isDebuggingFiner()) {
+ msgFine.append("lowerSubspaces: ").append(lowerSubspaces).append('\n');
}
boolean irrelevantCandidate = false;
- for(Subspace<V> s : lowerSubspaces) {
+ for(Subspace s : lowerSubspaces) {
if(!subspaces.contains(s)) {
irrelevantCandidate = true;
break;
@@ -376,16 +377,16 @@ public class SUBCLU<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
}
}
- if(logger.isDebuggingFiner()) {
- logger.debugFiner(msgFine.toString());
+ if(LOG.isDebuggingFiner()) {
+ LOG.debugFiner(msgFine.toString());
}
- if(logger.isDebugging()) {
- StringBuffer msg = new StringBuffer();
+ if(LOG.isDebugging()) {
+ StringBuilder msg = new StringBuilder();
msg.append(d + 1).append("-dimensional candidate subspaces: ");
- for(Subspace<V> candidate : candidates) {
- msg.append(candidate.dimensonsToString()).append(" ");
+ for(Subspace candidate : candidates) {
+ msg.append(candidate.dimensonsToString()).append(' ');
}
- logger.debug(msg.toString());
+ LOG.debug(msg.toString());
}
return candidates;
@@ -398,19 +399,19 @@ public class SUBCLU<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
* @param subspace the {@code d}-dimensional subspace
* @return a list of all {@code (d-1)}-dimensional subspaces
*/
- private List<Subspace<V>> lowerSubspaces(Subspace<V> subspace) {
+ private List<Subspace> lowerSubspaces(Subspace subspace) {
int dimensionality = subspace.dimensionality();
if(dimensionality <= 1) {
return null;
}
// order result according to the dimensions
- List<Subspace<V>> result = new ArrayList<Subspace<V>>();
+ List<Subspace> result = new ArrayList<Subspace>();
BitSet dimensions = subspace.getDimensions();
for(int dim = dimensions.nextSetBit(0); dim >= 0; dim = dimensions.nextSetBit(dim + 1)) {
BitSet newDimensions = (BitSet) dimensions.clone();
newDimensions.set(dim, false);
- result.add(new Subspace<V>(newDimensions));
+ result.add(new Subspace(newDimensions));
}
return result;
@@ -428,10 +429,10 @@ public class SUBCLU<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
* -dimensional candidate with minimal number of objects in the
* cluster
*/
- private Subspace<V> bestSubspace(List<Subspace<V>> subspaces, Subspace<V> candidate, TreeMap<Subspace<V>, List<Cluster<Model>>> clusterMap) {
- Subspace<V> bestSubspace = null;
+ private Subspace bestSubspace(List<Subspace> subspaces, Subspace candidate, TreeMap<Subspace, List<Cluster<Model>>> clusterMap) {
+ Subspace bestSubspace = null;
- for(Subspace<V> subspace : subspaces) {
+ for(Subspace subspace : subspaces) {
int min = Integer.MAX_VALUE;
if(subspace.isSubspace(candidate)) {
@@ -456,7 +457,7 @@ public class SUBCLU<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
@Override
protected Logging getLogger() {
- return logger;
+ return LOG;
}
/**
@@ -466,7 +467,7 @@ public class SUBCLU<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
protected int minpts = 0;
protected DoubleDistance epsilon = null;
@@ -486,7 +487,8 @@ public class SUBCLU<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
epsilon = epsilonP.getValue();
}
- IntParameter minptsP = new IntParameter(MINPTS_ID, new GreaterConstraint(0));
+ IntParameter minptsP = new IntParameter(MINPTS_ID);
+ minptsP.addConstraint(new GreaterConstraint(0));
if(config.grab(minptsP)) {
minpts = minptsP.getValue();
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUESubspace.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUESubspace.java
index eff71a35..6b22b233 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUESubspace.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUESubspace.java
@@ -46,7 +46,7 @@ import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
*
* @param <V> the type of NumberVector this subspace contains
*/
-public class CLIQUESubspace<V extends NumberVector<V, ?>> extends Subspace<V> {
+public class CLIQUESubspace<V extends NumberVector<?>> extends Subspace {
/**
* The dense units belonging to this subspace.
*/
@@ -103,14 +103,14 @@ public class CLIQUESubspace<V extends NumberVector<V, ?>> extends Subspace<V> {
*
* @return the clusters in this subspace and the corresponding cluster models
*/
- public List<Pair<Subspace<V>, ModifiableDBIDs>> determineClusters() {
- List<Pair<Subspace<V>, ModifiableDBIDs>> clusters = new ArrayList<Pair<Subspace<V>, ModifiableDBIDs>>();
+ public List<Pair<Subspace, ModifiableDBIDs>> determineClusters() {
+ List<Pair<Subspace, ModifiableDBIDs>> clusters = new ArrayList<Pair<Subspace, ModifiableDBIDs>>();
for(CLIQUEUnit<V> unit : getDenseUnits()) {
if(!unit.isAssigned()) {
ModifiableDBIDs cluster = DBIDUtil.newHashSet();
CLIQUESubspace<V> model = new CLIQUESubspace<V>(getDimensions());
- clusters.add(new Pair<Subspace<V>, ModifiableDBIDs>(model, cluster));
+ clusters.add(new Pair<Subspace, ModifiableDBIDs>(model, cluster));
dfs(unit, cluster, model);
}
}
@@ -151,8 +151,8 @@ public class CLIQUESubspace<V extends NumberVector<V, ?>> extends Subspace<V> {
* @param dim the dimension
* @return the left neighbor of the given unit in the specified dimension
*/
- public CLIQUEUnit<V> leftNeighbor(CLIQUEUnit<V> unit, Integer dim) {
- Interval i = unit.getInterval(dim);
+ public CLIQUEUnit<V> leftNeighbor(CLIQUEUnit<V> unit, int dim) {
+ Interval i = unit.getInterval(Integer.valueOf(dim));
for(CLIQUEUnit<V> u : getDenseUnits()) {
if(u.containsLeftNeighbor(i)) {
@@ -238,10 +238,10 @@ public class CLIQUESubspace<V extends NumberVector<V, ?>> extends Subspace<V> {
*/
@Override
public String toString(String pre) {
- StringBuffer result = new StringBuffer();
+ StringBuilder result = new StringBuilder();
result.append(super.toString(pre));
- result.append("\n").append(pre).append("Coverage: ").append(coverage);
- result.append("\n").append(pre).append("Units: " + "\n");
+ result.append('\n').append(pre).append("Coverage: ").append(coverage);
+ result.append('\n').append(pre).append("Units: " + "\n");
for(CLIQUEUnit<V> denseUnit : getDenseUnits()) {
result.append(pre).append(" ").append(denseUnit.toString()).append(" ").append(denseUnit.getIds().size()).append(" objects\n");
}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUEUnit.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUEUnit.java
index db687567..70f251c9 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUEUnit.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUEUnit.java
@@ -23,15 +23,15 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.clique;
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-import java.util.HashMap;
+import gnu.trove.map.hash.TIntObjectHashMap;
+
import java.util.Iterator;
-import java.util.Map;
import java.util.SortedSet;
import java.util.TreeSet;
import de.lmu.ifi.dbs.elki.data.Interval;
import de.lmu.ifi.dbs.elki.data.NumberVector;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs;
@@ -46,7 +46,7 @@ import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
*
* @param <V> the type of NumberVector this unit contains
*/
-public class CLIQUEUnit<V extends NumberVector<V, ?>> {
+public class CLIQUEUnit<V extends NumberVector<?>> {
/**
* The one-dimensional intervals of which this unit is build.
*/
@@ -56,7 +56,7 @@ public class CLIQUEUnit<V extends NumberVector<V, ?>> {
* Provides a mapping of particular dimensions to the intervals of which this
* unit is build.
*/
- private Map<Integer, Interval> dimensionToInterval;
+ private TIntObjectHashMap<Interval> dimensionToInterval;
/**
* The ids of the feature vectors this unit contains.
@@ -77,7 +77,7 @@ public class CLIQUEUnit<V extends NumberVector<V, ?>> {
public CLIQUEUnit(SortedSet<Interval> intervals, ModifiableDBIDs ids) {
this.intervals = intervals;
- dimensionToInterval = new HashMap<Integer, Interval>();
+ dimensionToInterval = new TIntObjectHashMap<Interval>();
for(Interval interval : intervals) {
dimensionToInterval.put(interval.getDimension(), interval);
}
@@ -96,7 +96,7 @@ public class CLIQUEUnit<V extends NumberVector<V, ?>> {
intervals = new TreeSet<Interval>();
intervals.add(interval);
- dimensionToInterval = new HashMap<Integer, Interval>();
+ dimensionToInterval = new TIntObjectHashMap<Interval>();
dimensionToInterval.put(interval.getDimension(), interval);
ids = DBIDUtil.newHashSet();
@@ -114,7 +114,7 @@ public class CLIQUEUnit<V extends NumberVector<V, ?>> {
*/
public boolean contains(V vector) {
for(Interval interval : intervals) {
- double value = vector.doubleValue(interval.getDimension() + 1);
+ final double value = vector.doubleValue(interval.getDimension());
if(interval.getMin() > value || value >= interval.getMax()) {
return false;
}
@@ -131,7 +131,7 @@ public class CLIQUEUnit<V extends NumberVector<V, ?>> {
* @return true, if this unit contains the specified feature vector, false
* otherwise
*/
- public boolean addFeatureVector(DBID id, V vector) {
+ public boolean addFeatureVector(DBIDRef id, V vector) {
if(contains(vector)) {
ids.add(id);
return true;
@@ -284,9 +284,9 @@ public class CLIQUEUnit<V extends NumberVector<V, ?>> {
*/
@Override
public String toString() {
- StringBuffer result = new StringBuffer();
+ StringBuilder result = new StringBuilder();
for(Interval interval : intervals) {
- result.append(interval).append(" ");
+ result.append(interval).append(' ');
}
return result.toString();