summaryrefslogtreecommitdiff
path: root/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java')
-rw-r--r--src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java74
1 files changed, 37 insertions, 37 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java
index a3496a0e..b17ebebb 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2012
+ Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -62,7 +62,8 @@ import de.lmu.ifi.dbs.elki.result.optics.ClusterOrderEntry;
import de.lmu.ifi.dbs.elki.result.optics.ClusterOrderResult;
import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil;
import de.lmu.ifi.dbs.elki.utilities.FormatUtil;
-import de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy.HierarchyReferenceLists;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy.Hierarchy;
+import de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy.Hierarchy.Iter;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
@@ -238,29 +239,29 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
}
// build the hierarchy
- buildHierarchy(database, distFunc, clusters, dimensionality);
+ Clustering<SubspaceModel<V>> clustering = new Clustering<>("DiSH clustering", "dish-clustering");
+ buildHierarchy(database, distFunc, clustering, clusters, dimensionality);
if (LOG.isVerbose()) {
StringBuilder msg = new StringBuilder("Step 4: build hierarchy");
for (Cluster<SubspaceModel<V>> c : clusters) {
msg.append('\n').append(FormatUtil.format(dimensionality, c.getModel().getDimensions())).append(" ids ").append(c.size());
- for (Cluster<SubspaceModel<V>> cluster : c.getParents()) {
- msg.append("\n parent ").append(cluster);
+ for (Iter<Cluster<SubspaceModel<V>>> iter = clustering.getClusterHierarchy().iterParents(c); iter.valid(); iter.advance()) {
+ msg.append("\n parent ").append(iter.get());
}
- for (Cluster<SubspaceModel<V>> cluster : c.getChildren()) {
- msg.append("\n child ").append(cluster);
+ for (Iter<Cluster<SubspaceModel<V>>> iter = clustering.getClusterHierarchy().iterChildren(c); iter.valid(); iter.advance()) {
+ msg.append("\n child ").append(iter.get());
}
}
LOG.verbose(msg.toString());
}
// build result
- Clustering<SubspaceModel<V>> result = new Clustering<SubspaceModel<V>>("DiSH clustering", "dish-clustering");
for (Cluster<SubspaceModel<V>> c : clusters) {
- if (c.getParents() == null || c.getParents().isEmpty()) {
- result.addCluster(c);
+ if (clustering.getClusterHierarchy().numParents(c) == 0) {
+ clustering.addToplevelCluster(c);
}
}
- return result;
+ return clustering;
}
/**
@@ -274,9 +275,9 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
private Map<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>> extractClusters(Relation<V> database, DiSHDistanceFunction.Instance<V> distFunc, ClusterOrderResult<PreferenceVectorBasedCorrelationDistance> clusterOrder) {
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Extract Clusters", database.size(), LOG) : null;
int processed = 0;
- Map<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>> clustersMap = new HashMap<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>>();
- Map<DBID, ClusterOrderEntry<PreferenceVectorBasedCorrelationDistance>> entryMap = new HashMap<DBID, ClusterOrderEntry<PreferenceVectorBasedCorrelationDistance>>();
- Map<DBID, Pair<BitSet, ArrayModifiableDBIDs>> entryToClusterMap = new HashMap<DBID, Pair<BitSet, ArrayModifiableDBIDs>>();
+ Map<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>> clustersMap = new HashMap<>();
+ Map<DBID, ClusterOrderEntry<PreferenceVectorBasedCorrelationDistance>> entryMap = new HashMap<>();
+ Map<DBID, Pair<BitSet, ArrayModifiableDBIDs>> entryToClusterMap = new HashMap<>();
for (Iterator<ClusterOrderEntry<PreferenceVectorBasedCorrelationDistance>> it = clusterOrder.iterator(); it.hasNext();) {
ClusterOrderEntry<PreferenceVectorBasedCorrelationDistance> entry = it.next();
entryMap.put(entry.getID(), entry);
@@ -287,7 +288,7 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
// get the list of (parallel) clusters for the preference vector
List<Pair<BitSet, ArrayModifiableDBIDs>> parallelClusters = clustersMap.get(preferenceVector);
if (parallelClusters == null) {
- parallelClusters = new ArrayList<Pair<BitSet, ArrayModifiableDBIDs>>();
+ parallelClusters = new ArrayList<>();
clustersMap.put(preferenceVector, parallelClusters);
}
@@ -305,7 +306,7 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
}
}
if (cluster == null) {
- cluster = new Pair<BitSet, ArrayModifiableDBIDs>(preferenceVector, DBIDUtil.newArray());
+ cluster = new Pair<>(preferenceVector, DBIDUtil.newArray());
parallelClusters.add(cluster);
}
cluster.second.add(entry.getID());
@@ -373,15 +374,13 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
private List<Cluster<SubspaceModel<V>>> sortClusters(Relation<V> database, Map<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>> clustersMap) {
final int db_dim = RelationUtil.dimensionality(database);
// int num = 1;
- List<Cluster<SubspaceModel<V>>> clusters = new ArrayList<Cluster<SubspaceModel<V>>>();
+ List<Cluster<SubspaceModel<V>>> clusters = new ArrayList<>();
for (BitSet pv : clustersMap.keySet()) {
List<Pair<BitSet, ArrayModifiableDBIDs>> parallelClusters = clustersMap.get(pv);
for (int i = 0; i < parallelClusters.size(); i++) {
Pair<BitSet, ArrayModifiableDBIDs> c = parallelClusters.get(i);
- Cluster<SubspaceModel<V>> cluster = new Cluster<SubspaceModel<V>>(c.second);
- cluster.setModel(new SubspaceModel<V>(new Subspace(c.first), Centroid.make(database, c.second).toVector(database)));
- cluster.setHierarchy(new HierarchyReferenceLists<Cluster<SubspaceModel<V>>>(cluster, new ArrayList<Cluster<SubspaceModel<V>>>(), new ArrayList<Cluster<SubspaceModel<V>>>()));
- // cluster.setName("Cluster_" + num++);
+ Cluster<SubspaceModel<V>> cluster = new Cluster<>(c.second);
+ cluster.setModel(new SubspaceModel<>(new Subspace(c.first), Centroid.make(database, c.second).toVector(database)));
String subspace = FormatUtil.format(cluster.getModel().getSubspace().getDimensions(), db_dim, "");
if (parallelClusters.size() > 1) {
cluster.setName("Cluster_" + subspace + "_" + i);
@@ -415,9 +414,9 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
private void checkClusters(Relation<V> database, DiSHDistanceFunction.Instance<V> distFunc, Map<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>> clustersMap, int minpts) {
// check if there are clusters < minpts
// and add them to not assigned
- List<Pair<BitSet, ArrayModifiableDBIDs>> notAssigned = new ArrayList<Pair<BitSet, ArrayModifiableDBIDs>>();
- Map<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>> newClustersMap = new HashMap<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>>();
- Pair<BitSet, ArrayModifiableDBIDs> noise = new Pair<BitSet, ArrayModifiableDBIDs>(new BitSet(), DBIDUtil.newArray());
+ List<Pair<BitSet, ArrayModifiableDBIDs>> notAssigned = new ArrayList<>();
+ Map<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>> newClustersMap = new HashMap<>();
+ Pair<BitSet, ArrayModifiableDBIDs> noise = new Pair<>(new BitSet(), DBIDUtil.newArray());
for (BitSet pv : clustersMap.keySet()) {
// noise
if (pv.cardinality() == 0) {
@@ -429,7 +428,7 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
// clusters
else {
List<Pair<BitSet, ArrayModifiableDBIDs>> parallelClusters = clustersMap.get(pv);
- List<Pair<BitSet, ArrayModifiableDBIDs>> newParallelClusters = new ArrayList<Pair<BitSet, ArrayModifiableDBIDs>>(parallelClusters.size());
+ List<Pair<BitSet, ArrayModifiableDBIDs>> newParallelClusters = new ArrayList<>(parallelClusters.size());
for (Pair<BitSet, ArrayModifiableDBIDs> c : parallelClusters) {
if (!pv.equals(new BitSet()) && c.second.size() < minpts) {
notAssigned.add(c);
@@ -456,7 +455,7 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
}
}
- List<Pair<BitSet, ArrayModifiableDBIDs>> noiseList = new ArrayList<Pair<BitSet, ArrayModifiableDBIDs>>(1);
+ List<Pair<BitSet, ArrayModifiableDBIDs>> noiseList = new ArrayList<>(1);
noiseList.add(noise);
clustersMap.put(noise.first, noiseList);
}
@@ -510,13 +509,15 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
* Builds the cluster hierarchy.
*
* @param distFunc the distance function
+ * @param clustering Clustering we process
* @param clusters the sorted list of clusters
* @param dimensionality the dimensionality of the data
* @param database the database containing the data objects
*/
- private void buildHierarchy(Relation<V> database, DiSHDistanceFunction.Instance<V> distFunc, List<Cluster<SubspaceModel<V>>> clusters, int dimensionality) {
+ private void buildHierarchy(Relation<V> database, DiSHDistanceFunction.Instance<V> distFunc, Clustering<SubspaceModel<V>> clustering, List<Cluster<SubspaceModel<V>>> clusters, int dimensionality) {
StringBuilder msg = new StringBuilder();
final int db_dim = RelationUtil.dimensionality(database);
+ Hierarchy<Cluster<SubspaceModel<V>>> hier = clustering.getClusterHierarchy();
for (int i = 0; i < clusters.size() - 1; i++) {
Cluster<SubspaceModel<V>> c_i = clusters.get(i);
@@ -536,9 +537,8 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
// noise level reached
if (c_j.getModel().getSubspace().dimensionality() == 0) {
// no parents exists -> parent is noise
- if (c_i.getParents().isEmpty()) {
- c_j.getChildren().add(c_i);
- c_i.getParents().add(c_j);
+ if (hier.numParents(c_i) == 0) {
+ clustering.addChildCluster(c_j, c_i);
if (LOG.isDebugging()) {
msg.append("\n [").append(FormatUtil.format(db_dim, c_j.getModel().getSubspace().getDimensions()));
msg.append("] is parent of [").append(FormatUtil.format(db_dim, c_i.getModel().getSubspace().getDimensions()));
@@ -560,9 +560,8 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
if (d <= 2 * epsilon) {
// no parent exists or c_j is not a parent of the already
// existing parents
- if (c_i.getParents().isEmpty() || !isParent(database, distFunc, c_j, c_i.getParents())) {
- c_j.getChildren().add(c_i);
- c_i.getParents().add(c_j);
+ if (hier.numParents(c_i) == 0 || !isParent(database, distFunc, c_j, hier.iterParents(c_i))) {
+ clustering.addChildCluster(c_j, c_i);
if (LOG.isDebugging()) {
msg.append("\n [").append(FormatUtil.format(db_dim, c_j.getModel().getSubspace().getDimensions()));
msg.append("] is parent of [");
@@ -591,16 +590,17 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
* @param distFunc the distance function for distance computation between the
* clusters
* @param parent the parent to be tested
- * @param children the list of children to be tested
+ * @param iter the list of children to be tested
* @return true, if the specified parent cluster is a parent of one child of
* the children clusters, false otherwise
*/
- private boolean isParent(Relation<V> database, DiSHDistanceFunction.Instance<V> distFunc, Cluster<SubspaceModel<V>> parent, List<Cluster<SubspaceModel<V>>> children) {
+ private boolean isParent(Relation<V> database, DiSHDistanceFunction.Instance<V> distFunc, Cluster<SubspaceModel<V>> parent, Iter<Cluster<SubspaceModel<V>>> iter) {
V parent_centroid = ProjectedCentroid.make(parent.getModel().getDimensions(), database, parent.getIDs()).toVector(database);
int dimensionality = RelationUtil.dimensionality(database);
int subspaceDim_parent = dimensionality - parent.getModel().getSubspace().dimensionality();
- for (Cluster<SubspaceModel<V>> child : children) {
+ for (; iter.valid(); iter.advance()) {
+ Cluster<SubspaceModel<V>> child = iter.get();
V child_centroid = ProjectedCentroid.make(child.getModel().getDimensions(), database, child.getIDs()).toVector(database);
PreferenceVectorBasedCorrelationDistance distance = distFunc.correlationDistance(parent_centroid, child_centroid, parent.getModel().getSubspace().getDimensions(), child.getModel().getSubspace().getDimensions());
if (distance.getCorrelationValue() == subspaceDim_parent) {
@@ -699,7 +699,7 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin
@Override
protected DiSH<V> makeInstance() {
- return new DiSH<V>(epsilon, dishDistance, opticsO);
+ return new DiSH<>(epsilon, dishDistance, opticsO);
}
}
}