diff options
Diffstat (limited to 'src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java')
-rw-r--r-- | src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java | 74 |
1 files changed, 37 insertions, 37 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java index a3496a0e..b17ebebb 100644 --- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java +++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java @@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace; This file is part of ELKI: Environment for Developing KDD-Applications Supported by Index-Structures - Copyright (C) 2012 + Copyright (C) 2013 Ludwig-Maximilians-Universität München Lehr- und Forschungseinheit für Datenbanksysteme ELKI Development Team @@ -62,7 +62,8 @@ import de.lmu.ifi.dbs.elki.result.optics.ClusterOrderEntry; import de.lmu.ifi.dbs.elki.result.optics.ClusterOrderResult; import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil; import de.lmu.ifi.dbs.elki.utilities.FormatUtil; -import de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy.HierarchyReferenceLists; +import de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy.Hierarchy; +import de.lmu.ifi.dbs.elki.utilities.datastructures.hierarchy.Hierarchy.Iter; import de.lmu.ifi.dbs.elki.utilities.documentation.Description; import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; import de.lmu.ifi.dbs.elki.utilities.documentation.Title; @@ -238,29 +239,29 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin } // build the hierarchy - buildHierarchy(database, distFunc, clusters, dimensionality); + Clustering<SubspaceModel<V>> clustering = new Clustering<>("DiSH clustering", "dish-clustering"); + buildHierarchy(database, distFunc, clustering, clusters, dimensionality); if (LOG.isVerbose()) { StringBuilder msg = new StringBuilder("Step 4: build hierarchy"); for (Cluster<SubspaceModel<V>> c : clusters) { msg.append('\n').append(FormatUtil.format(dimensionality, c.getModel().getDimensions())).append(" ids ").append(c.size()); - for (Cluster<SubspaceModel<V>> cluster : c.getParents()) { - msg.append("\n parent ").append(cluster); + for (Iter<Cluster<SubspaceModel<V>>> iter = clustering.getClusterHierarchy().iterParents(c); iter.valid(); iter.advance()) { + msg.append("\n parent ").append(iter.get()); } - for (Cluster<SubspaceModel<V>> cluster : c.getChildren()) { - msg.append("\n child ").append(cluster); + for (Iter<Cluster<SubspaceModel<V>>> iter = clustering.getClusterHierarchy().iterChildren(c); iter.valid(); iter.advance()) { + msg.append("\n child ").append(iter.get()); } } LOG.verbose(msg.toString()); } // build result - Clustering<SubspaceModel<V>> result = new Clustering<SubspaceModel<V>>("DiSH clustering", "dish-clustering"); for (Cluster<SubspaceModel<V>> c : clusters) { - if (c.getParents() == null || c.getParents().isEmpty()) { - result.addCluster(c); + if (clustering.getClusterHierarchy().numParents(c) == 0) { + clustering.addToplevelCluster(c); } } - return result; + return clustering; } /** @@ -274,9 +275,9 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin private Map<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>> extractClusters(Relation<V> database, DiSHDistanceFunction.Instance<V> distFunc, ClusterOrderResult<PreferenceVectorBasedCorrelationDistance> clusterOrder) { FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Extract Clusters", database.size(), LOG) : null; int processed = 0; - Map<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>> clustersMap = new HashMap<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>>(); - Map<DBID, ClusterOrderEntry<PreferenceVectorBasedCorrelationDistance>> entryMap = new HashMap<DBID, ClusterOrderEntry<PreferenceVectorBasedCorrelationDistance>>(); - Map<DBID, Pair<BitSet, ArrayModifiableDBIDs>> entryToClusterMap = new HashMap<DBID, Pair<BitSet, ArrayModifiableDBIDs>>(); + Map<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>> clustersMap = new HashMap<>(); + Map<DBID, ClusterOrderEntry<PreferenceVectorBasedCorrelationDistance>> entryMap = new HashMap<>(); + Map<DBID, Pair<BitSet, ArrayModifiableDBIDs>> entryToClusterMap = new HashMap<>(); for (Iterator<ClusterOrderEntry<PreferenceVectorBasedCorrelationDistance>> it = clusterOrder.iterator(); it.hasNext();) { ClusterOrderEntry<PreferenceVectorBasedCorrelationDistance> entry = it.next(); entryMap.put(entry.getID(), entry); @@ -287,7 +288,7 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin // get the list of (parallel) clusters for the preference vector List<Pair<BitSet, ArrayModifiableDBIDs>> parallelClusters = clustersMap.get(preferenceVector); if (parallelClusters == null) { - parallelClusters = new ArrayList<Pair<BitSet, ArrayModifiableDBIDs>>(); + parallelClusters = new ArrayList<>(); clustersMap.put(preferenceVector, parallelClusters); } @@ -305,7 +306,7 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin } } if (cluster == null) { - cluster = new Pair<BitSet, ArrayModifiableDBIDs>(preferenceVector, DBIDUtil.newArray()); + cluster = new Pair<>(preferenceVector, DBIDUtil.newArray()); parallelClusters.add(cluster); } cluster.second.add(entry.getID()); @@ -373,15 +374,13 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin private List<Cluster<SubspaceModel<V>>> sortClusters(Relation<V> database, Map<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>> clustersMap) { final int db_dim = RelationUtil.dimensionality(database); // int num = 1; - List<Cluster<SubspaceModel<V>>> clusters = new ArrayList<Cluster<SubspaceModel<V>>>(); + List<Cluster<SubspaceModel<V>>> clusters = new ArrayList<>(); for (BitSet pv : clustersMap.keySet()) { List<Pair<BitSet, ArrayModifiableDBIDs>> parallelClusters = clustersMap.get(pv); for (int i = 0; i < parallelClusters.size(); i++) { Pair<BitSet, ArrayModifiableDBIDs> c = parallelClusters.get(i); - Cluster<SubspaceModel<V>> cluster = new Cluster<SubspaceModel<V>>(c.second); - cluster.setModel(new SubspaceModel<V>(new Subspace(c.first), Centroid.make(database, c.second).toVector(database))); - cluster.setHierarchy(new HierarchyReferenceLists<Cluster<SubspaceModel<V>>>(cluster, new ArrayList<Cluster<SubspaceModel<V>>>(), new ArrayList<Cluster<SubspaceModel<V>>>())); - // cluster.setName("Cluster_" + num++); + Cluster<SubspaceModel<V>> cluster = new Cluster<>(c.second); + cluster.setModel(new SubspaceModel<>(new Subspace(c.first), Centroid.make(database, c.second).toVector(database))); String subspace = FormatUtil.format(cluster.getModel().getSubspace().getDimensions(), db_dim, ""); if (parallelClusters.size() > 1) { cluster.setName("Cluster_" + subspace + "_" + i); @@ -415,9 +414,9 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin private void checkClusters(Relation<V> database, DiSHDistanceFunction.Instance<V> distFunc, Map<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>> clustersMap, int minpts) { // check if there are clusters < minpts // and add them to not assigned - List<Pair<BitSet, ArrayModifiableDBIDs>> notAssigned = new ArrayList<Pair<BitSet, ArrayModifiableDBIDs>>(); - Map<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>> newClustersMap = new HashMap<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>>(); - Pair<BitSet, ArrayModifiableDBIDs> noise = new Pair<BitSet, ArrayModifiableDBIDs>(new BitSet(), DBIDUtil.newArray()); + List<Pair<BitSet, ArrayModifiableDBIDs>> notAssigned = new ArrayList<>(); + Map<BitSet, List<Pair<BitSet, ArrayModifiableDBIDs>>> newClustersMap = new HashMap<>(); + Pair<BitSet, ArrayModifiableDBIDs> noise = new Pair<>(new BitSet(), DBIDUtil.newArray()); for (BitSet pv : clustersMap.keySet()) { // noise if (pv.cardinality() == 0) { @@ -429,7 +428,7 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin // clusters else { List<Pair<BitSet, ArrayModifiableDBIDs>> parallelClusters = clustersMap.get(pv); - List<Pair<BitSet, ArrayModifiableDBIDs>> newParallelClusters = new ArrayList<Pair<BitSet, ArrayModifiableDBIDs>>(parallelClusters.size()); + List<Pair<BitSet, ArrayModifiableDBIDs>> newParallelClusters = new ArrayList<>(parallelClusters.size()); for (Pair<BitSet, ArrayModifiableDBIDs> c : parallelClusters) { if (!pv.equals(new BitSet()) && c.second.size() < minpts) { notAssigned.add(c); @@ -456,7 +455,7 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin } } - List<Pair<BitSet, ArrayModifiableDBIDs>> noiseList = new ArrayList<Pair<BitSet, ArrayModifiableDBIDs>>(1); + List<Pair<BitSet, ArrayModifiableDBIDs>> noiseList = new ArrayList<>(1); noiseList.add(noise); clustersMap.put(noise.first, noiseList); } @@ -510,13 +509,15 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin * Builds the cluster hierarchy. * * @param distFunc the distance function + * @param clustering Clustering we process * @param clusters the sorted list of clusters * @param dimensionality the dimensionality of the data * @param database the database containing the data objects */ - private void buildHierarchy(Relation<V> database, DiSHDistanceFunction.Instance<V> distFunc, List<Cluster<SubspaceModel<V>>> clusters, int dimensionality) { + private void buildHierarchy(Relation<V> database, DiSHDistanceFunction.Instance<V> distFunc, Clustering<SubspaceModel<V>> clustering, List<Cluster<SubspaceModel<V>>> clusters, int dimensionality) { StringBuilder msg = new StringBuilder(); final int db_dim = RelationUtil.dimensionality(database); + Hierarchy<Cluster<SubspaceModel<V>>> hier = clustering.getClusterHierarchy(); for (int i = 0; i < clusters.size() - 1; i++) { Cluster<SubspaceModel<V>> c_i = clusters.get(i); @@ -536,9 +537,8 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin // noise level reached if (c_j.getModel().getSubspace().dimensionality() == 0) { // no parents exists -> parent is noise - if (c_i.getParents().isEmpty()) { - c_j.getChildren().add(c_i); - c_i.getParents().add(c_j); + if (hier.numParents(c_i) == 0) { + clustering.addChildCluster(c_j, c_i); if (LOG.isDebugging()) { msg.append("\n [").append(FormatUtil.format(db_dim, c_j.getModel().getSubspace().getDimensions())); msg.append("] is parent of [").append(FormatUtil.format(db_dim, c_i.getModel().getSubspace().getDimensions())); @@ -560,9 +560,8 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin if (d <= 2 * epsilon) { // no parent exists or c_j is not a parent of the already // existing parents - if (c_i.getParents().isEmpty() || !isParent(database, distFunc, c_j, c_i.getParents())) { - c_j.getChildren().add(c_i); - c_i.getParents().add(c_j); + if (hier.numParents(c_i) == 0 || !isParent(database, distFunc, c_j, hier.iterParents(c_i))) { + clustering.addChildCluster(c_j, c_i); if (LOG.isDebugging()) { msg.append("\n [").append(FormatUtil.format(db_dim, c_j.getModel().getSubspace().getDimensions())); msg.append("] is parent of ["); @@ -591,16 +590,17 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin * @param distFunc the distance function for distance computation between the * clusters * @param parent the parent to be tested - * @param children the list of children to be tested + * @param iter the list of children to be tested * @return true, if the specified parent cluster is a parent of one child of * the children clusters, false otherwise */ - private boolean isParent(Relation<V> database, DiSHDistanceFunction.Instance<V> distFunc, Cluster<SubspaceModel<V>> parent, List<Cluster<SubspaceModel<V>>> children) { + private boolean isParent(Relation<V> database, DiSHDistanceFunction.Instance<V> distFunc, Cluster<SubspaceModel<V>> parent, Iter<Cluster<SubspaceModel<V>>> iter) { V parent_centroid = ProjectedCentroid.make(parent.getModel().getDimensions(), database, parent.getIDs()).toVector(database); int dimensionality = RelationUtil.dimensionality(database); int subspaceDim_parent = dimensionality - parent.getModel().getSubspace().dimensionality(); - for (Cluster<SubspaceModel<V>> child : children) { + for (; iter.valid(); iter.advance()) { + Cluster<SubspaceModel<V>> child = iter.get(); V child_centroid = ProjectedCentroid.make(child.getModel().getDimensions(), database, child.getIDs()).toVector(database); PreferenceVectorBasedCorrelationDistance distance = distFunc.correlationDistance(parent_centroid, child_centroid, parent.getModel().getSubspace().getDimensions(), child.getModel().getSubspace().getDimensions()); if (distance.getCorrelationValue() == subspaceDim_parent) { @@ -699,7 +699,7 @@ public class DiSH<V extends NumberVector<?>> extends AbstractAlgorithm<Clusterin @Override protected DiSH<V> makeInstance() { - return new DiSH<V>(epsilon, dishDistance, opticsO); + return new DiSH<>(epsilon, dishDistance, opticsO); } } } |