diff options
Diffstat (limited to 'elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/DBSCAN.java')
-rw-r--r-- | elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/DBSCAN.java | 27 |
1 files changed, 18 insertions, 9 deletions
diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/DBSCAN.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/DBSCAN.java index 7d802fa4..ba6cf77d 100644 --- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/DBSCAN.java +++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/DBSCAN.java @@ -34,12 +34,13 @@ import de.lmu.ifi.dbs.elki.data.model.Model; import de.lmu.ifi.dbs.elki.data.type.TypeInformation; import de.lmu.ifi.dbs.elki.data.type.TypeUtil; import de.lmu.ifi.dbs.elki.database.QueryUtil; +import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs; import de.lmu.ifi.dbs.elki.database.ids.DBIDIter; import de.lmu.ifi.dbs.elki.database.ids.DBIDRef; import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil; import de.lmu.ifi.dbs.elki.database.ids.DBIDVar; import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList; -import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs; +import de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter; import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs; import de.lmu.ifi.dbs.elki.database.query.range.RangeQuery; import de.lmu.ifi.dbs.elki.database.relation.Relation; @@ -70,6 +71,8 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; * </p> * * @author Arthur Zimek + * @author Erich Schubert + * @since 0.2 * @param <O> the type of Object the algorithm is applied to */ @Title("DBSCAN: Density-Based Clustering of Applications with Noise") @@ -172,9 +175,10 @@ public class DBSCAN<O> extends AbstractDistanceBasedAlgorithm<O, Clustering<Mode IndefiniteProgress clusprog = LOG.isVerbose() ? new IndefiniteProgress("Number of clusters", LOG) : null; processedIDs = DBIDUtil.newHashSet(size); + ArrayModifiableDBIDs seeds = DBIDUtil.newArray(); for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) { if(!processedIDs.contains(iditer)) { - expandCluster(relation, rangeQuery, iditer, objprog, clusprog); + expandCluster(relation, rangeQuery, iditer, seeds, objprog, clusprog); } if(objprog != null && clusprog != null) { objprog.setProcessed(processedIDs.size(), LOG); @@ -197,9 +201,11 @@ public class DBSCAN<O> extends AbstractDistanceBasedAlgorithm<O, Clustering<Mode * @param relation Database relation to run on * @param rangeQuery Range query to use * @param startObjectID potential seed of a new potential cluster - * @param objprog the progress object for logging the current status + * @param seeds Array to store the current seeds + * @param objprog Number of objects processed (may be {@code null}) + * @param clusprog Number of clusters found (may be {@code null}) */ - protected void expandCluster(Relation<O> relation, RangeQuery<O> rangeQuery, DBIDRef startObjectID, FiniteProgress objprog, IndefiniteProgress clusprog) { + protected void expandCluster(Relation<O> relation, RangeQuery<O> rangeQuery, DBIDRef startObjectID, ArrayModifiableDBIDs seeds, FiniteProgress objprog, IndefiniteProgress clusprog) { DoubleDBIDList neighbors = rangeQuery.getRangeForDBID(startObjectID, epsilon); ncounter += neighbors.size(); @@ -218,13 +224,13 @@ public class DBSCAN<O> extends AbstractDistanceBasedAlgorithm<O, Clustering<Mode processedIDs.add(startObjectID); // try to expand the cluster - HashSetModifiableDBIDs seeds = DBIDUtil.newHashSet(); + assert(seeds.size() == 0); + seeds.clear(); processNeighbors(neighbors.iter(), currentCluster, seeds); DBIDVar o = DBIDUtil.newVar(); while(!seeds.isEmpty()) { - seeds.pop(o); - neighbors = rangeQuery.getRangeForDBID(o, epsilon); + neighbors = rangeQuery.getRangeForDBID(seeds.pop(o), epsilon); ncounter += neighbors.size(); if(neighbors.size() >= minpts) { @@ -248,10 +254,13 @@ public class DBSCAN<O> extends AbstractDistanceBasedAlgorithm<O, Clustering<Mode * @param currentCluster Current cluster * @param seeds Seed set */ - private void processNeighbors(DBIDIter neighbor, ModifiableDBIDs currentCluster, HashSetModifiableDBIDs seeds) { + private void processNeighbors(DoubleDBIDListIter neighbor, ModifiableDBIDs currentCluster, ArrayModifiableDBIDs seeds) { + final boolean ismetric = getDistanceFunction().isMetric(); for(; neighbor.valid(); neighbor.advance()) { if(processedIDs.add(neighbor)) { - seeds.add(neighbor); + if(!ismetric || neighbor.doubleValue() > 0.) { + seeds.add(neighbor); + } } else if(!noise.remove(neighbor)) { continue; |