summaryrefslogtreecommitdiff
path: root/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/AGNES.java
diff options
context:
space:
mode:
Diffstat (limited to 'elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/AGNES.java')
-rw-r--r--elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/AGNES.java87
1 files changed, 36 insertions, 51 deletions
diff --git a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/AGNES.java b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/AGNES.java
index 41de1cac..484c83b7 100644
--- a/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/AGNES.java
+++ b/elki/src/main/java/de/lmu/ifi/dbs/elki/algorithm/clustering/hierarchical/AGNES.java
@@ -27,14 +27,8 @@ import de.lmu.ifi.dbs.elki.algorithm.AbstractDistanceBasedAlgorithm;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
-import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
-import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableDBIDDataStore;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore;
import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
@@ -57,7 +51,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
*
* This is the naive O(n^3) algorithm. See {@link SLINK} for a much faster
* algorithm (however, only for single-linkage).
- *
+ *
* This implementation uses the pointer-based representation used by SLINK, so
* that the extraction algorithms we have can be used with either of them.
*
@@ -92,8 +86,10 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
* </p>
*
* @author Erich Schubert
+ * @since 0.6.0
*
* @apiviz.composedOf LinkageMethod
+ * @apiviz.composedOf PointerHierarchyRepresentationBuilder
*
* @param <O> Object type
*/
@@ -116,7 +112,7 @@ public class AGNES<O> extends AbstractDistanceBasedAlgorithm<O, PointerHierarchy
/**
* Constructor.
- *
+ *
* @param distanceFunction Distance function to use
* @param linkage Linkage method
*/
@@ -136,7 +132,7 @@ public class AGNES<O> extends AbstractDistanceBasedAlgorithm<O, PointerHierarchy
/**
* Run the algorithm
- *
+ *
* @param db Database
* @param relation Relation
* @return Clustering hierarchy
@@ -162,21 +158,16 @@ public class AGNES<O> extends AbstractDistanceBasedAlgorithm<O, PointerHierarchy
initializeDistanceMatrix(scratch, dq, ix, iy, square);
// Initialize space for result:
- WritableDBIDDataStore pi = DataStoreUtil.makeDBIDStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
- WritableDoubleDataStore lambda = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, Double.POSITIVE_INFINITY);
- WritableIntegerDataStore csize = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, 1);
- for(DBIDIter it = ids.iter(); it.valid(); it.advance()) {
- pi.put(it, it);
- }
+ PointerHierarchyRepresentationBuilder builder = new PointerHierarchyRepresentationBuilder(ids);
// Repeat until everything merged into 1 cluster
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Agglomerative clustering", size - 1, LOG) : null;
int wsize = size;
for(int i = 1; i < size; i++) {
- int x = findMerge(wsize, scratch, ix, iy, pi, lambda, csize);
+ int x = findMerge(wsize, scratch, ix, iy, builder);
if(x == wsize - 1) {
--wsize;
- for(ix.seek(wsize - 1); lambda.doubleValue(ix) < Double.POSITIVE_INFINITY; ix.retract()) {
+ for(ix.seek(wsize - 1); builder.isLinked(ix); ix.retract()) {
--wsize;
}
}
@@ -184,12 +175,12 @@ public class AGNES<O> extends AbstractDistanceBasedAlgorithm<O, PointerHierarchy
}
LOG.ensureCompleted(prog);
- return new PointerHierarchyRepresentationResult(ids, pi, lambda);
+ return builder.complete();
}
/**
* Compute the size of a complete x by x triangle (minus diagonal)
- *
+ *
* @param x Offset
* @return Size of complete triangle
*/
@@ -199,7 +190,7 @@ public class AGNES<O> extends AbstractDistanceBasedAlgorithm<O, PointerHierarchy
/**
* Initialize a distance matrix.
- *
+ *
* @param scratch Scratch space to be used.
* @param dq Distance query
* @param ix Data iterator
@@ -221,29 +212,27 @@ public class AGNES<O> extends AbstractDistanceBasedAlgorithm<O, PointerHierarchy
/**
* Perform the next merge step in AGNES.
- *
+ *
* @param size Data set size
* @param scratch Scratch space.
* @param ix First iterator
* @param iy Second iterator
- * @param pi Parent storage
- * @param lambda Lambda (join distance) storage
- * @param csize Cluster sizes
+ * @param builder Pointer representation builder
* @return x, for shrinking the working set.
*/
- protected int findMerge(int size, double[] scratch, DBIDArrayIter ix, DBIDArrayIter iy, WritableDBIDDataStore pi, WritableDoubleDataStore lambda, WritableIntegerDataStore csize) {
+ protected int findMerge(int size, double[] scratch, DBIDArrayIter ix, DBIDArrayIter iy, PointerHierarchyRepresentationBuilder builder) {
double mindist = Double.POSITIVE_INFINITY;
int x = -1, y = -1;
// Find minimum:
for(int ox = 0, xbase = 0; ox < size; xbase += ox++) {
// Skip if object has already joined a cluster:
- if(lambda.doubleValue(ix.seek(ox)) < Double.POSITIVE_INFINITY) {
+ if(builder.isLinked(ix.seek(ox))) {
continue;
}
assert(xbase == triangleSize(ox));
for(int oy = 0; oy < ox; oy++) {
// Skip if object has already joined a cluster:
- if(lambda.doubleValue(iy.seek(oy)) < Double.POSITIVE_INFINITY) {
+ if(builder.isLinked(iy.seek(oy))) {
continue;
}
final int idx = xbase + oy;
@@ -255,25 +244,23 @@ public class AGNES<O> extends AbstractDistanceBasedAlgorithm<O, PointerHierarchy
}
}
assert(x >= 0 && y >= 0);
- merge(size, scratch, ix, iy, pi, lambda, csize, mindist, x, y);
+ merge(size, scratch, ix, iy, builder, mindist, x, y);
return x;
}
/**
* Execute the cluster merge.
- *
+ *
* @param size Data set size
* @param scratch Scratch space.
* @param ix First iterator
* @param iy Second iterator
- * @param pi Parent storage
- * @param lambda Lambda (join distance) storage
- * @param csize Cluster sizes
+ * @param builder Hierarchy builder
* @param mindist Distance that was used for merging
* @param x First matrix position
* @param y Second matrix position
*/
- protected void merge(int size, double[] scratch, DBIDArrayIter ix, DBIDArrayIter iy, WritableDBIDDataStore pi, WritableDoubleDataStore lambda, WritableIntegerDataStore csize, double mindist, int x, int y) {
+ protected void merge(int size, double[] scratch, DBIDArrayIter ix, DBIDArrayIter iy, PointerHierarchyRepresentationBuilder builder, double mindist, int x, int y) {
// Avoid allocating memory, by reusing existing iterators:
ix.seek(x);
iy.seek(y);
@@ -283,41 +270,39 @@ public class AGNES<O> extends AbstractDistanceBasedAlgorithm<O, PointerHierarchy
// Perform merge in data structure: x -> y
assert(y < x);
// Since y < x, prefer keeping y, dropping x.
- lambda.put(ix, mindist);
- pi.put(ix, iy);
+ builder.add(ix, mindist, iy);
// Update cluster size for y:
- final int sizex = csize.intValue(ix), sizey = csize.intValue(iy);
- csize.put(iy, sizex + sizey);
+ final int sizex = builder.getSize(ix), sizey = builder.getSize(iy);
+ builder.setSize(iy, sizex + sizey);
// Note: this changes iy.
- updateMatrix(size, scratch, iy, lambda, csize, mindist, x, y, sizex, sizey);
+ updateMatrix(size, scratch, iy, builder, mindist, x, y, sizex, sizey);
}
/**
* Update the scratch distance matrix.
- *
+ *
* @param size Data set size
* @param scratch Scratch matrix.
* @param ij Iterator to reuse
- * @param lambda Lambda (join distance) storage
- * @param csize Cluster sizes
+ * @param builder Hierarchy builder
* @param mindist Distance that was used for merging
* @param x First matrix position
* @param y Second matrix position
* @param sizex Old size of first cluster
* @param sizey Old size of second cluster
*/
- protected void updateMatrix(int size, double[] scratch, DBIDArrayIter ij, WritableDoubleDataStore lambda, WritableIntegerDataStore csize, double mindist, int x, int y, final int sizex, final int sizey) {
+ protected void updateMatrix(int size, double[] scratch, DBIDArrayIter ij, PointerHierarchyRepresentationBuilder builder, double mindist, int x, int y, final int sizex, final int sizey) {
// Update distance matrix. Note: miny < minx
final int xbase = triangleSize(x), ybase = triangleSize(y);
// Write to (y, j), with j < y
int j = 0;
for(; j < y; j++) {
- if(lambda.doubleValue(ij.seek(j)) < Double.POSITIVE_INFINITY) {
+ if(builder.isLinked(ij.seek(j))) {
continue;
}
- final int sizej = csize.intValue(ij);
+ final int sizej = builder.getSize(ij);
final int yb = ybase + j;
scratch[yb] = linkage.combine(sizex, scratch[xbase + j], sizey, scratch[yb], sizej, mindist);
}
@@ -325,20 +310,20 @@ public class AGNES<O> extends AbstractDistanceBasedAlgorithm<O, PointerHierarchy
// Write to (j, y), with y < j < x
int jbase = triangleSize(j);
for(; j < x; jbase += j++) {
- if(lambda.doubleValue(ij.seek(j)) < Double.POSITIVE_INFINITY) {
+ if(builder.isLinked(ij.seek(j))) {
continue;
}
- final int sizej = csize.intValue(ij);
+ final int sizej = builder.getSize(ij);
final int jb = jbase + y;
scratch[jb] = linkage.combine(sizex, scratch[xbase + j], sizey, scratch[jb], sizej, mindist);
}
jbase += j++; // Skip x
// Write to (j, y), with y < x < j
for(; j < size; jbase += j++) {
- if(lambda.doubleValue(ij.seek(j)) < Double.POSITIVE_INFINITY) {
+ if(builder.isLinked(ij.seek(j))) {
continue;
}
- final int sizej = csize.intValue(ij);
+ final int sizej = builder.getSize(ij);
scratch[jbase + y] = linkage.combine(sizex, scratch[jbase + x], sizey, scratch[jbase + y], sizej, mindist);
}
}
@@ -356,11 +341,11 @@ public class AGNES<O> extends AbstractDistanceBasedAlgorithm<O, PointerHierarchy
/**
* Parameterization class
- *
+ *
* @author Erich Schubert
- *
+ *
* @apiviz.exclude
- *
+ *
* @param <O> Object type
*/
public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {