summaryrefslogtreecommitdiff
path: root/src/tutorial/clustering
diff options
context:
space:
mode:
Diffstat (limited to 'src/tutorial/clustering')
-rw-r--r--src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering1.java59
-rw-r--r--src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering2.java26
-rw-r--r--src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering3.java82
-rw-r--r--src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering4.java80
-rw-r--r--src/tutorial/clustering/SameSizeKMeansAlgorithm.java51
-rw-r--r--src/tutorial/clustering/package-info.java2
6 files changed, 140 insertions, 160 deletions
diff --git a/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering1.java b/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering1.java
index 7477c064..b1406274 100644
--- a/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering1.java
+++ b/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering1.java
@@ -4,7 +4,7 @@ package tutorial.clustering;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -46,7 +46,6 @@ import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.result.Result;
@@ -69,7 +68,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
*
* @param <O> Object type
*/
-public class NaiveAgglomerativeHierarchicalClustering1<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, Result> {
+public class NaiveAgglomerativeHierarchicalClustering1<O> extends AbstractDistanceBasedAlgorithm<O, Result> {
/**
* Class logger
*/
@@ -86,7 +85,7 @@ public class NaiveAgglomerativeHierarchicalClustering1<O, D extends NumberDistan
* @param distanceFunction Distance function to use
* @param numclusters Number of clusters
*/
- public NaiveAgglomerativeHierarchicalClustering1(DistanceFunction<? super O, D> distanceFunction, int numclusters) {
+ public NaiveAgglomerativeHierarchicalClustering1(DistanceFunction<? super O> distanceFunction, int numclusters) {
super(distanceFunction);
this.numclusters = numclusters;
}
@@ -99,7 +98,7 @@ public class NaiveAgglomerativeHierarchicalClustering1<O, D extends NumberDistan
* @return Clustering hierarchy
*/
public Result run(Database db, Relation<O> relation) {
- DistanceQuery<O, D> dq = db.getDistanceQuery(relation, getDistanceFunction());
+ DistanceQuery<O> dq = db.getDistanceQuery(relation, getDistanceFunction());
ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
final int size = ids.size();
@@ -108,10 +107,10 @@ public class NaiveAgglomerativeHierarchicalClustering1<O, D extends NumberDistan
// Compute the initial distance matrix.
double[][] matrix = new double[size][size];
DBIDArrayIter ix = ids.iter(), iy = ids.iter();
- for (int x = 0; ix.valid(); x++, ix.advance()) {
+ for(int x = 0; ix.valid(); x++, ix.advance()) {
iy.seek(0);
- for (int y = 0; y < x; y++, iy.advance()) {
- final double dist = dq.distance(ix, iy).doubleValue();
+ for(int y = 0; y < x; y++, iy.advance()) {
+ final double dist = dq.distance(ix, iy);
matrix[x][y] = dist;
matrix[y][x] = dist;
}
@@ -129,18 +128,18 @@ public class NaiveAgglomerativeHierarchicalClustering1<O, D extends NumberDistan
// Repeat until everything merged, except the desired number of clusters:
final int stop = size - numclusters;
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Agglomerative clustering", stop, LOG) : null;
- for (int i = 0; i < stop; i++) {
+ for(int i = 0; i < stop; i++) {
double min = Double.POSITIVE_INFINITY;
int minx = -1, miny = -1;
- for (int x = 0; x < size; x++) {
- if (height[x] < Double.POSITIVE_INFINITY) {
+ for(int x = 0; x < size; x++) {
+ if(height[x] < Double.POSITIVE_INFINITY) {
continue;
}
- for (int y = 0; y < x; y++) {
- if (height[y] < Double.POSITIVE_INFINITY) {
+ for(int y = 0; y < x; y++) {
+ if(height[y] < Double.POSITIVE_INFINITY) {
continue;
}
- if (matrix[x][y] < min) {
+ if(matrix[x][y] < min) {
min = matrix[x][y];
minx = x;
miny = y;
@@ -158,36 +157,33 @@ public class NaiveAgglomerativeHierarchicalClustering1<O, D extends NumberDistan
// Merge into cluster
ModifiableDBIDs cx = clusters.get(minx);
ModifiableDBIDs cy = clusters.get(miny);
- if (cy == null) {
+ if(cy == null) {
cy = DBIDUtil.newHashSet();
cy.add(iy);
}
- if (cx == null) {
+ if(cx == null) {
cy.add(ix);
- } else {
+ }
+ else {
cy.addDBIDs(cx);
clusters.remove(minx);
}
clusters.put(miny, cy);
// Update distance matrix for y:
- for (int j = 0; j < size; j++) {
+ for(int j = 0; j < size; j++) {
matrix[j][miny] = Math.min(matrix[j][minx], matrix[j][miny]);
matrix[miny][j] = Math.min(matrix[minx][j], matrix[miny][j]);
}
- if (prog != null) {
- prog.incrementProcessed(LOG);
- }
- }
- if (prog != null) {
- prog.ensureCompleted(LOG);
+ LOG.incrementProcessed(prog);
}
+ LOG.ensureCompleted(prog);
// Build the clustering result
final Clustering<Model> dendrogram = new Clustering<>("Hierarchical-Clustering", "hierarchical-clustering");
- for (int x = 0; x < size; x++) {
- if (height[x] < Double.POSITIVE_INFINITY) {
+ for(int x = 0; x < size; x++) {
+ if(height[x] < Double.POSITIVE_INFINITY) {
DBIDs cids = clusters.get(x);
- if (cids == null) {
+ if(cids == null) {
ix.seek(x);
cids = DBIDUtil.deref(ix);
}
@@ -214,10 +210,11 @@ public class NaiveAgglomerativeHierarchicalClustering1<O, D extends NumberDistan
*
* @author Erich Schubert
*
+ * @apiviz.exclude
+ *
* @param <O> Object type
- * @param <D> Distance type
*/
- public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {
/**
* Desired number of clusters.
*/
@@ -228,13 +225,13 @@ public class NaiveAgglomerativeHierarchicalClustering1<O, D extends NumberDistan
super.makeOptions(config);
IntParameter numclustersP = new IntParameter(ExtractFlatClusteringFromHierarchy.Parameterizer.MINCLUSTERS_ID);
numclustersP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
- if (config.grab(numclustersP)) {
+ if(config.grab(numclustersP)) {
numclusters = numclustersP.intValue();
}
}
@Override
- protected NaiveAgglomerativeHierarchicalClustering1<O, D> makeInstance() {
+ protected NaiveAgglomerativeHierarchicalClustering1<O> makeInstance() {
return new NaiveAgglomerativeHierarchicalClustering1<>(distanceFunction, numclusters);
}
}
diff --git a/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering2.java b/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering2.java
index d33ca768..0081a845 100644
--- a/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering2.java
+++ b/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering2.java
@@ -4,7 +4,7 @@ package tutorial.clustering;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -46,7 +46,6 @@ import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.result.Result;
@@ -69,7 +68,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
*
* @param <O> Object type
*/
-public class NaiveAgglomerativeHierarchicalClustering2<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, Result> {
+public class NaiveAgglomerativeHierarchicalClustering2<O> extends AbstractDistanceBasedAlgorithm<O, Result> {
/**
* Class logger
*/
@@ -86,7 +85,7 @@ public class NaiveAgglomerativeHierarchicalClustering2<O, D extends NumberDistan
* @param distanceFunction Distance function to use
* @param numclusters Number of clusters
*/
- public NaiveAgglomerativeHierarchicalClustering2(DistanceFunction<? super O, D> distanceFunction, int numclusters) {
+ public NaiveAgglomerativeHierarchicalClustering2(DistanceFunction<? super O> distanceFunction, int numclusters) {
super(distanceFunction);
this.numclusters = numclusters;
}
@@ -99,7 +98,7 @@ public class NaiveAgglomerativeHierarchicalClustering2<O, D extends NumberDistan
* @return Clustering hierarchy
*/
public Result run(Database db, Relation<O> relation) {
- DistanceQuery<O, D> dq = db.getDistanceQuery(relation, getDistanceFunction());
+ DistanceQuery<O> dq = db.getDistanceQuery(relation, getDistanceFunction());
ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
final int size = ids.size();
@@ -116,7 +115,7 @@ public class NaiveAgglomerativeHierarchicalClustering2<O, D extends NumberDistan
for (int x = 0; ix.valid(); x++, ix.advance()) {
iy.seek(0);
for (int y = 0; y < x; y++, iy.advance()) {
- scratch[pos] = dq.distance(ix, iy).doubleValue();
+ scratch[pos] = dq.distance(ix, iy);
pos++;
}
}
@@ -200,13 +199,9 @@ public class NaiveAgglomerativeHierarchicalClustering2<O, D extends NumberDistan
final int jbase = triangleSize(j);
scratch[jbase + miny] = Math.min(scratch[jbase + minx], scratch[jbase + miny]);
}
- if (prog != null) {
- prog.incrementProcessed(LOG);
- }
- }
- if (prog != null) {
- prog.ensureCompleted(LOG);
+ LOG.incrementProcessed(prog);
}
+ LOG.ensureCompleted(prog);
// Build the clustering result
final Clustering<Model> dendrogram = new Clustering<>("Hierarchical-Clustering", "hierarchical-clustering");
@@ -251,10 +246,11 @@ public class NaiveAgglomerativeHierarchicalClustering2<O, D extends NumberDistan
*
* @author Erich Schubert
*
+ * @apiviz.exclude
+ *
* @param <O> Object type
- * @param <D> Distance type
*/
- public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {
/**
* Desired number of clusters.
*/
@@ -271,7 +267,7 @@ public class NaiveAgglomerativeHierarchicalClustering2<O, D extends NumberDistan
}
@Override
- protected NaiveAgglomerativeHierarchicalClustering2<O, D> makeInstance() {
+ protected NaiveAgglomerativeHierarchicalClustering2<O> makeInstance() {
return new NaiveAgglomerativeHierarchicalClustering2<>(distanceFunction, numclusters);
}
}
diff --git a/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering3.java b/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering3.java
index d3312115..0b1590bd 100644
--- a/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering3.java
+++ b/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering3.java
@@ -4,7 +4,7 @@ package tutorial.clustering;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -47,7 +47,6 @@ import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.result.Result;
@@ -81,7 +80,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
* @param <O> Object type
*/
@Reference(title = "A Review of Classification", authors = "R. M. Cormack", booktitle = "Journal of the Royal Statistical Society. Series A, Vol. 134, No. 3", url = "http://www.jstor.org/stable/2344237")
-public class NaiveAgglomerativeHierarchicalClustering3<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, Result> {
+public class NaiveAgglomerativeHierarchicalClustering3<O> extends AbstractDistanceBasedAlgorithm<O, Result> {
/**
* Class logger
*/
@@ -94,6 +93,8 @@ public class NaiveAgglomerativeHierarchicalClustering3<O, D extends NumberDistan
* R. M. Cormack, A Review of Classification
*
* @author Erich Schubert
+ *
+ * @apiviz.exclude
*/
public static enum Linkage {//
SINGLE {
@@ -168,7 +169,7 @@ public class NaiveAgglomerativeHierarchicalClustering3<O, D extends NumberDistan
* @param numclusters Number of clusters
* @param linkage Linkage strategy
*/
- public NaiveAgglomerativeHierarchicalClustering3(DistanceFunction<? super O, D> distanceFunction, int numclusters, Linkage linkage) {
+ public NaiveAgglomerativeHierarchicalClustering3(DistanceFunction<? super O> distanceFunction, int numclusters, Linkage linkage) {
super(distanceFunction);
this.numclusters = numclusters;
this.linkage = linkage;
@@ -182,14 +183,14 @@ public class NaiveAgglomerativeHierarchicalClustering3<O, D extends NumberDistan
* @return Clustering hierarchy
*/
public Result run(Database db, Relation<O> relation) {
- DistanceQuery<O, D> dq = db.getDistanceQuery(relation, getDistanceFunction());
+ DistanceQuery<O> dq = db.getDistanceQuery(relation, getDistanceFunction());
ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
final int size = ids.size();
- if (size > 0x10000) {
+ if(size > 0x10000) {
throw new AbortException("This implementation does not scale to data sets larger than " + 0x10000 + " instances (~17 GB RAM), which results in an integer overflow.");
}
- if (Linkage.SINGLE.equals(linkage)) {
+ if(Linkage.SINGLE.equals(linkage)) {
LOG.verbose("Notice: SLINK is a much faster algorithm for single-linkage clustering!");
}
@@ -199,12 +200,12 @@ public class NaiveAgglomerativeHierarchicalClustering3<O, D extends NumberDistan
// Position counter - must agree with computeOffset!
int pos = 0;
boolean square = Linkage.WARD.equals(linkage) && !(SquaredEuclideanDistanceFunction.class.isInstance(getDistanceFunction()));
- for (int x = 0; ix.valid(); x++, ix.advance()) {
+ for(int x = 0; ix.valid(); x++, ix.advance()) {
iy.seek(0);
- for (int y = 0; y < x; y++, iy.advance()) {
- scratch[pos] = dq.distance(ix, iy).doubleValue();
+ for(int y = 0; y < x; y++, iy.advance()) {
+ scratch[pos] = dq.distance(ix, iy);
// Ward uses variances -- i.e. squared values
- if (square) {
+ if(square) {
scratch[pos] *= scratch[pos];
}
pos++;
@@ -223,20 +224,20 @@ public class NaiveAgglomerativeHierarchicalClustering3<O, D extends NumberDistan
// Repeat until everything merged, except the desired number of clusters:
final int stop = size - numclusters;
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Agglomerative clustering", stop, LOG) : null;
- for (int i = 0; i < stop; i++) {
+ for(int i = 0; i < stop; i++) {
double min = Double.POSITIVE_INFINITY;
int minx = -1, miny = -1;
- for (int x = 0; x < size; x++) {
- if (height[x] < Double.POSITIVE_INFINITY) {
+ for(int x = 0; x < size; x++) {
+ if(height[x] < Double.POSITIVE_INFINITY) {
continue;
}
final int xbase = triangleSize(x);
- for (int y = 0; y < x; y++) {
- if (height[y] < Double.POSITIVE_INFINITY) {
+ for(int y = 0; y < x; y++) {
+ if(height[y] < Double.POSITIVE_INFINITY) {
continue;
}
final int idx = xbase + y;
- if (scratch[idx] < min) {
+ if(scratch[idx] < min) {
min = scratch[idx];
minx = x;
miny = y;
@@ -255,15 +256,17 @@ public class NaiveAgglomerativeHierarchicalClustering3<O, D extends NumberDistan
ModifiableDBIDs cx = clusters.get(minx);
ModifiableDBIDs cy = clusters.get(miny);
int sizex = 1, sizey = 1; // cluster sizes, for averaging
- if (cy == null) {
+ if(cy == null) {
cy = DBIDUtil.newHashSet();
cy.add(iy);
- } else {
+ }
+ else {
sizey = cy.size();
}
- if (cx == null) {
+ if(cx == null) {
cy.add(ix);
- } else {
+ }
+ else {
sizex = cx.size();
cy.addDBIDs(cx);
clusters.remove(minx);
@@ -276,8 +279,8 @@ public class NaiveAgglomerativeHierarchicalClustering3<O, D extends NumberDistan
// hashmap lookup.
final int xbase = triangleSize(minx), ybase = triangleSize(miny);
// Write to (y, j), with j < y
- for (int j = 0; j < miny; j++) {
- if (height[j] < Double.POSITIVE_INFINITY) {
+ for(int j = 0; j < miny; j++) {
+ if(height[j] < Double.POSITIVE_INFINITY) {
continue;
}
final DBIDs idsj = clusters.get(j);
@@ -285,8 +288,8 @@ public class NaiveAgglomerativeHierarchicalClustering3<O, D extends NumberDistan
scratch[ybase + j] = linkage.combine(sizex, scratch[xbase + j], sizey, scratch[ybase + j], sizej, min);
}
// Write to (j, y), with y < j < x
- for (int j = miny + 1; j < minx; j++) {
- if (height[j] < Double.POSITIVE_INFINITY) {
+ for(int j = miny + 1; j < minx; j++) {
+ if(height[j] < Double.POSITIVE_INFINITY) {
continue;
}
final int jbase = triangleSize(j);
@@ -295,8 +298,8 @@ public class NaiveAgglomerativeHierarchicalClustering3<O, D extends NumberDistan
scratch[jbase + miny] = linkage.combine(sizex, scratch[xbase + j], sizey, scratch[jbase + miny], sizej, min);
}
// Write to (j, y), with y < x < j
- for (int j = minx + 1; j < size; j++) {
- if (height[j] < Double.POSITIVE_INFINITY) {
+ for(int j = minx + 1; j < size; j++) {
+ if(height[j] < Double.POSITIVE_INFINITY) {
continue;
}
final DBIDs idsj = clusters.get(j);
@@ -304,20 +307,16 @@ public class NaiveAgglomerativeHierarchicalClustering3<O, D extends NumberDistan
final int jbase = triangleSize(j);
scratch[jbase + miny] = linkage.combine(sizex, scratch[jbase + minx], sizey, scratch[jbase + miny], sizej, min);
}
- if (prog != null) {
- prog.incrementProcessed(LOG);
- }
- }
- if (prog != null) {
- prog.ensureCompleted(LOG);
+ LOG.incrementProcessed(prog);
}
+ LOG.ensureCompleted(prog);
// Build the clustering result
final Clustering<Model> dendrogram = new Clustering<>("Hierarchical-Clustering", "hierarchical-clustering");
- for (int x = 0; x < size; x++) {
- if (height[x] < Double.POSITIVE_INFINITY) {
+ for(int x = 0; x < size; x++) {
+ if(height[x] < Double.POSITIVE_INFINITY) {
DBIDs cids = clusters.get(x);
- if (cids == null) {
+ if(cids == null) {
ix.seek(x);
cids = DBIDUtil.deref(ix);
}
@@ -355,10 +354,11 @@ public class NaiveAgglomerativeHierarchicalClustering3<O, D extends NumberDistan
*
* @author Erich Schubert
*
+ * @apiviz.exclude
+ *
* @param <O> Object type
- * @param <D> Distance type
*/
- public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {
/**
* Option ID for linkage parameter.
*/
@@ -379,19 +379,19 @@ public class NaiveAgglomerativeHierarchicalClustering3<O, D extends NumberDistan
super.makeOptions(config);
IntParameter numclustersP = new IntParameter(ExtractFlatClusteringFromHierarchy.Parameterizer.MINCLUSTERS_ID);
numclustersP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
- if (config.grab(numclustersP)) {
+ if(config.grab(numclustersP)) {
numclusters = numclustersP.intValue();
}
EnumParameter<Linkage> linkageP = new EnumParameter<>(LINKAGE_ID, Linkage.class);
linkageP.setDefaultValue(Linkage.WARD);
- if (config.grab(linkageP)) {
+ if(config.grab(linkageP)) {
linkage = linkageP.getValue();
}
}
@Override
- protected NaiveAgglomerativeHierarchicalClustering3<O, D> makeInstance() {
+ protected NaiveAgglomerativeHierarchicalClustering3<O> makeInstance() {
return new NaiveAgglomerativeHierarchicalClustering3<>(distanceFunction, numclusters, linkage);
}
}
diff --git a/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering4.java b/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering4.java
index 23678ccc..997b657a 100644
--- a/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering4.java
+++ b/src/tutorial/clustering/NaiveAgglomerativeHierarchicalClustering4.java
@@ -4,7 +4,7 @@ package tutorial.clustering;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -33,7 +33,7 @@ import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDBIDDataStore;
-import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDistanceDataStore;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore;
import de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore;
import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter;
@@ -43,8 +43,6 @@ import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.DistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
@@ -75,7 +73,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.EnumParameter;
* @param <O> Object type
*/
@Reference(title = "A Review of Classification", authors = "R. M. Cormack", booktitle = "Journal of the Royal Statistical Society. Series A, Vol. 134, No. 3", url = "http://www.jstor.org/stable/2344237")
-public class NaiveAgglomerativeHierarchicalClustering4<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm<O, D, PointerHierarchyRepresentationResult<DoubleDistance>> implements HierarchicalClusteringAlgorithm<DoubleDistance> {
+public class NaiveAgglomerativeHierarchicalClustering4<O> extends AbstractDistanceBasedAlgorithm<O, PointerHierarchyRepresentationResult> implements HierarchicalClusteringAlgorithm {
/**
* Class logger
*/
@@ -88,6 +86,8 @@ public class NaiveAgglomerativeHierarchicalClustering4<O, D extends NumberDistan
* R. M. Cormack, A Review of Classification
*
* @author Erich Schubert
+ *
+ * @apiviz.exclude
*/
public static enum Linkage {//
SINGLE {
@@ -156,7 +156,7 @@ public class NaiveAgglomerativeHierarchicalClustering4<O, D extends NumberDistan
* @param distanceFunction Distance function to use
* @param linkage Linkage strategy
*/
- public NaiveAgglomerativeHierarchicalClustering4(DistanceFunction<? super O, D> distanceFunction, Linkage linkage) {
+ public NaiveAgglomerativeHierarchicalClustering4(DistanceFunction<? super O> distanceFunction, Linkage linkage) {
super(distanceFunction);
this.linkage = linkage;
}
@@ -168,15 +168,15 @@ public class NaiveAgglomerativeHierarchicalClustering4<O, D extends NumberDistan
* @param relation Relation
* @return Clustering hierarchy
*/
- public PointerHierarchyRepresentationResult<DoubleDistance> run(Database db, Relation<O> relation) {
- DistanceQuery<O, D> dq = db.getDistanceQuery(relation, getDistanceFunction());
+ public PointerHierarchyRepresentationResult run(Database db, Relation<O> relation) {
+ DistanceQuery<O> dq = db.getDistanceQuery(relation, getDistanceFunction());
ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
final int size = ids.size();
- if (size > 0x10000) {
+ if(size > 0x10000) {
throw new AbortException("This implementation does not scale to data sets larger than " + 0x10000 + " instances (~17 GB RAM), which results in an integer overflow.");
}
- if (Linkage.SINGLE.equals(linkage)) {
+ if(Linkage.SINGLE.equals(linkage)) {
LOG.verbose("Notice: SLINK is a much faster algorithm for single-linkage clustering!");
}
@@ -186,12 +186,12 @@ public class NaiveAgglomerativeHierarchicalClustering4<O, D extends NumberDistan
// Position counter - must agree with computeOffset!
int pos = 0;
boolean square = Linkage.WARD.equals(linkage) && !(SquaredEuclideanDistanceFunction.class.isInstance(getDistanceFunction()));
- for (int x = 0; ix.valid(); x++, ix.advance()) {
+ for(int x = 0; ix.valid(); x++, ix.advance()) {
iy.seek(0);
- for (int y = 0; y < x; y++, iy.advance()) {
- scratch[pos] = dq.distance(ix, iy).doubleValue();
+ for(int y = 0; y < x; y++, iy.advance()) {
+ scratch[pos] = dq.distance(ix, iy);
// Ward uses variances -- i.e. squared values
- if (square) {
+ if(square) {
scratch[pos] *= scratch[pos];
}
pos++;
@@ -200,9 +200,9 @@ public class NaiveAgglomerativeHierarchicalClustering4<O, D extends NumberDistan
// Initialize space for result:
WritableDBIDDataStore parent = DataStoreUtil.makeDBIDStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
- WritableDoubleDistanceDataStore height = DataStoreUtil.makeDoubleDistanceStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
+ WritableDoubleDataStore height = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
WritableIntegerDataStore csize = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
- for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
+ for(DBIDIter it = ids.iter(); it.valid(); it.advance()) {
parent.put(it, it);
height.put(it, Double.POSITIVE_INFINITY);
csize.put(it, 1);
@@ -210,20 +210,20 @@ public class NaiveAgglomerativeHierarchicalClustering4<O, D extends NumberDistan
// Repeat until everything merged, except the desired number of clusters:
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Agglomerative clustering", size - 1, LOG) : null;
- for (int i = 1; i < size; i++) {
+ for(int i = 1; i < size; i++) {
double min = Double.POSITIVE_INFINITY;
int minx = -1, miny = -1;
- for (ix.seek(0); ix.valid(); ix.advance()) {
- if (height.doubleValue(ix) < Double.POSITIVE_INFINITY) {
+ for(ix.seek(0); ix.valid(); ix.advance()) {
+ if(height.doubleValue(ix) < Double.POSITIVE_INFINITY) {
continue;
}
final int xbase = triangleSize(ix.getOffset());
- for (iy.seek(0); iy.getOffset() < ix.getOffset(); iy.advance()) {
- if (height.doubleValue(iy) < Double.POSITIVE_INFINITY) {
+ for(iy.seek(0); iy.getOffset() < ix.getOffset(); iy.advance()) {
+ if(height.doubleValue(iy) < Double.POSITIVE_INFINITY) {
continue;
}
final int idx = xbase + iy.getOffset();
- if (scratch[idx] <= min) {
+ if(scratch[idx] <= min) {
min = scratch[idx];
minx = ix.getOffset();
miny = iy.getOffset();
@@ -244,16 +244,16 @@ public class NaiveAgglomerativeHierarchicalClustering4<O, D extends NumberDistan
// Update distance matrix. Note: miny < minx
final int xbase = triangleSize(minx), ybase = triangleSize(miny);
// Write to (y, j), with j < y
- for (ij.seek(0); ij.getOffset() < miny; ij.advance()) {
- if (height.doubleValue(ij) < Double.POSITIVE_INFINITY) {
+ for(ij.seek(0); ij.getOffset() < miny; ij.advance()) {
+ if(height.doubleValue(ij) < Double.POSITIVE_INFINITY) {
continue;
}
final int sizej = csize.intValue(ij);
scratch[ybase + ij.getOffset()] = linkage.combine(sizex, scratch[xbase + ij.getOffset()], sizey, scratch[ybase + ij.getOffset()], sizej, min);
}
// Write to (j, y), with y < j < x
- for (ij.seek(miny + 1); ij.getOffset() < minx; ij.advance()) {
- if (height.doubleValue(ij) < Double.POSITIVE_INFINITY) {
+ for(ij.seek(miny + 1); ij.getOffset() < minx; ij.advance()) {
+ if(height.doubleValue(ij) < Double.POSITIVE_INFINITY) {
continue;
}
final int jbase = triangleSize(ij.getOffset());
@@ -261,23 +261,19 @@ public class NaiveAgglomerativeHierarchicalClustering4<O, D extends NumberDistan
scratch[jbase + miny] = linkage.combine(sizex, scratch[xbase + ij.getOffset()], sizey, scratch[jbase + miny], sizej, min);
}
// Write to (j, y), with y < x < j
- for (ij.seek(minx + 1); ij.valid(); ij.advance()) {
- if (height.doubleValue(ij) < Double.POSITIVE_INFINITY) {
+ for(ij.seek(minx + 1); ij.valid(); ij.advance()) {
+ if(height.doubleValue(ij) < Double.POSITIVE_INFINITY) {
continue;
}
final int jbase = triangleSize(ij.getOffset());
final int sizej = csize.intValue(ij);
scratch[jbase + miny] = linkage.combine(sizex, scratch[jbase + minx], sizey, scratch[jbase + miny], sizej, min);
}
- if (prog != null) {
- prog.incrementProcessed(LOG);
- }
- }
- if (prog != null) {
- prog.ensureCompleted(LOG);
+ LOG.incrementProcessed(prog);
}
+ LOG.ensureCompleted(prog);
- return new PointerHierarchyRepresentationResult<>(ids, parent, height);
+ return new PointerHierarchyRepresentationResult(ids, parent, height);
}
/**
@@ -301,20 +297,16 @@ public class NaiveAgglomerativeHierarchicalClustering4<O, D extends NumberDistan
return LOG;
}
- @Override
- public DoubleDistance getDistanceFactory() {
- return DoubleDistance.FACTORY;
- }
-
/**
* Parameterization class
*
* @author Erich Schubert
*
+ * @apiviz.exclude
+ *
* @param <O> Object type
- * @param <D> Distance type
*/
- public static class Parameterizer<O, D extends NumberDistance<D, ?>> extends AbstractDistanceBasedAlgorithm.Parameterizer<O, D> {
+ public static class Parameterizer<O> extends AbstractDistanceBasedAlgorithm.Parameterizer<O> {
/**
* Option ID for linkage parameter.
*/
@@ -330,13 +322,13 @@ public class NaiveAgglomerativeHierarchicalClustering4<O, D extends NumberDistan
super.makeOptions(config);
EnumParameter<Linkage> linkageP = new EnumParameter<>(LINKAGE_ID, Linkage.class);
linkageP.setDefaultValue(Linkage.WARD);
- if (config.grab(linkageP)) {
+ if(config.grab(linkageP)) {
linkage = linkageP.getValue();
}
}
@Override
- protected NaiveAgglomerativeHierarchicalClustering4<O, D> makeInstance() {
+ protected NaiveAgglomerativeHierarchicalClustering4<O> makeInstance() {
return new NaiveAgglomerativeHierarchicalClustering4<>(distanceFunction, linkage);
}
}
diff --git a/src/tutorial/clustering/SameSizeKMeansAlgorithm.java b/src/tutorial/clustering/SameSizeKMeansAlgorithm.java
index 91da44c5..ad57885e 100644
--- a/src/tutorial/clustering/SameSizeKMeansAlgorithm.java
+++ b/src/tutorial/clustering/SameSizeKMeansAlgorithm.java
@@ -4,7 +4,7 @@ package tutorial.clustering;
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
@@ -29,8 +29,8 @@ import java.util.Comparator;
import java.util.List;
import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.AbstractKMeans;
-import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansInitialization;
-import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansPlusPlusInitialMeans;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.initialization.KMeansInitialization;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.initialization.KMeansPlusPlusInitialMeans;
import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.NumberVector;
@@ -48,12 +48,11 @@ import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
-import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.EuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
import de.lmu.ifi.dbs.elki.utilities.datastructures.arrays.IntegerArrayQuickSort;
import de.lmu.ifi.dbs.elki.utilities.datastructures.arrays.IntegerComparator;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
@@ -78,7 +77,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
*
* @param <V> Vector type
*/
-public class SameSizeKMeansAlgorithm<V extends NumberVector<?>> extends AbstractKMeans<V, DoubleDistance, MeanModel<V>> {
+public class SameSizeKMeansAlgorithm<V extends NumberVector> extends AbstractKMeans<V, MeanModel> {
/**
* Class logger
*/
@@ -92,7 +91,7 @@ public class SameSizeKMeansAlgorithm<V extends NumberVector<?>> extends Abstract
* @param maxiter Maximum number of iterations
* @param initializer
*/
- public SameSizeKMeansAlgorithm(PrimitiveDoubleDistanceFunction<? super NumberVector<?>> distanceFunction, int k, int maxiter, KMeansInitialization<V> initializer) {
+ public SameSizeKMeansAlgorithm(PrimitiveDistanceFunction<? super NumberVector> distanceFunction, int k, int maxiter, KMeansInitialization<? super V> initializer) {
super(distanceFunction, k, maxiter, initializer);
}
@@ -104,11 +103,11 @@ public class SameSizeKMeansAlgorithm<V extends NumberVector<?>> extends Abstract
* @return result
*/
@Override
- public Clustering<MeanModel<V>> run(Database database, Relation<V> relation) {
+ public Clustering<MeanModel> run(Database database, Relation<V> relation) {
// Database objects to process
final DBIDs ids = relation.getDBIDs();
// Choose initial means
- List<? extends NumberVector<?>> means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
+ List<Vector> means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction(), Vector.FACTORY);
// Setup cluster assignment store
List<ModifiableDBIDs> clusters = new ArrayList<>();
for(int i = 0; i < k; i++) {
@@ -125,11 +124,10 @@ public class SameSizeKMeansAlgorithm<V extends NumberVector<?>> extends Abstract
means = refineResult(relation, means, clusters, metas, tids);
// Wrap result
- Clustering<MeanModel<V>> result = new Clustering<>("k-Means Samesize Clustering", "kmeans-samesize-clustering");
- final NumberVector.Factory<V, ?> factory = RelationUtil.getNumberVectorFactory(relation);
+ Clustering<MeanModel> result = new Clustering<>("k-Means Samesize Clustering", "kmeans-samesize-clustering");
for(int i = 0; i < clusters.size(); i++) {
- V mean = factory.newNumberVector(means.get(i).getColumnVector().getArrayRef());
- result.addToplevelCluster(new Cluster<>(clusters.get(i), new MeanModel<>(mean)));
+ Vector mean = means.get(i).getColumnVector();
+ result.addToplevelCluster(new Cluster<>(clusters.get(i), new MeanModel(mean)));
}
return result;
}
@@ -141,10 +139,8 @@ public class SameSizeKMeansAlgorithm<V extends NumberVector<?>> extends Abstract
* @param means Mean vectors
* @return Initialized storage
*/
- protected WritableDataStore<Meta> initializeMeta(Relation<V> relation, List<? extends NumberVector<?>> means) {
- // This is a safe cast - see constructor.
- @SuppressWarnings("unchecked")
- PrimitiveDoubleDistanceFunction<NumberVector<?>> df = (PrimitiveDoubleDistanceFunction<NumberVector<?>>) getDistanceFunction();
+ protected WritableDataStore<Meta> initializeMeta(Relation<V> relation, List<? extends NumberVector> means) {
+ PrimitiveDistanceFunction<? super NumberVector> df = getDistanceFunction();
// The actual storage
final WritableDataStore<Meta> metas = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, Meta.class);
// Build the metadata, track the two nearest cluster centers.
@@ -152,7 +148,7 @@ public class SameSizeKMeansAlgorithm<V extends NumberVector<?>> extends Abstract
Meta c = new Meta(k);
V fv = relation.get(id);
for(int i = 0; i < k; i++) {
- c.dists[i] = df.doubleDistance(fv, means.get(i));
+ c.dists[i] = df.distance(fv, means.get(i));
if(i > 0) {
if(c.dists[i] < c.dists[c.primary]) {
c.primary = i;
@@ -231,14 +227,14 @@ public class SameSizeKMeansAlgorithm<V extends NumberVector<?>> extends Abstract
* @param metas Metadata storage
* @param df Distance function
*/
- protected void updateDistances(Relation<V> relation, List<? extends NumberVector<?>> means, final WritableDataStore<Meta> metas, PrimitiveDoubleDistanceFunction<NumberVector<?>> df) {
+ protected void updateDistances(Relation<V> relation, List<Vector> means, final WritableDataStore<Meta> metas, PrimitiveDistanceFunction<? super NumberVector> df) {
for(DBIDIter id = relation.iterDBIDs(); id.valid(); id.advance()) {
Meta c = metas.get(id);
V fv = relation.get(id);
// Update distances to means.
c.secondary = -1;
for(int i = 0; i < k; i++) {
- c.dists[i] = df.doubleDistance(fv, means.get(i));
+ c.dists[i] = df.distance(fv, means.get(i));
if(c.primary != i) {
if(c.secondary < 0 || c.dists[i] < c.dists[c.secondary]) {
c.secondary = i;
@@ -259,10 +255,9 @@ public class SameSizeKMeansAlgorithm<V extends NumberVector<?>> extends Abstract
* @param tids DBIDs array
* @return final means
*/
- protected List<? extends NumberVector<?>> refineResult(Relation<V> relation, List<? extends NumberVector<?>> means, List<ModifiableDBIDs> clusters, final WritableDataStore<Meta> metas, ArrayModifiableDBIDs tids) {
+ protected List<Vector> refineResult(Relation<V> relation, List<Vector> means, List<ModifiableDBIDs> clusters, final WritableDataStore<Meta> metas, ArrayModifiableDBIDs tids) {
// This is a safe cast - see constructor.
- @SuppressWarnings("unchecked")
- PrimitiveDoubleDistanceFunction<NumberVector<?>> df = (PrimitiveDoubleDistanceFunction<NumberVector<?>>) getDistanceFunction();
+ PrimitiveDistanceFunction<? super NumberVector> df = getDistanceFunction();
// Our desired cluster size:
final int minsize = tids.size() / k; // rounded down
final int maxsize = (tids.size() + k - 1) / k; // rounded up
@@ -289,7 +284,7 @@ public class SameSizeKMeansAlgorithm<V extends NumberVector<?>> extends Abstract
transfers[i] = DBIDUtil.newArray();
}
- for(int iter = 0; maxiter < 0 || iter < maxiter; iter++) {
+ for(int iter = 0; maxiter <= 0 || iter < maxiter; iter++) {
updateDistances(relation, means, metas, df);
tids.sort(comp);
int active = 0; // Track if anything has changed
@@ -461,7 +456,7 @@ public class SameSizeKMeansAlgorithm<V extends NumberVector<?>> extends Abstract
*
* @apiviz.exclude
*/
- public static class Parameterizer<V extends NumberVector<?>> extends AbstractParameterizer {
+ public static class Parameterizer<V extends NumberVector> extends AbstractParameterizer {
/**
* k Parameter.
*/
@@ -480,12 +475,12 @@ public class SameSizeKMeansAlgorithm<V extends NumberVector<?>> extends Abstract
/**
* Distance function
*/
- protected PrimitiveDoubleDistanceFunction<? super NumberVector<?>> distanceFunction;
+ protected PrimitiveDistanceFunction<? super NumberVector> distanceFunction;
@Override
protected void makeOptions(Parameterization config) {
super.makeOptions(config);
- ObjectParameter<PrimitiveDoubleDistanceFunction<? super NumberVector<?>>> distanceFunctionP = makeParameterDistanceFunction(SquaredEuclideanDistanceFunction.class, PrimitiveDoubleDistanceFunction.class);
+ ObjectParameter<PrimitiveDistanceFunction<? super NumberVector>> distanceFunctionP = makeParameterDistanceFunction(SquaredEuclideanDistanceFunction.class, PrimitiveDistanceFunction.class);
if(config.grab(distanceFunctionP)) {
distanceFunction = distanceFunctionP.instantiateClass(config);
if(!(distanceFunction instanceof EuclideanDistanceFunction) && !(distanceFunction instanceof SquaredEuclideanDistanceFunction)) {
diff --git a/src/tutorial/clustering/package-info.java b/src/tutorial/clustering/package-info.java
index ac7ab950..e2971ef8 100644
--- a/src/tutorial/clustering/package-info.java
+++ b/src/tutorial/clustering/package-info.java
@@ -5,7 +5,7 @@
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
- Copyright (C) 2013
+ Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team