10 files changed, 1261 insertions, 0 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeans.java
new file mode 100644
index 00000000..d3c73b53
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeans.java
@@ -0,0 +1,310 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractPrimitiveDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.model.MeanModel;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * Abstract base class for k-means implementations.
+ * 
+ * @author Erich Schubert
+ * 
+ * @param <V> Vector type
+ * @param <D> Distance type
+ */
+public abstract class AbstractKMeans<V extends NumberVector<V, ?>, D extends Distance<D>> extends AbstractPrimitiveDistanceBasedAlgorithm<NumberVector<?, ?>, D, Clustering<MeanModel<V>>> {
+  /**
+   * Parameter to specify the number of clusters to find, must be an integer
+   * greater than 0.
+   */
+  public static final OptionID K_ID = OptionID.getOrCreateOptionID("kmeans.k", "The number of clusters to find.");
+
+  /**
+   * Parameter to specify the number of clusters to find, must be an integer
+   * greater or equal to 0, where 0 means no limit.
+   */
+  public static final OptionID MAXITER_ID = OptionID.getOrCreateOptionID("kmeans.maxiter", "The maximum number of iterations to do. 0 means no limit.");
+
+  /**
+   * Parameter to specify the random generator seed.
+   */
+  public static final OptionID SEED_ID = OptionID.getOrCreateOptionID("kmeans.seed", "The random number generator seed.");
+
+  /**
+   * Parameter to specify the initialization method
+   */
+  public static final OptionID INIT_ID = OptionID.getOrCreateOptionID("kmeans.initialization", "Method to choose the initial means.");
+
+  /**
+   * Holds the value of {@link #K_ID}.
+   */
+  protected int k;
+
+  /**
+   * Holds the value of {@link #MAXITER_ID}.
+   */
+  protected int maxiter;
+
+  /**
+   * Method to choose initial means.
+   */
+  protected KMeansInitialization<V> initializer;
+
+  /**
+   * Constructor.
+   * 
+   * @param distanceFunction distance function
+   * @param k k parameter
+   * @param maxiter Maxiter parameter
+   */
+  public AbstractKMeans(PrimitiveDistanceFunction<NumberVector<?, ?>, D> distanceFunction, int k, int maxiter, KMeansInitialization<V> initializer) {
+    super(distanceFunction);
+    this.k = k;
+    this.maxiter = maxiter;
+    this.initializer = initializer;
+  }
+
+  /**
+   * Returns a list of clusters. The k<sup>th</sup> cluster contains the ids of
+   * those FeatureVectors, that are nearest to the k<sup>th</sup> mean.
+   * 
+   * @param relation the database to cluster
+   * @param means a list of k means
+   * @param clusters cluster assignment
+   * @return true when the object was reassigned
+   */
+  protected boolean assignToNearestCluster(Relation<V> relation, List<Vector> means, List<? extends ModifiableDBIDs> clusters) {
+    boolean changed = false;
+
+    if(getDistanceFunction() instanceof PrimitiveDoubleDistanceFunction) {
+      @SuppressWarnings("unchecked")
+      final PrimitiveDoubleDistanceFunction<? super NumberVector<?, ?>> df = (PrimitiveDoubleDistanceFunction<? super NumberVector<?, ?>>) getDistanceFunction();
+      for(DBID id : relation.iterDBIDs()) {
+        double mindist = Double.POSITIVE_INFINITY;
+        V fv = relation.get(id);
+        int minIndex = 0;
+        for(int i = 0; i < k; i++) {
+          double dist = df.doubleDistance(fv, means.get(i));
+          if(dist < mindist) {
+            minIndex = i;
+            mindist = dist;
+          }
+        }
+        if(clusters.get(minIndex).add(id)) {
+          changed = true;
+          // Remove from previous cluster
+          // TODO: keep a list of cluster assignments to save this search?
+          for(int i = 0; i < k; i++) {
+            if(i != minIndex) {
+              if(clusters.get(i).remove(id)) {
+                break;
+              }
+            }
+          }
+        }
+      }
+    }
+    else {
+      final PrimitiveDistanceFunction<? super NumberVector<?, ?>, D> df = getDistanceFunction();
+      for(DBID id : relation.iterDBIDs()) {
+        D mindist = df.getDistanceFactory().infiniteDistance();
+        V fv = relation.get(id);
+        int minIndex = 0;
+        for(int i = 0; i < k; i++) {
+          D dist = df.distance(fv, means.get(i));
+          if(dist.compareTo(mindist) < 0) {
+            minIndex = i;
+            mindist = dist;
+          }
+        }
+        if(clusters.get(minIndex).add(id)) {
+          changed = true;
+          // Remove from previous cluster
+          // TODO: keep a list of cluster assignments to save this search?
+          for(int i = 0; i < k; i++) {
+            if(i != minIndex) {
+              if(clusters.get(i).remove(id)) {
+                break;
+              }
+            }
+          }
+        }
+      }
+    }
+    return changed;
+  }
+
+  @Override
+  public TypeInformation[] getInputTypeRestriction() {
+    return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD);
+  }
+
+  /**
+   * Returns the mean vectors of the given clusters in the given database.
+   * 
+   * @param clusters the clusters to compute the means
+   * @param means the recent means
+   * @param database the database containing the vectors
+   * @return the mean vectors of the given clusters in the given database
+   */
+  protected List<Vector> means(List<? extends ModifiableDBIDs> clusters, List<Vector> means, Relation<V> database) {
+    List<Vector> newMeans = new ArrayList<Vector>(k);
+    for(int i = 0; i < k; i++) {
+      ModifiableDBIDs list = clusters.get(i);
+      Vector mean = null;
+      for(Iterator<DBID> clusterIter = list.iterator(); clusterIter.hasNext();) {
+        if(mean == null) {
+          mean = database.get(clusterIter.next()).getColumnVector();
+        }
+        else {
+          mean.plusEquals(database.get(clusterIter.next()).getColumnVector());
+        }
+      }
+      if(list.size() > 0) {
+        assert mean != null;
+        mean.timesEquals(1.0 / list.size());
+      }
+      else {
+        mean = means.get(i);
+      }
+      newMeans.add(mean);
+    }
+    return newMeans;
+  }
+
+  /**
+   * Compute an incremental update for the mean
+   * 
+   * @param mean Mean to update
+   * @param vec Object vector
+   * @param newsize (New) size of cluster
+   * @param op Cluster size change / Weight change
+   */
+  protected void incrementalUpdateMean(Vector mean, V vec, int newsize, double op) {
+    if(newsize == 0) {
+      return; // Keep old mean
+    }
+    Vector delta = vec.getColumnVector();
+    // Compute difference from mean
+    delta.minusEquals(mean);
+    delta.timesEquals(op / newsize);
+    mean.plusEquals(delta);
+  }
+
+  /**
+   * Perform a MacQueen style iteration.
+   * 
+   * @param relation Relation
+   * @param means Means
+   * @param clusters Clusters
+   * @return true when the means have changed
+   */
+  protected boolean macQueenIterate(Relation<V> relation, List<Vector> means, List<ModifiableDBIDs> clusters) {
+    boolean changed = false;
+  
+    if(getDistanceFunction() instanceof PrimitiveDoubleDistanceFunction) {
+      // Raw distance function
+      @SuppressWarnings("unchecked")
+      final PrimitiveDoubleDistanceFunction<? super NumberVector<?, ?>> df = (PrimitiveDoubleDistanceFunction<? super NumberVector<?, ?>>) getDistanceFunction();
+  
+      // Incremental update
+      for(DBID id : relation.iterDBIDs()) {
+        double mindist = Double.POSITIVE_INFINITY;
+        V fv = relation.get(id);
+        int minIndex = 0;
+        for(int i = 0; i < k; i++) {
+          double dist = df.doubleDistance(fv, means.get(i));
+          if(dist < mindist) {
+            minIndex = i;
+            mindist = dist;
+          }
+        }
+        // Update the cluster mean incrementally:
+        for(int i = 0; i < k; i++) {
+          ModifiableDBIDs ci = clusters.get(i);
+          if(i == minIndex) {
+            if(ci.add(id)) {
+              incrementalUpdateMean(means.get(i), relation.get(id), ci.size(), +1);
+              changed = true;
+            }
+          }
+          else if(ci.remove(id)) {
+            incrementalUpdateMean(means.get(i), relation.get(id), ci.size() + 1, -1);
+            changed = true;
+          }
+        }
+      }
+    }
+    else {
+      // Raw distance function
+      final PrimitiveDistanceFunction<? super NumberVector<?, ?>, D> df = getDistanceFunction();
+  
+      // Incremental update
+      for(DBID id : relation.iterDBIDs()) {
+        D mindist = df.getDistanceFactory().infiniteDistance();
+        V fv = relation.get(id);
+        int minIndex = 0;
+        for(int i = 0; i < k; i++) {
+          D dist = df.distance(fv, means.get(i));
+          if(dist.compareTo(mindist) < 0) {
+            minIndex = i;
+            mindist = dist;
+          }
+        }
+        // Update the cluster mean incrementally:
+        for(int i = 0; i < k; i++) {
+          ModifiableDBIDs ci = clusters.get(i);
+          if(i == minIndex) {
+            if(ci.add(id)) {
+              incrementalUpdateMean(means.get(i), relation.get(id), ci.size(), +1);
+              changed = true;
+            }
+          }
+          else if(ci.remove(id)) {
+            incrementalUpdateMean(means.get(i), relation.get(id), ci.size() + 1, -1);
+            changed = true;
+          }
+        }
+      }
+    }
+    return changed;
+  }
+}
+\ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeansInitialization.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeansInitialization.java
new file mode 100644
index 00000000..b5f088fb
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeansInitialization.java
@@ -0,0 +1,71 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.LongParameter;
+
+/**
+ * Abstract base class for common k-means initializations.
+ * 
+ * @author Erich Schubert
+ * 
+ * @param <V> Vector type
+ */
+public abstract class AbstractKMeansInitialization<V extends NumberVector<V, ?>> implements KMeansInitialization<V> {
+  /**
+   * Holds the value of {@link KMeansLloyd#SEED_ID}.
+   */
+  protected Long seed;
+
+  /**
+   * Constructor.
+   * 
+   * @param seed Random seed.
+   */
+  public AbstractKMeansInitialization(Long seed) {
+    this.seed = seed;
+  }
+
+  /**
+   * Parameterization class.
+   * 
+   * @author Erich Schubert
+   * 
+   * @apiviz.exclude
+   */
+  public abstract static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractParameterizer {
+    protected Long seed;
+
+    @Override
+    protected void makeOptions(Parameterization config) {
+      super.makeOptions(config);
+      LongParameter seedP = new LongParameter(AbstractKMeans.SEED_ID, true);
+      if(config.grab(seedP)) {
+        seed = seedP.getValue();
+      }
+    }
+  }
+}
+\ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/FirstKInitialMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/FirstKInitialMeans.java
new file mode 100644
index 00000000..78ccd426
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/FirstKInitialMeans.java
@@ -0,0 +1,74 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+
+/**
+ * Initialize K-means by using the first k objects as initial means.
+ * 
+ * @author Erich Schubert
+ * 
+ * @param <V> Vector type
+ */
+public class FirstKInitialMeans<V extends NumberVector<V, ?>> extends AbstractKMeansInitialization<V> {
+  /**
+   * Constructor.
+   */
+  public FirstKInitialMeans() {
+    super(null);
+  }
+
+  @Override
+  public List<Vector> chooseInitialMeans(Relation<V> relation, int k, PrimitiveDistanceFunction<? super V, ?> distanceFunction) {
+    Iterator<DBID> iter = relation.iterDBIDs();
+    List<Vector> means = new ArrayList<Vector>(k);
+    for(int i = 0; i < k && iter.hasNext(); i++) {
+      means.add(relation.get(iter.next()).getColumnVector());
+    }
+    return means;
+  }
+
+  /**
+   * Parameterization class.
+   * 
+   * @author Erich Schubert
+   * 
+   * @apiviz.exclude
+   */
+  public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractParameterizer {
+    @Override
+    protected FirstKInitialMeans<V> makeInstance() {
+      return new FirstKInitialMeans<V>();
+    }
+  }
+}
+\ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansInitialization.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansInitialization.java
new file mode 100644
index 00000000..f4c0d9c7
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansInitialization.java
@@ -0,0 +1,49 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+
+/**
+ * Interface for initializing K-Means
+ * 
+ * @author Erich Schubert
+ * 
+ * @param <V> Vector type
+ */
+public interface KMeansInitialization<V extends NumberVector<V, ?>> {
+  /**
+   * Choose initial means
+   * 
+   * @param relation Relation
+   * @param k Parameter k
+   * @param distanceFunction Distance function 
+   * @return List of chosen means for k-means
+   */
+  public abstract List<Vector> chooseInitialMeans(Relation<V> relation, int k, PrimitiveDistanceFunction<? super V, ?> distanceFunction);
+}
+\ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java
new file mode 100644
index 00000000..fda1d6c0
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java
@@ -0,0 +1,176 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.ArrayList;
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractPrimitiveDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm;
+import de.lmu.ifi.dbs.elki.data.Cluster;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.model.MeanModel;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * Provides the k-means algorithm, using Lloyd-style bulk iterations.
+ * 
+ * <p>
+ * Reference:<br />
+ * S. Lloyd<br/>
+ * Least squares quantization in PCM<br/>
+ * IEEE Transactions on Information Theory 28 (2)<br/>
+ * previously published as Bell Telephone Laboratories Paper
+ * </p>
+ * 
+ * @author Arthur Zimek
+ * 
+ * @apiviz.has MeanModel
+ * 
+ * @param <V> vector datatype
+ * @param <D> distance value type
+ */
+@Title("K-Means")
+@Description("Finds a partitioning into k clusters.")
+@Reference(authors = "S. Lloyd", title = "Least squares quantization in PCM", booktitle = "IEEE Transactions on Information Theory 28 (2): 129–137.", url = "http://dx.doi.org/10.1109/TIT.1982.1056489")
+public class KMeansLloyd<V extends NumberVector<V, ?>, D extends Distance<D>> extends AbstractKMeans<V, D> implements ClusteringAlgorithm<Clustering<MeanModel<V>>> {
+  /**
+   * The logger for this class.
+   */
+  private static final Logging logger = Logging.getLogger(KMeansLloyd.class);
+
+  /**
+   * Constructor.
+   * 
+   * @param distanceFunction distance function
+   * @param k k parameter
+   * @param maxiter Maxiter parameter
+   */
+  public KMeansLloyd(PrimitiveDistanceFunction<NumberVector<?, ?>, D> distanceFunction, int k, int maxiter, KMeansInitialization<V> initializer) {
+    super(distanceFunction, k, maxiter, initializer);
+  }
+
+  /**
+   * Run k-means
+   * 
+   * @param database Database
+   * @param relation relation to use
+   * @return result
+   * @throws IllegalStateException
+   */
+  public Clustering<MeanModel<V>> run(Database database, Relation<V> relation) throws IllegalStateException {
+    if(relation.size() <= 0) {
+      return new Clustering<MeanModel<V>>("k-Means Clustering", "kmeans-clustering");
+    }
+    // Choose initial means
+    List<Vector> means = initializer.chooseInitialMeans(relation, k, getDistanceFunction());
+    // Setup cluster assignment store
+    List<ModifiableDBIDs> clusters = new ArrayList<ModifiableDBIDs>();
+    for(int i = 0; i < k; i++) {
+      clusters.add(DBIDUtil.newHashSet(relation.size() / k));
+    }
+
+    for(int iteration = 0; maxiter <= 0 || iteration < maxiter; iteration++) {
+      if(logger.isVerbose()) {
+        logger.verbose("K-Means iteration " + (iteration + 1));
+      }
+      boolean changed = assignToNearestCluster(relation, means, clusters);
+      // Stop if no cluster assignment changed.
+      if(!changed) {
+        break;
+      }
+      // Recompute means.
+      means = means(clusters, means, relation);
+    }
+    // Wrap result
+    final V factory = DatabaseUtil.assumeVectorField(relation).getFactory();
+    Clustering<MeanModel<V>> result = new Clustering<MeanModel<V>>("k-Means Clustering", "kmeans-clustering");
+    for(int i = 0; i < clusters.size(); i++) {
+      MeanModel<V> model = new MeanModel<V>(factory.newNumberVector(means.get(i).getArrayRef()));
+      result.addCluster(new Cluster<MeanModel<V>>(clusters.get(i), model));
+    }
+    return result;
+  }
+
+  @Override
+  protected Logging getLogger() {
+    return logger;
+  }
+
+  /**
+   * Parameterization class.
+   * 
+   * @author Erich Schubert
+   * 
+   * @apiviz.exclude
+   */
+  public static class Parameterizer<V extends NumberVector<V, ?>, D extends Distance<D>> extends AbstractPrimitiveDistanceBasedAlgorithm.Parameterizer<NumberVector<?, ?>, D> {
+    protected int k;
+
+    protected int maxiter;
+
+    protected KMeansInitialization<V> initializer;
+
+    @Override
+    protected void makeOptions(Parameterization config) {
+      super.makeOptions(config);
+      IntParameter kP = new IntParameter(K_ID, new GreaterConstraint(0));
+      if(config.grab(kP)) {
+        k = kP.getValue();
+      }
+
+      ObjectParameter<KMeansInitialization<V>> initialP = new ObjectParameter<KMeansInitialization<V>>(INIT_ID, KMeansInitialization.class, RandomlyGeneratedInitialMeans.class);
+      if(config.grab(initialP)) {
+        initializer = initialP.instantiateClass(config);
+      }
+
+      IntParameter maxiterP = new IntParameter(MAXITER_ID, new GreaterEqualConstraint(0), 0);
+      if(config.grab(maxiterP)) {
+        maxiter = maxiterP.getValue();
+      }
+    }
+
+    @Override
+    protected AbstractKMeans<V, D> makeInstance() {
+      return new KMeansLloyd<V, D>(distanceFunction, k, maxiter, initializer);
+    }
+  }
+}
+\ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansMacQueen.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansMacQueen.java
new file mode 100644
index 00000000..56492dd0
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansMacQueen.java
@@ -0,0 +1,177 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.ArrayList;
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractPrimitiveDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm;
+import de.lmu.ifi.dbs.elki.data.Cluster;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.model.MeanModel;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * Provides the k-means algorithm, using MacQueen style incremental updates.
+ * 
+ * <p>
+ * Reference:<br />
+ * J. MacQueen: Some Methods for Classification and Analysis of Multivariate
+ * Observations. <br />
+ * In 5th Berkeley Symp. Math. Statist. Prob., Vol. 1, 1967, pp 281-297.
+ * </p>
+ * 
+ * @author Erich Schubert
+ * 
+ * @apiviz.has MeanModel
+ * 
+ * @param <V> vector type to use
+ * @param <D> distance function value type
+ */
+@Title("K-Means")
+@Description("Finds a partitioning into k clusters.")
+@Reference(authors = "J. MacQueen", title = "Some Methods for Classification and Analysis of Multivariate Observations", booktitle = "5th Berkeley Symp. Math. Statist. Prob., Vol. 1, 1967, pp 281-297", url = "http://projecteuclid.org/euclid.bsmsp/1200512992")
+public class KMeansMacQueen<V extends NumberVector<V, ?>, D extends Distance<D>> extends AbstractKMeans<V, D> implements ClusteringAlgorithm<Clustering<MeanModel<V>>> {
+  /**
+   * The logger for this class.
+   */
+  private static final Logging logger = Logging.getLogger(KMeansMacQueen.class);
+
+  /**
+   * Constructor.
+   * 
+   * @param distanceFunction distance function
+   * @param k k parameter
+   * @param maxiter Maxiter parameter
+   */
+  public KMeansMacQueen(PrimitiveDistanceFunction<NumberVector<?, ?>, D> distanceFunction, int k, int maxiter, KMeansInitialization<V> initializer) {
+    super(distanceFunction, k, maxiter, initializer);
+  }
+
+  /**
+   * Run k-means
+   * 
+   * @param database Database
+   * @param relation relation to use
+   * @return result
+   * @throws IllegalStateException
+   */
+  public Clustering<MeanModel<V>> run(Database database, Relation<V> relation) throws IllegalStateException {
+    if(relation.size() <= 0) {
+      return new Clustering<MeanModel<V>>("k-Means Clustering", "kmeans-clustering");
+    }
+    // Choose initial means
+    List<Vector> means = initializer.chooseInitialMeans(relation, k, getDistanceFunction());
+    // Initialize cluster and assign objects
+    List<ModifiableDBIDs> clusters = new ArrayList<ModifiableDBIDs>();
+    for(int i = 0; i < k; i++) {
+      clusters.add(DBIDUtil.newHashSet(relation.size() / k));
+    }
+    assignToNearestCluster(relation, means, clusters);
+    // Initial recomputation of the means.
+    means = means(clusters, means, relation);
+
+    // Refine result
+    for(int iteration = 0; maxiter <= 0 || iteration < maxiter; iteration++) {
+      if(logger.isVerbose()) {
+        logger.verbose("K-Means iteration " + (iteration + 1));
+      }
+      boolean changed = macQueenIterate(relation, means, clusters);
+      if(!changed) {
+        break;
+      }
+    }
+    final V factory = DatabaseUtil.assumeVectorField(relation).getFactory();
+    Clustering<MeanModel<V>> result = new Clustering<MeanModel<V>>("k-Means Clustering", "kmeans-clustering");
+    for(int i = 0; i < clusters.size(); i++) {
+      DBIDs ids = clusters.get(i);
+      MeanModel<V> model = new MeanModel<V>(factory.newNumberVector(means.get(i).getArrayRef()));
+      result.addCluster(new Cluster<MeanModel<V>>(ids, model));
+    }
+    return result;
+  }
+
+  @Override
+  protected Logging getLogger() {
+    return logger;
+  }
+
+  /**
+   * Parameterization class.
+   * 
+   * @author Erich Schubert
+   * 
+   * @apiviz.exclude
+   */
+  public static class Parameterizer<V extends NumberVector<V, ?>, D extends Distance<D>> extends AbstractPrimitiveDistanceBasedAlgorithm.Parameterizer<NumberVector<?, ?>, D> {
+    protected int k;
+
+    protected int maxiter;
+
+    protected KMeansInitialization<V> initializer;
+
+    @Override
+    protected void makeOptions(Parameterization config) {
+      super.makeOptions(config);
+      IntParameter kP = new IntParameter(K_ID, new GreaterConstraint(0));
+      if(config.grab(kP)) {
+        k = kP.getValue();
+      }
+
+      ObjectParameter<KMeansInitialization<V>> initialP = new ObjectParameter<KMeansInitialization<V>>(INIT_ID, KMeansInitialization.class, RandomlyGeneratedInitialMeans.class);
+      if(config.grab(initialP)) {
+        initializer = initialP.instantiateClass(config);
+      }
+
+      IntParameter maxiterP = new IntParameter(MAXITER_ID, new GreaterEqualConstraint(0), 0);
+      if(config.grab(maxiterP)) {
+        maxiter = maxiterP.getValue();
+      }
+    }
+
+    @Override
+    protected AbstractKMeans<V, D> makeInstance() {
+      return new KMeansMacQueen<V, D>(distanceFunction, k, maxiter, initializer);
+    }
+  }
+}
+\ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansPlusPlusInitialMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansPlusPlusInitialMeans.java
new file mode 100644
index 00000000..c7a2fa1d
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansPlusPlusInitialMeans.java
@@ -0,0 +1,213 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
+import de.lmu.ifi.dbs.elki.logging.LoggingUtil;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
+
+/**
+ * K-Means++ initialization for k-means.
+ * 
+ * Reference:
+ * <p>
+ * D. Arthur, S. Vassilvitskii<br />
+ * k-means++: the advantages of careful seeding<br />
+ * In: Proc. of the Eighteenth Annual ACM-SIAM Symposium on Discrete Algorithms,
+ * SODA 2007
+ * </p>
+ * 
+ * @author Erich Schubert
+ * 
+ * @param <V> Vector type
+ * @param <D> Distance type
+ */
+@Reference(authors = "D. Arthur, S. Vassilvitskii", title = "k-means++: the advantages of careful seeding", booktitle = "Proc. of the Eighteenth Annual ACM-SIAM Symposium on Discrete Algorithms, SODA 2007", url = "http://dx.doi.org/10.1145/1283383.1283494")
+public class KMeansPlusPlusInitialMeans<V extends NumberVector<V, ?>, D extends NumberDistance<D, ?>> extends AbstractKMeansInitialization<V> {
+  /**
+   * Constructor.
+   * 
+   * @param seed Random seed.
+   */
+  public KMeansPlusPlusInitialMeans(Long seed) {
+    super(seed);
+  }
+
+  @Override
+  public List<Vector> chooseInitialMeans(Relation<V> relation, int k, PrimitiveDistanceFunction<? super V, ?> distanceFunction) {
+    // Get a distance query
+    if(!(distanceFunction.getDistanceFactory() instanceof NumberDistance)) {
+      throw new AbortException("K-Means++ initialization can only be used with numerical distances.");
+    }
+    @SuppressWarnings("unchecked")
+    final PrimitiveDistanceFunction<? super V, D> distF = (PrimitiveDistanceFunction<? super V, D>) distanceFunction;
+    DistanceQuery<V, D> distQ = relation.getDatabase().getDistanceQuery(relation, distF);
+
+    // Chose first mean
+    List<Vector> means = new ArrayList<Vector>(k);
+
+    Random random = (seed != null) ? new Random(seed) : new Random();
+    DBID first = DBIDUtil.randomSample(relation.getDBIDs(), 1, random.nextLong()).iterator().next();
+    means.add(relation.get(first).getColumnVector());
+
+    ModifiableDBIDs chosen = DBIDUtil.newHashSet(k);
+    chosen.add(first);
+    ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
+    // Initialize weights
+    double[] weights = new double[ids.size()];
+    double weightsum = initialWeights(weights, ids, first, distQ);
+    while(means.size() < k) {
+      if(weightsum > Double.MAX_VALUE) {
+        LoggingUtil.warning("Could not choose a reasonable mean for k-means++ - too many data points, too large squared distances?");
+      }
+      if(weightsum < Double.MIN_NORMAL) {
+        LoggingUtil.warning("Could not choose a reasonable mean for k-means++ - to few data points?");
+      }
+      double r = random.nextDouble() * weightsum;
+      int pos = 0;
+      while(r > 0 && pos < weights.length) {
+        r -= weights[pos];
+        pos++;
+      }
+      // Add new mean:
+      DBID newmean = ids.get(pos);
+      means.add(relation.get(newmean).getColumnVector());
+      chosen.add(newmean);
+      // Update weights:
+      weights[pos] = 0.0;
+      // Choose optimized version for double distances, if applicable.
+      if (distF instanceof PrimitiveDoubleDistanceFunction) {
+        @SuppressWarnings("unchecked")
+        PrimitiveDoubleDistanceFunction<V> ddist = (PrimitiveDoubleDistanceFunction<V>) distF;
+        weightsum = updateWeights(weights, ids, newmean, ddist, relation);
+      } else {
+        weightsum = updateWeights(weights, ids, newmean, distQ);
+      }
+    }
+
+    return means;
+  }
+
+  /**
+   * Initialize the weight list.
+   * 
+   * @param weights Weight list
+   * @param ids IDs
+   * @param latest Added ID
+   * @param distQ Distance query
+   * @return Weight sum
+   */
+  protected double initialWeights(double[] weights, ArrayDBIDs ids, DBID latest, DistanceQuery<V, D> distQ) {
+    double weightsum = 0.0;
+    DBIDIter it = ids.iter();
+    for(int i = 0; i < weights.length; i++, it.advance()) {
+      DBID id = it.getDBID();
+      if(latest.equals(id)) {
+        weights[i] = 0.0;
+      }
+      else {
+        double d = distQ.distance(latest, id).doubleValue();
+        weights[i] = d * d;
+      }
+      weightsum += weights[i];
+    }
+    return weightsum;
+  }
+
+  /**
+   * Update the weight list.
+   * 
+   * @param weights Weight list
+   * @param ids IDs
+   * @param latest Added ID
+   * @param distQ Distance query
+   * @return Weight sum
+   */
+  protected double updateWeights(double[] weights, ArrayDBIDs ids, DBID latest, DistanceQuery<V, D> distQ) {
+    double weightsum = 0.0;
+    DBIDIter it = ids.iter();
+    for(int i = 0; i < weights.length; i++, it.advance()) {
+      DBID id = it.getDBID();
+      if(weights[i] > 0.0) {
+        double d = distQ.distance(latest, id).doubleValue();
+        weights[i] = Math.min(weights[i], d * d);
+        weightsum += weights[i];
+      }
+    }
+    return weightsum;
+  }
+
+  /**
+   * Update the weight list.
+   * 
+   * @param weights Weight list
+   * @param ids IDs
+   * @param latest Added ID
+   * @param distF Distance function
+   * @return Weight sum
+   */
+  protected double updateWeights(double[] weights, ArrayDBIDs ids, DBID latest, PrimitiveDoubleDistanceFunction<V> distF, Relation<V> rel) {
+    final V lv = rel.get(latest);
+    double weightsum = 0.0;
+    DBIDIter it = ids.iter();
+    for(int i = 0; i < weights.length; i++, it.advance()) {
+      DBID id = it.getDBID();
+      if(weights[i] > 0.0) {
+        double d = distF.doubleDistance(lv, rel.get(id));
+        weights[i] = Math.min(weights[i], d * d);
+        weightsum += weights[i];
+      }
+    }
+    return weightsum;
+  }
+
+  /**
+   * Parameterization class.
+   * 
+   * @author Erich Schubert
+   * 
+   * @apiviz.exclude
+   */
+  public static class Parameterizer<V extends NumberVector<V, ?>, D extends NumberDistance<D, ?>> extends AbstractKMeansInitialization.Parameterizer<V> {
+    @Override
+    protected KMeansPlusPlusInitialMeans<V, D> makeInstance() {
+      return new KMeansPlusPlusInitialMeans<V, D>(seed);
+    }
+  }
+}
+\ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyChosenInitialMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyChosenInitialMeans.java
new file mode 100644
index 00000000..30e59453
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyChosenInitialMeans.java
@@ -0,0 +1,78 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+import java.util.ArrayList;
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+
+/**
+ * Initialize K-means by randomly choosing k exsiting elements as cluster
+ * centers.
+ * 
+ * @author Erich Schubert
+ * 
+ * @param <V> Vector type
+ */
+public class RandomlyChosenInitialMeans<V extends NumberVector<V, ?>> extends AbstractKMeansInitialization<V> {
+  /**
+   * Constructor.
+   * 
+   * @param seed Random seed.
+   */
+  public RandomlyChosenInitialMeans(Long seed) {
+    super(seed);
+  }
+
+  @Override
+  public List<Vector> chooseInitialMeans(Relation<V> relation, int k, PrimitiveDistanceFunction<? super V, ?> distanceFunction) {
+    DBIDs ids = DBIDUtil.randomSample(relation.getDBIDs(), k, seed);
+    List<Vector> means = new ArrayList<Vector>(k);
+    for(DBID id : ids) {
+      means.add(relation.get(id).getColumnVector());
+    }
+    return means;
+  }
+
+  /**
+   * Parameterization class.
+   * 
+   * @author Erich Schubert
+   * 
+   * @apiviz.exclude
+   */
+  public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractKMeansInitialization.Parameterizer<V> {
+
+    @Override
+    protected RandomlyChosenInitialMeans<V> makeInstance() {
+      return new RandomlyChosenInitialMeans<V>(seed);
+    }
+  }
+}
+\ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyGeneratedInitialMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyGeneratedInitialMeans.java
new file mode 100644
index 00000000..e8a466dd
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyGeneratedInitialMeans.java
@@ -0,0 +1,87 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.math.MathUtil;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
+
+/**
+ * Initialize k-means by generating random vectors (within the data sets value
+ * range).
+ * 
+ * @author Erich Schubert
+ * 
+ * @param <V> Vector type
+ */
+public class RandomlyGeneratedInitialMeans<V extends NumberVector<V, ?>> extends AbstractKMeansInitialization<V> {
+  /**
+   * Constructor.
+   * 
+   * @param seed Random seed.
+   */
+  public RandomlyGeneratedInitialMeans(Long seed) {
+    super(seed);
+  }
+
+  @Override
+  public List<Vector> chooseInitialMeans(Relation<V> relation, int k, PrimitiveDistanceFunction<? super V, ?> distanceFunction) {
+    final int dim = DatabaseUtil.dimensionality(relation);
+    Pair<V, V> minmax = DatabaseUtil.computeMinMax(relation);
+    List<Vector> means = new ArrayList<Vector>(k);
+    final Random random = (this.seed != null) ? new Random(this.seed) : new Random();
+    for(int i = 0; i < k; i++) {
+      double[] r = MathUtil.randomDoubleArray(dim, random);
+      // Rescale
+      for(int d = 0; d < dim; d++) {
+        r[d] = minmax.first.doubleValue(d + 1) + (minmax.second.doubleValue(d + 1) - minmax.first.doubleValue(d + 1)) * r[d];
+      }
+      means.add(new Vector(r));
+    }
+    return means;
+  }
+
+
+  /**
+   * Parameterization class.
+   * 
+   * @author Erich Schubert
+   * 
+   * @apiviz.exclude
+   */
+  public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractKMeansInitialization.Parameterizer<V> {
+
+    @Override
+    protected RandomlyGeneratedInitialMeans<V> makeInstance() {
+      return new RandomlyGeneratedInitialMeans<V>(seed);
+    }
+  }
+}
+\ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/package-info.java
new file mode 100644
index 00000000..2ce625b0
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/package-info.java
@@ -0,0 +1,26 @@
+/**
+ * <p>K-means clustering and variations.</p>
+ */
+/*
+This file is part of ELKI:
+Environment for Developing KDD-Applications Supported by Index-Structures
+
+Copyright (C) 2012
+Ludwig-Maximilians-Universität München
+Lehr- und Forschungseinheit für Datenbanksysteme
+ELKI Development Team
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
+\ No newline at end of file