50 files changed, 2124 insertions, 481 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedClustering.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedClustering.java
index 5712d814..ea441655 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedClustering.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedClustering.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
  This file is part of ELKI:
  Environment for Developing KDD-Applications Supported by Index-Structures
 
- Copyright (C) 2011
+ Copyright (C) 2012
  Ludwig-Maximilians-Universität München
  Lehr- und Forschungseinheit für Datenbanksysteme
  ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedDBSCAN.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedDBSCAN.java
index 95d88b93..108ba0ed 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedDBSCAN.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/AbstractProjectedDBSCAN.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
  This file is part of ELKI:
  Environment for Developing KDD-Applications Supported by Index-Structures
 
- Copyright (C) 2011
+ Copyright (C) 2012
  Ludwig-Maximilians-Universität München
  Lehr- und Forschungseinheit für Datenbanksysteme
  ELKI Development Team
@@ -340,9 +340,9 @@ public abstract class AbstractProjectedDBSCAN<R extends Clustering<Model>, V ext
         }
       }
 
-      if(processedIDs.size() == distFunc.getRelation().size() && noise.size() == 0) {
+      /* if(processedIDs.size() == relation.size() && noise.size() == 0) {
         break;
-      }
+      } */
     }
 
     if(currentCluster.size() >= minpts) {
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/ClusteringAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/ClusteringAlgorithm.java
index e28dbff3..5ec59777 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/ClusteringAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/ClusteringAlgorithm.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
  This file is part of ELKI:
  Environment for Developing KDD-Applications Supported by Index-Structures
 
- Copyright (C) 2011
+ Copyright (C) 2012
  Ludwig-Maximilians-Universität München
  Lehr- und Forschungseinheit für Datenbanksysteme
  ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/DBSCAN.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/DBSCAN.java
index 2576c5f6..b59af555 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/DBSCAN.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/DBSCAN.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
  This file is part of ELKI:
  Environment for Developing KDD-Applications Supported by Index-Structures
 
- Copyright (C) 2011
+ Copyright (C) 2012
  Ludwig-Maximilians-Universität München
  Lehr- und Forschungseinheit für Datenbanksysteme
  ELKI Development Team
@@ -33,7 +33,6 @@ import de.lmu.ifi.dbs.elki.data.model.ClusterModel;
 import de.lmu.ifi.dbs.elki.data.model.Model;
 import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
 import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
-import de.lmu.ifi.dbs.elki.database.Database;
 import de.lmu.ifi.dbs.elki.database.QueryUtil;
 import de.lmu.ifi.dbs.elki.database.ids.DBID;
 import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
@@ -132,7 +131,7 @@ public class DBSCAN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
   /**
    * Performs the DBSCAN algorithm on the given database.
    */
-  public Clustering<Model> run(Database database, Relation<O> relation) {
+  public Clustering<Model> run(Relation<O> relation) {
     RangeQuery<O, D> rangeQuery = QueryUtil.getRangeQuery(relation, getDistanceFunction());
     final int size = relation.size();
 
@@ -142,9 +141,9 @@ public class DBSCAN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
     noise = DBIDUtil.newHashSet();
     processedIDs = DBIDUtil.newHashSet(size);
     if(size >= minpts) {
-      for(DBID id : rangeQuery.getRelation().iterDBIDs()) {
+      for(DBID id : relation.iterDBIDs()) {
         if(!processedIDs.contains(id)) {
-          expandCluster(database, rangeQuery, id, objprog, clusprog);
+          expandCluster(relation, rangeQuery, id, objprog, clusprog);
         }
         if(objprog != null && clusprog != null) {
           objprog.setProcessed(processedIDs.size(), logger);
@@ -156,7 +155,7 @@ public class DBSCAN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
       }
     }
     else {
-      for(DBID id : rangeQuery.getRelation().iterDBIDs()) {
+      for(DBID id : relation.iterDBIDs()) {
         noise.add(id);
         if(objprog != null && clusprog != null) {
           objprog.setProcessed(noise.size(), logger);
@@ -189,12 +188,12 @@ public class DBSCAN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
    * <p/>
    * Border-Objects become members of the first possible cluster.
    * 
-   * @param database the database on which the algorithm is run
+   * @param relation Database relation to run on
    * @param rangeQuery Range query to use
    * @param startObjectID potential seed of a new potential cluster
    * @param objprog the progress object for logging the current status
    */
-  protected void expandCluster(Database database, RangeQuery<O, D> rangeQuery, DBID startObjectID, FiniteProgress objprog, IndefiniteProgress clusprog) {
+  protected void expandCluster(Relation<O> relation, RangeQuery<O, D> rangeQuery, DBID startObjectID, FiniteProgress objprog, IndefiniteProgress clusprog) {
     List<DistanceResultPair<D>> seeds = rangeQuery.getRangeForDBID(startObjectID, epsilon);
 
     // startObject is no core-object
@@ -245,7 +244,7 @@ public class DBSCAN<O, D extends Distance<D>> extends AbstractDistanceBasedAlgor
         }
       }
 
-      if(processedIDs.size() == rangeQuery.getRelation().size() && noise.size() == 0) {
+      if(processedIDs.size() == relation.size() && noise.size() == 0) {
         break;
       }
 
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/DeLiClu.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/DeLiClu.java
index ca401ddc..f1e6c945 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/DeLiClu.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/DeLiClu.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
  This file is part of ELKI:
  Environment for Developing KDD-Applications Supported by Index-Structures
 
- Copyright (C) 2011
+ Copyright (C) 2012
  Ludwig-Maximilians-Universität München
  Lehr- und Forschungseinheit für Datenbanksysteme
  ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/EM.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/EM.java
index c1285659..a70a3f6f 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/EM.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/EM.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
  This file is part of ELKI:
  Environment for Developing KDD-Applications Supported by Index-Structures
 
- Copyright (C) 2011
+ Copyright (C) 2012
  Ludwig-Maximilians-Universität München
  Lehr- und Forschungseinheit für Datenbanksysteme
  ELKI Development Team
@@ -25,9 +25,10 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
 
 import java.util.ArrayList;
 import java.util.List;
-import java.util.Random;
 
 import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeansInitialization;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.RandomlyGeneratedInitialMeans;
 import de.lmu.ifi.dbs.elki.data.Cluster;
 import de.lmu.ifi.dbs.elki.data.Clustering;
 import de.lmu.ifi.dbs.elki.data.NumberVector;
@@ -42,6 +43,7 @@ import de.lmu.ifi.dbs.elki.database.ids.DBID;
 import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
 import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
 import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.EuclideanDistanceFunction;
 import de.lmu.ifi.dbs.elki.logging.Logging;
 import de.lmu.ifi.dbs.elki.math.MathUtil;
 import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
@@ -58,8 +60,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualCons
 import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
 import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
 import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.LongParameter;
-import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
 
 /**
  * Provides the EM algorithm (clustering by expectation maximization).
@@ -113,6 +114,11 @@ public class EM<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clusteri
    */
   public static final OptionID DELTA_ID = OptionID.getOrCreateOptionID("em.delta", "The termination criterion for maximization of E(M): " + "E(M) - E(M') < em.delta");
 
+  /**
+   * Parameter to specify the initialization method
+   */
+  public static final OptionID INIT_ID = OptionID.getOrCreateOptionID("kmeans.initialization", "Method to choose the initial means.");
+
   private static final double MIN_LOGLIKELIHOOD = -100000;
 
   /**
@@ -121,32 +127,27 @@ public class EM<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clusteri
   private double delta;
 
   /**
-   * Parameter to specify the random generator seed.
+   * Store the individual probabilities, for use by EMOutlierDetection etc.
    */
-  public static final OptionID SEED_ID = OptionID.getOrCreateOptionID("em.seed", "The random number generator seed.");
+  private WritableDataStore<double[]> probClusterIGivenX;
 
   /**
-   * Holds the value of {@link #SEED_ID}.
+   * Class to choose the initial means
    */
-  private Long seed;
-
-  /**
-   * Store the individual probabilities, for use by EMOutlierDetection etc.
-   */
-  private WritableDataStore<double[]> probClusterIGivenX;
+  private KMeansInitialization<V> initializer;
 
   /**
    * Constructor.
    * 
    * @param k k parameter
    * @param delta delta parameter
-   * @param seed Seed parameter
+   * @param initializer Class to choose the initial means
    */
-  public EM(int k, double delta, Long seed) {
+  public EM(int k, double delta, KMeansInitialization<V> initializer) {
     super();
     this.k = k;
     this.delta = delta;
-    this.seed = seed;
+    this.initializer = initializer;
   }
 
   /**
@@ -169,14 +170,14 @@ public class EM<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clusteri
     if(logger.isVerbose()) {
       logger.verbose("initializing " + k + " models");
     }
-    List<V> means = initialMeans(relation);
+    List<Vector> means = initializer.chooseInitialMeans(relation, k, EuclideanDistanceFunction.STATIC);
     List<Matrix> covarianceMatrices = new ArrayList<Matrix>(k);
     List<Double> normDistrFactor = new ArrayList<Double>(k);
     List<Matrix> invCovMatr = new ArrayList<Matrix>(k);
     List<Double> clusterWeights = new ArrayList<Double>(k);
     probClusterIGivenX = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_SORTED, double[].class);
 
-    int dimensionality = means.get(0).getDimensionality();
+    final int dimensionality = means.get(0).getDimensionality();
     for(int i = 0; i < k; i++) {
       Matrix m = Matrix.identity(dimensionality, dimensionality);
       covarianceMatrices.add(m);
@@ -211,12 +212,12 @@ public class EM<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clusteri
       em = emNew;
 
       // recompute models
-      List<V> meanSums = new ArrayList<V>(k);
+      List<Vector> meanSums = new ArrayList<Vector>(k);
       double[] sumOfClusterProbabilities = new double[k];
 
       for(int i = 0; i < k; i++) {
         clusterWeights.set(i, 0.0);
-        meanSums.add(means.get(i).nullVector());
+        meanSums.add(new Vector(dimensionality));
         covarianceMatrices.set(i, Matrix.zeroMatrix(dimensionality));
       }
 
@@ -226,24 +227,23 @@ public class EM<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clusteri
 
         for(int i = 0; i < k; i++) {
           sumOfClusterProbabilities[i] += clusterProbabilities[i];
-          V summand = relation.get(id).multiplicate(clusterProbabilities[i]);
-          V currentMeanSum = meanSums.get(i).plus(summand);
-          meanSums.set(i, currentMeanSum);
+          Vector summand = relation.get(id).getColumnVector().timesEquals(clusterProbabilities[i]);
+          meanSums.get(i).plusEquals(summand);
         }
       }
       final int n = relation.size();
       for(int i = 0; i < k; i++) {
         clusterWeights.set(i, sumOfClusterProbabilities[i] / n);
-        V newMean = meanSums.get(i).multiplicate(1 / sumOfClusterProbabilities[i]);
+        Vector newMean = meanSums.get(i).timesEquals(1 / sumOfClusterProbabilities[i]);
         means.set(i, newMean);
       }
       // covariance matrices
       for(DBID id : relation.iterDBIDs()) {
         double[] clusterProbabilities = probClusterIGivenX.get(id);
-        V instance = relation.get(id);
+        Vector instance = relation.get(id).getColumnVector();
         for(int i = 0; i < k; i++) {
-          V difference = instance.minus(means.get(i));
-          covarianceMatrices.get(i).plusEquals(difference.getColumnVector().times(difference.getRowVector()).times(clusterProbabilities[i]));
+          Vector difference = instance.minus(means.get(i));
+          covarianceMatrices.get(i).plusEquals(difference.timesTranspose(difference).timesEquals(clusterProbabilities[i]));
         }
       }
       for(int i = 0; i < k; i++) {
@@ -281,13 +281,14 @@ public class EM<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clusteri
       }
       hardClusters.get(maxIndex).add(id);
     }
+    final V factory = DatabaseUtil.assumeVectorField(relation).getFactory();
     Clustering<EMModel<V>> result = new Clustering<EMModel<V>>("EM Clustering", "em-clustering");
     // provide models within the result
     for(int i = 0; i < k; i++) {
       // TODO: re-do labeling.
       // SimpleClassLabel label = new SimpleClassLabel();
       // label.init(result.canonicalClusterLabel(i));
-      Cluster<EMModel<V>> model = new Cluster<EMModel<V>>(hardClusters.get(i), new EMModel<V>(means.get(i), covarianceMatrices.get(i)));
+      Cluster<EMModel<V>> model = new Cluster<EMModel<V>>(hardClusters.get(i), new EMModel<V>(factory.newNumberVector(means.get(i).getArrayRef()), covarianceMatrices.get(i)));
       result.addCluster(model);
     }
     return result;
@@ -308,24 +309,20 @@ public class EM<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clusteri
    * @param clusterWeights the weights of the current clusters
    * @return the expectation value of the current mixture of distributions
    */
-  protected double assignProbabilitiesToInstances(Relation<V> database, List<Double> normDistrFactor, List<V> means, List<Matrix> invCovMatr, List<Double> clusterWeights, WritableDataStore<double[]> probClusterIGivenX) {
+  protected double assignProbabilitiesToInstances(Relation<V> database, List<Double> normDistrFactor, List<Vector> means, List<Matrix> invCovMatr, List<Double> clusterWeights, WritableDataStore<double[]> probClusterIGivenX) {
     double emSum = 0.0;
 
     for(DBID id : database.iterDBIDs()) {
-      V x = database.get(id);
+      Vector x = database.get(id).getColumnVector();
       List<Double> probabilities = new ArrayList<Double>(k);
       for(int i = 0; i < k; i++) {
-        V difference = x.minus(means.get(i));
-        Matrix differenceRow = difference.getRowVector();
-        Vector differenceCol = difference.getColumnVector();
-        Matrix rowTimesCov = differenceRow.times(invCovMatr.get(i));
-        Vector rowTimesCovTimesCol = rowTimesCov.times(differenceCol);
-        double power = rowTimesCovTimesCol.get(0, 0) / 2.0;
+        Vector difference = x.minus(means.get(i));
+        double rowTimesCovTimesCol = difference.transposeTimesTimes(invCovMatr.get(i), difference);
+        double power = rowTimesCovTimesCol / 2.0;
         double prob = normDistrFactor.get(i) * Math.exp(-power);
         if(logger.isDebuggingFinest()) {
-          logger.debugFinest(" difference vector= ( " + difference.toString() + " )\n" + " differenceRow:\n" + FormatUtil.format(differenceRow, "    ") + "\n" + " differenceCol:\n" + FormatUtil.format(differenceCol, "    ") + "\n" + " rowTimesCov:\n" + FormatUtil.format(rowTimesCov, "    ") + "\n" + " rowTimesCovTimesCol:\n" + FormatUtil.format(rowTimesCovTimesCol, "    ") + "\n" + " power= " + power + "\n" + " prob=" + prob + "\n" + " inv cov matrix: \n" + FormatUtil.format(invCovMatr.get(i), "     "));
+          logger.debugFinest(" difference vector= ( " + difference.toString() + " )\n" + " difference:\n" + FormatUtil.format(difference, "    ") + "\n" + " rowTimesCovTimesCol:\n" + rowTimesCovTimesCol + "\n" + " power= " + power + "\n" + " prob=" + prob + "\n" + " inv cov matrix: \n" + FormatUtil.format(invCovMatr.get(i), "     "));
         }
-
         probabilities.add(prob);
       }
       double priorProbability = 0.0;
@@ -356,48 +353,6 @@ public class EM<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clusteri
   }
 
   /**
-   * Creates {@link #k k} random points distributed uniformly within the
-   * attribute ranges of the given database.
-   * 
-   * @param relation the database must contain enough points in order to
-   *        ascertain the range of attribute values. Less than two points would
-   *        make no sense. The content of the database is not touched otherwise.
-   * @return a list of {@link #k k} random points distributed uniformly within
-   *         the attribute ranges of the given database
-   */
-  protected List<V> initialMeans(Relation<V> relation) {
-    final Random random;
-    if(this.seed != null) {
-      random = new Random(this.seed);
-    }
-    else {
-      random = new Random();
-    }
-    if(relation.size() > 0) {
-      final int dim = DatabaseUtil.dimensionality(relation);
-      Pair<V, V> minmax = DatabaseUtil.computeMinMax(relation);
-      List<V> means = new ArrayList<V>(k);
-      if(logger.isVerbose()) {
-        logger.verbose("initializing random vectors");
-      }
-      for(int i = 0; i < k; i++) {
-        double[] r = MathUtil.randomDoubleArray(dim, random);
-        // Rescale
-        for (int d = 0; d < dim; d++) {
-          r[d] = minmax.first.doubleValue(d + 1) + (minmax.second.doubleValue(d + 1) - minmax.first.doubleValue(d + 1)) * r[d];
-        }
-        // Instantiate
-        V randomVector = minmax.first.newInstance(r);
-        means.add(randomVector);
-      }
-      return means;
-    }
-    else {
-      return new ArrayList<V>(0);
-    }
-  }
-
-  /**
    * Get the probabilities for a given point.
    * 
    * @param index Point ID
@@ -429,7 +384,7 @@ public class EM<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clusteri
 
     protected double delta;
 
-    protected Long seed;
+    protected KMeansInitialization<V> initializer;
 
     @Override
     protected void makeOptions(Parameterization config) {
@@ -439,20 +394,20 @@ public class EM<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clusteri
         k = kP.getValue();
       }
 
+      ObjectParameter<KMeansInitialization<V>> initialP = new ObjectParameter<KMeansInitialization<V>>(INIT_ID, KMeansInitialization.class, RandomlyGeneratedInitialMeans.class);
+      if(config.grab(initialP)) {
+        initializer = initialP.instantiateClass(config);
+      }
+
       DoubleParameter deltaP = new DoubleParameter(DELTA_ID, new GreaterEqualConstraint(0.0), 0.0);
       if(config.grab(deltaP)) {
         delta = deltaP.getValue();
       }
-
-      LongParameter seedP = new LongParameter(SEED_ID, true);
-      if(config.grab(seedP)) {
-        seed = seedP.getValue();
-      }
     }
 
     @Override
     protected EM<V> makeInstance() {
-      return new EM<V>(k, delta, seed);
+      return new EM<V>(k, delta, initializer);
     }
   }
 }
 \ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/KMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/KMeans.java
deleted file mode 100644
index 38ea89c2..00000000
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/KMeans.java
+++ /dev/null
@@ -1,307 +0,0 @@
-package de.lmu.ifi.dbs.elki.algorithm.clustering;
-
-/*
- This file is part of ELKI:
- Environment for Developing KDD-Applications Supported by Index-Structures
-
- Copyright (C) 2011
- Ludwig-Maximilians-Universität München
- Lehr- und Forschungseinheit für Datenbanksysteme
- ELKI Development Team
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Random;
-
-import de.lmu.ifi.dbs.elki.algorithm.AbstractPrimitiveDistanceBasedAlgorithm;
-import de.lmu.ifi.dbs.elki.data.Cluster;
-import de.lmu.ifi.dbs.elki.data.Clustering;
-import de.lmu.ifi.dbs.elki.data.NumberVector;
-import de.lmu.ifi.dbs.elki.data.model.MeanModel;
-import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
-import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
-import de.lmu.ifi.dbs.elki.database.Database;
-import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.DBID;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
-import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
-import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
-import de.lmu.ifi.dbs.elki.database.relation.Relation;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
-import de.lmu.ifi.dbs.elki.logging.Logging;
-import de.lmu.ifi.dbs.elki.math.MathUtil;
-import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
-import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
-import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
-import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
-import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.LongParameter;
-import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
-
-/**
- * Provides the k-means algorithm.
- * <p>
- * Reference: J. MacQueen: Some Methods for Classification and Analysis of
- * Multivariate Observations. <br>
- * In 5th Berkeley Symp. Math. Statist. Prob., Vol. 1, 1967, pp 281-297.
- * </p>
- * 
- * @author Arthur Zimek
- * 
- * @apiviz.has MeanModel
- * 
- * @param <D> a type of {@link Distance} as returned by the used distance
- *        function
- * @param <V> a type of {@link NumberVector} as a suitable datatype for this
- *        algorithm
- */
-@Title("K-Means")
-@Description("Finds a partitioning into k clusters.")
-@Reference(authors = "J. MacQueen", title = "Some Methods for Classification and Analysis of Multivariate Observations", booktitle = "5th Berkeley Symp. Math. Statist. Prob., Vol. 1, 1967, pp 281-297", url = "http://projecteuclid.org/euclid.bsmsp/1200512992")
-public class KMeans<V extends NumberVector<V, ?>, D extends Distance<D>> extends AbstractPrimitiveDistanceBasedAlgorithm<V, D, Clustering<MeanModel<V>>> implements ClusteringAlgorithm<Clustering<MeanModel<V>>> {
-  /**
-   * The logger for this class.
-   */
-  private static final Logging logger = Logging.getLogger(KMeans.class);
-
-  /**
-   * Parameter to specify the number of clusters to find, must be an integer
-   * greater than 0.
-   */
-  public static final OptionID K_ID = OptionID.getOrCreateOptionID("kmeans.k", "The number of clusters to find.");
-
-  /**
-   * Parameter to specify the number of clusters to find, must be an integer
-   * greater or equal to 0, where 0 means no limit.
-   */
-  public static final OptionID MAXITER_ID = OptionID.getOrCreateOptionID("kmeans.maxiter", "The maximum number of iterations to do. 0 means no limit.");
-
-  /**
-   * Parameter to specify the random generator seed.
-   */
-  public static final OptionID SEED_ID = OptionID.getOrCreateOptionID("kmeans.seed", "The random number generator seed.");
-
-  /**
-   * Holds the value of {@link #K_ID}.
-   */
-  private int k;
-
-  /**
-   * Holds the value of {@link #MAXITER_ID}.
-   */
-  private int maxiter;
-
-  /**
-   * Holds the value of {@link #SEED_ID}.
-   */
-  private Long seed;
-
-  /**
-   * Constructor.
-   * 
-   * @param distanceFunction distance function
-   * @param k k parameter
-   * @param maxiter Maxiter parameter
-   * @param seed Random generator seed
-   */
-  public KMeans(PrimitiveDistanceFunction<? super V, D> distanceFunction, int k, int maxiter, Long seed) {
-    super(distanceFunction);
-    this.k = k;
-    this.maxiter = maxiter;
-    this.seed = seed;
-  }
-
-  /**
-   * Run k-means
-   * 
-   * @param database Database
-   * @param relation relation to use
-   * @return result
-   * @throws IllegalStateException
-   */
-  public Clustering<MeanModel<V>> run(Database database, Relation<V> relation) throws IllegalStateException {
-    final Random random = (this.seed != null) ? new Random(this.seed) : new Random();
-    if(relation.size() > 0) {
-      final int dim = DatabaseUtil.dimensionality(relation);
-      Pair<V, V> minmax = DatabaseUtil.computeMinMax(relation);
-      List<V> means = new ArrayList<V>(k);
-      List<V> oldMeans;
-      if(logger.isVerbose()) {
-        logger.verbose("initializing random vectors");
-      }
-      for(int i = 0; i < k; i++) {
-        double[] r = MathUtil.randomDoubleArray(dim, random);
-        // Rescale
-        for (int d = 0; d < dim; d++) {
-          r[d] = minmax.first.doubleValue(d + 1) + (minmax.second.doubleValue(d + 1) - minmax.first.doubleValue(d + 1)) * r[d];
-        }
-        // Instantiate
-        V randomVector = minmax.first.newInstance(r);
-        means.add(randomVector);
-      }
-      List<? extends ModifiableDBIDs> clusters;
-      clusters = sort(means, relation);
-      boolean changed = true;
-      int iteration = 1;
-      while(changed) {
-        if(logger.isVerbose()) {
-          logger.verbose("iteration " + iteration);
-        }
-        oldMeans = new ArrayList<V>(means);
-        means = means(clusters, means, relation);
-        clusters = sort(means, relation);
-        changed = !means.equals(oldMeans);
-        iteration++;
-
-        if(maxiter > 0 && iteration > maxiter) {
-          break;
-        }
-      }
-      Clustering<MeanModel<V>> result = new Clustering<MeanModel<V>>("k-Means Clustering", "kmeans-clustering");
-      for(int i = 0; i < clusters.size(); i++) {
-        DBIDs ids = clusters.get(i);
-        MeanModel<V> model = new MeanModel<V>(means.get(i));
-        result.addCluster(new Cluster<MeanModel<V>>(ids, model));
-      }
-      return result;
-    }
-    else {
-      return new Clustering<MeanModel<V>>("k-Means Clustering", "kmeans-clustering");
-    }
-  }
-
-  /**
-   * Returns the mean vectors of the given clusters in the given database.
-   * 
-   * @param clusters the clusters to compute the means
-   * @param means the recent means
-   * @param database the database containing the vectors
-   * @return the mean vectors of the given clusters in the given database
-   */
-  protected List<V> means(List<? extends ModifiableDBIDs> clusters, List<V> means, Relation<V> database) {
-    List<V> newMeans = new ArrayList<V>(k);
-    for(int i = 0; i < k; i++) {
-      ModifiableDBIDs list = clusters.get(i);
-      V mean = null;
-      for(Iterator<DBID> clusterIter = list.iterator(); clusterIter.hasNext();) {
-        if(mean == null) {
-          mean = database.get(clusterIter.next());
-        }
-        else {
-          mean = mean.plus(database.get(clusterIter.next()));
-        }
-      }
-      if(list.size() > 0) {
-        assert mean != null;
-        mean = mean.multiplicate(1.0 / list.size());
-      }
-      else {
-        mean = means.get(i);
-      }
-      newMeans.add(mean);
-    }
-    return newMeans;
-  }
-
-  /**
-   * Returns a list of clusters. The k<sup>th</sup> cluster contains the ids of
-   * those FeatureVectors, that are nearest to the k<sup>th</sup> mean.
-   * 
-   * @param means a list of k means
-   * @param database the database to cluster
-   * @return list of k clusters
-   */
-  protected List<? extends ModifiableDBIDs> sort(List<V> means, Relation<V> database) {
-    List<ArrayModifiableDBIDs> clusters = new ArrayList<ArrayModifiableDBIDs>(k);
-    for(int i = 0; i < k; i++) {
-      clusters.add(DBIDUtil.newArray());
-    }
-
-    for(DBID id : database.iterDBIDs()) {
-      List<D> distances = new ArrayList<D>(k);
-      V fv = database.get(id);
-      int minIndex = 0;
-      for(int d = 0; d < k; d++) {
-        distances.add(getDistanceFunction().distance(fv, means.get(d)));
-        if(distances.get(d).compareTo(distances.get(minIndex)) < 0) {
-          minIndex = d;
-        }
-      }
-      clusters.get(minIndex).add(id);
-    }
-    for(ArrayModifiableDBIDs cluster : clusters) {
-      Collections.sort(cluster);
-    }
-    return clusters;
-  }
-
-  @Override
-  public TypeInformation[] getInputTypeRestriction() {
-    return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD);
-  }
-
-  @Override
-  protected Logging getLogger() {
-    return logger;
-  }
-
-  /**
-   * Parameterization class.
-   * 
-   * @author Erich Schubert
-   * 
-   * @apiviz.exclude
-   */
-  public static class Parameterizer<V extends NumberVector<V, ?>, D extends Distance<D>> extends AbstractPrimitiveDistanceBasedAlgorithm.Parameterizer<V, D> {
-    protected int k;
-
-    protected int maxiter;
-
-    protected Long seed;
-
-    @Override
-    protected void makeOptions(Parameterization config) {
-      super.makeOptions(config);
-      IntParameter kP = new IntParameter(K_ID, new GreaterConstraint(0));
-      if(config.grab(kP)) {
-        k = kP.getValue();
-      }
-
-      IntParameter maxiterP = new IntParameter(MAXITER_ID, new GreaterEqualConstraint(0), 0);
-      if(config.grab(maxiterP)) {
-        maxiter = maxiterP.getValue();
-      }
-
-      LongParameter seedP = new LongParameter(SEED_ID, true);
-      if(config.grab(seedP)) {
-        seed = seedP.getValue();
-      }
-    }
-
-    @Override
-    protected KMeans<V, D> makeInstance() {
-      return new KMeans<V, D>(distanceFunction, k, maxiter, seed);
-    }
-  }
-}
-\ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICS.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICS.java
index 24985e24..2244b07b 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICS.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICS.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
  This file is part of ELKI:
  Environment for Developing KDD-Applications Supported by Index-Structures
 
- Copyright (C) 2011
+ Copyright (C) 2012
  Ludwig-Maximilians-Universität München
  Lehr- und Forschungseinheit für Datenbanksysteme
  ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICSTypeAlgorithm.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICSTypeAlgorithm.java
index c233963d..d6c5872a 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICSTypeAlgorithm.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICSTypeAlgorithm.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
  This file is part of ELKI:
  Environment for Developing KDD-Applications Supported by Index-Structures
 
- Copyright (C) 2011
+ Copyright (C) 2012
  Ludwig-Maximilians-Universität München
  Lehr- und Forschungseinheit für Datenbanksysteme
  ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICSXi.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICSXi.java
index f7bd10c7..41e48b89 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICSXi.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/OPTICSXi.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
  This file is part of ELKI:
  Environment for Developing KDD-Applications Supported by Index-Structures
 
- Copyright (C) 2011
+ Copyright (C) 2012
  Ludwig-Maximilians-Universität München
  Lehr- und Forschungseinheit für Datenbanksysteme
  ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SLINK.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SLINK.java
index e1329888..45b12c43 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SLINK.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SLINK.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
  This file is part of ELKI:
  Environment for Developing KDD-Applications Supported by Index-Structures
 
- Copyright (C) 2011
+ Copyright (C) 2012
  Ludwig-Maximilians-Universität München
  Lehr- und Forschungseinheit für Datenbanksysteme
  ELKI Development Team
@@ -133,18 +133,18 @@ public class SLINK<O, D extends Distance<D>> extends AbstractDistanceBasedAlgori
   public Result run(Database database, Relation<O> relation) {
     DistanceQuery<O, D> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
     Class<D> distCls = (Class<D>) getDistanceFunction().getDistanceFactory().getClass();
-    WritableRecordStore store = DataStoreUtil.makeRecordStorage(distQuery.getRelation().getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, DBID.class, distCls);
+    WritableRecordStore store = DataStoreUtil.makeRecordStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, DBID.class, distCls);
     pi = store.getStorage(0, DBID.class);
     lambda = store.getStorage(1, distCls);
     // Temporary storage for m.
-    WritableDataStore<D> m = DataStoreUtil.makeStorage(distQuery.getRelation().getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, distCls);
+    WritableDataStore<D> m = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, distCls);
 
-    FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("Clustering", distQuery.getRelation().size(), logger) : null;
+    FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("Clustering", relation.size(), logger) : null;
     // has to be an array for monotonicity reasons!
-    ModifiableDBIDs processedIDs = DBIDUtil.newArray(distQuery.getRelation().size());
+    ModifiableDBIDs processedIDs = DBIDUtil.newArray(relation.size());
 
     // apply the algorithm
-    for(DBID id : distQuery.getRelation().iterDBIDs()) {
+    for(DBID id : relation.iterDBIDs()) {
       step1(id);
       step2(id, processedIDs, distQuery, m);
       step3(id, processedIDs, m);
@@ -168,8 +168,8 @@ public class SLINK<O, D extends Distance<D>> extends AbstractDistanceBasedAlgori
     BasicResult result = null;
 
     // Build clusters identified by their target object
-    int minc = minclusters != null ? minclusters : distQuery.getRelation().size();
-    result = extractClusters(distQuery.getRelation().getDBIDs(), pi, lambda, minc);
+    int minc = minclusters != null ? minclusters : relation.size();
+    result = extractClusters(relation.getDBIDs(), pi, lambda, minc);
 
     result.addChildResult(new MaterializedRelation<DBID>("SLINK pi", "slink-order", TypeUtil.DBID, pi, processedIDs));
     result.addChildResult(new MaterializedRelation<D>("SLINK lambda", "slink-order", new SimpleTypeInformation<D>(distCls), lambda, processedIDs));
@@ -288,7 +288,7 @@ public class SLINK<O, D extends Distance<D>> extends AbstractDistanceBasedAlgori
     D stopdist = null;
     // sort by lambda
     ArrayModifiableDBIDs order = DBIDUtil.newArray(ids);
-    Collections.sort(order, new CompareByLambda<D>(lambda));
+    order.sort(new CompareByLambda<D>(lambda));
     int index = ids.size() - minclusters - 1;
     while(index >= 0) {
       if(lambda.get(order.get(index)).equals(lambda.get(order.get(index + 1)))) {
@@ -458,7 +458,7 @@ public class SLINK<O, D extends Distance<D>> extends AbstractDistanceBasedAlgori
     // extract a hierarchical clustering
     ArrayModifiableDBIDs order = DBIDUtil.newArray(ids);
     // sort by lambda
-    Collections.sort(order, new CompareByLambda<D>(lambda));
+    order.sort(new CompareByLambda<D>(lambda));
     D curdist = null;
 
     D stopdist = null;
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SNNClustering.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SNNClustering.java
index 3bde2932..7c3a13c9 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SNNClustering.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/SNNClustering.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
  This file is part of ELKI:
  Environment for Developing KDD-Applications Supported by Index-Structures
 
- Copyright (C) 2011
+ Copyright (C) 2012
  Ludwig-Maximilians-Universität München
  Lehr- und Forschungseinheit für Datenbanksysteme
  ELKI Development Team
@@ -25,7 +25,6 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering;
 
 import java.util.ArrayList;
 import java.util.Iterator;
-import java.util.LinkedList;
 import java.util.List;
 
 import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
@@ -36,6 +35,7 @@ import de.lmu.ifi.dbs.elki.data.model.Model;
 import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
 import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
 import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs;
 import de.lmu.ifi.dbs.elki.database.ids.DBID;
 import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
 import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
@@ -200,8 +200,8 @@ public class SNNClustering<O> extends AbstractAlgorithm<Clustering<Model>> imple
    * @return the shared nearest neighbors of the specified query object in the
    *         given database
    */
-  protected List<DBID> findSNNNeighbors(SimilarityQuery<O, IntegerDistance> snnInstance, DBID queryObject) {
-    List<DBID> neighbors = new LinkedList<DBID>();
+  protected ArrayModifiableDBIDs findSNNNeighbors(SimilarityQuery<O, IntegerDistance> snnInstance, DBID queryObject) {
+    ArrayModifiableDBIDs neighbors = DBIDUtil.newArray();
     for(DBID id : snnInstance.getRelation().iterDBIDs()) {
       if(snnInstance.similarity(queryObject, id).compareTo(epsilon) >= 0) {
         neighbors.add(id);
@@ -222,7 +222,7 @@ public class SNNClustering<O> extends AbstractAlgorithm<Clustering<Model>> imple
    *        clustering
    */
   protected void expandCluster(SimilarityQuery<O, IntegerDistance> snnInstance, DBID startObjectID, FiniteProgress objprog, IndefiniteProgress clusprog) {
-    List<DBID> seeds = findSNNNeighbors(snnInstance, startObjectID);
+    ArrayModifiableDBIDs seeds = findSNNNeighbors(snnInstance, startObjectID);
 
     // startObject is no core-object
     if(seeds.size() < minpts) {
@@ -247,11 +247,10 @@ public class SNNClustering<O> extends AbstractAlgorithm<Clustering<Model>> imple
         noise.remove(seed);
       }
     }
-    seeds.remove(0);
 
     while(seeds.size() > 0) {
-      DBID o = seeds.remove(0);
-      List<DBID> neighborhood = findSNNNeighbors(snnInstance, o);
+      DBID o = seeds.remove(seeds.size() - 1);
+      ArrayModifiableDBIDs neighborhood = findSNNNeighbors(snnInstance, o);
 
       if(neighborhood.size() >= minpts) {
         for(DBID p : neighborhood) {
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/CASH.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/CASH.java
index 4a0b391c..b877415e 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/CASH.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/CASH.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation;
  This file is part of ELKI:
  Environment for Developing KDD-Applications Supported by Index-Structures
 
- Copyright (C) 2011
+ Copyright (C) 2012
  Ludwig-Maximilians-Universität München
  Lehr- und Forschungseinheit für Datenbanksysteme
  ELKI Development Team
@@ -55,7 +55,7 @@ import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
 import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
 import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
 import de.lmu.ifi.dbs.elki.database.relation.Relation;
-import de.lmu.ifi.dbs.elki.datasource.filter.NonNumericFeaturesException;
+import de.lmu.ifi.dbs.elki.datasource.filter.normalization.NonNumericFeaturesException;
 import de.lmu.ifi.dbs.elki.distance.distancefunction.WeightedDistanceFunction;
 import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
 import de.lmu.ifi.dbs.elki.logging.Logging;
@@ -84,7 +84,12 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
 
 /**
  * Provides the CASH algorithm, an subspace clustering algorithm based on the
- * hough transform.
+ * Hough transform.
+ * 
+ * <b>Note:</b> CASH requires explicitly setting the input parser other than default to
+ * {@link de.lmu.ifi.dbs.elki.datasource.parser.ParameterizationFunctionLabelParser}:
+ * (in the MiniGui, set option: dbc.parser ParameterizationFunctionLabelParser).
+ * 
  * <p>
  * Reference: E. Achtert, C. Böhm, J. David, P. Kröger, A. Zimek: Robust
  * clustering in arbitrarily oriented subspaces. <br>
@@ -99,7 +104,7 @@ import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
  */
 // todo elke hierarchy (later)
 @Title("CASH: Robust clustering in arbitrarily oriented subspaces")
-@Description("Subspace clustering algorithm based on the hough transform.")
+@Description("Subspace clustering algorithm based on the Hough transform.")
 @Reference(authors = "E. Achtert, C. Böhm, J. David, P. Kröger, A. Zimek", title = "Robust clustering in arbitraily oriented subspaces", booktitle = "Proc. 8th SIAM Int. Conf. on Data Mining (SDM'08), Atlanta, GA, 2008", url = "http://www.siam.org/proceedings/datamining/2008/dm08_69_AchtertBoehmDavidKroegerZimek.pdf")
 public class CASH extends AbstractAlgorithm<Clustering<Model>> implements ClusteringAlgorithm<Clustering<Model>> {
   /**
@@ -349,7 +354,7 @@ public class CASH extends AbstractAlgorithm<Clustering<Model>> implements Cluste
         res.addCluster(c);
         noiseIDs.removeDBIDs(interval.getIDs());
         clusterIDs.addDBIDs(interval.getIDs());
-        processedIDs.addAll(interval.getIDs());
+        processedIDs.addDBIDs(interval.getIDs());
       }
 
       // Rebuild heap
@@ -372,13 +377,13 @@ public class CASH extends AbstractAlgorithm<Clustering<Model>> implements Cluste
       if(dim == noiseDim) {
         Cluster<Model> c = new Cluster<Model>(noiseIDs, true, ClusterModel.CLUSTER);
         res.addCluster(c);
-        processedIDs.addAll(noiseIDs);
+        processedIDs.addDBIDs(noiseIDs);
       }
       else if(noiseIDs.size() >= minPts) {
         LinearEquationSystem les = runDerivator(fulldatabase, dim - 1, noiseIDs);
         Cluster<Model> c = new Cluster<Model>(noiseIDs, true, new LinearEquationModel(les));
         res.addCluster(c);
-        processedIDs.addAll(noiseIDs);
+        processedIDs.addDBIDs(noiseIDs);
       }
     }
 
@@ -521,7 +526,7 @@ public class CASH extends AbstractAlgorithm<Clustering<Model>> implements Cluste
   private ParameterizationFunction project(Matrix basis, ParameterizationFunction f) {
     // Matrix m = new Matrix(new
     // double[][]{f.getPointCoordinates()}).times(basis);
-    Matrix m = f.getRowVector().times(basis);
+    Matrix m = f.getColumnVector().transposeTimes(basis);
     ParameterizationFunction f_t = new ParameterizationFunction(m.getColumnPackedCopy());
     return f_t;
   }
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/COPAC.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/COPAC.java
index 8fc30b3d..575bf117 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/COPAC.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/COPAC.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation;
  This file is part of ELKI:
  Environment for Developing KDD-Applications Supported by Index-Structures
 
- Copyright (C) 2011
+ Copyright (C) 2012
  Ludwig-Maximilians-Universität München
  Lehr- und Forschungseinheit für Datenbanksysteme
  ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ERiC.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ERiC.java
index 75633853..af4f677f 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ERiC.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ERiC.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation;
  This file is part of ELKI:
  Environment for Developing KDD-Applications Supported by Index-Structures
 
- Copyright (C) 2011
+ Copyright (C) 2012
  Ludwig-Maximilians-Universität München
  Lehr- und Forschungseinheit für Datenbanksysteme
  ELKI Development Team
@@ -244,7 +244,7 @@ public class ERiC<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Cluste
         }
         else {
           ModifiableDBIDs merged = DBIDUtil.newHashSet(noise.getIDs());
-          merged.addAll(clus.getIDs().asCollection());
+          merged.addDBIDs(clus.getIDs());
           noise.setIDs(merged);
         }
       }
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/FourC.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/FourC.java
index 93d0cc99..98761962 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/FourC.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/FourC.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation;
  This file is part of ELKI:
  Environment for Developing KDD-Applications Supported by Index-Structures
 
- Copyright (C) 2011
+ Copyright (C) 2012
  Ludwig-Maximilians-Universität München
  Lehr- und Forschungseinheit für Datenbanksysteme
  ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/HiCO.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/HiCO.java
index 92723428..1065682c 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/HiCO.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/HiCO.java
@@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation;
 This file is part of ELKI:
 Environment for Developing KDD-Applications Supported by Index-Structures
 
-Copyright (C) 2011
+Copyright (C) 2012
 Ludwig-Maximilians-Universität München
 Lehr- und Forschungseinheit für Datenbanksysteme
 ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/LMCLUS.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/LMCLUS.java
new file mode 100644
index 00000000..41ee1f69
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/LMCLUS.java
@@ -0,0 +1,566 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation;
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Random;
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
+import de.lmu.ifi.dbs.elki.data.Cluster;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.model.Model;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress;
+import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
+import de.lmu.ifi.dbs.elki.math.MeanVariance;
+import de.lmu.ifi.dbs.elki.math.histograms.FlexiHistogram;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.UnableToComplyException;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.pairs.DoubleObjPair;
+
+/**
+ * Linear manifold clustering in high dimensional spaces by stochastic search.
+ * 
+ * Reference:
+ * <p>
+ * Robert Haralick, Rave Harpaz<br />
+ * Linear manifold clustering in high dimensional spaces by stochastic search<br/>
+ * In: Pattern Recognition volume 40, Issue 10
+ * </p>
+ * 
+ * Implementation note: the LMCLUS algorithm seems to lack good stopping
+ * criterions. We can't entirely reproduce the good results from the original
+ * publication, in particular not on noisy data. But the questionable parts are
+ * as in the original publication, associated thesis and published source code.
+ * The minimum cluster size however can serve as a hidden stopping criterion.
+ * 
+ * @author Ernst Waas
+ * @author Erich Schubert
+ */
+@Reference(authors = "Robert Haralick, Rave Harpaz", title = "Linear manifold clustering in high dimensional spaces by stochastic search", booktitle = "Pattern Recognition volume 40, Issue 10", url = "http://dx.doi.org/10.1016/j.patcog.2007.01.020")
+public class LMCLUS extends AbstractAlgorithm<Clustering<Model>> {
+  /**
+   * The logger for this class.
+   */
+  private static final Logging logger = Logging.getLogger(LMCLUS.class);
+
+  /**
+   * Epsilon
+   */
+  private final static double NOT_FROM_ONE_CLUSTER_PROBABILITY = 0.2;
+
+  /**
+   * Histogram resolution
+   */
+  private final static int BINS = 50;
+
+  /**
+   * The current threshold value calculated by the findSeperation Method.
+   */
+  private final double sensitivityThreshold;
+
+  /**
+   * Maximum cluster dimensionality
+   */
+  private final int maxLMDim;
+
+  /**
+   * Minimum cluster size
+   */
+  private final int minsize;
+
+  /**
+   * Number of sampling rounds to find a good split
+   */
+  private final int samplingLevel;
+
+  /**
+   * Constructor.
+   * 
+   * @param maxdim Maximum dimensionality
+   * @param minsize Minimum cluster size
+   * @param samplingLevel Sampling level
+   * @param sensitivityThreshold Threshold
+   */
+  public LMCLUS(int maxdim, int minsize, int samplingLevel, double sensitivityThreshold) {
+    super();
+    this.maxLMDim = maxdim;
+    this.minsize = minsize;
+    this.samplingLevel = samplingLevel;
+    this.sensitivityThreshold = sensitivityThreshold;
+  }
+
+  /**
+   * The main LMCLUS (Linear manifold clustering algorithm) is processed in this
+   * method.
+   * 
+   * <PRE>
+   * The algorithm samples random linear manifolds and tries to find clusters in it.
+   * It calculates a distance histogram searches for a threshold and partitions the
+   * points in two groups the ones in the cluster and everything else.
+   * Then the best fitting linear manifold is searched and registered as a cluster.
+   * The process is started over until all points are clustered.
+   * The last cluster should contain all the outliers. (or the whole data if no clusters have been found.)
+   * For details see {@link LMCLUS}.
+   * </PRE>
+   * 
+   * @param database The database to operate on
+   * @param relation Relation
+   * @return Clustering result
+   * @throws de.lmu.ifi.dbs.elki.utilities.UnableToComplyException
+   */
+  public Clustering<Model> run(Database database, Relation<NumberVector<?, ?>> relation) throws UnableToComplyException {
+    Clustering<Model> ret = new Clustering<Model>("LMCLUS Clustering", "lmclus-clustering");
+    FiniteProgress progress = logger.isVerbose() ? new FiniteProgress("Clustered objects", relation.size(), logger) : null;
+    IndefiniteProgress cprogress = logger.isVerbose() ? new IndefiniteProgress("Clusters found", logger) : null;
+    ModifiableDBIDs unclustered = DBIDUtil.newHashSet(relation.getDBIDs());
+
+    final int maxdim = Math.min(maxLMDim, DatabaseUtil.dimensionality(relation));
+    int cnum = 0;
+    while(unclustered.size() > minsize) {
+      DBIDs current = unclustered;
+      int lmDim = 1;
+      for(int k = 1; k <= maxdim; k++) {
+        // Implementation note: this while loop is from the original publication
+        // and the published LMCLUS source code. It doesn't make sense to me -
+        // it is lacking a stop criterion other than "cluster is too small" and
+        // "cluster is inseparable"! Additionally, there is good criterion for
+        // stopping at the appropriate dimensionality either.
+        while(true) {
+          Separation separation = findSeparation(relation, current, k);
+          // logger.verbose("k: " + k + " goodness: " + separation.goodness +
+          // " threshold: " + separation.threshold);
+          if(separation.goodness <= sensitivityThreshold) {
+            break;
+          }
+          ModifiableDBIDs subset = DBIDUtil.newArray(current.size());
+          for(DBID id : current) {
+            if(deviation(relation.get(id).getColumnVector().minusEquals(separation.originV), separation.basis) < separation.threshold) {
+              subset.add(id);
+            }
+          }
+          // logger.verbose("size:"+subset.size());
+          if(subset.size() < minsize) {
+            break;
+          }
+          current = subset;
+          lmDim = k;
+          // System.out.println("Partition: " + subset.size());
+        }
+      }
+      // No more clusters found
+      if(current.size() < minsize || current == unclustered) {
+        break;
+      }
+      // New cluster found
+      // TODO: annotate cluster with dimensionality
+      final Cluster<Model> cluster = new Cluster<Model>(current);
+      cluster.setName("Cluster_" + lmDim + "d_" + cnum);
+      cnum++;
+      ret.addCluster(cluster);
+      // Remove from main working set.
+      unclustered.removeDBIDs(current);
+      if(progress != null) {
+        progress.setProcessed(relation.size() - unclustered.size(), logger);
+      }
+      if(cprogress != null) {
+        cprogress.setProcessed(cnum, logger);
+      }
+    }
+    // Remaining objects are noise
+    if(unclustered.size() > 0) {
+      ret.addCluster(new Cluster<Model>(unclustered, true));
+    }
+    if(progress != null) {
+      progress.setProcessed(relation.size(), logger);
+      progress.ensureCompleted(logger);
+    }
+    if(cprogress != null) {
+      cprogress.setCompleted(logger);
+    }
+    return ret;
+  }
+
+  /**
+   * Deviation from a manifold described by beta.
+   * 
+   * @param delta Delta from origin vector
+   * @param beta Manifold
+   * @return Deviation score
+   */
+  private double deviation(Vector delta, Matrix beta) {
+    double a = delta.euclideanLength();
+    double b = beta.transposeTimes(delta).euclideanLength();
+    return Math.sqrt((a * a) - (b * b));
+  }
+
+  /**
+   * This method samples a number of linear manifolds an tries to determine
+   * which the one with the best cluster is.
+   * 
+   * <PRE>
+   * A number of sample points according to the dimension of the linear manifold are taken.
+   * The basis (B) and the origin(o) of the manifold are calculated.
+   * A distance histogram using  the distance function ||x-o|| -||B^t*(x-o)|| is generated.
+   * The best threshold is searched using the elevate threshold function.
+   * The overall goodness of the threshold is determined.
+   * The process is redone until a specific number of samples is taken.
+   * </PRE>
+   * 
+   * @param relation The vector relation
+   * @param currentids Current DBIDs
+   * @param dimension the dimension of the linear manifold to sample.
+   * @return the overall goodness of the separation. The values origin basis and
+   *         threshold are returned indirectly over class variables.
+   */
+  private Separation findSeparation(Relation<NumberVector<?, ?>> relation, DBIDs currentids, int dimension) {
+    Separation separation = new Separation();
+    // determine the number of samples needed, to secure that with a specific
+    // probability
+    // in at least on sample every sampled point is from the same cluster.
+    int samples = (int) Math.min(Math.log(NOT_FROM_ONE_CLUSTER_PROBABILITY) / (Math.log(1 - Math.pow((1.0d / samplingLevel), dimension))), (double) currentids.size());
+    // System.out.println("Number of samples: " + samples);
+    Random r = new Random();
+    int remaining_retries = 100;
+    for(int i = 1; i <= samples; i++) {
+      DBIDs sample = DBIDUtil.randomSample(currentids, dimension + 1, r.nextLong());
+      final Iterator<DBID> iter = sample.iterator();
+      // Use first as origin
+      DBID origin = iter.next();
+      Vector originV = relation.get(origin).getColumnVector();
+      // Build orthogonal basis from remainder
+      Matrix basis;
+      {
+        List<Vector> vectors = new ArrayList<Vector>(sample.size() - 1);
+        while(iter.hasNext()) {
+          Vector vec = relation.get(iter.next()).getColumnVector();
+          vectors.add(vec.minusEquals(originV));
+        }
+        // generate orthogonal basis
+        basis = generateOrthonormalBasis(vectors);
+        if(basis == null) {
+          // new sample has to be taken.
+          i--;
+          remaining_retries--;
+          if(remaining_retries < 0) {
+            throw new AbortException("Too many retries in sampling, and always a linear dependant data set.");
+          }
+          continue;
+        }
+      }
+      // Generate and fill a histogram.
+      FlexiHistogram<Double, Double> histogram = FlexiHistogram.DoubleSumHistogram(BINS);
+      double w = 1.0 / currentids.size();
+      for(DBID point : currentids) {
+        // Skip sampled points
+        if(sample.contains(point)) {
+          continue;
+        }
+        Vector vec = relation.get(point).getColumnVector().minusEquals(originV);
+        final double distance = deviation(vec, basis);
+        histogram.aggregate(distance, w);
+      }
+      double[] th = findAndEvaluateThreshold(histogram); // evaluate threshold
+      if(th[1] > separation.goodness) {
+        separation.goodness = th[1];
+        separation.threshold = th[0];
+        separation.originV = originV;
+        separation.basis = basis;
+      }
+    }
+    return separation;
+  }
+
+  /**
+   * This Method generates an orthonormal basis from a set of Vectors. It uses
+   * the established Gram-Schmidt algorithm for orthonormalisation:
+   * 
+   * <PRE>
+   * u_1 = v_1
+   * u_k = v_k -proj_u1(v_k)...proj_u(k-1)(v_k);
+   * 
+   * Where proj_u(v) = <v,u>/<u,u> *u
+   * </PRE>
+   * 
+   * @param vectors The set of vectors to generate the orthonormal basis from
+   * @return the orthonormal basis generated by this method.
+   * @throws RuntimeException if the given vectors are not linear independent.
+   */
+  private Matrix generateOrthonormalBasis(List<Vector> vectors) {
+    Vector first = vectors.get(0);
+    first = first.times(1.0 / first.euclideanLength());
+    Matrix ret = new Matrix(first.getDimensionality(), vectors.size());
+    ret.setCol(0, first);
+    for(int i = 1; i < vectors.size(); i++) {
+      // System.out.println("Matrix:" + ret);
+      Vector v_i = vectors.get(i);
+      Vector u_i = v_i.copy();
+      // System.out.println("Vector " + i + ":" + partialSol);
+      for(int j = 0; j < i; j++) {
+        Vector v_j = ret.getCol(j);
+        double f = v_i.transposeTimes(v_j) / v_j.transposeTimes(v_j);
+        if(Double.isNaN(f)) {
+          if(logger.isDebuggingFine()) {
+            logger.debugFine("Zero vector encountered? " + v_j);
+          }
+          return null;
+        }
+        u_i.minusTimesEquals(v_j, f);
+      }
+      // check if the vectors weren't independent
+      final double len_u_i = u_i.euclideanLength();
+      if(len_u_i == 0.0) {
+        if(logger.isDebuggingFine()) {
+          logger.debugFine("Points not independent - no orthonormalization.");
+        }
+        return null;
+      }
+      // System.out.println("Vector " + i + ":" + partialSol);
+      u_i.timesEquals(1 / len_u_i);
+      ret.setCol(i, u_i);
+    }
+    return ret;
+  }
+
+  /**
+   * Evaluate the histogram to find a suitable threshold
+   * 
+   * @param histogram Histogram to evaluate
+   * @return Position and goodness
+   */
+  private double[] findAndEvaluateThreshold(FlexiHistogram<Double, Double> histogram) {
+    int n = histogram.getNumBins();
+    double[] p1 = new double[n];
+    double[] p2 = new double[n];
+    double[] mu1 = new double[n];
+    double[] mu2 = new double[n];
+    double[] sigma1 = new double[n];
+    double[] sigma2 = new double[n];
+    double[] jt = new double[n];
+    // Forward pass
+    {
+      MeanVariance mv = new MeanVariance();
+      Iterator<DoubleObjPair<Double>> forward = histogram.iterator();
+      for(int i = 0; forward.hasNext(); i++) {
+        DoubleObjPair<Double> pair = forward.next();
+        p1[i] = pair.second + ((i > 0) ? p1[i - 1] : 0);
+        mv.put(i, pair.second);
+        mu1[i] = mv.getMean();
+        sigma1[i] = mv.getNaiveStddev();
+      }
+    }
+    // Backwards pass
+    {
+      MeanVariance mv = new MeanVariance();
+      Iterator<DoubleObjPair<Double>> backwards = histogram.reverseIterator();
+      for(int j = n - 1; backwards.hasNext(); j--) {
+        DoubleObjPair<Double> pair = backwards.next();
+        p2[j] = pair.second + ((j + 1 < n) ? p2[j + 1] : 0);
+        mv.put(j, pair.second);
+        mu2[j] = mv.getMean();
+        sigma2[j] = mv.getNaiveStddev();
+      }
+    }
+
+    for(int i = 0; i < n; i++) {
+      jt[i] = 1.0 + 2 * (p1[i] * (Math.log(sigma1[i]) - Math.log(p1[i])) + p2[i] * (Math.log(sigma2[i]) - Math.log(p2[i])));
+    }
+
+    int bestpos = -1;
+    double bestgoodness = Double.NEGATIVE_INFINITY;
+
+    double devPrev = jt[1] - jt[0];
+    for(int i = 1; i < jt.length - 1; i++) {
+      double devCur = jt[i + 1] - jt[i];
+      // System.out.println(p1[i]);
+      // System.out.println(jt[i + 1]);
+      // System.out.println(jt[i]);
+      // System.out.println(devCur);
+      // Local minimum found - calculate depth
+      if(devCur >= 0 && devPrev <= 0) {
+        double lowestMaxima = Double.POSITIVE_INFINITY;
+        for(int j = i - 1; j > 0; j--) {
+          if(jt[j - 1] < jt[j]) {
+            lowestMaxima = Math.min(lowestMaxima, jt[j]);
+            break;
+          }
+        }
+        for(int j = i + 1; j < n - 2; j++) {
+          if(jt[j + 1] < jt[j]) {
+            lowestMaxima = Math.min(lowestMaxima, jt[j]);
+            break;
+          }
+        }
+        double localDepth = lowestMaxima - jt[i];
+
+        final double mud = mu1[i] - mu2[i];
+        double discriminability = mud * mud / (sigma1[i] * sigma1[i] + sigma2[i] * sigma2[i]);
+        if(Double.isNaN(discriminability)) {
+          discriminability = -1;
+        }
+        double goodness = localDepth * discriminability;
+        if(goodness > bestgoodness) {
+          bestgoodness = goodness;
+          bestpos = i;
+        }
+      }
+      devPrev = devCur;
+    }
+    return new double[] { histogram.getBinMax(bestpos), bestgoodness };
+  }
+
+  @Override
+  protected Logging getLogger() {
+    return logger;
+  }
+
+  @Override
+  public TypeInformation[] getInputTypeRestriction() {
+    return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD);
+  }
+
+  /**
+   * Class to represent a linear manifold separation
+   * 
+   * @author Erich Schubert
+   * 
+   * @apiviz.exclude
+   */
+  private static class Separation {
+    /**
+     * Goodness of separation
+     */
+    double goodness = Double.NEGATIVE_INFINITY;
+
+    /**
+     * Threshold
+     */
+    double threshold = Double.NEGATIVE_INFINITY;
+
+    /**
+     * Basis of manifold
+     */
+    Matrix basis = null;
+
+    /**
+     * Origin vector
+     */
+    Vector originV = null;
+  }
+
+  /**
+   * Parameterization class
+   * 
+   * @author Erich Schubert
+   * 
+   * @apiviz.exclude
+   */
+  public static class Parameterizer extends AbstractParameterizer {
+    /**
+     * Parameter with the maximum dimension to search for
+     */
+    public static final OptionID MAXDIM_ID = OptionID.getOrCreateOptionID("lmclus.maxdim", "Maximum linear manifold dimension to search.");
+
+    /**
+     * Parameter for the minimum cluster size
+     */
+    public static final OptionID MINSIZE_ID = OptionID.getOrCreateOptionID("lmclus.minsize", "Minimum cluster size to allow.");
+
+    /**
+     * Sampling intensity level
+     */
+    public static final OptionID SAMPLINGL_ID = OptionID.getOrCreateOptionID("lmclus.sampling-level", "A number used to determine how many samples are taken in each search.");
+
+    /**
+     * Global significance threshold
+     */
+    public static final OptionID THRESHOLD_ID = OptionID.getOrCreateOptionID("lmclus.threshold", "Threshold to determine if a cluster was found.");
+
+    /**
+     * Maximum dimensionality to search for
+     */
+    private int maxdim = Integer.MAX_VALUE;
+
+    /**
+     * Minimum cluster size.
+     */
+    private int minsize;
+
+    /**
+     * Sampling level
+     */
+    private int samplingLevel;
+
+    /**
+     * Threshold
+     */
+    private double threshold;
+
+    @Override
+    protected void makeOptions(Parameterization config) {
+      super.makeOptions(config);
+      IntParameter maxLMDimP = new IntParameter(MAXDIM_ID, new GreaterEqualConstraint(1), true);
+      if(config.grab(maxLMDimP)) {
+        maxdim = maxLMDimP.getValue();
+      }
+      IntParameter minsizeP = new IntParameter(MINSIZE_ID, new GreaterEqualConstraint(1));
+      if(config.grab(minsizeP)) {
+        minsize = minsizeP.getValue();
+      }
+      IntParameter samplingLevelP = new IntParameter(SAMPLINGL_ID, 100);
+      if(config.grab(samplingLevelP)) {
+        samplingLevel = samplingLevelP.getValue();
+      }
+      DoubleParameter sensivityThresholdP = new DoubleParameter(THRESHOLD_ID);
+      if(config.grab(sensivityThresholdP)) {
+        threshold = sensivityThresholdP.getValue();
+      }
+    }
+
+    @Override
+    protected LMCLUS makeInstance() {
+      return new LMCLUS(maxdim, minsize, samplingLevel, threshold);
+    }
+  }
+}
+\ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ORCLUS.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ORCLUS.java
index 924e1786..eb5608fc 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ORCLUS.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/ORCLUS.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation;
  This file is part of ELKI:
  Environment for Developing KDD-Applications Supported by Index-Structures
 
- Copyright (C) 2011
+ Copyright (C) 2012
  Ludwig-Maximilians-Universität München
  Lehr- und Forschungseinheit für Datenbanksysteme
  ELKI Development Team
@@ -50,6 +50,7 @@ import de.lmu.ifi.dbs.elki.logging.Logging;
 import de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress;
 import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
 import de.lmu.ifi.dbs.elki.math.linearalgebra.SortedEigenPairs;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
 import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCAResult;
 import de.lmu.ifi.dbs.elki.math.linearalgebra.pca.PCARunner;
 import de.lmu.ifi.dbs.elki.utilities.ClassGenericsUtil;
@@ -420,14 +421,14 @@ public class ORCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClust
   /**
    * Returns the union of the two specified clusters.
    * 
-   * @param database the database holding the objects
+   * @param relation the database holding the objects
    * @param distFunc the distance function
    * @param c1 the first cluster
    * @param c2 the second cluster
    * @param dim the dimensionality of the union cluster
    * @return the union of the two specified clusters
    */
-  private ORCLUSCluster union(Relation<V> database, DistanceQuery<V, DoubleDistance> distFunc, ORCLUSCluster c1, ORCLUSCluster c2, int dim) {
+  private ORCLUSCluster union(Relation<V> relation, DistanceQuery<V, DoubleDistance> distFunc, ORCLUSCluster c1, ORCLUSCluster c2, int dim) {
     ORCLUSCluster c = new ORCLUSCluster();
 
     c.objectIDs = DBIDUtil.newHashSet(c1.objectIDs);
@@ -436,11 +437,13 @@ public class ORCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClust
     c.objectIDs = DBIDUtil.newArray(c.objectIDs);
 
     if(c.objectIDs.size() > 0) {
-      c.centroid = DatabaseUtil.centroid(database, c.objectIDs);
-      c.basis = findBasis(database, distFunc, c, dim);
+      c.centroid = DatabaseUtil.centroid(relation, c.objectIDs);
+      c.basis = findBasis(relation, distFunc, c, dim);
     }
     else {
-      c.centroid = c1.centroid.plus(c2.centroid).multiplicate(0.5);
+      V factory = DatabaseUtil.assumeVectorField(relation).getFactory();
+      Vector cent = c1.centroid.getColumnVector().plusEquals(c2.centroid.getColumnVector()).timesEquals(0.5); 
+      c.centroid = factory.newNumberVector(cent.getArrayRef());
       double[][] doubles = new double[c1.basis.getRowDimensionality()][dim];
       for(int i = 0; i < dim; i++) {
         doubles[i][i] = 1;
@@ -460,9 +463,9 @@ public class ORCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClust
    * @return the projection of double vector o in the subspace of cluster c
    */
   private V projection(ORCLUSCluster c, V o, V factory) {
-    Matrix o_proj = o.getRowVector().times(c.basis);
+    Matrix o_proj = o.getColumnVector().transposeTimes(c.basis);
     double[] values = o_proj.getColumnPackedCopy();
-    return factory.newInstance(values);
+    return factory.newNumberVector(values);
   }
 
   @Override
@@ -523,7 +526,7 @@ public class ORCLUS<V extends NumberVector<V, ?>> extends AbstractProjectedClust
       for(int d = 1; d <= o.getDimensionality(); d++) {
         values[d - 1] = o.doubleValue(d);
       }
-      this.centroid = factory.newInstance(values);
+      this.centroid = factory.newNumberVector(values);
     }
   }
 
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHInterval.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHInterval.java
index 6c5db740..46112498 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHInterval.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHInterval.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation.cash;
  This file is part of ELKI:
  Environment for Developing KDD-Applications Supported by Index-Structures
 
- Copyright (C) 2011
+ Copyright (C) 2012
  Ludwig-Maximilians-Universität München
  Lehr- und Forschungseinheit für Datenbanksysteme
  ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHIntervalSplit.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHIntervalSplit.java
index 62ff658f..86e045cb 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHIntervalSplit.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/CASHIntervalSplit.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.correlation.cash;
  This file is part of ELKI:
  Environment for Developing KDD-Applications Supported by Index-Structures
 
- Copyright (C) 2011
+ Copyright (C) 2012
  Ludwig-Maximilians-Universität München
  Lehr- und Forschungseinheit für Datenbanksysteme
  ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/package-info.java
index 8cd156e8..8b6d104c 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/cash/package-info.java
@@ -7,7 +7,7 @@
 This file is part of ELKI:
 Environment for Developing KDD-Applications Supported by Index-Structures
 
-Copyright (C) 2011
+Copyright (C) 2012
 Ludwig-Maximilians-Universität München
 Lehr- und Forschungseinheit für Datenbanksysteme
 ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/package-info.java
index 82a1f1e1..665de632 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/correlation/package-info.java
@@ -7,7 +7,7 @@
 This file is part of ELKI:
 Environment for Developing KDD-Applications Supported by Index-Structures
 
-Copyright (C) 2011
+Copyright (C) 2012
 Ludwig-Maximilians-Universität München
 Lehr- und Forschungseinheit für Datenbanksysteme
 ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeans.java
new file mode 100644
index 00000000..d3c73b53
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeans.java
@@ -0,0 +1,310 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractPrimitiveDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.model.MeanModel;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * Abstract base class for k-means implementations.
+ * 
+ * @author Erich Schubert
+ * 
+ * @param <V> Vector type
+ * @param <D> Distance type
+ */
+public abstract class AbstractKMeans<V extends NumberVector<V, ?>, D extends Distance<D>> extends AbstractPrimitiveDistanceBasedAlgorithm<NumberVector<?, ?>, D, Clustering<MeanModel<V>>> {
+  /**
+   * Parameter to specify the number of clusters to find, must be an integer
+   * greater than 0.
+   */
+  public static final OptionID K_ID = OptionID.getOrCreateOptionID("kmeans.k", "The number of clusters to find.");
+
+  /**
+   * Parameter to specify the number of clusters to find, must be an integer
+   * greater or equal to 0, where 0 means no limit.
+   */
+  public static final OptionID MAXITER_ID = OptionID.getOrCreateOptionID("kmeans.maxiter", "The maximum number of iterations to do. 0 means no limit.");
+
+  /**
+   * Parameter to specify the random generator seed.
+   */
+  public static final OptionID SEED_ID = OptionID.getOrCreateOptionID("kmeans.seed", "The random number generator seed.");
+
+  /**
+   * Parameter to specify the initialization method
+   */
+  public static final OptionID INIT_ID = OptionID.getOrCreateOptionID("kmeans.initialization", "Method to choose the initial means.");
+
+  /**
+   * Holds the value of {@link #K_ID}.
+   */
+  protected int k;
+
+  /**
+   * Holds the value of {@link #MAXITER_ID}.
+   */
+  protected int maxiter;
+
+  /**
+   * Method to choose initial means.
+   */
+  protected KMeansInitialization<V> initializer;
+
+  /**
+   * Constructor.
+   * 
+   * @param distanceFunction distance function
+   * @param k k parameter
+   * @param maxiter Maxiter parameter
+   */
+  public AbstractKMeans(PrimitiveDistanceFunction<NumberVector<?, ?>, D> distanceFunction, int k, int maxiter, KMeansInitialization<V> initializer) {
+    super(distanceFunction);
+    this.k = k;
+    this.maxiter = maxiter;
+    this.initializer = initializer;
+  }
+
+  /**
+   * Returns a list of clusters. The k<sup>th</sup> cluster contains the ids of
+   * those FeatureVectors, that are nearest to the k<sup>th</sup> mean.
+   * 
+   * @param relation the database to cluster
+   * @param means a list of k means
+   * @param clusters cluster assignment
+   * @return true when the object was reassigned
+   */
+  protected boolean assignToNearestCluster(Relation<V> relation, List<Vector> means, List<? extends ModifiableDBIDs> clusters) {
+    boolean changed = false;
+
+    if(getDistanceFunction() instanceof PrimitiveDoubleDistanceFunction) {
+      @SuppressWarnings("unchecked")
+      final PrimitiveDoubleDistanceFunction<? super NumberVector<?, ?>> df = (PrimitiveDoubleDistanceFunction<? super NumberVector<?, ?>>) getDistanceFunction();
+      for(DBID id : relation.iterDBIDs()) {
+        double mindist = Double.POSITIVE_INFINITY;
+        V fv = relation.get(id);
+        int minIndex = 0;
+        for(int i = 0; i < k; i++) {
+          double dist = df.doubleDistance(fv, means.get(i));
+          if(dist < mindist) {
+            minIndex = i;
+            mindist = dist;
+          }
+        }
+        if(clusters.get(minIndex).add(id)) {
+          changed = true;
+          // Remove from previous cluster
+          // TODO: keep a list of cluster assignments to save this search?
+          for(int i = 0; i < k; i++) {
+            if(i != minIndex) {
+              if(clusters.get(i).remove(id)) {
+                break;
+              }
+            }
+          }
+        }
+      }
+    }
+    else {
+      final PrimitiveDistanceFunction<? super NumberVector<?, ?>, D> df = getDistanceFunction();
+      for(DBID id : relation.iterDBIDs()) {
+        D mindist = df.getDistanceFactory().infiniteDistance();
+        V fv = relation.get(id);
+        int minIndex = 0;
+        for(int i = 0; i < k; i++) {
+          D dist = df.distance(fv, means.get(i));
+          if(dist.compareTo(mindist) < 0) {
+            minIndex = i;
+            mindist = dist;
+          }
+        }
+        if(clusters.get(minIndex).add(id)) {
+          changed = true;
+          // Remove from previous cluster
+          // TODO: keep a list of cluster assignments to save this search?
+          for(int i = 0; i < k; i++) {
+            if(i != minIndex) {
+              if(clusters.get(i).remove(id)) {
+                break;
+              }
+            }
+          }
+        }
+      }
+    }
+    return changed;
+  }
+
+  @Override
+  public TypeInformation[] getInputTypeRestriction() {
+    return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD);
+  }
+
+  /**
+   * Returns the mean vectors of the given clusters in the given database.
+   * 
+   * @param clusters the clusters to compute the means
+   * @param means the recent means
+   * @param database the database containing the vectors
+   * @return the mean vectors of the given clusters in the given database
+   */
+  protected List<Vector> means(List<? extends ModifiableDBIDs> clusters, List<Vector> means, Relation<V> database) {
+    List<Vector> newMeans = new ArrayList<Vector>(k);
+    for(int i = 0; i < k; i++) {
+      ModifiableDBIDs list = clusters.get(i);
+      Vector mean = null;
+      for(Iterator<DBID> clusterIter = list.iterator(); clusterIter.hasNext();) {
+        if(mean == null) {
+          mean = database.get(clusterIter.next()).getColumnVector();
+        }
+        else {
+          mean.plusEquals(database.get(clusterIter.next()).getColumnVector());
+        }
+      }
+      if(list.size() > 0) {
+        assert mean != null;
+        mean.timesEquals(1.0 / list.size());
+      }
+      else {
+        mean = means.get(i);
+      }
+      newMeans.add(mean);
+    }
+    return newMeans;
+  }
+
+  /**
+   * Compute an incremental update for the mean
+   * 
+   * @param mean Mean to update
+   * @param vec Object vector
+   * @param newsize (New) size of cluster
+   * @param op Cluster size change / Weight change
+   */
+  protected void incrementalUpdateMean(Vector mean, V vec, int newsize, double op) {
+    if(newsize == 0) {
+      return; // Keep old mean
+    }
+    Vector delta = vec.getColumnVector();
+    // Compute difference from mean
+    delta.minusEquals(mean);
+    delta.timesEquals(op / newsize);
+    mean.plusEquals(delta);
+  }
+
+  /**
+   * Perform a MacQueen style iteration.
+   * 
+   * @param relation Relation
+   * @param means Means
+   * @param clusters Clusters
+   * @return true when the means have changed
+   */
+  protected boolean macQueenIterate(Relation<V> relation, List<Vector> means, List<ModifiableDBIDs> clusters) {
+    boolean changed = false;
+  
+    if(getDistanceFunction() instanceof PrimitiveDoubleDistanceFunction) {
+      // Raw distance function
+      @SuppressWarnings("unchecked")
+      final PrimitiveDoubleDistanceFunction<? super NumberVector<?, ?>> df = (PrimitiveDoubleDistanceFunction<? super NumberVector<?, ?>>) getDistanceFunction();
+  
+      // Incremental update
+      for(DBID id : relation.iterDBIDs()) {
+        double mindist = Double.POSITIVE_INFINITY;
+        V fv = relation.get(id);
+        int minIndex = 0;
+        for(int i = 0; i < k; i++) {
+          double dist = df.doubleDistance(fv, means.get(i));
+          if(dist < mindist) {
+            minIndex = i;
+            mindist = dist;
+          }
+        }
+        // Update the cluster mean incrementally:
+        for(int i = 0; i < k; i++) {
+          ModifiableDBIDs ci = clusters.get(i);
+          if(i == minIndex) {
+            if(ci.add(id)) {
+              incrementalUpdateMean(means.get(i), relation.get(id), ci.size(), +1);
+              changed = true;
+            }
+          }
+          else if(ci.remove(id)) {
+            incrementalUpdateMean(means.get(i), relation.get(id), ci.size() + 1, -1);
+            changed = true;
+          }
+        }
+      }
+    }
+    else {
+      // Raw distance function
+      final PrimitiveDistanceFunction<? super NumberVector<?, ?>, D> df = getDistanceFunction();
+  
+      // Incremental update
+      for(DBID id : relation.iterDBIDs()) {
+        D mindist = df.getDistanceFactory().infiniteDistance();
+        V fv = relation.get(id);
+        int minIndex = 0;
+        for(int i = 0; i < k; i++) {
+          D dist = df.distance(fv, means.get(i));
+          if(dist.compareTo(mindist) < 0) {
+            minIndex = i;
+            mindist = dist;
+          }
+        }
+        // Update the cluster mean incrementally:
+        for(int i = 0; i < k; i++) {
+          ModifiableDBIDs ci = clusters.get(i);
+          if(i == minIndex) {
+            if(ci.add(id)) {
+              incrementalUpdateMean(means.get(i), relation.get(id), ci.size(), +1);
+              changed = true;
+            }
+          }
+          else if(ci.remove(id)) {
+            incrementalUpdateMean(means.get(i), relation.get(id), ci.size() + 1, -1);
+            changed = true;
+          }
+        }
+      }
+    }
+    return changed;
+  }
+}
+\ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeansInitialization.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeansInitialization.java
new file mode 100644
index 00000000..b5f088fb
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/AbstractKMeansInitialization.java
@@ -0,0 +1,71 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.LongParameter;
+
+/**
+ * Abstract base class for common k-means initializations.
+ * 
+ * @author Erich Schubert
+ * 
+ * @param <V> Vector type
+ */
+public abstract class AbstractKMeansInitialization<V extends NumberVector<V, ?>> implements KMeansInitialization<V> {
+  /**
+   * Holds the value of {@link KMeansLloyd#SEED_ID}.
+   */
+  protected Long seed;
+
+  /**
+   * Constructor.
+   * 
+   * @param seed Random seed.
+   */
+  public AbstractKMeansInitialization(Long seed) {
+    this.seed = seed;
+  }
+
+  /**
+   * Parameterization class.
+   * 
+   * @author Erich Schubert
+   * 
+   * @apiviz.exclude
+   */
+  public abstract static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractParameterizer {
+    protected Long seed;
+
+    @Override
+    protected void makeOptions(Parameterization config) {
+      super.makeOptions(config);
+      LongParameter seedP = new LongParameter(AbstractKMeans.SEED_ID, true);
+      if(config.grab(seedP)) {
+        seed = seedP.getValue();
+      }
+    }
+  }
+}
+\ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/FirstKInitialMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/FirstKInitialMeans.java
new file mode 100644
index 00000000..78ccd426
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/FirstKInitialMeans.java
@@ -0,0 +1,74 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+
+/**
+ * Initialize K-means by using the first k objects as initial means.
+ * 
+ * @author Erich Schubert
+ * 
+ * @param <V> Vector type
+ */
+public class FirstKInitialMeans<V extends NumberVector<V, ?>> extends AbstractKMeansInitialization<V> {
+  /**
+   * Constructor.
+   */
+  public FirstKInitialMeans() {
+    super(null);
+  }
+
+  @Override
+  public List<Vector> chooseInitialMeans(Relation<V> relation, int k, PrimitiveDistanceFunction<? super V, ?> distanceFunction) {
+    Iterator<DBID> iter = relation.iterDBIDs();
+    List<Vector> means = new ArrayList<Vector>(k);
+    for(int i = 0; i < k && iter.hasNext(); i++) {
+      means.add(relation.get(iter.next()).getColumnVector());
+    }
+    return means;
+  }
+
+  /**
+   * Parameterization class.
+   * 
+   * @author Erich Schubert
+   * 
+   * @apiviz.exclude
+   */
+  public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractParameterizer {
+    @Override
+    protected FirstKInitialMeans<V> makeInstance() {
+      return new FirstKInitialMeans<V>();
+    }
+  }
+}
+\ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansInitialization.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansInitialization.java
new file mode 100644
index 00000000..f4c0d9c7
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansInitialization.java
@@ -0,0 +1,49 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+
+/**
+ * Interface for initializing K-Means
+ * 
+ * @author Erich Schubert
+ * 
+ * @param <V> Vector type
+ */
+public interface KMeansInitialization<V extends NumberVector<V, ?>> {
+  /**
+   * Choose initial means
+   * 
+   * @param relation Relation
+   * @param k Parameter k
+   * @param distanceFunction Distance function 
+   * @return List of chosen means for k-means
+   */
+  public abstract List<Vector> chooseInitialMeans(Relation<V> relation, int k, PrimitiveDistanceFunction<? super V, ?> distanceFunction);
+}
+\ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java
new file mode 100644
index 00000000..fda1d6c0
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansLloyd.java
@@ -0,0 +1,176 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.ArrayList;
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractPrimitiveDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm;
+import de.lmu.ifi.dbs.elki.data.Cluster;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.model.MeanModel;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * Provides the k-means algorithm, using Lloyd-style bulk iterations.
+ * 
+ * <p>
+ * Reference:<br />
+ * S. Lloyd<br/>
+ * Least squares quantization in PCM<br/>
+ * IEEE Transactions on Information Theory 28 (2)<br/>
+ * previously published as Bell Telephone Laboratories Paper
+ * </p>
+ * 
+ * @author Arthur Zimek
+ * 
+ * @apiviz.has MeanModel
+ * 
+ * @param <V> vector datatype
+ * @param <D> distance value type
+ */
+@Title("K-Means")
+@Description("Finds a partitioning into k clusters.")
+@Reference(authors = "S. Lloyd", title = "Least squares quantization in PCM", booktitle = "IEEE Transactions on Information Theory 28 (2): 129–137.", url = "http://dx.doi.org/10.1109/TIT.1982.1056489")
+public class KMeansLloyd<V extends NumberVector<V, ?>, D extends Distance<D>> extends AbstractKMeans<V, D> implements ClusteringAlgorithm<Clustering<MeanModel<V>>> {
+  /**
+   * The logger for this class.
+   */
+  private static final Logging logger = Logging.getLogger(KMeansLloyd.class);
+
+  /**
+   * Constructor.
+   * 
+   * @param distanceFunction distance function
+   * @param k k parameter
+   * @param maxiter Maxiter parameter
+   */
+  public KMeansLloyd(PrimitiveDistanceFunction<NumberVector<?, ?>, D> distanceFunction, int k, int maxiter, KMeansInitialization<V> initializer) {
+    super(distanceFunction, k, maxiter, initializer);
+  }
+
+  /**
+   * Run k-means
+   * 
+   * @param database Database
+   * @param relation relation to use
+   * @return result
+   * @throws IllegalStateException
+   */
+  public Clustering<MeanModel<V>> run(Database database, Relation<V> relation) throws IllegalStateException {
+    if(relation.size() <= 0) {
+      return new Clustering<MeanModel<V>>("k-Means Clustering", "kmeans-clustering");
+    }
+    // Choose initial means
+    List<Vector> means = initializer.chooseInitialMeans(relation, k, getDistanceFunction());
+    // Setup cluster assignment store
+    List<ModifiableDBIDs> clusters = new ArrayList<ModifiableDBIDs>();
+    for(int i = 0; i < k; i++) {
+      clusters.add(DBIDUtil.newHashSet(relation.size() / k));
+    }
+
+    for(int iteration = 0; maxiter <= 0 || iteration < maxiter; iteration++) {
+      if(logger.isVerbose()) {
+        logger.verbose("K-Means iteration " + (iteration + 1));
+      }
+      boolean changed = assignToNearestCluster(relation, means, clusters);
+      // Stop if no cluster assignment changed.
+      if(!changed) {
+        break;
+      }
+      // Recompute means.
+      means = means(clusters, means, relation);
+    }
+    // Wrap result
+    final V factory = DatabaseUtil.assumeVectorField(relation).getFactory();
+    Clustering<MeanModel<V>> result = new Clustering<MeanModel<V>>("k-Means Clustering", "kmeans-clustering");
+    for(int i = 0; i < clusters.size(); i++) {
+      MeanModel<V> model = new MeanModel<V>(factory.newNumberVector(means.get(i).getArrayRef()));
+      result.addCluster(new Cluster<MeanModel<V>>(clusters.get(i), model));
+    }
+    return result;
+  }
+
+  @Override
+  protected Logging getLogger() {
+    return logger;
+  }
+
+  /**
+   * Parameterization class.
+   * 
+   * @author Erich Schubert
+   * 
+   * @apiviz.exclude
+   */
+  public static class Parameterizer<V extends NumberVector<V, ?>, D extends Distance<D>> extends AbstractPrimitiveDistanceBasedAlgorithm.Parameterizer<NumberVector<?, ?>, D> {
+    protected int k;
+
+    protected int maxiter;
+
+    protected KMeansInitialization<V> initializer;
+
+    @Override
+    protected void makeOptions(Parameterization config) {
+      super.makeOptions(config);
+      IntParameter kP = new IntParameter(K_ID, new GreaterConstraint(0));
+      if(config.grab(kP)) {
+        k = kP.getValue();
+      }
+
+      ObjectParameter<KMeansInitialization<V>> initialP = new ObjectParameter<KMeansInitialization<V>>(INIT_ID, KMeansInitialization.class, RandomlyGeneratedInitialMeans.class);
+      if(config.grab(initialP)) {
+        initializer = initialP.instantiateClass(config);
+      }
+
+      IntParameter maxiterP = new IntParameter(MAXITER_ID, new GreaterEqualConstraint(0), 0);
+      if(config.grab(maxiterP)) {
+        maxiter = maxiterP.getValue();
+      }
+    }
+
+    @Override
+    protected AbstractKMeans<V, D> makeInstance() {
+      return new KMeansLloyd<V, D>(distanceFunction, k, maxiter, initializer);
+    }
+  }
+}
+\ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansMacQueen.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansMacQueen.java
new file mode 100644
index 00000000..56492dd0
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansMacQueen.java
@@ -0,0 +1,177 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.ArrayList;
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractPrimitiveDistanceBasedAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm;
+import de.lmu.ifi.dbs.elki.data.Cluster;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.model.MeanModel;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.Distance;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.GreaterEqualConstraint;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * Provides the k-means algorithm, using MacQueen style incremental updates.
+ * 
+ * <p>
+ * Reference:<br />
+ * J. MacQueen: Some Methods for Classification and Analysis of Multivariate
+ * Observations. <br />
+ * In 5th Berkeley Symp. Math. Statist. Prob., Vol. 1, 1967, pp 281-297.
+ * </p>
+ * 
+ * @author Erich Schubert
+ * 
+ * @apiviz.has MeanModel
+ * 
+ * @param <V> vector type to use
+ * @param <D> distance function value type
+ */
+@Title("K-Means")
+@Description("Finds a partitioning into k clusters.")
+@Reference(authors = "J. MacQueen", title = "Some Methods for Classification and Analysis of Multivariate Observations", booktitle = "5th Berkeley Symp. Math. Statist. Prob., Vol. 1, 1967, pp 281-297", url = "http://projecteuclid.org/euclid.bsmsp/1200512992")
+public class KMeansMacQueen<V extends NumberVector<V, ?>, D extends Distance<D>> extends AbstractKMeans<V, D> implements ClusteringAlgorithm<Clustering<MeanModel<V>>> {
+  /**
+   * The logger for this class.
+   */
+  private static final Logging logger = Logging.getLogger(KMeansMacQueen.class);
+
+  /**
+   * Constructor.
+   * 
+   * @param distanceFunction distance function
+   * @param k k parameter
+   * @param maxiter Maxiter parameter
+   */
+  public KMeansMacQueen(PrimitiveDistanceFunction<NumberVector<?, ?>, D> distanceFunction, int k, int maxiter, KMeansInitialization<V> initializer) {
+    super(distanceFunction, k, maxiter, initializer);
+  }
+
+  /**
+   * Run k-means
+   * 
+   * @param database Database
+   * @param relation relation to use
+   * @return result
+   * @throws IllegalStateException
+   */
+  public Clustering<MeanModel<V>> run(Database database, Relation<V> relation) throws IllegalStateException {
+    if(relation.size() <= 0) {
+      return new Clustering<MeanModel<V>>("k-Means Clustering", "kmeans-clustering");
+    }
+    // Choose initial means
+    List<Vector> means = initializer.chooseInitialMeans(relation, k, getDistanceFunction());
+    // Initialize cluster and assign objects
+    List<ModifiableDBIDs> clusters = new ArrayList<ModifiableDBIDs>();
+    for(int i = 0; i < k; i++) {
+      clusters.add(DBIDUtil.newHashSet(relation.size() / k));
+    }
+    assignToNearestCluster(relation, means, clusters);
+    // Initial recomputation of the means.
+    means = means(clusters, means, relation);
+
+    // Refine result
+    for(int iteration = 0; maxiter <= 0 || iteration < maxiter; iteration++) {
+      if(logger.isVerbose()) {
+        logger.verbose("K-Means iteration " + (iteration + 1));
+      }
+      boolean changed = macQueenIterate(relation, means, clusters);
+      if(!changed) {
+        break;
+      }
+    }
+    final V factory = DatabaseUtil.assumeVectorField(relation).getFactory();
+    Clustering<MeanModel<V>> result = new Clustering<MeanModel<V>>("k-Means Clustering", "kmeans-clustering");
+    for(int i = 0; i < clusters.size(); i++) {
+      DBIDs ids = clusters.get(i);
+      MeanModel<V> model = new MeanModel<V>(factory.newNumberVector(means.get(i).getArrayRef()));
+      result.addCluster(new Cluster<MeanModel<V>>(ids, model));
+    }
+    return result;
+  }
+
+  @Override
+  protected Logging getLogger() {
+    return logger;
+  }
+
+  /**
+   * Parameterization class.
+   * 
+   * @author Erich Schubert
+   * 
+   * @apiviz.exclude
+   */
+  public static class Parameterizer<V extends NumberVector<V, ?>, D extends Distance<D>> extends AbstractPrimitiveDistanceBasedAlgorithm.Parameterizer<NumberVector<?, ?>, D> {
+    protected int k;
+
+    protected int maxiter;
+
+    protected KMeansInitialization<V> initializer;
+
+    @Override
+    protected void makeOptions(Parameterization config) {
+      super.makeOptions(config);
+      IntParameter kP = new IntParameter(K_ID, new GreaterConstraint(0));
+      if(config.grab(kP)) {
+        k = kP.getValue();
+      }
+
+      ObjectParameter<KMeansInitialization<V>> initialP = new ObjectParameter<KMeansInitialization<V>>(INIT_ID, KMeansInitialization.class, RandomlyGeneratedInitialMeans.class);
+      if(config.grab(initialP)) {
+        initializer = initialP.instantiateClass(config);
+      }
+
+      IntParameter maxiterP = new IntParameter(MAXITER_ID, new GreaterEqualConstraint(0), 0);
+      if(config.grab(maxiterP)) {
+        maxiter = maxiterP.getValue();
+      }
+    }
+
+    @Override
+    protected AbstractKMeans<V, D> makeInstance() {
+      return new KMeansMacQueen<V, D>(distanceFunction, k, maxiter, initializer);
+    }
+  }
+}
+\ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansPlusPlusInitialMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansPlusPlusInitialMeans.java
new file mode 100644
index 00000000..c7a2fa1d
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/KMeansPlusPlusInitialMeans.java
@@ -0,0 +1,213 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.query.distance.DistanceQuery;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDoubleDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancevalue.NumberDistance;
+import de.lmu.ifi.dbs.elki.logging.LoggingUtil;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException;
+
+/**
+ * K-Means++ initialization for k-means.
+ * 
+ * Reference:
+ * <p>
+ * D. Arthur, S. Vassilvitskii<br />
+ * k-means++: the advantages of careful seeding<br />
+ * In: Proc. of the Eighteenth Annual ACM-SIAM Symposium on Discrete Algorithms,
+ * SODA 2007
+ * </p>
+ * 
+ * @author Erich Schubert
+ * 
+ * @param <V> Vector type
+ * @param <D> Distance type
+ */
+@Reference(authors = "D. Arthur, S. Vassilvitskii", title = "k-means++: the advantages of careful seeding", booktitle = "Proc. of the Eighteenth Annual ACM-SIAM Symposium on Discrete Algorithms, SODA 2007", url = "http://dx.doi.org/10.1145/1283383.1283494")
+public class KMeansPlusPlusInitialMeans<V extends NumberVector<V, ?>, D extends NumberDistance<D, ?>> extends AbstractKMeansInitialization<V> {
+  /**
+   * Constructor.
+   * 
+   * @param seed Random seed.
+   */
+  public KMeansPlusPlusInitialMeans(Long seed) {
+    super(seed);
+  }
+
+  @Override
+  public List<Vector> chooseInitialMeans(Relation<V> relation, int k, PrimitiveDistanceFunction<? super V, ?> distanceFunction) {
+    // Get a distance query
+    if(!(distanceFunction.getDistanceFactory() instanceof NumberDistance)) {
+      throw new AbortException("K-Means++ initialization can only be used with numerical distances.");
+    }
+    @SuppressWarnings("unchecked")
+    final PrimitiveDistanceFunction<? super V, D> distF = (PrimitiveDistanceFunction<? super V, D>) distanceFunction;
+    DistanceQuery<V, D> distQ = relation.getDatabase().getDistanceQuery(relation, distF);
+
+    // Chose first mean
+    List<Vector> means = new ArrayList<Vector>(k);
+
+    Random random = (seed != null) ? new Random(seed) : new Random();
+    DBID first = DBIDUtil.randomSample(relation.getDBIDs(), 1, random.nextLong()).iterator().next();
+    means.add(relation.get(first).getColumnVector());
+
+    ModifiableDBIDs chosen = DBIDUtil.newHashSet(k);
+    chosen.add(first);
+    ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
+    // Initialize weights
+    double[] weights = new double[ids.size()];
+    double weightsum = initialWeights(weights, ids, first, distQ);
+    while(means.size() < k) {
+      if(weightsum > Double.MAX_VALUE) {
+        LoggingUtil.warning("Could not choose a reasonable mean for k-means++ - too many data points, too large squared distances?");
+      }
+      if(weightsum < Double.MIN_NORMAL) {
+        LoggingUtil.warning("Could not choose a reasonable mean for k-means++ - to few data points?");
+      }
+      double r = random.nextDouble() * weightsum;
+      int pos = 0;
+      while(r > 0 && pos < weights.length) {
+        r -= weights[pos];
+        pos++;
+      }
+      // Add new mean:
+      DBID newmean = ids.get(pos);
+      means.add(relation.get(newmean).getColumnVector());
+      chosen.add(newmean);
+      // Update weights:
+      weights[pos] = 0.0;
+      // Choose optimized version for double distances, if applicable.
+      if (distF instanceof PrimitiveDoubleDistanceFunction) {
+        @SuppressWarnings("unchecked")
+        PrimitiveDoubleDistanceFunction<V> ddist = (PrimitiveDoubleDistanceFunction<V>) distF;
+        weightsum = updateWeights(weights, ids, newmean, ddist, relation);
+      } else {
+        weightsum = updateWeights(weights, ids, newmean, distQ);
+      }
+    }
+
+    return means;
+  }
+
+  /**
+   * Initialize the weight list.
+   * 
+   * @param weights Weight list
+   * @param ids IDs
+   * @param latest Added ID
+   * @param distQ Distance query
+   * @return Weight sum
+   */
+  protected double initialWeights(double[] weights, ArrayDBIDs ids, DBID latest, DistanceQuery<V, D> distQ) {
+    double weightsum = 0.0;
+    DBIDIter it = ids.iter();
+    for(int i = 0; i < weights.length; i++, it.advance()) {
+      DBID id = it.getDBID();
+      if(latest.equals(id)) {
+        weights[i] = 0.0;
+      }
+      else {
+        double d = distQ.distance(latest, id).doubleValue();
+        weights[i] = d * d;
+      }
+      weightsum += weights[i];
+    }
+    return weightsum;
+  }
+
+  /**
+   * Update the weight list.
+   * 
+   * @param weights Weight list
+   * @param ids IDs
+   * @param latest Added ID
+   * @param distQ Distance query
+   * @return Weight sum
+   */
+  protected double updateWeights(double[] weights, ArrayDBIDs ids, DBID latest, DistanceQuery<V, D> distQ) {
+    double weightsum = 0.0;
+    DBIDIter it = ids.iter();
+    for(int i = 0; i < weights.length; i++, it.advance()) {
+      DBID id = it.getDBID();
+      if(weights[i] > 0.0) {
+        double d = distQ.distance(latest, id).doubleValue();
+        weights[i] = Math.min(weights[i], d * d);
+        weightsum += weights[i];
+      }
+    }
+    return weightsum;
+  }
+
+  /**
+   * Update the weight list.
+   * 
+   * @param weights Weight list
+   * @param ids IDs
+   * @param latest Added ID
+   * @param distF Distance function
+   * @return Weight sum
+   */
+  protected double updateWeights(double[] weights, ArrayDBIDs ids, DBID latest, PrimitiveDoubleDistanceFunction<V> distF, Relation<V> rel) {
+    final V lv = rel.get(latest);
+    double weightsum = 0.0;
+    DBIDIter it = ids.iter();
+    for(int i = 0; i < weights.length; i++, it.advance()) {
+      DBID id = it.getDBID();
+      if(weights[i] > 0.0) {
+        double d = distF.doubleDistance(lv, rel.get(id));
+        weights[i] = Math.min(weights[i], d * d);
+        weightsum += weights[i];
+      }
+    }
+    return weightsum;
+  }
+
+  /**
+   * Parameterization class.
+   * 
+   * @author Erich Schubert
+   * 
+   * @apiviz.exclude
+   */
+  public static class Parameterizer<V extends NumberVector<V, ?>, D extends NumberDistance<D, ?>> extends AbstractKMeansInitialization.Parameterizer<V> {
+    @Override
+    protected KMeansPlusPlusInitialMeans<V, D> makeInstance() {
+      return new KMeansPlusPlusInitialMeans<V, D>(seed);
+    }
+  }
+}
+\ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyChosenInitialMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyChosenInitialMeans.java
new file mode 100644
index 00000000..30e59453
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyChosenInitialMeans.java
@@ -0,0 +1,78 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+import java.util.ArrayList;
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+
+/**
+ * Initialize K-means by randomly choosing k exsiting elements as cluster
+ * centers.
+ * 
+ * @author Erich Schubert
+ * 
+ * @param <V> Vector type
+ */
+public class RandomlyChosenInitialMeans<V extends NumberVector<V, ?>> extends AbstractKMeansInitialization<V> {
+  /**
+   * Constructor.
+   * 
+   * @param seed Random seed.
+   */
+  public RandomlyChosenInitialMeans(Long seed) {
+    super(seed);
+  }
+
+  @Override
+  public List<Vector> chooseInitialMeans(Relation<V> relation, int k, PrimitiveDistanceFunction<? super V, ?> distanceFunction) {
+    DBIDs ids = DBIDUtil.randomSample(relation.getDBIDs(), k, seed);
+    List<Vector> means = new ArrayList<Vector>(k);
+    for(DBID id : ids) {
+      means.add(relation.get(id).getColumnVector());
+    }
+    return means;
+  }
+
+  /**
+   * Parameterization class.
+   * 
+   * @author Erich Schubert
+   * 
+   * @apiviz.exclude
+   */
+  public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractKMeansInitialization.Parameterizer<V> {
+
+    @Override
+    protected RandomlyChosenInitialMeans<V> makeInstance() {
+      return new RandomlyChosenInitialMeans<V>(seed);
+    }
+  }
+}
+\ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyGeneratedInitialMeans.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyGeneratedInitialMeans.java
new file mode 100644
index 00000000..e8a466dd
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/RandomlyGeneratedInitialMeans.java
@@ -0,0 +1,87 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.math.MathUtil;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+import de.lmu.ifi.dbs.elki.utilities.DatabaseUtil;
+import de.lmu.ifi.dbs.elki.utilities.pairs.Pair;
+
+/**
+ * Initialize k-means by generating random vectors (within the data sets value
+ * range).
+ * 
+ * @author Erich Schubert
+ * 
+ * @param <V> Vector type
+ */
+public class RandomlyGeneratedInitialMeans<V extends NumberVector<V, ?>> extends AbstractKMeansInitialization<V> {
+  /**
+   * Constructor.
+   * 
+   * @param seed Random seed.
+   */
+  public RandomlyGeneratedInitialMeans(Long seed) {
+    super(seed);
+  }
+
+  @Override
+  public List<Vector> chooseInitialMeans(Relation<V> relation, int k, PrimitiveDistanceFunction<? super V, ?> distanceFunction) {
+    final int dim = DatabaseUtil.dimensionality(relation);
+    Pair<V, V> minmax = DatabaseUtil.computeMinMax(relation);
+    List<Vector> means = new ArrayList<Vector>(k);
+    final Random random = (this.seed != null) ? new Random(this.seed) : new Random();
+    for(int i = 0; i < k; i++) {
+      double[] r = MathUtil.randomDoubleArray(dim, random);
+      // Rescale
+      for(int d = 0; d < dim; d++) {
+        r[d] = minmax.first.doubleValue(d + 1) + (minmax.second.doubleValue(d + 1) - minmax.first.doubleValue(d + 1)) * r[d];
+      }
+      means.add(new Vector(r));
+    }
+    return means;
+  }
+
+
+  /**
+   * Parameterization class.
+   * 
+   * @author Erich Schubert
+   * 
+   * @apiviz.exclude
+   */
+  public static class Parameterizer<V extends NumberVector<V, ?>> extends AbstractKMeansInitialization.Parameterizer<V> {
+
+    @Override
+    protected RandomlyGeneratedInitialMeans<V> makeInstance() {
+      return new RandomlyGeneratedInitialMeans<V>(seed);
+    }
+  }
+}
+\ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/package-info.java
new file mode 100644
index 00000000..2ce625b0
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/kmeans/package-info.java
@@ -0,0 +1,26 @@
+/**
+ * <p>K-means clustering and variations.</p>
+ */
+/*
+This file is part of ELKI:
+Environment for Developing KDD-Applications Supported by Index-Structures
+
+Copyright (C) 2012
+Ludwig-Maximilians-Universität München
+Lehr- und Forschungseinheit für Datenbanksysteme
+ELKI Development Team
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+package de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans;
+\ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/package-info.java
index 660a7a4f..eed031df 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/package-info.java
@@ -1,5 +1,5 @@
 /**
- * <p>Clustering algorithms</p>
+ * <p>Clustering algorithms.</p>
  * 
  * Clustering algorithms are supposed to implement the {@link de.lmu.ifi.dbs.elki.algorithm.Algorithm}-Interface.
  * The more specialized interface {@link de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm}
@@ -15,7 +15,7 @@
 This file is part of ELKI:
 Environment for Developing KDD-Applications Supported by Index-Structures
 
-Copyright (C) 2011
+Copyright (C) 2012
 Ludwig-Maximilians-Universität München
 Lehr- und Forschungseinheit für Datenbanksysteme
 ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/CLIQUE.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/CLIQUE.java
index dfc4e1cd..e3b274a6 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/CLIQUE.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/CLIQUE.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace;
  This file is part of ELKI:
  Environment for Developing KDD-Applications Supported by Index-Structures
 
- Copyright (C) 2011
+ Copyright (C) 2012
  Ludwig-Maximilians-Universität München
  Lehr- und Forschungseinheit für Datenbanksysteme
  ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java
index 987c7eda..c4c1687b 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/DiSH.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace;
  This file is part of ELKI:
  Environment for Developing KDD-Applications Supported by Index-Structures
 
- Copyright (C) 2011
+ Copyright (C) 2012
  Ludwig-Maximilians-Universität München
  Lehr- und Forschungseinheit für Datenbanksysteme
  ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/HiSC.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/HiSC.java
index 36473cc0..40ab60a8 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/HiSC.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/HiSC.java
@@ -3,7 +3,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace;
 This file is part of ELKI:
 Environment for Developing KDD-Applications Supported by Index-Structures
 
-Copyright (C) 2011
+Copyright (C) 2012
 Ludwig-Maximilians-Universität München
 Lehr- und Forschungseinheit für Datenbanksysteme
 ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PROCLUS.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PROCLUS.java
index 92c2248c..3f16e907 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PROCLUS.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PROCLUS.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace;
  This file is part of ELKI:
  Environment for Developing KDD-Applications Supported by Index-Structures
 
- Copyright (C) 2011
+ Copyright (C) 2012
  Ludwig-Maximilians-Universität München
  Lehr- und Forschungseinheit für Datenbanksysteme
  ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PreDeCon.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PreDeCon.java
index 22e9c150..4ca5a564 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PreDeCon.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/PreDeCon.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace;
  This file is part of ELKI:
  Environment for Developing KDD-Applications Supported by Index-Structures
 
- Copyright (C) 2011
+ Copyright (C) 2012
  Ludwig-Maximilians-Universität München
  Lehr- und Forschungseinheit für Datenbanksysteme
  ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java
index c0edb2ea..963c0922 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/SUBCLU.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace;
  This file is part of ELKI:
  Environment for Developing KDD-Applications Supported by Index-Structures
 
- Copyright (C) 2011
+ Copyright (C) 2012
  Ludwig-Maximilians-Universität München
  Lehr- und Forschungseinheit für Datenbanksysteme
  ELKI Development Team
@@ -44,7 +44,7 @@ import de.lmu.ifi.dbs.elki.database.ProxyDatabase;
 import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
 import de.lmu.ifi.dbs.elki.database.relation.Relation;
 import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.AbstractDimensionsSelectingDoubleDistanceFunction;
-import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.DimensionsSelectingEuclideanDistanceFunction;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction;
 import de.lmu.ifi.dbs.elki.distance.distancevalue.DoubleDistance;
 import de.lmu.ifi.dbs.elki.logging.Logging;
 import de.lmu.ifi.dbs.elki.logging.progress.StepProgress;
@@ -94,7 +94,7 @@ public class SUBCLU<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
   /**
    * The distance function to determine the distance between database objects.
    * <p>
-   * Default value: {@link DimensionsSelectingEuclideanDistanceFunction}
+   * Default value: {@link SubspaceEuclideanDistanceFunction}
    * </p>
    * <p>
    * Key: {@code -subclu.distancefunction}
@@ -477,7 +477,7 @@ public class SUBCLU<V extends NumberVector<V, ?>> extends AbstractAlgorithm<Clus
     @Override
     protected void makeOptions(Parameterization config) {
       super.makeOptions(config);
-      ObjectParameter<AbstractDimensionsSelectingDoubleDistanceFunction<V>> param = new ObjectParameter<AbstractDimensionsSelectingDoubleDistanceFunction<V>>(DISTANCE_FUNCTION_ID, AbstractDimensionsSelectingDoubleDistanceFunction.class, DimensionsSelectingEuclideanDistanceFunction.class);
+      ObjectParameter<AbstractDimensionsSelectingDoubleDistanceFunction<V>> param = new ObjectParameter<AbstractDimensionsSelectingDoubleDistanceFunction<V>>(DISTANCE_FUNCTION_ID, AbstractDimensionsSelectingDoubleDistanceFunction.class, SubspaceEuclideanDistanceFunction.class);
       if(config.grab(param)) {
         distance = param.instantiateClass(config);
       }
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUESubspace.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUESubspace.java
index 1874c9e8..eff71a35 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUESubspace.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUESubspace.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.clique;
  This file is part of ELKI:
  Environment for Developing KDD-Applications Supported by Index-Structures
 
- Copyright (C) 2011
+ Copyright (C) 2012
  Ludwig-Maximilians-Universität München
  Lehr- und Forschungseinheit für Datenbanksysteme
  ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUEUnit.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUEUnit.java
index 4b6fa9ad..db687567 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUEUnit.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/CLIQUEUnit.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.subspace.clique;
  This file is part of ELKI:
  Environment for Developing KDD-Applications Supported by Index-Structures
 
- Copyright (C) 2011
+ Copyright (C) 2012
  Ludwig-Maximilians-Universität München
  Lehr- und Forschungseinheit für Datenbanksysteme
  ELKI Development Team
@@ -34,6 +34,7 @@ import de.lmu.ifi.dbs.elki.data.NumberVector;
 import de.lmu.ifi.dbs.elki.database.ids.DBID;
 import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
 import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
+import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs;
 import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
 
 /**
@@ -265,7 +266,7 @@ public class CLIQUEUnit<V extends NumberVector<V, ?>> {
     resultIntervals.add(this.intervals.last());
     resultIntervals.add(other.intervals.last());
 
-    ModifiableDBIDs resultIDs = DBIDUtil.newHashSet(this.ids);
+    HashSetModifiableDBIDs resultIDs = DBIDUtil.newHashSet(this.ids);
     resultIDs.retainAll(other.ids);
 
     if(resultIDs.size() / all >= tau) {
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/package-info.java
index 444cb0e6..7a686190 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/clique/package-info.java
@@ -7,7 +7,7 @@
 This file is part of ELKI:
 Environment for Developing KDD-Applications Supported by Index-Structures
 
-Copyright (C) 2011
+Copyright (C) 2012
 Ludwig-Maximilians-Universität München
 Lehr- und Forschungseinheit für Datenbanksysteme
 ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/package-info.java
index 168ceadb..2a1eb930 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/subspace/package-info.java
@@ -10,7 +10,7 @@
 This file is part of ELKI:
 Environment for Developing KDD-Applications Supported by Index-Structures
 
-Copyright (C) 2011
+Copyright (C) 2012
 Ludwig-Maximilians-Universität München
 Lehr- und Forschungseinheit für Datenbanksysteme
 ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelClustering.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelClustering.java
index 02350db3..43c6a218 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelClustering.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelClustering.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.trivial;
  This file is part of ELKI:
  Environment for Developing KDD-Applications Supported by Index-Structures
 
- Copyright (C) 2011
+ Copyright (C) 2012
  Ludwig-Maximilians-Universität München
  Lehr- und Forschungseinheit für Datenbanksysteme
  ELKI Development Team
@@ -85,9 +85,7 @@ public class ByLabelClustering extends AbstractAlgorithm<Clustering<Model>> impl
   public static final OptionID MULTIPLE_ID = OptionID.getOrCreateOptionID("bylabelclustering.multiple", "Flag to indicate that only subspaces with large coverage " + "(i.e. the fraction of the database that is covered by the dense units) " + "are selected, the rest will be pruned.");
 
   /**
-   * Flag to indicate that multiple cluster assignment is possible. If an
-   * assignment to multiple clusters is desired, the labels indicating the
-   * clusters need to be separated by blanks.
+   * Pattern to recognize noise clusters by.
    */
   public static final OptionID NOISE_ID = OptionID.getOrCreateOptionID("bylabelclustering.noise", "Pattern to recognize noise classes by their label.");
 
@@ -144,7 +142,7 @@ public class ByLabelClustering extends AbstractAlgorithm<Clustering<Model>> impl
     ModifiableDBIDs noiseids = DBIDUtil.newArray();
     Clustering<Model> result = new Clustering<Model>("By Label Clustering", "bylabel-clustering");
     for(Entry<String, ModifiableDBIDs> entry : labelMap.entrySet()) {
-      ModifiableDBIDs ids = labelMap.get(entry.getKey());
+      ModifiableDBIDs ids = entry.getValue();
       if(ids.size() <= 1) {
         noiseids.addDBIDs(ids);
         continue;
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelHierarchicalClustering.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelHierarchicalClustering.java
index 5b8041d7..228cc7e7 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelHierarchicalClustering.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByLabelHierarchicalClustering.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.trivial;
  This file is part of ELKI:
  Environment for Developing KDD-Applications Supported by Index-Structures
 
- Copyright (C) 2011
+ Copyright (C) 2012
  Ludwig-Maximilians-Universität München
  Lehr- und Forschungseinheit für Datenbanksysteme
  ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByModelClustering.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByModelClustering.java
new file mode 100644
index 00000000..cd45cda2
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/ByModelClustering.java
@@ -0,0 +1,163 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.trivial;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2012
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.HashMap;
+import java.util.Map.Entry;
+import java.util.regex.Pattern;
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm;
+import de.lmu.ifi.dbs.elki.data.Cluster;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.model.Model;
+import de.lmu.ifi.dbs.elki.data.synthetic.bymodel.GeneratorInterface;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.ids.DBID;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.PatternParameter;
+
+/**
+ * Pseudo clustering using annotated models.
+ * 
+ * This "algorithm" puts elements into the same cluster when they agree in their
+ * model. I.e. it just uses a predefined clustering, and is mostly useful for
+ * testing and evaluation (e.g. comparing the result of a real algorithm to the
+ * reference result / golden standard used by the generator).
+ * 
+ * @author Erich Schubert
+ * 
+ * @apiviz.uses Model
+ */
+@Title("Clustering by model")
+@Description("Cluster points by a (pre-assigned!) model. For comparing results with a reference clustering.")
+public class ByModelClustering extends AbstractAlgorithm<Clustering<Model>> implements ClusteringAlgorithm<Clustering<Model>> {
+  /**
+   * The logger for this class.
+   */
+  private static final Logging logger = Logging.getLogger(ByModelClustering.class);
+
+  /**
+   * Pattern to recognize noise clusters with
+   */
+  public static final OptionID NOISE_ID = OptionID.getOrCreateOptionID("bymodel.noise", "Pattern to recognize noise models by their label.");
+
+  /**
+   * Holds the value of {@link #NOISE_ID}.
+   */
+  private Pattern noisepattern = null;
+
+  /**
+   * Constructor.
+   * 
+   * @param noisepattern Noise pattern
+   */
+  public ByModelClustering(Pattern noisepattern) {
+    super();
+    this.noisepattern = noisepattern;
+  }
+
+  /**
+   * Constructor without parameters
+   */
+  public ByModelClustering() {
+    this(null);
+  }
+
+  /**
+   * Run the actual clustering algorithm.
+   * 
+   * @param relation The data input we use
+   */
+  public Clustering<Model> run(Relation<Model> relation) {
+    // Build model mapping
+    HashMap<Model, ModifiableDBIDs> modelMap = new HashMap<Model, ModifiableDBIDs>();
+    for(DBID id : relation.iterDBIDs()) {
+      Model model = relation.get(id);
+      ModifiableDBIDs modelids = modelMap.get(model);
+      if(modelids == null) {
+        modelids = DBIDUtil.newHashSet();
+        modelMap.put(model, modelids);
+      }
+      modelids.add(id);
+    }
+
+    Clustering<Model> result = new Clustering<Model>("By Model Clustering", "bymodel-clustering");
+    for(Entry<Model, ModifiableDBIDs> entry : modelMap.entrySet()) {
+      final Model model = entry.getKey();
+      final ModifiableDBIDs ids = entry.getValue();
+      final String name = (model instanceof GeneratorInterface) ? ((GeneratorInterface) model).getName() : model.toString();
+      Cluster<Model> c = new Cluster<Model>(name, ids, model);
+      if(noisepattern != null && noisepattern.matcher(name).find()) {
+        c.setNoise(true);
+      }
+      result.addCluster(c);
+    }
+    return result;
+  }
+
+  @Override
+  public TypeInformation[] getInputTypeRestriction() {
+    return TypeUtil.array(TypeUtil.MODEL);
+  }
+
+  @Override
+  protected Logging getLogger() {
+    return logger;
+  }
+
+  /**
+   * Parameterization class.
+   * 
+   * @author Erich Schubert
+   * 
+   * @apiviz.exclude
+   */
+  public static class Parameterizer extends AbstractParameterizer {
+    protected Pattern noisepat;
+
+    @Override
+    protected void makeOptions(Parameterization config) {
+      super.makeOptions(config);
+      PatternParameter noisepatP = new PatternParameter(NOISE_ID, true);
+      if(config.grab(noisepatP)) {
+        noisepat = noisepatP.getValue();
+      }
+    }
+
+    @Override
+    protected ByModelClustering makeInstance() {
+      return new ByModelClustering(noisepat);
+    }
+  }
+}
+\ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllInOne.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllInOne.java
index a316ce57..2e7d006d 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllInOne.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllInOne.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.trivial;
  This file is part of ELKI:
  Environment for Developing KDD-Applications Supported by Index-Structures
 
- Copyright (C) 2011
+ Copyright (C) 2012
  Ludwig-Maximilians-Universität München
  Lehr- und Forschungseinheit für Datenbanksysteme
  ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllNoise.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllNoise.java
index b85f5445..c497632c 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllNoise.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/TrivialAllNoise.java
@@ -4,7 +4,7 @@ package de.lmu.ifi.dbs.elki.algorithm.clustering.trivial;
  This file is part of ELKI:
  Environment for Developing KDD-Applications Supported by Index-Structures
 
- Copyright (C) 2011
+ Copyright (C) 2012
  Ludwig-Maximilians-Universität München
  Lehr- und Forschungseinheit für Datenbanksysteme
  ELKI Development Team
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/package-info.java
index 5629855c..5870a736 100644
--- a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/package-info.java
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/trivial/package-info.java
@@ -7,7 +7,7 @@
 This file is part of ELKI:
 Environment for Developing KDD-Applications Supported by Index-Structures
 
-Copyright (C) 2011
+Copyright (C) 2012
 Ludwig-Maximilians-Universität München
 Lehr- und Forschungseinheit für Datenbanksysteme
 ELKI Development Team