11 files changed, 1530 insertions, 0 deletions
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/em/AbstractEMModelFactory.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/em/AbstractEMModelFactory.java
new file mode 100644
index 00000000..605c3aed
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/em/AbstractEMModelFactory.java
@@ -0,0 +1,89 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.em;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.initialization.KMeansInitialization;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.initialization.RandomlyGeneratedInitialMeans;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.model.MeanModel;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * Abstract base class for initializing EM.
+ * 
+ * @author Erich Schubert
+ * 
+ * @param <V> Vector type
+ * @param <M> Model type
+ */
+public abstract class AbstractEMModelFactory<V extends NumberVector, M extends MeanModel> implements EMClusterModelFactory<V, M> {
+  /**
+   * Class to choose the initial means
+   */
+  protected KMeansInitialization<V> initializer;
+
+  /**
+   * Constructor.
+   * 
+   * @param initializer Class for choosing the initial seeds.
+   */
+  public AbstractEMModelFactory(KMeansInitialization<V> initializer) {
+    super();
+    this.initializer = initializer;
+  }
+
+  /**
+   * Parameterization class.
+   * 
+   * @author Erich Schubert
+   * 
+   * @apiviz.exclude
+   * 
+   * @param <V> vector type
+   */
+  public abstract static class Parameterizer<V extends NumberVector> extends AbstractParameterizer {
+    /**
+     * Parameter to specify the cluster center initialization.
+     */
+    public static final OptionID INIT_ID = new OptionID("em.centers", //
+    "Method to choose the initial cluster centers.");
+
+    /**
+     * Initialization method
+     */
+    protected KMeansInitialization<V> initializer;
+
+    @Override
+    protected void makeOptions(Parameterization config) {
+      super.makeOptions(config);
+      ObjectParameter<KMeansInitialization<V>> initialP = new ObjectParameter<>(INIT_ID, KMeansInitialization.class, RandomlyGeneratedInitialMeans.class);
+      if(config.grab(initialP)) {
+        initializer = initialP.instantiateClass(config);
+      }
+    }
+  }
+}
+\ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/em/DiagonalGaussianModel.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/em/DiagonalGaussianModel.java
new file mode 100644
index 00000000..4ba9290f
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/em/DiagonalGaussianModel.java
@@ -0,0 +1,200 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.em;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+import java.util.Arrays;
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.model.EMModel;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.math.MathUtil;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+
+/**
+ * Simpler model for a single Gaussian cluster, without covariances.
+ * 
+ * @author Erich Schubert
+ */
+public class DiagonalGaussianModel implements EMClusterModel<EMModel> {
+  /**
+   * Class logger.
+   */
+  private static Logging LOG = Logging.getLogger(DiagonalGaussianModel.class);
+
+  /**
+   * Mean vector.
+   */
+  Vector mean;
+
+  /**
+   * Per-dimension variances.
+   */
+  double[] variances;
+
+  /**
+   * Temporary storage, to avoid reallocations.
+   */
+  double[] nmea, mref;
+
+  /**
+   * Normalization factor.
+   */
+  double norm, normDistrFactor;
+
+  /**
+   * Weight aggregation sum
+   */
+  double weight, wsum;
+
+  /**
+   * Constructor.
+   * 
+   * @param weight Cluster weight
+   * @param mean Initial mean
+   */
+  public DiagonalGaussianModel(double weight, Vector mean) {
+    this(weight, mean, MathUtil.powi(MathUtil.TWOPI, mean.getDimensionality()));
+  }
+
+  /**
+   * Constructor.
+   * 
+   * @param weight Cluster weight
+   * @param mean Initial mean
+   * @param norm Normalization factor.
+   */
+  public DiagonalGaussianModel(double weight, Vector mean, double norm) {
+    this.weight = weight;
+    final int dim = mean.getDimensionality();
+    this.mean = mean;
+    this.norm = norm;
+    this.normDistrFactor = 1. / Math.sqrt(norm); // assume det=1
+    this.mref = mean.getArrayRef();
+    this.nmea = new double[dim];
+    this.variances = new double[dim];
+    Arrays.fill(variances, 1.);
+    this.wsum = 0.;
+  }
+
+  @Override
+  public void beginEStep() {
+    wsum = 0.;
+  }
+
+  @Override
+  public void updateE(NumberVector vec, double wei) {
+    assert (vec.getDimensionality() == mref.length);
+    final double nwsum = wsum + wei;
+    // Compute new means
+    for(int i = 0; i < mref.length; i++) {
+      final double delta = vec.doubleValue(i) - mref[i];
+      final double rval = delta * wei / nwsum;
+      nmea[i] = mref[i] + rval;
+    }
+    // Update variances
+    for(int i = 0; i < mref.length; i++) {
+      // We DO want to use the new mean once and the old mean once!
+      // It does not matter which one is which.
+      variances[i] += (vec.doubleValue(i) - nmea[i]) * (vec.doubleValue(i) - mref[i]) * wei;
+    }
+    // Use new values.
+    wsum = nwsum;
+    System.arraycopy(nmea, 0, mref, 0, nmea.length);
+  }
+
+  @Override
+  public void finalizeEStep() {
+    if(wsum > 0.) {
+      final double s = 1. / wsum;
+      double det = 1.;
+      for(int i = 0; i < variances.length; i++) {
+        double v = variances[i];
+        v = v > 0 ? v * s : Matrix.SINGULARITY_CHEAT;
+        variances[i] = v;
+        det *= v;
+      }
+      normDistrFactor = 1. / Math.sqrt(norm * det);
+    }
+    else {
+      // Degenerate
+      normDistrFactor = 1. / Math.sqrt(norm);
+    }
+  }
+
+  /**
+   * Compute the Mahalanobis distance from the centroid for a given vector.
+   * 
+   * @param vec Vector
+   * @return Mahalanobis distance
+   */
+  public double mahalanobisDistance(Vector vec) {
+    double[] difference = vec.minus(mean).getArrayRef();
+    double agg = 0.;
+    for(int i = 0; i < variances.length; i++) {
+      agg += difference[i] / variances[i] * difference[i];
+    }
+    return agg;
+  }
+
+  /**
+   * Compute the Mahalanobis distance from the centroid for a given vector.
+   * 
+   * @param vec Vector
+   * @return Mahalanobis distance
+   */
+  public double mahalanobisDistance(NumberVector vec) {
+    double[] difference = vec.getColumnVector().minusEquals(mean).getArrayRef();
+    double agg = 0.;
+    for(int i = 0; i < variances.length; i++) {
+      agg += difference[i] / variances[i] * difference[i];
+    }
+    return agg;
+  }
+
+  @Override
+  public double estimateDensity(NumberVector vec) {
+    double power = mahalanobisDistance(vec) * .5;
+    double prob = normDistrFactor * Math.exp(-power);
+    if(!(prob >= 0.)) {
+      LOG.warning("Invalid probability: " + prob + " power: " + power + " factor: " + normDistrFactor);
+      prob = 0.;
+    }
+    return prob * weight;
+  }
+
+  @Override
+  public double getWeight() {
+    return weight;
+  }
+
+  @Override
+  public void setWeight(double weight) {
+    this.weight = weight;
+  }
+
+  @Override
+  public EMModel finalizeCluster() {
+    return new EMModel(mean, Matrix.diagonal(variances));
+  }
+}
+\ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/em/DiagonalGaussianModelFactory.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/em/DiagonalGaussianModelFactory.java
new file mode 100644
index 00000000..3ac02dae
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/em/DiagonalGaussianModelFactory.java
@@ -0,0 +1,88 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.em;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.ArrayList;
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.initialization.KMeansInitialization;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.model.EMModel;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.math.MathUtil;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+
+/**
+ * Factory for EM with multivariate gaussian models using diagonal matrixes.
+ * 
+ * These models have individual variances, but no covariance, so this
+ * corresponds to the {@code 'VVI'} model in Mclust (R).
+ * 
+ * @author Erich Schubert
+ * 
+ * @apiviz.has DiagonalGaussianModel
+ * 
+ * @param <V> vector type
+ */
+public class DiagonalGaussianModelFactory<V extends NumberVector> extends AbstractEMModelFactory<V, EMModel> {
+  /**
+   * Constructor.
+   * 
+   * @param initializer Class for choosing the inital seeds.
+   */
+  public DiagonalGaussianModelFactory(KMeansInitialization<V> initializer) {
+    super(initializer);
+  }
+
+  @Override
+  public List<DiagonalGaussianModel> buildInitialModels(Database database, Relation<V> relation, int k, PrimitiveDistanceFunction<? super NumberVector> df) {
+    final List<Vector> initialMeans = initializer.chooseInitialMeans(database, relation, k, df, Vector.FACTORY);
+    assert (initialMeans.size() == k);
+    final int dimensionality = initialMeans.get(0).getDimensionality();
+    final double norm = MathUtil.powi(MathUtil.TWOPI, dimensionality);
+    List<DiagonalGaussianModel> models = new ArrayList<>(k);
+    for(Vector nv : initialMeans) {
+      models.add(new DiagonalGaussianModel(1. / k, nv, norm));
+    }
+    return models;
+  }
+
+  /**
+   * Parameterization class
+   * 
+   * @author Erich Schubert
+   * 
+   * @apiviz.exclude
+   * 
+   * @param <V> Vector type
+   */
+  public static class Parameterizer<V extends NumberVector> extends AbstractEMModelFactory.Parameterizer<V> {
+    @Override
+    protected DiagonalGaussianModelFactory<V> makeInstance() {
+      return new DiagonalGaussianModelFactory<>(initializer);
+    }
+  }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/em/EM.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/em/EM.java
new file mode 100644
index 00000000..076c819b
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/em/EM.java
@@ -0,0 +1,414 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.em;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.ArrayList;
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm;
+import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeans;
+import de.lmu.ifi.dbs.elki.data.Cluster;
+import de.lmu.ifi.dbs.elki.data.Clustering;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.model.MeanModel;
+import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
+import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreFactory;
+import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
+import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
+import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
+import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
+import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.utilities.Alias;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
+import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
+import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
+
+/**
+ * Clustering by expectation maximization (EM-Algorithm), also known as Gaussian
+ * Mixture Modeling (GMM).
+ * 
+ * <p>
+ * Reference: A. P. Dempster, N. M. Laird, D. B. Rubin:<br />
+ * Maximum Likelihood from Incomplete Data via the EM algorithm.<br>
+ * In Journal of the Royal Statistical Society, Series B, 39(1), 1977, pp. 1-31
+ * </p>
+ * 
+ * @author Arthur Zimek
+ * @author Erich Schubert
+ * 
+ * @apiviz.composedOf EMClusterModelFactory
+ * 
+ * @param <V> vector type to analyze
+ * @param <M> model type to produce
+ */
+@Title("EM-Clustering: Clustering by Expectation Maximization")
+@Description("Cluster data via Gaussian mixture modeling and the EM algorithm")
+@Reference(authors = "A. P. Dempster, N. M. Laird, D. B. Rubin", //
+title = "Maximum Likelihood from Incomplete Data via the EM algorithm", //
+booktitle = "Journal of the Royal Statistical Society, Series B, 39(1), 1977, pp. 1-31", //
+url = "http://www.jstor.org/stable/2984875")
+@Alias({ "de.lmu.ifi.dbs.elki.algorithm.clustering.EM", "EM" })
+public class EM<V extends NumberVector, M extends MeanModel> extends AbstractAlgorithm<Clustering<M>> implements ClusteringAlgorithm<Clustering<M>> {
+  /**
+   * The logger for this class.
+   */
+  private static final Logging LOG = Logging.getLogger(EM.class);
+
+  /**
+   * Number of clusters
+   */
+  private int k;
+
+  /**
+   * Delta parameter
+   */
+  private double delta;
+
+  /**
+   * Factory for producing the initial cluster model.
+   */
+  private EMClusterModelFactory<V, M> mfactory;
+
+  /**
+   * Maximum number of iterations to allow
+   */
+  private int maxiter;
+
+  /**
+   * Retain soft assignments.
+   */
+  private boolean soft;
+
+  private static final double MIN_LOGLIKELIHOOD = -100000;
+
+  /**
+   * Soft assignment result type.
+   */
+  public static final SimpleTypeInformation<double[]> SOFT_TYPE = new SimpleTypeInformation<>(double[].class);
+
+  /**
+   * Constructor.
+   * 
+   * @param k k parameter
+   * @param delta delta parameter
+   * @param mfactory EM cluster model factory
+   * @param maxiter Maximum number of iterations
+   * @param soft Include soft assignments
+   */
+  public EM(int k, double delta, EMClusterModelFactory<V, M> mfactory, int maxiter, boolean soft) {
+    super();
+    this.k = k;
+    this.delta = delta;
+    this.mfactory = mfactory;
+    this.maxiter = maxiter;
+    this.setSoft(soft);
+  }
+
+  /**
+   * Performs the EM clustering algorithm on the given database.
+   * <p/>
+   * Finally a hard clustering is provided where each clusters gets assigned the
+   * points exhibiting the highest probability to belong to this cluster. But
+   * still, the database objects hold associated the complete probability-vector
+   * for all models.
+   * 
+   * @param database Database
+   * @param relation Relation
+   * @return Result
+   */
+  public Clustering<M> run(Database database, Relation<V> relation) {
+    if(relation.size() == 0) {
+      throw new IllegalArgumentException("database empty: must contain elements");
+    }
+    // initial models
+    if(LOG.isVerbose()) {
+      LOG.verbose("initializing " + k + " models");
+    }
+    List<? extends EMClusterModel<M>> models = mfactory.buildInitialModels(database, relation, k, SquaredEuclideanDistanceFunction.STATIC);
+    WritableDataStore<double[]> probClusterIGivenX = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_SORTED, double[].class);
+    double emNew = assignProbabilitiesToInstances(relation, models, probClusterIGivenX);
+
+    // iteration unless no change
+    if(LOG.isVerbose()) {
+      LOG.verbose("iterating EM");
+    }
+    if(LOG.isVerbose()) {
+      LOG.verbose("iteration " + 0 + " - expectation value: " + emNew);
+    }
+
+    for(int it = 1; it <= maxiter || maxiter < 0; it++) {
+      final double emOld = emNew;
+      recomputeCovarianceMatrices(relation, probClusterIGivenX, models);
+      // reassign probabilities
+      emNew = assignProbabilitiesToInstances(relation, models, probClusterIGivenX);
+
+      if(LOG.isVerbose()) {
+        LOG.verbose("iteration " + it + " - expectation value: " + emNew);
+      }
+      if(Math.abs(emOld - emNew) <= delta) {
+        break;
+      }
+    }
+
+    if(LOG.isVerbose()) {
+      LOG.verbose("assigning clusters");
+    }
+
+    // fill result with clusters and models
+    List<ModifiableDBIDs> hardClusters = new ArrayList<>(k);
+    for(int i = 0; i < k; i++) {
+      hardClusters.add(DBIDUtil.newHashSet());
+    }
+
+    // provide a hard clustering
+    for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+      double[] clusterProbabilities = probClusterIGivenX.get(iditer);
+      int maxIndex = 0;
+      double currentMax = 0.0;
+      for(int i = 0; i < k; i++) {
+        if(clusterProbabilities[i] > currentMax) {
+          maxIndex = i;
+          currentMax = clusterProbabilities[i];
+        }
+      }
+      hardClusters.get(maxIndex).add(iditer);
+    }
+    Clustering<M> result = new Clustering<>("EM Clustering", "em-clustering");
+    // provide models within the result
+    for(int i = 0; i < k; i++) {
+      // TODO: re-do labeling.
+      // SimpleClassLabel label = new SimpleClassLabel();
+      // label.init(result.canonicalClusterLabel(i));
+      Cluster<M> model = new Cluster<>(hardClusters.get(i), models.get(i).finalizeCluster());
+      result.addToplevelCluster(model);
+    }
+    if(isSoft()) {
+      result.addChildResult(new MaterializedRelation<>("cluster assignments", "em-soft-score", SOFT_TYPE, probClusterIGivenX, relation.getDBIDs()));
+    }
+    else {
+      probClusterIGivenX.destroy();
+    }
+    return result;
+  }
+
+  /**
+   * Recompute the covariance matrixes.
+   * 
+   * @param relation Vector data
+   * @param probClusterIGivenX Object probabilities
+   * @param models Cluster models to update
+   */
+  public static void recomputeCovarianceMatrices(Relation<? extends NumberVector> relation, WritableDataStore<double[]> probClusterIGivenX, List<? extends EMClusterModel<?>> models) {
+    for(EMClusterModel<?> m : models) {
+      m.beginEStep();
+    }
+    double[] wsum = new double[models.size()];
+    for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+      double[] clusterProbabilities = probClusterIGivenX.get(iditer);
+      NumberVector instance = relation.get(iditer);
+      int i = 0;
+      for(EMClusterModel<?> m : models) {
+        final double prior = clusterProbabilities[i];
+        if(prior > 0.) {
+          m.updateE(instance, prior);
+        }
+        wsum[i] += prior;
+        ++i;
+      }
+    }
+    int i = 0;
+    for(EMClusterModel<?> m : models) {
+      m.finalizeEStep();
+      m.setWeight(wsum[i] / relation.size());
+      i++;
+    }
+  }
+
+  /**
+   * Assigns the current probability values to the instances in the database and
+   * compute the expectation value of the current mixture of distributions.
+   * 
+   * Computed as the sum of the logarithms of the prior probability of each
+   * instance.
+   * 
+   * @param relation the database used for assignment to instances
+   * @param models Cluster models
+   * @param probClusterIGivenX Output storage for cluster probabilities
+   * @return the expectation value of the current mixture of distributions
+   */
+  public static double assignProbabilitiesToInstances(Relation<? extends NumberVector> relation, List<? extends EMClusterModel<?>> models, WritableDataStore<double[]> probClusterIGivenX) {
+    final int k = models.size();
+    double emSum = 0.;
+
+    for(DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
+      NumberVector vec = relation.get(iditer);
+      double[] probabilities = new double[k];
+      {
+        int i = 0;
+        for(EMClusterModel<?> m : models) {
+          probabilities[i] = m.estimateDensity(vec);
+          ++i;
+        }
+      }
+      double priorProbability = 0.;
+      for(int i = 0; i < k; i++) {
+        priorProbability += probabilities[i];
+      }
+      double logP = Math.max(Math.log(priorProbability), MIN_LOGLIKELIHOOD);
+      emSum += (logP == logP) ? logP : 0.; /* avoid NaN */
+
+      double[] clusterProbabilities = new double[k];
+      if(priorProbability > 0.) {
+        for(int i = 0; i < k; i++) {
+          // do not divide by zero!
+          clusterProbabilities[i] = probabilities[i] / priorProbability;
+        }
+      }
+      probClusterIGivenX.put(iditer, clusterProbabilities);
+    }
+
+    return emSum / relation.size();
+  }
+
+  @Override
+  public TypeInformation[] getInputTypeRestriction() {
+    return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD);
+  }
+
+  @Override
+  protected Logging getLogger() {
+    return LOG;
+  }
+
+  /**
+   * @return the soft
+   */
+  public boolean isSoft() {
+    return soft;
+  }
+
+  /**
+   * @param soft the soft to set
+   */
+  public void setSoft(boolean soft) {
+    this.soft = soft;
+  }
+
+  /**
+   * Parameterization class.
+   * 
+   * @author Erich Schubert
+   * 
+   * @apiviz.exclude
+   */
+  public static class Parameterizer<V extends NumberVector, M extends MeanModel> extends AbstractParameterizer {
+    /**
+     * Parameter to specify the number of clusters to find, must be an integer
+     * greater than 0.
+     */
+    public static final OptionID K_ID = new OptionID("em.k", "The number of clusters to find.");
+
+    /**
+     * Parameter to specify the termination criterion for maximization of E(M):
+     * E(M) - E(M') < em.delta, must be a double equal to or greater than 0.
+     */
+    public static final OptionID DELTA_ID = new OptionID("em.delta", //
+    "The termination criterion for maximization of E(M): " + //
+    "E(M) - E(M') < em.delta");
+
+    /**
+     * Parameter to specify the EM cluster models to use.
+     */
+    public static final OptionID INIT_ID = new OptionID("em.model", //
+    "Model factory.");
+
+    /**
+     * Number of clusters.
+     */
+    protected int k;
+
+    /**
+     * Stopping threshold
+     */
+    protected double delta;
+
+    /**
+     * Initialization method
+     */
+    protected EMClusterModelFactory<V, M> initializer;
+
+    /**
+     * Maximum number of iterations.
+     */
+    protected int maxiter = -1;
+
+    @Override
+    protected void makeOptions(Parameterization config) {
+      super.makeOptions(config);
+      IntParameter kP = new IntParameter(K_ID);
+      kP.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
+      if(config.grab(kP)) {
+        k = kP.getValue();
+      }
+
+      ObjectParameter<EMClusterModelFactory<V, M>> initialP = new ObjectParameter<>(INIT_ID, EMClusterModelFactory.class, MultivariateGaussianModelFactory.class);
+      if(config.grab(initialP)) {
+        initializer = initialP.instantiateClass(config);
+      }
+
+      DoubleParameter deltaP = new DoubleParameter(DELTA_ID, 0.)//
+      .addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE);
+      if(config.grab(deltaP)) {
+        delta = deltaP.getValue();
+      }
+
+      IntParameter maxiterP = new IntParameter(KMeans.MAXITER_ID)//
+      .addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_INT) //
+      .setOptional(true);
+      if(config.grab(maxiterP)) {
+        maxiter = maxiterP.getValue();
+      }
+    }
+
+    @Override
+    protected EM<V, M> makeInstance() {
+      return new EM<>(k, delta, initializer, maxiter, false);
+    }
+  }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/em/EMClusterModel.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/em/EMClusterModel.java
new file mode 100644
index 00000000..f08e6444
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/em/EMClusterModel.java
@@ -0,0 +1,81 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.em;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.model.MeanModel;
+
+/**
+ * Models useable in EM clustering.
+ * 
+ * @author Erich Schubert
+ */
+public interface EMClusterModel<M extends MeanModel> {
+  /**
+   * Begin the E step.
+   */
+  void beginEStep();
+
+  /**
+   * Update the
+   * 
+   * @param vec Vector to process
+   * @param weight Weight
+   */
+  void updateE(NumberVector vec, double weight);
+
+  /**
+   * Finalize the E step.
+   */
+  void finalizeEStep();
+
+  /**
+   * Estimate the likelihood of a vector.
+   * 
+   * @param vec Vector
+   * @return Likelihood.
+   */
+  double estimateDensity(NumberVector vec);
+
+  /**
+   * Finalize a cluster model.
+   * 
+   * @return Cluster model
+   */
+  M finalizeCluster();  
+
+  /**
+   * Get the cluster weight.
+   * 
+   * @return Cluster weight
+   */
+  double getWeight();
+
+  /**
+   * Set the cluster weight.
+   * 
+   * @param weight Cluster weight
+   */
+  void setWeight(double weight);
+}
+\ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/em/EMClusterModelFactory.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/em/EMClusterModelFactory.java
new file mode 100644
index 00000000..223ebcd6
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/em/EMClusterModelFactory.java
@@ -0,0 +1,52 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.em;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.model.MeanModel;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+
+/**
+ * Factory for initializing the EM models.
+ * 
+ * @author Erich Schubert
+ * 
+ * @param <V> Vector type
+ */
+public interface EMClusterModelFactory<V extends NumberVector, M extends MeanModel> {
+  /**
+   * Build the initial models
+   * 
+   * @param database Database
+   * @param relation Relation
+   * @param k Number of clusters
+   * @param df Distance function
+   * @return Initial models
+   */
+  List<? extends EMClusterModel<M>> buildInitialModels(Database database, Relation<V> relation, int k, PrimitiveDistanceFunction<? super NumberVector> df);
+}
+\ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/em/MultivariateGaussianModel.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/em/MultivariateGaussianModel.java
new file mode 100644
index 00000000..e134edff
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/em/MultivariateGaussianModel.java
@@ -0,0 +1,212 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.em;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.model.EMModel;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.math.MathUtil;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.LUDecomposition;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.VMath;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+
+/**
+ * Model for a single Gaussian cluster.
+ * 
+ * @author Erich Schubert
+ */
+public class MultivariateGaussianModel implements EMClusterModel<EMModel> {
+  /**
+   * Class logger.
+   */
+  private static Logging LOG = Logging.getLogger(MultivariateGaussianModel.class);
+
+  /**
+   * Mean vector.
+   */
+  Vector mean;
+
+  /**
+   * Covariance matrix, and inverse.
+   */
+  Matrix covariance, invCovMatr;
+
+  /**
+   * Temporary storage, to avoid reallocations.
+   */
+  double[] nmea, mref;
+
+  /**
+   * Matrix element reference.
+   */
+  double[][] elements;
+
+  /**
+   * Normalization factor.
+   */
+  double norm, normDistrFactor;
+
+  /**
+   * Weight aggregation sum
+   */
+  double weight, wsum;
+
+  /**
+   * Constructor.
+   * 
+   * @param weight Cluster weight
+   * @param mean Initial mean
+   */
+  public MultivariateGaussianModel(double weight, Vector mean) {
+    this(weight, mean, MathUtil.powi(MathUtil.TWOPI, mean.getDimensionality()));
+  }
+
+  /**
+   * Constructor.
+   * 
+   * @param weight Cluster weight
+   * @param mean Initial mean
+   * @param norm Normalization factor.
+   */
+  public MultivariateGaussianModel(double weight, Vector mean, double norm) {
+    this.weight = weight;
+    final int dim = mean.getDimensionality();
+    this.mean = mean;
+    this.norm = norm;
+    this.normDistrFactor = 1. / Math.sqrt(norm); // assume det=1
+    this.mref = mean.getArrayRef();
+    this.nmea = new double[dim];
+    this.covariance = new Matrix(dim, dim);
+    this.elements = this.covariance.getArrayRef();
+    this.wsum = 0.;
+  }
+
+  @Override
+  public void beginEStep() {
+    if(covariance == null) {
+      covariance = new Matrix(mean.getDimensionality(), mean.getDimensionality());
+      return;
+    }
+    wsum = 0.;
+  }
+
+  @Override
+  public void updateE(NumberVector vec, double wei) {
+    assert (vec.getDimensionality() == mref.length);
+    final double nwsum = wsum + wei;
+    // Compute new means
+    for(int i = 0; i < mref.length; i++) {
+      final double delta = vec.doubleValue(i) - mref[i];
+      final double rval = delta * wei / nwsum;
+      nmea[i] = mref[i] + rval;
+    }
+    // Update covariance matrix
+    for(int i = 0; i < mref.length; i++) {
+      for(int j = i; j < mref.length; j++) {
+        // We DO want to use the new mean once and the old mean once!
+        // It does not matter which one is which.
+        double delta = (vec.doubleValue(i) - nmea[i]) * (vec.doubleValue(j) - mref[j]) * wei;
+        elements[i][j] = elements[i][j] + delta;
+        // Optimize via symmetry
+        if(i != j) {
+          elements[j][i] = elements[j][i] + delta;
+        }
+      }
+    }
+    // Use new values.
+    wsum = nwsum;
+    System.arraycopy(nmea, 0, mref, 0, nmea.length);
+  }
+
+  @Override
+  public void finalizeEStep() {
+    final int dim = mean.getDimensionality();
+    // TODO: improve handling of degenerated cases?
+    if(wsum > 0.) {
+      covariance.timesEquals(1. / wsum);
+    }
+    LUDecomposition lu = new LUDecomposition(covariance);
+    double det = lu.det();
+    if(det <= 0.) {
+      // Add a small value to the diagonal
+      covariance.plusDiagonalEquals(Matrix.SINGULARITY_CHEAT);
+      lu = new LUDecomposition(covariance); // Should no longer be zero now.
+      det = lu.det();
+      assert (det > 0) : "Singularity cheat did not resolve zero determinant.";
+    }
+    normDistrFactor = 1. / Math.sqrt(norm * det);
+    invCovMatr = lu.solve(Matrix.identity(dim, dim));
+  }
+
+  /**
+   * Compute the Mahalanobis distance from the centroid for a given vector.
+   * 
+   * @param vec Vector
+   * @return Mahalanobis distance
+   */
+  public double mahalanobisDistance(Vector vec) {
+    if (invCovMatr != null) {
+      return VMath.mahalanobisDistance(invCovMatr.getArrayRef(), vec.getArrayRef(), mref);
+    }
+    Vector difference = vec.minus(mean);
+    return difference.transposeTimes(difference);
+  }
+
+  /**
+   * Compute the Mahalanobis distance from the centroid for a given vector.
+   * 
+   * @param vec Vector
+   * @return Mahalanobis distance
+   */
+  public double mahalanobisDistance(NumberVector vec) {
+    Vector difference = vec.getColumnVector().minusEquals(mean);
+    return (invCovMatr != null) ? difference.transposeTimesTimes(invCovMatr, difference) : difference.transposeTimes(difference);
+  }
+
+  @Override
+  public double estimateDensity(NumberVector vec) {
+    double power = mahalanobisDistance(vec) * .5;
+    double prob = normDistrFactor * Math.exp(-power);
+    if(!(prob >= 0.)) {
+      LOG.warning("Invalid probability: " + prob + " power: " + power + " factor: " + normDistrFactor);
+      prob = 0.;
+    }
+    return prob * weight;
+  }
+
+  @Override
+  public double getWeight() {
+    return weight;
+  }
+
+  @Override
+  public void setWeight(double weight) {
+    this.weight = weight;
+  }
+
+  @Override
+  public EMModel finalizeCluster() {
+    return new EMModel(mean, covariance);
+  }
+}
+\ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/em/MultivariateGaussianModelFactory.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/em/MultivariateGaussianModelFactory.java
new file mode 100644
index 00000000..ca0b7db4
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/em/MultivariateGaussianModelFactory.java
@@ -0,0 +1,89 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.em;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.ArrayList;
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.initialization.KMeansInitialization;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.model.EMModel;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.math.MathUtil;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+
+/**
+ * Factory for EM with multivariate gaussian models (with covariance; also known
+ * as Gaussian Mixture Modeling, GMM).
+ * 
+ * These models have individual covariance matrixes, so this corresponds to the
+ * {@code 'VVV'} model in Mclust (R).
+ * 
+ * @author Erich Schubert
+ * 
+ * @apiviz.has MultivariateGaussianModel
+ * 
+ * @param <V> vector type
+ */
+public class MultivariateGaussianModelFactory<V extends NumberVector> extends AbstractEMModelFactory<V, EMModel> {
+  /**
+   * Constructor.
+   * 
+   * @param initializer Class for choosing the inital seeds.
+   */
+  public MultivariateGaussianModelFactory(KMeansInitialization<V> initializer) {
+    super(initializer);
+  }
+
+  @Override
+  public List<MultivariateGaussianModel> buildInitialModels(Database database, Relation<V> relation, int k, PrimitiveDistanceFunction<? super NumberVector> df) {
+    final List<Vector> initialMeans = initializer.chooseInitialMeans(database, relation, k, df, Vector.FACTORY);
+    assert (initialMeans.size() == k);
+    final int dimensionality = initialMeans.get(0).getDimensionality();
+    final double norm = MathUtil.powi(MathUtil.TWOPI, dimensionality);
+    List<MultivariateGaussianModel> models = new ArrayList<>(k);
+    for(Vector nv : initialMeans) {
+      models.add(new MultivariateGaussianModel(1. / k, nv, norm));
+    }
+    return models;
+  }
+
+  /**
+   * Parameterization class
+   * 
+   * @author Erich Schubert
+   * 
+   * @apiviz.exclude
+   * 
+   * @param <V> Vector type
+   */
+  public static class Parameterizer<V extends NumberVector> extends AbstractEMModelFactory.Parameterizer<V> {
+    @Override
+    protected MultivariateGaussianModelFactory<V> makeInstance() {
+      return new MultivariateGaussianModelFactory<>(initializer);
+    }
+  }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/em/SphericalGaussianModel.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/em/SphericalGaussianModel.java
new file mode 100644
index 00000000..b1d62a17
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/em/SphericalGaussianModel.java
@@ -0,0 +1,190 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.em;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.model.EMModel;
+import de.lmu.ifi.dbs.elki.logging.Logging;
+import de.lmu.ifi.dbs.elki.math.MathUtil;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+
+/**
+ * Simple spherical Gaussian cluster.
+ * 
+ * @author Erich Schubert
+ */
+public class SphericalGaussianModel implements EMClusterModel<EMModel> {
+  /**
+   * Class logger.
+   */
+  private static Logging LOG = Logging.getLogger(SphericalGaussianModel.class);
+
+  /**
+   * Mean vector.
+   */
+  Vector mean;
+
+  /**
+   * Variances.
+   */
+  double variance;
+
+  /**
+   * Temporary storage, to avoid reallocations.
+   */
+  double[] nmea, mref;
+
+  /**
+   * Normalization factor.
+   */
+  double norm, normDistrFactor;
+
+  /**
+   * Weight aggregation sum
+   */
+  double weight, wsum;
+
+  /**
+   * Constructor.
+   * 
+   * @param weight Cluster weight
+   * @param mean Initial mean
+   */
+  public SphericalGaussianModel(double weight, Vector mean) {
+    this(weight, mean, MathUtil.powi(MathUtil.TWOPI, mean.getDimensionality()));
+  }
+
+  /**
+   * Constructor.
+   * 
+   * @param weight Cluster weight
+   * @param mean Initial mean
+   * @param norm Normalization factor.
+   */
+  public SphericalGaussianModel(double weight, Vector mean, double norm) {
+    this.weight = weight;
+    final int dim = mean.getDimensionality();
+    this.mean = mean;
+    this.norm = norm;
+    this.normDistrFactor = 1. / Math.sqrt(norm); // assume det=1
+    this.mref = mean.getArrayRef();
+    this.nmea = new double[dim];
+    this.variance = 1.;
+    this.wsum = 0.;
+  }
+
+  @Override
+  public void beginEStep() {
+    wsum = 0.;
+  }
+
+  @Override
+  public void updateE(NumberVector vec, double wei) {
+    assert (vec.getDimensionality() == mref.length);
+    final double nwsum = wsum + wei;
+    // Compute new means
+    for(int i = 0; i < mref.length; i++) {
+      final double delta = vec.doubleValue(i) - mref[i];
+      final double rval = delta * wei / nwsum;
+      nmea[i] = mref[i] + rval;
+    }
+    // Update variances
+    for(int i = 0; i < mref.length; i++) {
+      // We DO want to use the new mean once and the old mean once!
+      // It does not matter which one is which.
+      variance += (vec.doubleValue(i) - nmea[i]) * (vec.doubleValue(i) - mref[i]) * wei;
+    }
+    // Use new values.
+    wsum = nwsum;
+    System.arraycopy(nmea, 0, mref, 0, nmea.length);
+  }
+
+  @Override
+  public void finalizeEStep() {
+    if(wsum > 0.) {
+      variance = variance / (wsum * mref.length);
+      normDistrFactor = 1. / Math.sqrt(norm * variance);
+    }
+    else {
+      // Degenerate
+      normDistrFactor = 1. / Math.sqrt(norm);
+    }
+  }
+
+  /**
+   * Compute the Mahalanobis distance from the centroid for a given vector.
+   * 
+   * @param vec Vector
+   * @return Mahalanobis distance
+   */
+  public double mahalanobisDistance(Vector vec) {
+    double[] difference = vec.minus(mean).getArrayRef();
+    double agg = 0.;
+    for(int i = 0; i < difference.length; i++) {
+      agg += difference[i] / variance * difference[i];
+    }
+    return agg;
+  }
+
+  /**
+   * Compute the Mahalanobis distance from the centroid for a given vector.
+   * 
+   * @param vec Vector
+   * @return Mahalanobis distance
+   */
+  public double mahalanobisDistance(NumberVector vec) {
+    double[] difference = vec.getColumnVector().minusEquals(mean).getArrayRef();
+    double agg = 0.;
+    for(int i = 0; i < difference.length; i++) {
+      agg += difference[i] / variance * difference[i];
+    }
+    return agg;
+  }
+
+  @Override
+  public double estimateDensity(NumberVector vec) {
+    double power = mahalanobisDistance(vec) * .5;
+    double prob = normDistrFactor * Math.exp(-power);
+    if(!(prob >= 0.)) {
+      LOG.warning("Invalid probability: " + prob + " power: " + power + " factor: " + normDistrFactor);
+      prob = 0.;
+    }
+    return prob * weight;
+  }
+
+  @Override
+  public double getWeight() {
+    return weight;
+  }
+
+  @Override
+  public void setWeight(double weight) {
+    this.weight = weight;
+  }
+
+  @Override
+  public EMModel finalizeCluster() {
+    return new EMModel(mean, Matrix.identity(nmea.length, nmea.length).timesEquals(variance));
+  }
+}
+\ No newline at end of file
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/em/SphericalGaussianModelFactory.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/em/SphericalGaussianModelFactory.java
new file mode 100644
index 00000000..7de77188
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/em/SphericalGaussianModelFactory.java
@@ -0,0 +1,88 @@
+package de.lmu.ifi.dbs.elki.algorithm.clustering.em;
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.util.ArrayList;
+import java.util.List;
+
+import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.initialization.KMeansInitialization;
+import de.lmu.ifi.dbs.elki.data.NumberVector;
+import de.lmu.ifi.dbs.elki.data.model.EMModel;
+import de.lmu.ifi.dbs.elki.database.Database;
+import de.lmu.ifi.dbs.elki.database.relation.Relation;
+import de.lmu.ifi.dbs.elki.distance.distancefunction.PrimitiveDistanceFunction;
+import de.lmu.ifi.dbs.elki.math.MathUtil;
+import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
+
+/**
+ * Factory for EM with multivariate gaussian models using diagonal matrixes.
+ * 
+ * These models have individual variances, but no covariance, so this
+ * corresponds to the {@code 'VVI'} model in Mclust (R).
+ * 
+ * @author Erich Schubert
+ * 
+ * @apiviz.has SphericalGaussianModel
+ * 
+ * @param <V> vector type
+ */
+public class SphericalGaussianModelFactory<V extends NumberVector> extends AbstractEMModelFactory<V, EMModel> {
+  /**
+   * Constructor.
+   * 
+   * @param initializer Class for choosing the inital seeds.
+   */
+  public SphericalGaussianModelFactory(KMeansInitialization<V> initializer) {
+    super(initializer);
+  }
+
+  @Override
+  public List<SphericalGaussianModel> buildInitialModels(Database database, Relation<V> relation, int k, PrimitiveDistanceFunction<? super NumberVector> df) {
+    final List<Vector> initialMeans = initializer.chooseInitialMeans(database, relation, k, df, Vector.FACTORY);
+    assert (initialMeans.size() == k);
+    final int dimensionality = initialMeans.get(0).getDimensionality();
+    final double norm = MathUtil.powi(MathUtil.TWOPI, dimensionality);
+    List<SphericalGaussianModel> models = new ArrayList<>(k);
+    for(Vector nv : initialMeans) {
+      models.add(new SphericalGaussianModel(1. / k, nv, norm));
+    }
+    return models;
+  }
+
+  /**
+   * Parameterization class
+   * 
+   * @author Erich Schubert
+   * 
+   * @apiviz.exclude
+   * 
+   * @param <V> Vector type
+   */
+  public static class Parameterizer<V extends NumberVector> extends AbstractEMModelFactory.Parameterizer<V> {
+    @Override
+    protected SphericalGaussianModelFactory<V> makeInstance() {
+      return new SphericalGaussianModelFactory<>(initializer);
+    }
+  }
+}
diff --git a/src/de/lmu/ifi/dbs/elki/algorithm/clustering/em/package-info.java b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/em/package-info.java
new file mode 100644
index 00000000..22f0b669
--- /dev/null
+++ b/src/de/lmu/ifi/dbs/elki/algorithm/clustering/em/package-info.java
@@ -0,0 +1,27 @@
+/**
+ * Expectation-Maximization clustering algorithm.
+ */
+
+/*
+ This file is part of ELKI:
+ Environment for Developing KDD-Applications Supported by Index-Structures
+
+ Copyright (C) 2014
+ Ludwig-Maximilians-Universität München
+ Lehr- und Forschungseinheit für Datenbanksysteme
+ ELKI Development Team
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+package de.lmu.ifi.dbs.elki.algorithm.clustering.em;
+\ No newline at end of file