package de.lmu.ifi.dbs.elki.algorithm.clustering.em;
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
Copyright (C) 2014
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
import java.util.Arrays;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.model.EMModel;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.math.MathUtil;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Matrix;
import de.lmu.ifi.dbs.elki.math.linearalgebra.Vector;
/**
* Simpler model for a single Gaussian cluster, without covariances.
*
* @author Erich Schubert
*/
public class DiagonalGaussianModel implements EMClusterModel {
/**
* Class logger.
*/
private static Logging LOG = Logging.getLogger(DiagonalGaussianModel.class);
/**
* Mean vector.
*/
Vector mean;
/**
* Per-dimension variances.
*/
double[] variances;
/**
* Temporary storage, to avoid reallocations.
*/
double[] nmea, mref;
/**
* Normalization factor.
*/
double norm, normDistrFactor;
/**
* Weight aggregation sum
*/
double weight, wsum;
/**
* Constructor.
*
* @param weight Cluster weight
* @param mean Initial mean
*/
public DiagonalGaussianModel(double weight, Vector mean) {
this(weight, mean, MathUtil.powi(MathUtil.TWOPI, mean.getDimensionality()));
}
/**
* Constructor.
*
* @param weight Cluster weight
* @param mean Initial mean
* @param norm Normalization factor.
*/
public DiagonalGaussianModel(double weight, Vector mean, double norm) {
this.weight = weight;
final int dim = mean.getDimensionality();
this.mean = mean;
this.norm = norm;
this.normDistrFactor = 1. / Math.sqrt(norm); // assume det=1
this.mref = mean.getArrayRef();
this.nmea = new double[dim];
this.variances = new double[dim];
Arrays.fill(variances, 1.);
this.wsum = 0.;
}
@Override
public void beginEStep() {
wsum = 0.;
}
@Override
public void updateE(NumberVector vec, double wei) {
assert (vec.getDimensionality() == mref.length);
final double nwsum = wsum + wei;
// Compute new means
for(int i = 0; i < mref.length; i++) {
final double delta = vec.doubleValue(i) - mref[i];
final double rval = delta * wei / nwsum;
nmea[i] = mref[i] + rval;
}
// Update variances
for(int i = 0; i < mref.length; i++) {
// We DO want to use the new mean once and the old mean once!
// It does not matter which one is which.
variances[i] += (vec.doubleValue(i) - nmea[i]) * (vec.doubleValue(i) - mref[i]) * wei;
}
// Use new values.
wsum = nwsum;
System.arraycopy(nmea, 0, mref, 0, nmea.length);
}
@Override
public void finalizeEStep() {
if(wsum > 0.) {
final double s = 1. / wsum;
double det = 1.;
for(int i = 0; i < variances.length; i++) {
double v = variances[i];
v = v > 0 ? v * s : Matrix.SINGULARITY_CHEAT;
variances[i] = v;
det *= v;
}
normDistrFactor = 1. / Math.sqrt(norm * det);
}
else {
// Degenerate
normDistrFactor = 1. / Math.sqrt(norm);
}
}
/**
* Compute the Mahalanobis distance from the centroid for a given vector.
*
* @param vec Vector
* @return Mahalanobis distance
*/
public double mahalanobisDistance(Vector vec) {
double[] difference = vec.minus(mean).getArrayRef();
double agg = 0.;
for(int i = 0; i < variances.length; i++) {
agg += difference[i] / variances[i] * difference[i];
}
return agg;
}
/**
* Compute the Mahalanobis distance from the centroid for a given vector.
*
* @param vec Vector
* @return Mahalanobis distance
*/
public double mahalanobisDistance(NumberVector vec) {
double[] difference = vec.getColumnVector().minusEquals(mean).getArrayRef();
double agg = 0.;
for(int i = 0; i < variances.length; i++) {
agg += difference[i] / variances[i] * difference[i];
}
return agg;
}
@Override
public double estimateDensity(NumberVector vec) {
double power = mahalanobisDistance(vec) * .5;
double prob = normDistrFactor * Math.exp(-power);
if(!(prob >= 0.)) {
LOG.warning("Invalid probability: " + prob + " power: " + power + " factor: " + normDistrFactor);
prob = 0.;
}
return prob * weight;
}
@Override
public double getWeight() {
return weight;
}
@Override
public void setWeight(double weight) {
this.weight = weight;
}
@Override
public EMModel finalizeCluster() {
return new EMModel(mean, Matrix.diagonal(variances));
}
}