package de.lmu.ifi.dbs.elki.math.linearalgebra;
/*
This file is part of ELKI:
Environment for Developing KDD-Applications Supported by Index-Structures
Copyright (C) 2013
Ludwig-Maximilians-Universität München
Lehr- und Forschungseinheit für Datenbanksysteme
ELKI Development Team
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.database.relation.RelationUtil;
/**
* Class for computing covariance matrixes using stable mean and variance
* computations.
*
* This class encapsulates the mathematical aspects of computing this matrix.
*
* See {@link de.lmu.ifi.dbs.elki.utilities.DatabaseUtil DatabaseUtil} for
* easier to use APIs.
*
* For use in algorithms, it is more appropriate to use
* {@link de.lmu.ifi.dbs.elki.math.linearalgebra.pca.StandardCovarianceMatrixBuilder
* StandardCovarianceMatrixBuilder} since this class can be overridden with a
* stabilized covariance matrix builder!
*
* @author Erich Schubert
*
* @apiviz.uses Vector oneway
* @apiviz.uses NumberVector oneway
* @apiviz.has Matrix oneway - - «produces»
*/
public class CovarianceMatrix {
/**
* Error message reported when too little data (weight <= 1) in matrix.
*/
public static final String ERR_TOO_LITTLE_WEIGHT = "Too few elements (too little total weight) used to obtain a valid covariance matrix.";
/**
* The means.
*/
double[] mean;
/**
* The covariance matrix.
*/
double[][] elements;
/**
* Temporary storage, to avoid reallocations.
*/
double[] nmea;
/**
* The current weight.
*/
protected double wsum;
/**
* Constructor.
*
* @param dim Dimensionality
*/
public CovarianceMatrix(int dim) {
super();
this.mean = new double[dim];
this.nmea = new double[dim];
this.elements = new double[dim][dim];
this.wsum = 0.0;
}
/**
* Add a single value with weight 1.0.
*
* @param val Value
*/
public void put(double[] val) {
assert (val.length == mean.length);
final double nwsum = wsum + 1.0;
// Compute new means
for (int i = 0; i < mean.length; i++) {
final double delta = val[i] - mean[i];
nmea[i] = mean[i] + delta / nwsum;
}
// Update covariance matrix
for (int i = 0; i < mean.length; i++) {
for (int j = i; j < mean.length; j++) {
// We DO want to use the new mean once and the old mean once!
// It does not matter which one is which.
double delta = (val[i] - nmea[i]) * (val[j] - mean[j]);
elements[i][j] = elements[i][j] + delta;
// Optimize via symmetry
if (i != j) {
elements[j][i] = elements[j][i] + delta;
}
}
}
// Use new values.
wsum = nwsum;
System.arraycopy(nmea, 0, mean, 0, nmea.length);
}
/**
* Add data with a given weight.
*
* @param val data
* @param weight weight
*/
public void put(double[] val, double weight) {
assert (val.length == mean.length);
final double nwsum = wsum + weight;
// Compute new means
for (int i = 0; i < mean.length; i++) {
final double delta = val[i] - mean[i];
final double rval = delta * weight / nwsum;
nmea[i] = mean[i] + rval;
}
// Update covariance matrix
for (int i = 0; i < mean.length; i++) {
for (int j = i; j < mean.length; j++) {
// We DO want to use the new mean once and the old mean once!
// It does not matter which one is which.
double delta = (val[i] - nmea[i]) * (val[j] - mean[j]) * weight;
elements[i][j] = elements[i][j] + delta;
// Optimize via symmetry
if (i != j) {
elements[j][i] = elements[j][i] + delta;
}
}
}
// Use new values.
wsum = nwsum;
System.arraycopy(nmea, 0, mean, 0, nmea.length);
}
/**
* Add a single value with weight 1.0.
*
* @param val Value
*/
public final void put(Vector val) {
put(val.getArrayRef());
}
/**
* Add data with a given weight.
*
* @param val data
* @param weight weight
*/
public final void put(Vector val, double weight) {
put(val.getArrayRef(), weight);
}
/**
* Add a single value with weight 1.0.
*
* @param val Value
*/
public void put(NumberVector> val) {
assert (val.getDimensionality() == mean.length);
final double nwsum = wsum + 1.0;
// Compute new means
for (int i = 0; i < mean.length; i++) {
final double delta = val.doubleValue(i) - mean[i];
nmea[i] = mean[i] + delta / nwsum;
}
// Update covariance matrix
for (int i = 0; i < mean.length; i++) {
for (int j = i; j < mean.length; j++) {
// We DO want to use the new mean once and the old mean once!
// It does not matter which one is which.
double delta = (val.doubleValue(i) - nmea[i]) * (val.doubleValue(j) - mean[j]);
elements[i][j] = elements[i][j] + delta;
// Optimize via symmetry
if (i != j) {
elements[j][i] = elements[j][i] + delta;
}
}
}
// Use new values.
wsum = nwsum;
System.arraycopy(nmea, 0, mean, 0, nmea.length);
}
/**
* Get the weight sum, to test whether the covariance matrix can be
* materialized.
*
* @return Weight sum.
*/
public double getWeight() {
return wsum;
}
/**
* Add data with a given weight.
*
* @param val data
* @param weight weight
*/
public void put(NumberVector> val, double weight) {
assert (val.getDimensionality() == mean.length);
final double nwsum = wsum + weight;
// Compute new means
for (int i = 0; i < mean.length; i++) {
final double delta = val.doubleValue(i) - mean[i];
final double rval = delta * weight / nwsum;
nmea[i] = mean[i] + rval;
}
// Update covariance matrix
for (int i = 0; i < mean.length; i++) {
for (int j = i; j < mean.length; j++) {
// We DO want to use the new mean once and the old mean once!
// It does not matter which one is which.
double delta = (val.doubleValue(i) - nmea[i]) * (val.doubleValue(j) - mean[j]) * weight;
elements[i][j] = elements[i][j] + delta;
// Optimize via symmetry
if (i != j) {
elements[j][i] = elements[j][i] + delta;
}
}
}
// Use new values.
wsum = nwsum;
System.arraycopy(nmea, 0, mean, 0, nmea.length);
}
/**
* Get the mean as vector.
*
* @return Mean vector
*/
public Vector getMeanVector() {
return new Vector(mean);
}
/**
* Get the mean as vector.
*
* @param relation Data relation
* @param vector type
* @return Mean vector
*/
public > F getMeanVector(Relation extends F> relation) {
return RelationUtil.getNumberVectorFactory(relation).newNumberVector(mean);
}
/**
* Obtain the covariance matrix according to the sample statistics: (n-1)
* degrees of freedom.
*
* This method duplicates the matrix contents, so it does allow further
* updates. Use {@link #destroyToSampleMatrix()} if you do not need further
* updates.
*
* @return New matrix
*/
public Matrix makeSampleMatrix() {
if (wsum <= 1.0) {
throw new IllegalStateException(ERR_TOO_LITTLE_WEIGHT);
}
Matrix mat = new Matrix(elements);
return mat.times(1.0 / (wsum - 1));
}
/**
* Obtain the covariance matrix according to the population statistics: n
* degrees of freedom.
*
* This method duplicates the matrix contents, so it does allow further
* updates. Use {@link #destroyToNaiveMatrix()} if you do not need further
* updates.
*
* @return New matrix
*/
public Matrix makeNaiveMatrix() {
if (wsum <= 0.0) {
throw new IllegalStateException(ERR_TOO_LITTLE_WEIGHT);
}
Matrix mat = new Matrix(elements);
return mat.times(1.0 / wsum);
}
/**
* Obtain the covariance matrix according to the sample statistics: (n-1)
* degrees of freedom.
*
* This method doesn't require matrix duplication, but will not allow further
* updates, the object should be discarded. Use {@link #makeSampleMatrix()} if
* you want to perform further updates.
*
* @return New matrix
*/
public Matrix destroyToSampleMatrix() {
if (wsum <= 1.0) {
throw new IllegalStateException(ERR_TOO_LITTLE_WEIGHT);
}
Matrix mat = new Matrix(elements).timesEquals(1.0 / (wsum - 1));
this.elements = null;
return mat;
}
/**
* Obtain the covariance matrix according to the population statistics: n
* degrees of freedom.
*
* This method doesn't require matrix duplication, but will not allow further
* updates, the object should be discarded. Use {@link #makeNaiveMatrix()} if
* you want to perform further updates.
*
* @return New matrix
*/
public Matrix destroyToNaiveMatrix() {
if (wsum <= 0.0) {
throw new IllegalStateException(ERR_TOO_LITTLE_WEIGHT);
}
Matrix mat = new Matrix(elements).timesEquals(1.0 / wsum);
this.elements = null;
return mat;
}
/**
* Static Constructor.
*
* @param mat Matrix to use the columns of
* @return Covariance matrix
*/
public static CovarianceMatrix make(Matrix mat) {
CovarianceMatrix c = new CovarianceMatrix(mat.getRowDimensionality());
int n = mat.getColumnDimensionality();
for (int i = 0; i < n; i++) {
// TODO: avoid constructing the vector objects?
c.put(mat.getCol(i));
}
return c;
}
/**
* Static Constructor from a full relation.
*
* @param relation Relation to use.
* @return Covariance matrix
*/
public static CovarianceMatrix make(Relation extends NumberVector>> relation) {
CovarianceMatrix c = new CovarianceMatrix(RelationUtil.dimensionality(relation));
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
c.put(relation.get(iditer));
}
return c;
}
/**
* Static Constructor from a full relation.
*
* @param relation Relation to use.
* @param ids IDs to add
* @return Covariance matrix
*/
public static CovarianceMatrix make(Relation extends NumberVector>> relation, DBIDs ids) {
CovarianceMatrix c = new CovarianceMatrix(RelationUtil.dimensionality(relation));
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
c.put(relation.get(iter));
}
return c;
}
}